/* * debugfs.h - a tiny little debug file system * * Copyright (C) 2004 Greg Kroah-Hartman * Copyright (C) 2004 IBM Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 as published by the Free Software Foundation. * * debugfs is for people to use instead of /proc or /sys. * See Documentation/DocBook/filesystems for more details. */ #ifndef _DEBUGFS_H_ #define _DEBUGFS_H_ #include #include struct file_operations; struct debugfs_blob_wrapper { void *data; unsigned long size; }; extern struct dentry *arch_debugfs_dir; #if defined(CONFIG_DEBUG_FS) /* declared over in file.c */ extern const struct file_operations debugfs_file_operations; extern const struct inode_operations debugfs_link_operations; struct dentry *debugfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); void debugfs_remove(struct dentry *dentry); void debugfs_remove_recursive(struct dentry *dentry); struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, const char *new_name); struct dentry *debugfs_create_u8(const char *name, mode_t mode, struct dentry *parent, u8 *value); struct dentry *debugfs_create_u16(const char *name, mode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_u32(const char *name, mode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_u64(const char *name, mode_t mode, struct dentry *parent, u64 *value); struct dentry *debugfs_create_x8(const char *name, mode_t mode, struct dentry *parent, u8 *value); struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value); struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_x64(const char *name, mode_t mode, struct dentry *parent, u64 *value); struct dentry *debugfs_create_size_t(const char *name, mode_t mode, struct dentry *parent, size_t *value); struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_blob(const char *name, mode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob); bool debugfs_initialized(void); #else #include /* * We do not return NULL from these functions if CONFIG_DEBUG_FS is not enabled * so users have a chance to detect if there was a real error or not. We don't * want to duplicate the design decision mistakes of procfs and devfs again. */ static inline struct dentry *debugfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest) { return ERR_PTR(-ENODEV); } static inline void debugfs_remove(struct dentry *dentry) { } static inline void debugfs_remove_recursive(struct dentry *dentry) { } static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, struct dentry *new_dir, char *new_name) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_u8(const char *name, mode_t mode, struct dentry *parent, u8 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_u16(const char *name, mode_t mode, struct dentry *parent, u16 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_u32(const char *name, mode_t mode, struct dentry *parent, u32 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_u64(const char *name, mode_t mode, struct dentry *parent, u64 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_x8(const char *name, mode_t mode, struct dentry *parent, u8 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_x16(const char *name, mode_t mode, struct dentry *parent, u16 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_x32(const char *name, mode_t mode, struct dentry *parent, u32 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_size_t(const char *name, mode_t mode, struct dentry *parent, size_t *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_bool(const char *name, mode_t mode, struct dentry *parent, u32 *value) { return ERR_PTR(-ENODEV); } static inline struct dentry *debugfs_create_blob(const char *name, mode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) { return ERR_PTR(-ENODEV); } static inline bool debugfs_initialized(void) { return false; } #endif #endif ath'>path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig64
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/Makefile23
-rw-r--r--drivers/infiniband/core/addr.c128
-rw-r--r--drivers/infiniband/core/agent.c35
-rw-r--r--drivers/infiniband/core/cache.c504
-rw-r--r--drivers/infiniband/core/cgroup.c10
-rw-r--r--drivers/infiniband/core/cm.c3236
-rw-r--r--drivers/infiniband/core/cm_msgs.h791
-rw-r--r--drivers/infiniband/core/cm_trace.c15
-rw-r--r--drivers/infiniband/core/cm_trace.h414
-rw-r--r--drivers/infiniband/core/cma.c2482
-rw-r--r--drivers/infiniband/core/cma_configfs.c67
-rw-r--r--drivers/infiniband/core/cma_priv.h34
-rw-r--r--drivers/infiniband/core/cma_trace.c16
-rw-r--r--drivers/infiniband/core/cma_trace.h361
-rw-r--r--drivers/infiniband/core/core_priv.h135
-rw-r--r--drivers/infiniband/core/counters.c681
-rw-r--r--drivers/infiniband/core/cq.c361
-rw-r--r--drivers/infiniband/core/device.c1200
-rw-r--r--drivers/infiniband/core/fmr_pool.c507
-rw-r--r--drivers/infiniband/core/ib_core_uverbs.c367
-rw-r--r--drivers/infiniband/core/iwcm.c143
-rw-r--r--drivers/infiniband/core/iwcm.h2
-rw-r--r--drivers/infiniband/core/iwpm_msg.c86
-rw-r--r--drivers/infiniband/core/iwpm_util.c105
-rw-r--r--drivers/infiniband/core/iwpm_util.h28
-rw-r--r--drivers/infiniband/core/lag.c135
-rw-r--r--drivers/infiniband/core/mad.c950
-rw-r--r--drivers/infiniband/core/mad_priv.h83
-rw-r--r--drivers/infiniband/core/mad_rmpp.c80
-rw-r--r--drivers/infiniband/core/mr_pool.c18
-rw-r--r--drivers/infiniband/core/multicast.c43
-rw-r--r--drivers/infiniband/core/netlink.c161
-rw-r--r--drivers/infiniband/core/nldev.c1906
-rw-r--r--drivers/infiniband/core/opa_smi.h4
-rw-r--r--drivers/infiniband/core/rdma_core.c431
-rw-r--r--drivers/infiniband/core/rdma_core.h51
-rw-r--r--drivers/infiniband/core/restrack.c261
-rw-r--r--drivers/infiniband/core/restrack.h8
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c106
-rw-r--r--drivers/infiniband/core/rw.c306
-rw-r--r--drivers/infiniband/core/sa.h2
-rw-r--r--drivers/infiniband/core/sa_query.c704
-rw-r--r--drivers/infiniband/core/security.c50
-rw-r--r--drivers/infiniband/core/smi.c12
-rw-r--r--drivers/infiniband/core/smi.h4
-rw-r--r--drivers/infiniband/core/sysfs.c1330
-rw-r--r--drivers/infiniband/core/trace.c12
-rw-r--r--drivers/infiniband/core/ucaps.c267
-rw-r--r--drivers/infiniband/core/ucm.c1350
-rw-r--r--drivers/infiniband/core/ucma.c953
-rw-r--r--drivers/infiniband/core/ud_header.c85
-rw-r--r--drivers/infiniband/core/umem.c336
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c275
-rw-r--r--drivers/infiniband/core/umem_odp.c936
-rw-r--r--drivers/infiniband/core/user_mad.c235
-rw-r--r--drivers/infiniband/core/uverbs.h102
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c1208
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c191
-rw-r--r--drivers/infiniband/core/uverbs_main.c585
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c44
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c163
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c79
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c24
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c150
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c290
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c6
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dmah.c145
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c389
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c349
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c380
-rw-r--r--drivers/infiniband/core/uverbs_std_types_srq.c234
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c194
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c25
-rw-r--r--drivers/infiniband/core/verbs.c1239
-rw-r--r--drivers/infiniband/hw/Makefile11
-rw-r--r--drivers/infiniband/hw/bng_re/Kconfig10
-rw-r--r--drivers/infiniband/hw/bng_re/Makefile8
-rw-r--r--drivers/infiniband/hw/bng_re/bng_debugfs.c39
-rw-r--r--drivers/infiniband/hw/bng_re/bng_debugfs.h12
-rw-r--r--drivers/infiniband/hw/bng_re/bng_dev.c534
-rw-r--r--drivers/infiniband/hw/bng_re/bng_fw.c767
-rw-r--r--drivers/infiniband/hw/bng_re/bng_fw.h211
-rw-r--r--drivers/infiniband/hw/bng_re/bng_re.h85
-rw-r--r--drivers/infiniband/hw/bng_re/bng_res.c279
-rw-r--r--drivers/infiniband/hw/bng_re/bng_res.h215
-rw-r--r--drivers/infiniband/hw/bng_re/bng_sp.c131
-rw-r--r--drivers/infiniband/hw/bng_re/bng_sp.h47
-rw-r--r--drivers/infiniband/hw/bng_re/bng_tlv.h128
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig11
-rw-r--r--drivers/infiniband/hw/bnxt_re/Makefile3
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h209
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c524
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h55
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c523
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h81
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c2736
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h128
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c2672
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2246
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h368
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c1263
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h296
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c809
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h470
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c937
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h157
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_tlv.h162
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h7355
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig18
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile7
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1331
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h205
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c344
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h69
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h802
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c282
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h155
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2258
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h233
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c230
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c232
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c101
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1402
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h347
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1082
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig3
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c126
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c80
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c32
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c21
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h92
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c151
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c234
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c202
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c18
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c28
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h33
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h34
-rw-r--r--drivers/infiniband/hw/efa/efa.h99
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h474
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h49
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c567
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h44
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c370
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h115
-rw-r--r--drivers/infiniband/hw/efa/efa_common_defs.h13
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h391
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c401
-rw-r--r--drivers/infiniband/hw/efa/efa_regs_defs.h30
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c1473
-rw-r--r--drivers/infiniband/hw/erdma/Kconfig12
-rw-r--r--drivers/infiniband/hw/erdma/Makefile4
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h283
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c1431
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.h167
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c452
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c268
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c326
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h753
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c684
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c757
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c2300
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h491
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig13
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile6
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c199
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h48
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.c270
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h307
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c800
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h70
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h51
-rw-r--r--drivers/infiniband/hw/hfi1/common.h115
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c113
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h58
-rw-r--r--drivers/infiniband/hw/hfi1/device.c121
-rw-r--r--drivers/infiniband/hw/hfi1/device.h49
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c530
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c60
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h45
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c45
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h44
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c58
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h53
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c71
-rw-r--r--drivers/infiniband/hw/hfi1/fault.h50
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c237
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c70
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h373
-rw-r--r--drivers/infiniband/hw/hfi1/init.c326
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c75
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c8
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h55
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h171
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_main.c250
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_rx.c92
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c868
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c284
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h48
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c283
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h91
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c189
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h52
-rw-r--r--drivers/infiniband/hw/hfi1/netdev.h105
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c487
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h48
-rw-r--r--drivers/infiniband/hw/hfi1/opfn.c4
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c132
-rw-r--r--drivers/infiniband/hw/hfi1/pin_system.c474
-rw-r--r--drivers/infiniband/hw/hfi1/pinning.h20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c101
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h55
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c60
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c56
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h45
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c92
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h62
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c72
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h46
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c193
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c51
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c216
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h130
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h47
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c639
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c319
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c91
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h44
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h58
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h62
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h47
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h49
-rw-r--r--drivers/infiniband/hw/hfi1/trace_mmu.h97
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h49
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h62
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tid.h56
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h296
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c63
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c92
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c480
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h63
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c115
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c446
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h75
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c249
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h61
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c46
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h54
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h55
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c371
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c51
-rw-r--r--drivers/infiniband/hw/hns/Kconfig32
-rw-r--r--drivers/infiniband/hw/hns/Makefile14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c126
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c291
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_bond.c1012
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_bond.h95
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c228
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h29
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h253
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c746
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c40
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.c111
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.h33
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h1142
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c1587
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h110
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c4734
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h1099
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c9762
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h2297
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c35
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c989
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c1977
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c153
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c1837
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c242
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c654
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_trace.h216
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig8
-rw-r--r--drivers/infiniband/hw/i40iw/Makefile10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h602
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4419
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h462
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c5198
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1737
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.c821
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.h241
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c852
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2066
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h217
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_p.h128
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c612
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.h131
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c1493
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h188
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_register.h1030
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h101
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h1363
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c1232
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h430
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c1553
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c2807
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h179
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.c85
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.h62
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c756
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.h124
-rw-r--r--drivers/infiniband/hw/ionic/Kconfig15
-rw-r--r--drivers/infiniband/hw/ionic/Makefile9
-rw-r--r--drivers/infiniband/hw/ionic/ionic_admin.c1229
-rw-r--r--drivers/infiniband/hw/ionic/ionic_controlpath.c2679
-rw-r--r--drivers/infiniband/hw/ionic/ionic_datapath.c1399
-rw-r--r--drivers/infiniband/hw/ionic/ionic_fw.h1029
-rw-r--r--drivers/infiniband/hw/ionic/ionic_hw_stats.c484
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.c440
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.h517
-rw-r--r--drivers/infiniband/hw/ionic/ionic_lif_cfg.c111
-rw-r--r--drivers/infiniband/hw/ionic/ionic_lif_cfg.h66
-rw-r--r--drivers/infiniband/hw/ionic/ionic_pgtbl.c143
-rw-r--r--drivers/infiniband/hw/ionic/ionic_queue.c52
-rw-r--r--drivers/infiniband/hw/ionic/ionic_queue.h234
-rw-r--r--drivers/infiniband/hw/ionic/ionic_res.h154
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig14
-rw-r--r--drivers/infiniband/hw/irdma/Makefile31
-rw-r--r--drivers/infiniband/hw/irdma/cm.c4434
-rw-r--r--drivers/infiniband/hw/irdma/cm.h416
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c6602
-rw-r--r--drivers/infiniband/hw/irdma/defs.h1184
-rw-r--r--drivers/infiniband/hw/irdma/hmc.c709
-rw-r--r--drivers/infiniband/hw/irdma/hmc.h186
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2823
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c261
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.h162
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_if.c220
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c205
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.h73
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_if.c347
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_hw.c170
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_hw.h32
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_if.c236
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h174
-rw-r--r--drivers/infiniband/hw/irdma/main.c211
-rw-r--r--drivers/infiniband/hw/irdma/main.h578
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h74
-rw-r--r--drivers/infiniband/hw/irdma/pble.c519
-rw-r--r--drivers/infiniband/hw/irdma/pble.h132
-rw-r--r--drivers/infiniband/hw/irdma/protos.h97
-rw-r--r--drivers/infiniband/hw/irdma/puda.c1718
-rw-r--r--drivers/infiniband/hw/irdma/puda.h182
-rw-r--r--drivers/infiniband/hw/irdma/trace.c112
-rw-r--r--drivers/infiniband/hw/irdma/trace.h3
-rw-r--r--drivers/infiniband/hw/irdma/trace_cm.h460
-rw-r--r--drivers/infiniband/hw/irdma/type.h1674
-rw-r--r--drivers/infiniband/hw/irdma/uda.c265
-rw-r--r--drivers/infiniband/hw/irdma/uda.h87
-rw-r--r--drivers/infiniband/hw/irdma/uda_d.h127
-rw-r--r--drivers/infiniband/hw/irdma/uk.c1930
-rw-r--r--drivers/infiniband/hw/irdma/user.h676
-rw-r--r--drivers/infiniband/hw/irdma/utils.c2508
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c5517
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h341
-rw-r--r--drivers/infiniband/hw/irdma/virtchnl.c618
-rw-r--r--drivers/infiniband/hw/irdma/virtchnl.h176
-rw-r--r--drivers/infiniband/hw/irdma/ws.c406
-rw-r--r--drivers/infiniband/hw/irdma/ws.h41
-rw-r--r--drivers/infiniband/hw/mana/Kconfig10
-rw-r--r--drivers/infiniband/hw/mana/Makefile4
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c179
-rw-r--r--drivers/infiniband/hw/mana/counters.h62
-rw-r--r--drivers/infiniband/hw/mana/cq.c342
-rw-r--r--drivers/infiniband/hw/mana/device.c261
-rw-r--r--drivers/infiniband/hw/mana/main.c1134
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h738
-rw-r--r--drivers/infiniband/hw/mana/mr.c319
-rw-r--r--drivers/infiniband/hw/mana/qp.c921
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wq.c92
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig3
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c16
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c31
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c198
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c82
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c6
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c251
-rw-r--r--drivers/infiniband/hw/mlx4/main.c707
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c92
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h157
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c459
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c795
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c23
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c72
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig3
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile36
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c55
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c463
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h27
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c69
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c1279
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h19
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c366
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.c227
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.h23
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c1887
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h50
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c612
-rw-r--r--drivers/infiniband/hw/mlx5/dm.h68
-rw-r--r--drivers/infiniband/hw/mlx5/dmah.c54
-rw-r--r--drivers/infiniband/hw/mlx5/dmah.h23
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c17
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c682
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c3516
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h36
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c223
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c335
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h57
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c41
-rw-r--r--drivers/infiniband/hw/mlx5/macsec.c364
-rw-r--r--drivers/infiniband/hw/mlx5/macsec.h29
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c373
-rw-r--r--drivers/infiniband/hw/mlx5/main.c5640
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c231
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1185
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c3203
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c1821
-rw-r--r--drivers/infiniband/hw/mlx5/qos.c133
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c5060
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h60
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c697
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c217
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.h13
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c97
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h3
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c225
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c277
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c1129
-rw-r--r--drivers/infiniband/hw/mlx5/umr.h113
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c1284
-rw-r--r--drivers/infiniband/hw/mlx5/wr.h136
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c8
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h31
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c21
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c76
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c35
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c289
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c373
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h50
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c90
-rw-r--r--drivers/infiniband/hw/nes/Kconfig15
-rw-r--r--drivers/infiniband/hw/nes/Makefile3
-rw-r--r--drivers/infiniband/hw/nes/nes.c1205
-rw-r--r--drivers/infiniband/hw/nes/nes.h574
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c3992
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h470
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h193
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c3887
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h1380
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c1155
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.h97
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c1870
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c916
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c3759
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h198
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig3
-rw-r--r--drivers/infiniband/hw/ocrdma/Makefile1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c39
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h15
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c35
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c79
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c31
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c161
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h29
-rw-r--r--drivers/infiniband/hw/qedr/Kconfig3
-rw-r--r--drivers/infiniband/hw/qedr/Makefile1
-rw-r--r--drivers/infiniband/hw/qedr/main.c154
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h113
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c169
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c18
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.h5
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c1354
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h49
-rw-r--r--drivers/infiniband/hw/qib/Kconfig16
-rw-r--r--drivers/infiniband/hw/qib/Makefile17
-rw-r--r--drivers/infiniband/hw/qib/qib.h1525
-rw-r--r--drivers/infiniband/hw/qib/qib_6120_regs.h977
-rw-r--r--drivers/infiniband/hw/qib/qib_7220.h149
-rw-r--r--drivers/infiniband/hw/qib/qib_7220_regs.h1496
-rw-r--r--drivers/infiniband/hw/qib/qib_7322_regs.h3163
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h805
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c274
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c906
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c803
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c271
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2405
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c599
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c3537
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c4596
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c8506
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c1799
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c240
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2499
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h300
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c613
-rw-r--r--drivers/infiniband/hw/qib/qib_pio_copy.c64
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c454
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c549
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h188
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2153
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c314
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c1446
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c999
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c852
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c501
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c568
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c521
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c582
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c143
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1461
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.h52
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c1708
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h404
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_ppc64.c62
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_x86_64.c150
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c167
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h10
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c103
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c158
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h21
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c49
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Kconfig3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Makefile1
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h26
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c61
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c10
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c158
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c9
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c15
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c199
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c13
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c62
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h68
-rw-r--r--drivers/infiniband/sw/Makefile2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig7
-rw-r--r--drivers/infiniband/sw/rdmavt/Makefile1
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c71
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h56
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c306
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h55
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c56
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c58
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.h50
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.c49
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.h50
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c255
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.h68
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c47
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c749
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h57
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c92
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c127
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.c44
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h46
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h48
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h100
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h48
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rc.h46
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rvt.h46
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h48
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c158
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h70
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig10
-rw-r--r--drivers/infiniband/sw/rxe/Makefile4
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c278
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h120
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c132
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c414
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c113
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h257
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c80
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h37
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c105
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h273
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c569
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c39
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c879
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c338
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c357
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h29
-rw-r--r--drivers/infiniband/sw/rxe/rxe_odp.c577
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c816
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h58
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h107
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c616
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h145
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c740
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c97
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h308
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c296
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c538
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c1125
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c217
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c135
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c334
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h78
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c1484
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h296
-rw-r--r--drivers/infiniband/sw/siw/Kconfig20
-rw-r--r--drivers/infiniband/sw/siw/Makefile11
-rw-r--r--drivers/infiniband/sw/siw/iwarp.h367
-rw-r--r--drivers/infiniband/sw/siw/siw.h729
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c2009
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.h133
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c122
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c508
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c400
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.h69
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c1318
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c1455
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c1306
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c1891
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h91
-rw-r--r--drivers/infiniband/ulp/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig11
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h43
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c54
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c35
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c50
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c189
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c467
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c52
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c26
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c39
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig3
-rw-r--r--drivers/infiniband/ulp/iser/Makefile1
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c230
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h245
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c223
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c428
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c643
-rw-r--r--drivers/infiniband/ulp/isert/Kconfig3
-rw-r--r--drivers/infiniband/ulp/isert/Makefile1
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c549
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h69
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig7
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile4
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h33
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c12
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h9
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c1
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c27
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c2
-rw-r--r--drivers/infiniband/ulp/rtrs/Kconfig27
-rw-r--r--drivers/infiniband/ulp/rtrs/Makefile21
-rw-r--r--drivers/infiniband/ulp/rtrs/README213
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c198
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c514
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c15
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h86
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c3207
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h252
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-log.h28
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h408
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c51
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c319
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h88
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c2346
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h156
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c645
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.h188
-rw-r--r--drivers/infiniband/ulp/srp/Kbuild1
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig3
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c1039
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h57
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig3
-rw-r--r--drivers/infiniband/ulp/srpt/Makefile1
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c683
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h75
717 files changed, 168443 insertions, 175986 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index cbfbea49f126..794b9778816b 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
menuconfig INFINIBAND
tristate "InfiniBand support"
depends on HAS_IOMEM && HAS_DMA
@@ -6,7 +7,8 @@ menuconfig INFINIBAND
depends on m || IPV6 != m
depends on !ALPHA
select IRQ_POLL
- ---help---
+ select DIMLIB
+ help
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
InfiniBand hardware.
@@ -16,7 +18,7 @@ if INFINIBAND
config INFINIBAND_USER_MAD
tristate "InfiniBand userspace MAD support"
depends on INFINIBAND
- ---help---
+ help
Userspace InfiniBand Management Datagram (MAD) support. This
is the kernel side of the userspace MAD support, which allows
userspace processes to send and receive MADs. You will also
@@ -26,7 +28,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
depends on MMU
- ---help---
+ help
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
communication manager (CM). This allows userspace processes
@@ -35,37 +37,21 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
-config INFINIBAND_USER_ACCESS_UCM
- tristate "Userspace CM (UCM, DEPRECATED)"
- depends on BROKEN || COMPILE_TEST
- depends on INFINIBAND_USER_ACCESS
- help
- The UCM module has known security flaws, which no one is
- interested to fix. The user-space part of this code was
- dropped from the upstream a long time ago.
-
- This option is DEPRECATED and planned to be removed.
-
-config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
- bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
- depends on INFINIBAND_USER_ACCESS
- ---help---
- IOCTL based uAPI support for Infiniband is enabled by default for
- new verbs only. This allows userspace to invoke the IOCTL based uAPI
- for current legacy verbs too.
-
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
depends on MMU
+ select DMA_SHARED_BUFFER
default y
config INFINIBAND_ON_DEMAND_PAGING
bool "InfiniBand on-demand paging support"
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
+ select INTERVAL_TREE
+ select HMM_MIRROR
default y
- ---help---
+ help
On demand paging support for the InfiniBand subsystem.
Together with driver support this allows registration of
memory regions without pinning their pages, fetching the
@@ -75,7 +61,7 @@ config INFINIBAND_ADDR_TRANS
bool "RDMA/CM"
depends on INFINIBAND
default y
- ---help---
+ help
Support for RDMA communication manager (CM).
This allows for a generic connection abstraction over RDMA.
@@ -83,30 +69,37 @@ config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
default y
- ---help---
+ help
ConfigFS support for RDMA communication manager (CM).
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.
+config INFINIBAND_VIRT_DMA
+ def_bool !HIGHMEM
+
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
-source "drivers/infiniband/hw/mthca/Kconfig"
-source "drivers/infiniband/hw/qib/Kconfig"
-source "drivers/infiniband/hw/cxgb3/Kconfig"
+if !UML
+source "drivers/infiniband/hw/bnxt_re/Kconfig"
+source "drivers/infiniband/hw/bng_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
-source "drivers/infiniband/hw/i40iw/Kconfig"
+source "drivers/infiniband/hw/erdma/Kconfig"
+source "drivers/infiniband/hw/hfi1/Kconfig"
+source "drivers/infiniband/hw/hns/Kconfig"
+source "drivers/infiniband/hw/ionic/Kconfig"
+source "drivers/infiniband/hw/irdma/Kconfig"
+source "drivers/infiniband/hw/mana/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
-source "drivers/infiniband/hw/nes/Kconfig"
+source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
-source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
-source "drivers/infiniband/hw/usnic/Kconfig"
-source "drivers/infiniband/hw/hns/Kconfig"
-source "drivers/infiniband/hw/bnxt_re/Kconfig"
-source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
+source "drivers/infiniband/hw/usnic/Kconfig"
+source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/sw/rdmavt/Kconfig"
+endif # !UML
source "drivers/infiniband/sw/rxe/Kconfig"
+source "drivers/infiniband/sw/siw/Kconfig"
endif
source "drivers/infiniband/ulp/ipoib/Kconfig"
@@ -116,6 +109,7 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/ulp/rtrs/Kconfig"
source "drivers/infiniband/ulp/opa_vnic/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index fad0b44c356f..8603cdfcfdcb 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_INFINIBAND) += core/
obj-$(CONFIG_INFINIBAND) += hw/
obj-$(CONFIG_INFINIBAND) += ulp/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 313f2349b518..f483e0c12444 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -6,22 +6,23 @@ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
-obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
- device.o fmr_pool.o cache.o netlink.o \
+ device.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- nldev.o restrack.o
+ nldev.o restrack.o counters.o ib_core_uverbs.o \
+ trace.o lag.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
-ib_cm-y := cm.o
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
-rdma_cm-y := cma.o
+CFLAGS_cma_trace.o += -I$(src)
+rdma_cm-y := cma.o cma_trace.o
rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
@@ -29,13 +30,17 @@ rdma_ucm-y := ucma.o
ib_umad-y := user_mad.o
-ib_ucm-y := ucm.o
-
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_std_types_cq.o \
+ uverbs_std_types_dmah.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
- uverbs_uapi.o uverbs_std_types_device.o
-ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ uverbs_uapi.o uverbs_std_types_device.o \
+ uverbs_std_types_async_fd.o \
+ uverbs_std_types_srq.o \
+ uverbs_std_types_wq.o \
+ uverbs_std_types_qp.o \
+ ucaps.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 2f7d14159841..61596cda2b65 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,7 +37,6 @@
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -76,7 +75,9 @@ static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
- .len = sizeof(struct rdma_nla_ls_gid)},
+ .len = sizeof(struct rdma_nla_ls_gid),
+ .validation_type = NLA_VALIDATE_MIN,
+ .min = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
@@ -139,7 +140,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
if (ib_nl_is_good_ip_resp(nlh))
ib_nl_process_good_ip_rsep(nlh);
- return skb->len;
+ return 0;
}
static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
@@ -183,7 +184,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
+ rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
/* Make the request retry, so when we get the response from userspace
* we will have something.
@@ -337,7 +338,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
- memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
+ neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev);
}
neigh_release(n);
@@ -347,16 +348,10 @@ static int dst_fetch_ha(const struct dst_entry *dst,
static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
- struct rtable *rt;
- struct rt6_info *rt6;
-
- if (family == AF_INET) {
- rt = container_of(dst, struct rtable, dst);
- return rt->rt_gw_family == AF_INET;
- }
+ if (family == AF_INET)
+ return dst_rtable(dst)->rt_uses_gateway;
- rt6 = container_of(dst, struct rt6_info, dst);
- return rt6->rt6i_flags & RTF_GATEWAY;
+ return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY;
}
static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
@@ -371,6 +366,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
+ might_sleep();
+
/* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
@@ -421,16 +418,15 @@ static int addr6_resolve(struct sockaddr *src_sock,
(const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6;
struct dst_entry *dst;
- int ret;
memset(&fl6, 0, sizeof fl6);
fl6.daddr = dst_in->sin6_addr;
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6);
- if (ret < 0)
- return ret;
+ dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
if (ipv6_addr_any(&src_in->sin6_addr))
src_in->sin6_addr = fl6.saddr;
@@ -450,63 +446,41 @@ static int addr6_resolve(struct sockaddr *src_sock,
}
#endif
+static bool is_dst_local(const struct dst_entry *dst)
+{
+ if (dst->ops->family == AF_INET)
+ return !!(dst_rtable(dst)->rt_type & RTN_LOCAL);
+ else if (dst->ops->family == AF_INET6)
+ return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL);
+ else
+ return false;
+}
+
static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
- unsigned int ndev_flags,
u32 seq)
{
- int ret = 0;
-
- if (ndev_flags & IFF_LOOPBACK) {
+ if (is_dst_local(dst)) {
+ /* When the destination is local entry, source and destination
+ * are same. Skip the neighbour lookup.
+ */
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- } else {
- if (!(ndev_flags & IFF_NOARP)) {
- /* If the device doesn't do ARP internally */
- ret = fetch_ha(dst, addr, dst_in, seq);
- }
+ return 0;
}
- return ret;
-}
-
-static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
- const struct sockaddr *dst_in,
- const struct dst_entry *dst,
- const struct net_device *ndev)
-{
- int ret = 0;
-
- if (dst->dev->flags & IFF_LOOPBACK)
- ret = rdma_translate_ip(dst_in, dev_addr);
- else
- rdma_copy_src_l2_addr(dev_addr, dst->dev);
- /*
- * If there's a gateway and type of device not ARPHRD_INFINIBAND,
- * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
- * network type accordingly.
- */
- if (has_gateway(dst, dst_in->sa_family) &&
- ndev->type != ARPHRD_INFINIBAND)
- dev_addr->network = dst_in->sa_family == AF_INET ?
- RDMA_NETWORK_IPV4 :
- RDMA_NETWORK_IPV6;
- else
- dev_addr->network = RDMA_NETWORK_IB;
-
- return ret;
+ return fetch_ha(dst, addr, dst_in, seq);
}
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
- unsigned int *ndev_flags,
const struct sockaddr *dst_in,
const struct dst_entry *dst)
{
struct net_device *ndev = READ_ONCE(dst->dev);
- *ndev_flags = ndev->flags;
/* A physical device must be the RDMA device to use */
- if (ndev->flags & IFF_LOOPBACK) {
+ if (is_dst_local(dst)) {
+ int ret;
/*
* RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
* loopback IP address. So if route is resolved to loopback
@@ -516,9 +490,27 @@ static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
if (IS_ERR(ndev))
return -ENODEV;
+ ret = rdma_translate_ip(dst_in, dev_addr);
+ if (ret)
+ return ret;
+ } else {
+ rdma_copy_src_l2_addr(dev_addr, dst->dev);
}
- return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
+ /*
+ * If there's a gateway and type of device not ARPHRD_INFINIBAND,
+ * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
+ * network type accordingly.
+ */
+ if (has_gateway(dst, dst_in->sa_family) &&
+ ndev->type != ARPHRD_INFINIBAND)
+ dev_addr->network = dst_in->sa_family == AF_INET ?
+ RDMA_NETWORK_IPV4 :
+ RDMA_NETWORK_IPV6;
+ else
+ dev_addr->network = RDMA_NETWORK_IB;
+
+ return 0;
}
static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
@@ -555,7 +547,6 @@ static int addr_resolve(struct sockaddr *src_in,
u32 seq)
{
struct dst_entry *dst = NULL;
- unsigned int ndev_flags = 0;
struct rtable *rt = NULL;
int ret;
@@ -592,7 +583,7 @@ static int addr_resolve(struct sockaddr *src_in,
rcu_read_unlock();
goto done;
}
- ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
+ ret = rdma_set_src_addr_rcu(addr, dst_in, dst);
rcu_read_unlock();
/*
@@ -600,7 +591,7 @@ static int addr_resolve(struct sockaddr *src_in,
* only if src addr translation didn't fail.
*/
if (!ret && resolve_neigh)
- ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
+ ret = addr_resolve_neigh(dst, dst_in, addr, seq);
if (src_in->sa_family == AF_INET)
ip_rt_put(rt);
@@ -646,13 +637,12 @@ static void process_one_req(struct work_struct *_work)
req->callback = NULL;
spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
if (!list_empty(&req->list)) {
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list.
- */
- cancel_delayed_work(&req->work);
list_del_init(&req->list);
kfree(req);
}
@@ -728,6 +718,8 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
struct rdma_dev_addr dev_addr = {};
int ret;
+ might_sleep();
+
if (rec->roce.route_resolved)
return 0;
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index f82b4260de42..25a060a28301 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -59,7 +59,16 @@ __ib_get_agent_port(const struct ib_device *device, int port_num)
struct ib_agent_port_private *entry;
list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if (entry->agent[1]->device == device &&
+ /* Need to check both agent[0] and agent[1], as an agent port
+ * may only have one of them
+ */
+ if (entry->agent[0] &&
+ entry->agent[0]->device == device &&
+ entry->agent[0]->port_num == port_num)
+ return entry;
+
+ if (entry->agent[1] &&
+ entry->agent[1]->device == device &&
entry->agent[1]->port_num == port_num)
return entry;
}
@@ -101,8 +110,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
- PTR_ERR(ah));
+ dev_err(&device->dev, "ib_create_ah_from_wc error %pe\n", ah);
return;
}
@@ -172,14 +180,16 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
}
}
- /* Obtain send only MAD agent for GSI QP */
- port_priv->agent[1] = ib_register_mad_agent(device, port_num,
- IB_QPT_GSI, NULL, 0,
- &agent_send_handler,
- NULL, NULL, 0);
- if (IS_ERR(port_priv->agent[1])) {
- ret = PTR_ERR(port_priv->agent[1]);
- goto error3;
+ if (rdma_cap_ib_cm(device, port_num)) {
+ /* Obtain send only MAD agent for GSI QP */
+ port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+ IB_QPT_GSI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL, 0);
+ if (IS_ERR(port_priv->agent[1])) {
+ ret = PTR_ERR(port_priv->agent[1]);
+ goto error3;
+ }
}
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
@@ -212,7 +222,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
list_del(&port_priv->port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- ib_unregister_mad_agent(port_priv->agent[1]);
+ if (port_priv->agent[1])
+ ib_unregister_mad_agent(port_priv->agent[1]);
if (port_priv->agent[0])
ib_unregister_mad_agent(port_priv->agent[0]);
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 18e476b3ced0..81cf3c902e81 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -33,7 +33,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/if_vlan.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -46,14 +46,13 @@
struct ib_pkey_cache {
int table_len;
- u16 table[0];
+ u16 table[] __counted_by(table_len);
};
struct ib_update_work {
struct work_struct work;
- struct ib_device *device;
- u8 port_num;
- bool enforce_security;
+ struct ib_event event;
+ bool enforce_security;
};
union ib_gid zgid;
@@ -122,7 +121,7 @@ struct ib_gid_table {
u32 default_gid_indices;
};
-static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port)
{
struct ib_event event;
@@ -130,11 +129,15 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
event.element.port_num = port;
event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ ib_dispatch_event_clients(&event);
}
static const char * const gid_type_str[] = {
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+ * user space compatibility reasons.
+ */
[IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
[IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
};
@@ -194,7 +197,7 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
+static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port)
{
return device->port_data[port].cache.gid;
}
@@ -234,10 +237,10 @@ static void put_gid_ndev(struct rcu_head *head)
static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
{
struct ib_device *device = entry->attr.device;
- u8 port_num = entry->attr.port_num;
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
- dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
+ dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n", __func__,
port_num, entry->attr.index, entry->attr.gid.raw);
write_lock_irq(&table->rwlock);
@@ -279,7 +282,7 @@ static void free_gid_work(struct work_struct *work)
struct ib_gid_table_entry *entry =
container_of(work, struct ib_gid_table_entry, del_work);
struct ib_device *device = entry->attr.device;
- u8 port_num = entry->attr.port_num;
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num);
mutex_lock(&table->lock);
@@ -320,7 +323,7 @@ static void store_gid_entry(struct ib_gid_table *table,
{
entry->state = GID_TABLE_ENTRY_VALID;
- dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
+ dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n",
__func__, entry->attr.port_num, entry->attr.index,
entry->attr.gid.raw);
@@ -351,7 +354,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
int ret;
if (!attr->ndev) {
- dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
+ dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n",
__func__, attr->port_num, attr->index);
return -EINVAL;
}
@@ -359,7 +362,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
ret = attr->device->ops.add_gid(attr, &entry->context);
if (ret) {
dev_err(&attr->device->dev,
- "%s GID add failed port=%d index=%d\n",
+ "%s GID add failed port=%u index=%u\n",
__func__, attr->port_num, attr->index);
return ret;
}
@@ -376,7 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
* @ix: GID entry index to delete
*
*/
-static void del_gid(struct ib_device *ib_dev, u8 port,
+static void del_gid(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table, int ix)
{
struct roce_gid_ndev_storage *ndev_storage;
@@ -384,7 +387,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
lockdep_assert_held(&table->lock);
- dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
+ dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n", __func__, port,
ix, table->data_vec[ix]->attr.gid.raw);
write_lock_irq(&table->rwlock);
@@ -397,6 +400,9 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
+ if (rdma_cap_roce_gid_table(ib_dev, port))
+ ib_dev->ops.del_gid(&entry->attr, &entry->context);
+
ndev_storage = entry->ndev_storage;
if (ndev_storage) {
entry->ndev_storage = NULL;
@@ -404,9 +410,6 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
}
- if (rdma_cap_roce_gid_table(ib_dev, port))
- ib_dev->ops.del_gid(&entry->attr, &entry->context);
-
put_gid_entry_locked(entry);
}
@@ -540,7 +543,7 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr,
unsigned long mask, bool default_gid)
{
@@ -579,12 +582,12 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
out_unlock:
mutex_unlock(&table->lock);
if (ret)
- pr_warn("%s: unable to add gid %pI6 error=%d\n",
- __func__, gid->raw, ret);
+ pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
return ret;
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID |
@@ -595,7 +598,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
}
static int
-_ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+_ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr,
unsigned long mask, bool default_gid)
{
@@ -624,7 +627,7 @@ out_unlock:
return ret;
}
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
unsigned long mask = GID_ATTR_FIND_MASK_GID |
@@ -635,7 +638,7 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
}
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
struct ib_gid_table *table;
@@ -666,11 +669,10 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
* rdma_find_gid_by_port - Returns the GID entry attributes when it finds
* a valid GID entry for given search parameters. It searches for the specified
* GID value in the local software cache.
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
- * @port_num: The port number of the device where the GID value should be
- * searched.
+ * @port: The port number of the device where the GID value should be searched.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
*
* Returns sgid attributes if the GID is found with valid reference or
@@ -681,7 +683,7 @@ const struct ib_gid_attr *
rdma_find_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
enum ib_gid_type gid_type,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
int local_index;
struct ib_gid_table *table;
@@ -716,7 +718,7 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
* rdma_find_gid_by_filter - Returns the GID table attribute where a
* specified GID value occurs
- * @device: The device to query.
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
* @port: The port number of the device where the GID value could be
* searched.
@@ -725,13 +727,14 @@ EXPORT_SYMBOL(rdma_find_gid_by_port);
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
+ * @context: Private data to pass into the call-back.
*
* rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
*
*/
const struct ib_gid_attr *rdma_find_gid_by_filter(
- struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+ struct ib_device *ib_dev, const union ib_gid *gid, u32 port,
bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
void *),
void *context)
@@ -791,7 +794,6 @@ err_free_table:
static void release_gid_table(struct ib_device *device,
struct ib_gid_table *table)
{
- bool leak = false;
int i;
if (!table)
@@ -800,43 +802,35 @@ static void release_gid_table(struct ib_device *device,
for (i = 0; i < table->sz; i++) {
if (is_gid_entry_free(table->data_vec[i]))
continue;
- if (kref_read(&table->data_vec[i]->kref) > 1) {
- dev_err(&device->dev,
- "GID entry ref leak for index %d ref=%d\n", i,
- kref_read(&table->data_vec[i]->kref));
- leak = true;
- }
+
+ WARN_ONCE(true,
+ "GID entry ref leak for dev %s index %d ref=%u\n",
+ dev_name(&device->dev), i,
+ kref_read(&table->data_vec[i]->kref));
}
- if (leak)
- return;
+ mutex_destroy(&table->lock);
kfree(table->data_vec);
kfree(table);
}
-static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
+static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
int i;
- bool deleted = false;
if (!table)
return;
mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
- if (is_gid_entry_valid(table->data_vec[i])) {
+ if (is_gid_entry_valid(table->data_vec[i]))
del_gid(ib_dev, port, table, i);
- deleted = true;
- }
}
mutex_unlock(&table->lock);
-
- if (deleted)
- dispatch_gid_change_event(ib_dev, port);
}
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
@@ -869,7 +863,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
}
}
-static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
+static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
unsigned int i;
@@ -886,7 +880,7 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static void gid_table_release_one(struct ib_device *ib_dev)
{
- unsigned int p;
+ u32 p;
rdma_for_each_port (ib_dev, p) {
release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
@@ -897,7 +891,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
- unsigned int rdma_port;
+ u32 rdma_port;
rdma_for_each_port (ib_dev, rdma_port) {
table = alloc_gid_table(
@@ -917,7 +911,7 @@ rollback_table_setup:
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- unsigned int p;
+ u32 p;
rdma_for_each_port (ib_dev, p)
cleanup_gid_table_port(ib_dev, p,
@@ -952,12 +946,12 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
* Returns 0 on success or appropriate error code.
*
*/
-int rdma_query_gid(struct ib_device *device, u8 port_num,
+int rdma_query_gid(struct ib_device *device, u32 port_num,
int index, union ib_gid *gid)
{
struct ib_gid_table *table;
unsigned long flags;
- int res = -EINVAL;
+ int res;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
@@ -965,9 +959,15 @@ int rdma_query_gid(struct ib_device *device, u8 port_num,
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- if (index < 0 || index >= table->sz ||
- !is_gid_entry_valid(table->data_vec[index]))
+ if (index < 0 || index >= table->sz) {
+ res = -EINVAL;
goto done;
+ }
+
+ if (!is_gid_entry_valid(table->data_vec[index])) {
+ res = -ENOENT;
+ goto done;
+ }
memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
res = 0;
@@ -979,6 +979,23 @@ done:
EXPORT_SYMBOL(rdma_query_gid);
/**
+ * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
+ * @attr: Potinter to the GID attribute
+ *
+ * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
+ * to the SGID attr. Callers are required to already be holding the reference
+ * to an existing GID entry.
+ *
+ * Returns the HW GID context
+ *
+ */
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
+{
+ return container_of(attr, struct ib_gid_table_entry, attr)->context;
+}
+EXPORT_SYMBOL(rdma_read_gid_hw_context);
+
+/**
* rdma_find_gid - Returns SGID attributes if the matching GID is found.
* @device: The device to query.
* @gid: The GID value to search for.
@@ -999,7 +1016,7 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
- unsigned int p;
+ u32 p;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -1028,7 +1045,7 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
EXPORT_SYMBOL(rdma_find_gid);
int ib_get_cached_pkey(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
int index,
u16 *pkey)
{
@@ -1039,42 +1056,34 @@ int ib_get_cached_pkey(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
- if (index < 0 || index >= cache->table_len)
+ if (!cache || index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_pkey);
-int ib_get_cached_subnet_prefix(struct ib_device *device,
- u8 port_num,
- u64 *sn_pfx)
+void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num,
+ u64 *sn_pfx)
{
unsigned long flags;
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
-
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
- read_unlock_irqrestore(&device->cache.lock, flags);
-
- return 0;
+ read_unlock_irqrestore(&device->cache_lock, flags);
}
EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
-int ib_find_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+int ib_find_cached_pkey(struct ib_device *device, u32 port_num,
+ u16 pkey, u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
@@ -1085,9 +1094,13 @@ int ib_find_cached_pkey(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -1097,8 +1110,9 @@ int ib_find_cached_pkey(struct ib_device *device,
*index = i;
ret = 0;
break;
- } else
+ } else {
partial_ix = i;
+ }
}
if (ret && partial_ix >= 0) {
@@ -1106,47 +1120,14 @@ int ib_find_cached_pkey(struct ib_device *device,
ret = 0;
}
- read_unlock_irqrestore(&device->cache.lock, flags);
+err:
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
-int ib_find_exact_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
-{
- struct ib_pkey_cache *cache;
- unsigned long flags;
- int i;
- int ret = -ENOENT;
-
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
-
- read_lock_irqsave(&device->cache.lock, flags);
-
- cache = device->port_data[port_num].cache.pkey;
-
- *index = -1;
-
- for (i = 0; i < cache->table_len; ++i)
- if (cache->table[i] == pkey) {
- *index = i;
- ret = 0;
- break;
- }
-
- read_unlock_irqrestore(&device->cache.lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_find_exact_cached_pkey);
-
-int ib_get_cached_lmc(struct ib_device *device,
- u8 port_num,
- u8 *lmc)
+int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc)
{
unsigned long flags;
int ret = 0;
@@ -1154,16 +1135,15 @@ int ib_get_cached_lmc(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
*lmc = device->port_data[port_num].cache.lmc;
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
-int ib_get_cached_port_state(struct ib_device *device,
- u8 port_num,
+int ib_get_cached_port_state(struct ib_device *device, u32 port_num,
enum ib_port_state *port_state)
{
unsigned long flags;
@@ -1172,9 +1152,9 @@ int ib_get_cached_port_state(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
*port_state = device->port_data[port_num].cache.port_state;
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
@@ -1197,9 +1177,9 @@ EXPORT_SYMBOL(ib_get_cached_port_state);
* code.
*/
const struct ib_gid_attr *
-rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index)
{
- const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
struct ib_gid_table *table;
unsigned long flags;
@@ -1223,6 +1203,63 @@ done:
EXPORT_SYMBOL(rdma_get_gid_attr);
/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries)
+{
+ const struct ib_gid_attr *gid_attr;
+ ssize_t num_entries = 0, ret;
+ struct ib_gid_table *table;
+ u32 port_num, i;
+ struct net_device *ndev;
+ unsigned long flags;
+
+ rdma_for_each_port(device, port_num) {
+ table = rdma_gid_table(device, port_num);
+ read_lock_irqsave(&table->rwlock, flags);
+ for (i = 0; i < table->sz; i++) {
+ if (!is_gid_entry_valid(table->data_vec[i]))
+ continue;
+ if (num_entries >= max_entries) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ gid_attr = &table->data_vec[i]->attr;
+
+ memcpy(&entries->gid, &gid_attr->gid,
+ sizeof(gid_attr->gid));
+ entries->gid_index = gid_attr->index;
+ entries->port_num = gid_attr->port_num;
+ entries->gid_type = gid_attr->gid_type;
+ ndev = rcu_dereference_protected(
+ gid_attr->ndev,
+ lockdep_is_held(&table->rwlock));
+ if (ndev)
+ entries->netdev_ifindex = ndev->ifindex;
+
+ num_entries++;
+ entries++;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
+
+ return num_entries;
+err:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_query_gid_table);
+
+/**
* rdma_put_gid_attr - Release reference to the GID attribute
* @attr: Pointer to the GID attribute whose reference
* needs to be released.
@@ -1278,8 +1315,8 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
- struct net_device *ndev = ERR_PTR(-ENODEV);
- u8 port_num = entry->attr.port_num;
+ struct net_device *ndev = ERR_PTR(-EINVAL);
+ u32 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
bool valid;
@@ -1290,8 +1327,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid) {
ndev = rcu_dereference(attr->ndev);
- if (!ndev ||
- (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
+ if (!ndev)
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
@@ -1299,9 +1335,10 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
}
EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
-static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
+static int get_lower_dev_vlan(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
{
- u16 *vlan_id = data;
+ u16 *vlan_id = (u16 *)priv->data;
if (is_vlan_dev(lower_dev))
*vlan_id = vlan_dev_vlan_id(lower_dev);
@@ -1327,6 +1364,9 @@ static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
u16 *vlan_id, u8 *smac)
{
+ struct netdev_nested_priv priv = {
+ .data = (void *)vlan_id,
+ };
struct net_device *ndev;
rcu_read_lock();
@@ -1343,11 +1383,11 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
*vlan_id = vlan_dev_vlan_id(ndev);
} else {
/* If the netdev is upper device and if it's lower
- * device is vlan device, consider vlan id of the
+ * device is vlan device, consider vlan id of
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,
- get_lower_dev_vlan, vlan_id);
+ get_lower_dev_vlan, &priv);
}
}
rcu_read_unlock();
@@ -1356,7 +1396,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
EXPORT_SYMBOL(rdma_read_gid_l2_fields);
static int config_non_roce_gid_cache(struct ib_device *device,
- u8 port, int gid_tbl_len)
+ u32 port, struct ib_port_attr *tprops)
{
struct ib_gid_attr gid_attr = {};
struct ib_gid_table *table;
@@ -1368,7 +1408,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
table = rdma_gid_table(device, port);
mutex_lock(&table->lock);
- for (i = 0; i < gid_tbl_len; ++i) {
+ for (i = 0; i < tprops->gid_tbl_len; ++i) {
if (!device->ops.query_gid)
continue;
ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
@@ -1378,7 +1418,20 @@ static int config_non_roce_gid_cache(struct ib_device *device,
i);
goto err;
}
+
+ if (rdma_protocol_iwarp(device, port)) {
+ struct net_device *ndev;
+
+ ndev = ib_device_get_netdev(device, port);
+ if (!ndev)
+ continue;
+ RCU_INIT_POINTER(gid_attr.ndev, ndev);
+ dev_put(ndev);
+ }
+
gid_attr.index = i;
+ tprops->subnet_prefix =
+ be64_to_cpu(gid_attr.gid.global.subnet_prefix);
add_modify_gid(table, &gid_attr);
}
err:
@@ -1386,21 +1439,22 @@ err:
return ret;
}
-static void ib_cache_update(struct ib_device *device,
- u8 port,
- bool enforce_security)
+static int
+ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
+ bool update_pkeys, bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
- struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
+ struct ib_pkey_cache *pkey_cache = NULL;
+ struct ib_pkey_cache *old_pkey_cache = NULL;
int i;
int ret;
if (!rdma_is_port_valid(device, port))
- return;
+ return -EINVAL;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
- return;
+ return -ENOMEM;
ret = ib_query_port(device, port, tprops);
if (ret) {
@@ -1408,41 +1462,55 @@ static void ib_cache_update(struct ib_device *device,
goto err;
}
- if (!rdma_protocol_roce(device, port)) {
+ if (!rdma_protocol_roce(device, port) && update_gids) {
ret = config_non_roce_gid_cache(device, port,
- tprops->gid_tbl_len);
+ tprops);
if (ret)
goto err;
}
- pkey_cache = kmalloc(struct_size(pkey_cache, table,
- tprops->pkey_tbl_len),
- GFP_KERNEL);
- if (!pkey_cache)
- goto err;
-
- pkey_cache->table_len = tprops->pkey_tbl_len;
+ update_pkeys &= !!tprops->pkey_tbl_len;
- for (i = 0; i < pkey_cache->table_len; ++i) {
- ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
- if (ret) {
- dev_warn(&device->dev,
- "ib_query_pkey failed (%d) for index %d\n",
- ret, i);
+ if (update_pkeys) {
+ pkey_cache = kmalloc(struct_size(pkey_cache, table,
+ tprops->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!pkey_cache) {
+ ret = -ENOMEM;
goto err;
}
- }
- write_lock_irq(&device->cache.lock);
+ pkey_cache->table_len = tprops->pkey_tbl_len;
+
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i,
+ pkey_cache->table + i);
+ if (ret) {
+ dev_warn(&device->dev,
+ "ib_query_pkey failed (%d) for index %d\n",
+ ret, i);
+ goto err;
+ }
+ }
+ }
- old_pkey_cache = device->port_data[port].cache.pkey;
+ write_lock_irq(&device->cache_lock);
- device->port_data[port].cache.pkey = pkey_cache;
+ if (update_pkeys) {
+ old_pkey_cache = device->port_data[port].cache.pkey;
+ device->port_data[port].cache.pkey = pkey_cache;
+ }
device->port_data[port].cache.lmc = tprops->lmc;
+
+ if (device->port_data[port].cache.port_state != IB_PORT_NOP &&
+ device->port_data[port].cache.port_state != tprops->state)
+ ibdev_info(device, "Port: %d Link %s\n", port,
+ ib_port_state_to_str(tprops->state));
+
device->port_data[port].cache.port_state = tprops->state;
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
- write_unlock_irq(&device->cache.lock);
+ write_unlock_irq(&device->cache_lock);
if (enforce_security)
ib_security_cache_change(device,
@@ -1451,74 +1519,110 @@ static void ib_cache_update(struct ib_device *device,
kfree(old_pkey_cache);
kfree(tprops);
- return;
+ return 0;
err:
kfree(pkey_cache);
kfree(tprops);
+ return ret;
+}
+
+static void ib_cache_event_task(struct work_struct *_work)
+{
+ struct ib_update_work *work =
+ container_of(_work, struct ib_update_work, work);
+ int ret;
+
+ /* Before distributing the cache update event, first sync
+ * the cache.
+ */
+ ret = ib_cache_update(work->event.device, work->event.element.port_num,
+ work->event.event == IB_EVENT_GID_CHANGE,
+ work->event.event == IB_EVENT_PKEY_CHANGE,
+ work->enforce_security);
+
+ /* GID event is notified already for individual GID entries by
+ * dispatch_gid_change_event(). Hence, notifiy for rest of the
+ * events.
+ */
+ if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
+ ib_dispatch_event_clients(&work->event);
+
+ kfree(work);
}
-static void ib_cache_task(struct work_struct *_work)
+static void ib_generic_event_task(struct work_struct *_work)
{
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
- ib_cache_update(work->device,
- work->port_num,
- work->enforce_security);
+ ib_dispatch_event_clients(&work->event);
kfree(work);
}
-static void ib_cache_event(struct ib_event_handler *handler,
- struct ib_event *event)
+static bool is_cache_update_event(const struct ib_event *event)
+{
+ return (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE);
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(const struct ib_event *event)
{
struct ib_update_work *work;
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_GID_CHANGE) {
- work = kmalloc(sizeof *work, GFP_ATOMIC);
- if (work) {
- INIT_WORK(&work->work, ib_cache_task);
- work->device = event->device;
- work->port_num = event->element.port_num;
- if (event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_GID_CHANGE)
- work->enforce_security = true;
- else
- work->enforce_security = false;
-
- queue_work(ib_wq, &work->work);
- }
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ if (is_cache_update_event(event))
+ INIT_WORK(&work->work, ib_cache_event_task);
+ else
+ INIT_WORK(&work->work, ib_generic_event_task);
+
+ work->event = *event;
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_GID_CHANGE)
+ work->enforce_security = true;
+
+ queue_work(ib_wq, &work->work);
}
+EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device)
{
- unsigned int p;
+ u32 p;
int err;
- rwlock_init(&device->cache.lock);
-
err = gid_table_setup_one(device);
if (err)
return err;
- rdma_for_each_port (device, p)
- ib_cache_update(device, p, true);
+ rdma_for_each_port (device, p) {
+ err = ib_cache_update(device, p, true, true, true);
+ if (err) {
+ gid_table_cleanup_one(device);
+ return err;
+ }
+ }
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
- device, ib_cache_event);
- ib_register_event_handler(&device->cache.event_handler);
return 0;
}
void ib_cache_release_one(struct ib_device *device)
{
- unsigned int p;
+ u32 p;
/*
* The release function frees all the cache elements.
@@ -1534,14 +1638,12 @@ void ib_cache_release_one(struct ib_device *device)
void ib_cache_cleanup_one(struct ib_device *device)
{
- /* The cleanup function unregisters the event handler,
- * waits for all in-progress workqueue elements and cleans
- * up the GID cache. This function should be called after
- * the device was removed from the devices list and all
- * clients were removed, so the cache exists but is
+ /* The cleanup function waits for all in-progress workqueue
+ * elements and cleans up the GID cache. This function should be
+ * called after the device was removed from the devices list and
+ * all clients were removed, so the cache exists but is
* non-functional and shouldn't be updated anymore.
*/
- ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
index 388fd04e5f63..1f037fe01450 100644
--- a/drivers/infiniband/core/cgroup.c
+++ b/drivers/infiniband/core/cgroup.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include "core_priv.h"
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index da10e6ccb43c..024df6ee239d 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,36 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/completion.h>
@@ -51,13 +25,18 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
+#include <rdma/ib_sysfs.h>
#include "cm_msgs.h"
#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
+#define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */
+
static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_NO_QP] = "no QP",
[IB_CM_REJ_NO_EEC] = "no EEC",
@@ -92,6 +71,8 @@ static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
[IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
[IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+ [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
+ "vendor option is not supported",
};
const char *__attribute_const__ ibcm_reject_msg(int reason)
@@ -106,8 +87,21 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
}
EXPORT_SYMBOL(ibcm_reject_msg);
-static void cm_add_one(struct ib_device *device);
+struct cm_id_private;
+struct cm_work;
+static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work);
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param);
+static void cm_issue_dreq(struct cm_id_private *cm_id_priv);
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len);
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len);
static struct ib_client cm_client = {
.name = "cm",
@@ -130,8 +124,6 @@ static struct ib_cm {
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
- /* Sync on cm change port state */
- spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -159,77 +151,34 @@ enum {
CM_COUNTER_GROUPS
};
-static char const counter_group_names[CM_COUNTER_GROUPS]
- [sizeof("cm_rx_duplicates")] = {
- "cm_tx_msgs", "cm_tx_retries",
- "cm_rx_msgs", "cm_rx_duplicates"
-};
-
-struct cm_counter_group {
- struct kobject obj;
- atomic_long_t counter[CM_ATTR_COUNT];
-};
-
struct cm_counter_attribute {
- struct attribute attr;
- int index;
-};
-
-#define CM_COUNTER_ATTR(_name, _index) \
-struct cm_counter_attribute cm_##_name##_counter_attr = { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
- .index = _index \
-}
-
-static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
-static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
-static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
-static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
-static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
-static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
-static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
-static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
-static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
-static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
-static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
-
-static struct attribute *cm_counter_default_attrs[] = {
- &cm_req_counter_attr.attr,
- &cm_mra_counter_attr.attr,
- &cm_rej_counter_attr.attr,
- &cm_rep_counter_attr.attr,
- &cm_rtu_counter_attr.attr,
- &cm_dreq_counter_attr.attr,
- &cm_drep_counter_attr.attr,
- &cm_sidr_req_counter_attr.attr,
- &cm_sidr_rep_counter_attr.attr,
- &cm_lap_counter_attr.attr,
- &cm_apr_counter_attr.attr,
- NULL
+ struct ib_port_attribute attr;
+ unsigned short group;
+ unsigned short index;
};
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
- struct kobject port_obj;
- u8 port_num;
- struct list_head cm_priv_prim_list;
- struct list_head cm_priv_altr_list;
- struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
+ struct ib_mad_agent *rep_agent;
+ u32 port_num;
+ atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
};
struct cm_device {
+ struct kref kref;
struct list_head list;
+ rwlock_t mad_agent_lock;
struct ib_device *ib_device;
u8 ack_delay;
int going_down;
- struct cm_port *port[0];
+ struct cm_port *port[];
};
struct cm_av {
struct cm_port *port;
- union ib_gid dgid;
struct rdma_ah_attr ah_attr;
+ u16 dlid_datapath;
u16 pkey_index;
u8 timeout;
};
@@ -242,11 +191,11 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct sa_path_rec path[0];
+ struct sa_path_rec path[];
};
struct cm_timewait_info {
- struct cm_work work; /* Must be first. */
+ struct cm_work work;
struct list_head list;
struct rb_node remote_qp_node;
struct rb_node remote_id_node;
@@ -261,12 +210,15 @@ struct cm_id_private {
struct rb_node service_node;
struct rb_node sidr_id_node;
+ u32 sidr_slid;
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
/* Number of clients sharing this ib_cm_id. Only valid for listeners.
- * Protected by the cm.lock spinlock. */
+ * Protected by the cm.lock spinlock.
+ */
int listen_sharecount;
+ struct rcu_head rcu;
struct ib_mad_send_buf *msg;
struct cm_timewait_info *timewait_info;
@@ -286,101 +238,155 @@ struct cm_id_private {
__be16 pkey;
u8 private_data_len;
u8 max_cm_retries;
- u8 peer_to_peer;
u8 responder_resources;
u8 initiator_depth;
u8 retry_count;
u8 rnr_retry_count;
- u8 service_timeout;
u8 target_ack_delay;
- struct list_head prim_list;
- struct list_head altr_list;
- /* Indicates that the send port mad is registered and av is set */
- int prim_send_port_not_ready;
- int altr_send_port_not_ready;
-
struct list_head work_list;
atomic_t work_count;
+
+ struct rdma_ucm_ece ece;
};
+static void cm_dev_release(struct kref *kref)
+{
+ struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
+ u32 i;
+
+ rdma_for_each_port(cm_dev->ib_device, i)
+ kfree(cm_dev->port[i - 1]);
+
+ kfree(cm_dev);
+}
+
+static void cm_device_put(struct cm_device *cm_dev)
+{
+ kref_put(&cm_dev->kref, cm_dev_release);
+}
+
static void cm_work_handler(struct work_struct *work);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
{
- if (atomic_dec_and_test(&cm_id_priv->refcount))
+ if (refcount_dec_and_test(&cm_id_priv->refcount))
complete(&cm_id_priv->comp);
}
-static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *
+cm_alloc_msg_agent(struct cm_id_private *cm_id_priv, bool rep_agent)
{
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
- struct cm_av *av;
- unsigned long flags, flags2;
- int ret = 0;
- /* don't let the port to be released till the agent is down */
- spin_lock_irqsave(&cm.state_lock, flags2);
- spin_lock_irqsave(&cm.lock, flags);
- if (!cm_id_priv->prim_send_port_not_ready)
- av = &cm_id_priv->av;
- else if (!cm_id_priv->altr_send_port_not_ready &&
- (cm_id_priv->alt_av.port))
- av = &cm_id_priv->alt_av;
- else {
- pr_info("%s: not valid CM id\n", __func__);
- ret = -ENODEV;
- spin_unlock_irqrestore(&cm.lock, flags);
- goto out;
- }
- spin_unlock_irqrestore(&cm.lock, flags);
- /* Make sure the port haven't released the mad yet */
- mad_agent = cm_id_priv->av.port->mad_agent;
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!cm_id_priv->av.port)
+ return ERR_PTR(-EINVAL);
+
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ mad_agent = rep_agent ? cm_id_priv->av.port->rep_agent :
+ cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
- pr_info("%s: not a valid MAD agent\n", __func__);
- ret = -ENODEV;
+ m = ERR_PTR(-EINVAL);
goto out;
}
- ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
+
+ ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
if (IS_ERR(ah)) {
- ret = PTR_ERR(ah);
+ m = ERR_CAST(ah);
goto out;
}
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
- av->pkey_index,
+ cm_id_priv->av.pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
rdma_destroy_ah(ah, 0);
- ret = PTR_ERR(m);
goto out;
}
- /* Timeout set by caller if response is expected. */
m->ah = ah;
- m->retries = cm_id_priv->max_cm_retries;
-
- atomic_inc(&cm_id_priv->refcount);
- m->context[0] = cm_id_priv;
- *msg = m;
out:
- spin_unlock_irqrestore(&cm.state_lock, flags2);
- return ret;
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ return m;
}
-static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
- struct ib_mad_recv_wc *mad_recv_wc)
+static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
{
- return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
- 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
- GFP_ATOMIC,
- IB_MGMT_BASE_VERSION);
+ return cm_alloc_msg_agent(cm_id_priv, false);
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg_rep(struct cm_id_private *cm_id_priv, enum ib_cm_state state,
+ bool rep_agent)
+{
+ struct ib_mad_send_buf *msg;
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ msg = cm_alloc_msg_agent(cm_id_priv, rep_agent);
+ if (IS_ERR(msg))
+ return msg;
+
+ cm_id_priv->msg = msg;
+ refcount_inc(&cm_id_priv->refcount);
+ msg->context[0] = cm_id_priv;
+ msg->context[1] = (void *) (unsigned long) state;
+
+ msg->retries = cm_id_priv->max_cm_retries;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+
+ return msg;
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
+{
+ return cm_alloc_priv_msg_rep(cm_id_priv, state, false);
+}
+
+static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
+{
+ struct cm_id_private *cm_id_priv = msg->context[0];
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!WARN_ON(cm_id_priv->msg != msg))
+ cm_id_priv->msg = NULL;
+
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ cm_deref_id(cm_id_priv);
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_response_msg_no_ah(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ bool direct_retry)
+{
+ struct ib_mad_send_buf *m;
+
+ m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC, IB_MGMT_BASE_VERSION);
+ if (!IS_ERR(m))
+ m->context[0] = direct_retry ? CM_DIRECT_RETRY_CTX : NULL;
+
+ return m;
}
static int cm_create_response_msg_ah(struct cm_port *port,
@@ -398,29 +404,21 @@ static int cm_create_response_msg_ah(struct cm_port *port,
return 0;
}
-static void cm_free_msg(struct ib_mad_send_buf *msg)
-{
- if (msg->ah)
- rdma_destroy_ah(msg->ah, 0);
- if (msg->context[0])
- cm_deref_id(msg->context[0]);
- ib_free_send_mad(msg);
-}
-
static int cm_alloc_response_msg(struct cm_port *port,
struct ib_mad_recv_wc *mad_recv_wc,
+ bool direct_retry,
struct ib_mad_send_buf **msg)
{
struct ib_mad_send_buf *m;
int ret;
- m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
+ m = cm_alloc_response_msg_no_ah(port, mad_recv_wc, direct_retry);
if (IS_ERR(m))
return PTR_ERR(m);
ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
if (ret) {
- cm_free_msg(m);
+ ib_free_send_mad(m);
return ret;
}
@@ -428,8 +426,7 @@ static int cm_alloc_response_msg(struct cm_port *port,
return 0;
}
-static void * cm_copy_private_data(const void *private_data,
- u8 private_data_len)
+static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{
void *data;
@@ -453,62 +450,38 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
{
- struct rdma_ah_attr new_ah_attr;
- int ret;
+ struct cm_port *old_port = av->port;
- av->port = port;
- av->pkey_index = wc->pkey_index;
+ if (old_port == port)
+ return;
- /*
- * av->ah_attr might be initialized based on past wc during incoming
- * connect request or while sending out connect request. So initialize
- * a new ah_attr on stack. If initialization fails, old ah_attr is
- * used for sending any responses. If initialization is successful,
- * than new ah_attr is used by overwriting old one.
- */
- ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
- port->port_num, wc,
- grh, &new_ah_attr);
- if (ret)
- return ret;
+ av->port = port;
+ if (old_port)
+ cm_device_put(old_port->cm_dev);
+ if (port)
+ kref_get(&port->cm_dev->kref);
+}
- rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
- return 0;
+static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
+ struct rdma_ah_attr *ah_attr, struct cm_av *av)
+{
+ cm_set_av_port(av, port);
+ av->pkey_index = wc->pkey_index;
+ rdma_move_ah_attr(&av->ah_attr, ah_attr);
}
static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
- av->port = port;
+ cm_set_av_port(av, port);
av->pkey_index = wc->pkey_index;
return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
port->port_num, wc,
grh, &av->ah_attr);
}
-static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
- struct cm_av *av,
- struct cm_port *port)
-{
- unsigned long flags;
- int ret = 0;
-
- spin_lock_irqsave(&cm.lock, flags);
-
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
-
- spin_unlock_irqrestore(&cm.lock, flags);
- return ret;
-}
-
static struct cm_port *
get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
@@ -552,8 +525,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
static int cm_init_av_by_path(struct sa_path_rec *path,
const struct ib_gid_attr *sgid_attr,
- struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+ struct cm_av *av)
{
struct rdma_ah_attr new_ah_attr;
struct cm_device *cm_dev;
@@ -570,7 +542,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
if (ret)
return ret;
- av->port = port;
+ cm_set_av_port(av, port);
/*
* av->ah_attr might be initialized based on wc or during
@@ -587,60 +559,41 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
return ret;
av->timeout = path->packet_life_time + 1;
-
- ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret) {
- rdma_destroy_ah_attr(&new_ah_attr);
- return ret;
- }
rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
-static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */
+static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src)
{
- int err;
- u32 id;
-
- err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
- xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
-
- cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
- return err;
+ cm_set_av_port(dest, src->port);
+ cm_set_av_port(src, NULL);
+ dest->pkey_index = src->pkey_index;
+ rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr);
+ dest->timeout = src->timeout;
}
-static u32 cm_local_id(__be32 local_id)
+static void cm_destroy_av(struct cm_av *av)
{
- return (__force u32) (local_id ^ cm.random_id_operand);
+ rdma_destroy_ah_attr(&av->ah_attr);
+ cm_set_av_port(av, NULL);
}
-static void cm_free_id(__be32 local_id)
+static u32 cm_local_id(__be32 local_id)
{
- xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
+ return (__force u32) (local_id ^ cm.random_id_operand);
}
-static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
{
struct cm_id_private *cm_id_priv;
+ rcu_read_lock();
cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
- if (cm_id_priv) {
- if (cm_id_priv->id.remote_id == remote_id)
- atomic_inc(&cm_id_priv->refcount);
- else
- cm_id_priv = NULL;
- }
-
- return cm_id_priv;
-}
-
-static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
-{
- struct cm_id_private *cm_id_priv;
-
- spin_lock_irq(&cm.lock);
- cm_id_priv = cm_get_id(local_id, remote_id);
- spin_unlock_irq(&cm.lock);
+ if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
+ !refcount_inc_not_zero(&cm_id_priv->refcount))
+ cm_id_priv = NULL;
+ rcu_read_unlock();
return cm_id_priv;
}
@@ -670,22 +623,25 @@ static int be64_gt(__be64 a, __be64 b)
return (__force u64) a > (__force u64) b;
}
-static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+/*
+ * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
+ * if the new ID was inserted, NULL if it could not be inserted due to a
+ * collision, or the existing cm_id_priv ready for shared usage.
+ */
+static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
+ ib_cm_handler shared_handler)
{
struct rb_node **link = &cm.listen_service_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
- __be64 service_mask = cm_id_priv->id.service_mask;
+ unsigned long flags;
+ spin_lock_irqsave(&cm.lock, flags);
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
- if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id) &&
- (cm_id_priv->id.device == cur_cm_id_priv->id.device))
- return cur_cm_id_priv;
if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
link = &(*link)->rb_left;
@@ -695,26 +651,38 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
link = &(*link)->rb_left;
else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
link = &(*link)->rb_right;
- else
- link = &(*link)->rb_right;
+ else {
+ /*
+ * Sharing an ib_cm_id with different handlers is not
+ * supported
+ */
+ if (cur_cm_id_priv->id.cm_handler != shared_handler ||
+ cur_cm_id_priv->id.context ||
+ WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return NULL;
+ }
+ refcount_inc(&cur_cm_id_priv->refcount);
+ cur_cm_id_priv->listen_sharecount++;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cur_cm_id_priv;
+ }
}
+ cm_id_priv->listen_sharecount++;
rb_link_node(&cm_id_priv->service_node, parent, link);
rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
- return NULL;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cm_id_priv;
}
-static struct cm_id_private * cm_find_listen(struct ib_device *device,
- __be64 service_id)
+static struct cm_id_private *cm_find_listen(struct ib_device *device,
+ __be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
- if ((cm_id_priv->id.service_mask & service_id) ==
- cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device))
- return cm_id_priv;
if (device < cm_id_priv->id.device)
node = node->rb_left;
@@ -724,14 +692,16 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
node = node->rb_left;
else if (be64_gt(service_id, cm_id_priv->id.service_id))
node = node->rb_right;
- else
- node = node->rb_right;
+ else {
+ refcount_inc(&cm_id_priv->refcount);
+ return cm_id_priv;
+ }
}
return NULL;
}
-static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_id(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_id_table.rb_node;
struct rb_node *parent = NULL;
@@ -760,12 +730,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
return NULL;
}
-static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
- __be32 remote_id)
+static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
{
struct rb_node *node = cm.remote_id_table.rb_node;
struct cm_timewait_info *timewait_info;
+ struct cm_id_private *res = NULL;
+ spin_lock_irq(&cm.lock);
while (node) {
timewait_info = rb_entry(node, struct cm_timewait_info,
remote_id_node);
@@ -777,14 +749,18 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
node = node->rb_left;
else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
node = node->rb_right;
- else
- return timewait_info;
+ else {
+ res = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ break;
+ }
}
- return NULL;
+ spin_unlock_irq(&cm.lock);
+ return res;
}
-static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_qpn(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_qp_table.rb_node;
struct rb_node *parent = NULL;
@@ -813,13 +789,12 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
return NULL;
}
-static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
- *cm_id_priv)
+static struct cm_id_private *
+cm_insert_remote_sidr(struct cm_id_private *cm_id_priv)
{
struct rb_node **link = &cm.remote_sidr_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
- union ib_gid *port_gid = &cm_id_priv->av.dgid;
__be32 remote_id = cm_id_priv->id.remote_id;
while (*link) {
@@ -831,12 +806,9 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
link = &(*link)->rb_right;
else {
- int cmp;
- cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
- sizeof *port_gid);
- if (cmp < 0)
+ if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid)
link = &(*link)->rb_left;
- else if (cmp > 0)
+ else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid)
link = &(*link)->rb_right;
else
return cur_cm_id_priv;
@@ -847,21 +819,12 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
return NULL;
}
-static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
- enum ib_cm_sidr_status status)
-{
- struct ib_cm_sidr_rep_param param;
-
- memset(&param, 0, sizeof param);
- param.status = status;
- ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
-}
-
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
- ib_cm_handler cm_handler,
- void *context)
+static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
{
struct cm_id_private *cm_id_priv;
+ u32 id;
int ret;
cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
@@ -873,26 +836,54 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
- ret = cm_alloc_id(cm_id_priv);
- if (ret)
- goto error;
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
- INIT_LIST_HEAD(&cm_id_priv->prim_list);
- INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
- atomic_set(&cm_id_priv->refcount, 1);
- return &cm_id_priv->id;
+ refcount_set(&cm_id_priv->refcount, 1);
+
+ ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
+ &cm.local_id_next, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+
+ return cm_id_priv;
error:
kfree(cm_id_priv);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Make the ID visible to the MAD handlers and other threads that use the
+ * xarray.
+ */
+static void cm_finalize_id(struct cm_id_private *cm_id_priv)
+{
+ xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+ cm_id_priv, GFP_ATOMIC);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
+
+ cm_finalize_id(cm_id_priv);
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_create_cm_id);
-static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+static struct cm_work *cm_dequeue_work(struct cm_id_private *cm_id_priv)
{
struct cm_work *work;
@@ -911,6 +902,36 @@ static void cm_free_work(struct cm_work *work)
kfree(work);
}
+static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+ __releases(&cm_id_priv->lock)
+{
+ bool immediate;
+
+ /*
+ * To deliver the event to the user callback we have the drop the
+ * spinlock, however, we need to ensure that the user callback is single
+ * threaded and receives events in the temporal order. If there are
+ * already events being processed then thread new events onto a list,
+ * the thread currently processing will pick them up.
+ */
+ immediate = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!immediate) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ /*
+ * This routine always consumes incoming reference. Once queued
+ * to the work_list then a reference is held by the thread
+ * currently running cm_process_work() and this reference is not
+ * needed.
+ */
+ cm_deref_id(cm_id_priv);
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (immediate)
+ cm_process_work(cm_id_priv, work);
+}
+
static inline int cm_convert_to_ms(int iba_time)
{
/* approximate conversion to ms from 4.096us x 2^iba_time */
@@ -936,8 +957,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
return min(31, ack_timeout);
}
-static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
+ struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
+
if (timewait_info->inserted_remote_id) {
rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
timewait_info->inserted_remote_id = 0;
@@ -949,7 +972,7 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
}
}
-static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+static struct cm_timewait_info *cm_create_timewait_info(__be32 local_id)
{
struct cm_timewait_info *timewait_info;
@@ -969,12 +992,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
unsigned long flags;
struct cm_device *cm_dev;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
if (!cm_dev)
return;
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -993,6 +1018,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
msecs_to_jiffies(wait_time));
spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * The timewait_info is converted into a work and gets freed during
+ * cm_free_work() in cm_timewait_handler().
+ */
+ BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
cm_id_priv->timewait_info = NULL;
}
@@ -1000,122 +1030,157 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_id_priv->id.state = IB_CM_IDLE;
if (cm_id_priv->timewait_info) {
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
}
}
+static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id,
+ enum ib_cm_state old_state)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ pr_err_ratelimited("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
+ cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
+}
+
static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
{
struct cm_id_private *cm_id_priv;
+ enum ib_cm_state old_state;
+ unsigned long timeout;
struct cm_work *work;
+ int ret;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-retest:
spin_lock_irq(&cm_id_priv->lock);
+ old_state = cm_id->state;
+retest:
switch (cm_id->state) {
case IB_CM_LISTEN:
- spin_unlock_irq(&cm_id_priv->lock);
-
- spin_lock_irq(&cm.lock);
+ spin_lock(&cm.lock);
if (--cm_id_priv->listen_sharecount > 0) {
/* The id is still shared. */
+ WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
cm_deref_id(cm_id_priv);
- spin_unlock_irq(&cm.lock);
return;
}
+ cm_id->state = IB_CM_IDLE;
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
- spin_unlock_irq(&cm.lock);
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ spin_unlock(&cm.lock);
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_SIDR_REQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
- spin_lock_irq(&cm.lock);
- if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
- rb_erase(&cm_id_priv->sidr_id_node,
- &cm.remote_sidr_table);
- spin_unlock_irq(&cm.lock);
+ cm_send_sidr_rep_locked(cm_id_priv,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_REJECT });
+ /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
+ cm_id->state = IB_CM_IDLE;
break;
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
- &cm_id_priv->id.device->node_guid,
- sizeof cm_id_priv->id.device->node_guid,
- NULL, 0);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
+ &cm_id_priv->id.device->node_guid,
+ sizeof(cm_id_priv->id.device->node_guid),
+ NULL, 0);
break;
case IB_CM_REQ_RCVD:
if (err == -ENOMEM) {
/* Do not reject to allow future retries. */
cm_reset_to_idle(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
} else {
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv,
+ IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+ NULL, 0);
}
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* Fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
+ goto retest;
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
break;
case IB_CM_ESTABLISHED:
- spin_unlock_irq(&cm_id_priv->lock);
- if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+ if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
+ cm_id->state = IB_CM_IDLE;
break;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ }
+ cm_issue_dreq(cm_id_priv);
+ cm_enter_timewait(cm_id_priv);
goto retest;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
- break;
+ goto retest;
case IB_CM_DREQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_drep(cm_id, NULL, 0);
+ cm_send_drep_locked(cm_id_priv, NULL, 0);
+ WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
+ goto retest;
+ case IB_CM_TIMEWAIT:
+ /*
+ * The cm_acquire_id in cm_timewait_handler will stop working
+ * once we do xa_erase below, so just move to idle here for
+ * consistency.
+ */
+ cm_id->state = IB_CM_IDLE;
break;
- default:
- spin_unlock_irq(&cm_id_priv->lock);
+ case IB_CM_IDLE:
break;
}
+ WARN_ON(cm_id->state != IB_CM_IDLE);
- spin_lock_irq(&cm.lock);
- if (!list_empty(&cm_id_priv->altr_list) &&
- (!cm_id_priv->altr_send_port_not_ready))
- list_del(&cm_id_priv->altr_list);
- if (!list_empty(&cm_id_priv->prim_list) &&
- (!cm_id_priv->prim_send_port_not_ready))
- list_del(&cm_id_priv->prim_list);
- spin_unlock_irq(&cm.lock);
+ spin_lock(&cm.lock);
+ /* Required for cleanup paths related cm_req_handler() */
+ if (cm_id_priv->timewait_info) {
+ cm_remove_remote(cm_id_priv);
+ kfree(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
+ }
+
+ WARN_ON(cm_id_priv->listen_sharecount);
+ WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
+ if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
+ rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
- cm_free_id(cm_id->local_id);
+ xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
- wait_for_completion(&cm_id_priv->comp);
+ timeout = msecs_to_jiffies((cm_id_priv->max_cm_retries * cm_id_priv->timeout_ms * 5) / 4);
+ do {
+ ret = wait_for_completion_timeout(&cm_id_priv->comp, timeout);
+ if (!ret) /* timeout happened */
+ cm_destroy_id_wait_timeout(cm_id, old_state);
+ } while (!ret);
+
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
- rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
- rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
+ cm_destroy_av(&cm_id_priv->av);
+ cm_destroy_av(&cm_id_priv->alt_av);
kfree(cm_id_priv->private_data);
- kfree(cm_id_priv);
+ kfree_rcu(cm_id_priv, rcu);
}
void ib_destroy_cm_id(struct ib_cm_id *cm_id)
@@ -1124,70 +1189,63 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
}
EXPORT_SYMBOL(ib_destroy_cm_id);
+static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id)
+{
+ if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+ (service_id != IB_CM_ASSIGN_SERVICE_ID))
+ return -EINVAL;
+
+ if (service_id == IB_CM_ASSIGN_SERVICE_ID)
+ cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
+ else
+ cm_id_priv->id.service_id = service_id;
+
+ return 0;
+}
+
/**
- * __ib_cm_listen - Initiates listening on the specified service ID for
+ * ib_cm_listen - Initiates listening on the specified service ID for
* connection and service ID resolution requests.
* @cm_id: Connection identifier associated with the listen request.
* @service_id: Service identifier matched against incoming connection
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
- * @service_mask: Mask applied to service ID used to listen across a
- * range of service IDs. If set to 0, the service ID is matched
- * exactly. This parameter is ignored if %service_id is set to
- * IB_CM_ASSIGN_SERVICE_ID.
*/
-static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
- __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id)
{
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
- int ret = 0;
-
- service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
- service_id &= service_mask;
- if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
- (service_id != IB_CM_ASSIGN_SERVICE_ID))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- if (cm_id->state != IB_CM_IDLE)
- return -EINVAL;
-
- cm_id->state = IB_CM_LISTEN;
- ++cm_id_priv->listen_sharecount;
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
- if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
- cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
- cm_id->service_mask = ~cpu_to_be64(0);
- } else {
- cm_id->service_id = service_id;
- cm_id->service_mask = service_mask;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->id.state != IB_CM_IDLE) {
+ ret = -EINVAL;
+ goto out;
}
- cur_cm_id_priv = cm_insert_listen(cm_id_priv);
- if (cur_cm_id_priv) {
- cm_id->state = IB_CM_IDLE;
- --cm_id_priv->listen_sharecount;
+ ret = cm_init_listen(cm_id_priv, service_id);
+ if (ret)
+ goto out;
+
+ if (!cm_insert_listen(cm_id_priv, NULL)) {
ret = -EBUSY;
+ goto out;
}
- return ret;
-}
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&cm.lock, flags);
- ret = __ib_cm_listen(cm_id, service_id, service_mask);
- spin_unlock_irqrestore(&cm.lock, flags);
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_cm_listen);
/**
- * Create a new listening ib_cm_id and listen on the given service ID.
+ * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
+ * the given service ID.
*
* If there's an existing ID listening on that same device and service ID,
* return it.
@@ -1206,59 +1264,57 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
ib_cm_handler cm_handler,
__be64 service_id)
{
+ struct cm_id_private *listen_id_priv;
struct cm_id_private *cm_id_priv;
- struct ib_cm_id *cm_id;
- unsigned long flags;
int err = 0;
/* Create an ID in advance, since the creation may sleep */
- cm_id = ib_create_cm_id(device, cm_handler, NULL);
- if (IS_ERR(cm_id))
- return cm_id;
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
- spin_lock_irqsave(&cm.lock, flags);
+ err = cm_init_listen(cm_id_priv, service_id);
+ if (err) {
+ ib_destroy_cm_id(&cm_id_priv->id);
+ return ERR_PTR(err);
+ }
- if (service_id == IB_CM_ASSIGN_SERVICE_ID)
- goto new_id;
-
- /* Find an existing ID */
- cm_id_priv = cm_find_listen(device, service_id);
- if (cm_id_priv) {
- if (cm_id->cm_handler != cm_handler || cm_id->context) {
- /* Sharing an ib_cm_id with different handlers is not
- * supported */
- spin_unlock_irqrestore(&cm.lock, flags);
+ spin_lock_irq(&cm_id_priv->lock);
+ listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
+ if (listen_id_priv != cm_id_priv) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_destroy_cm_id(&cm_id_priv->id);
+ if (!listen_id_priv)
return ERR_PTR(-EINVAL);
- }
- atomic_inc(&cm_id_priv->refcount);
- ++cm_id_priv->listen_sharecount;
- spin_unlock_irqrestore(&cm.lock, flags);
-
- ib_destroy_cm_id(cm_id);
- cm_id = &cm_id_priv->id;
- return cm_id;
+ return &listen_id_priv->id;
}
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ spin_unlock_irq(&cm_id_priv->lock);
-new_id:
- /* Use newly created ID */
- err = __ib_cm_listen(cm_id, service_id, 0);
-
- spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * A listen ID does not need to be in the xarray since it does not
+ * receive mads, is not placed in the remote_id or remote_qpn rbtree,
+ * and does not enter timewait.
+ */
- if (err) {
- ib_destroy_cm_id(cm_id);
- return ERR_PTR(err);
- }
- return cm_id;
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_cm_insert_listen);
static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
- u64 hi_tid, low_tid;
+ u64 hi_tid = 0, low_tid;
+
+ lockdep_assert_held(&cm_id_priv->lock);
- hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- low_tid = (u64)cm_id_priv->id.local_id;
+ low_tid = (u64)cm_id_priv->id.local_id;
+ if (!cm_id_priv->av.port)
+ return cpu_to_be64(low_tid);
+
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ if (cm_id_priv->av.port->mad_agent)
+ hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1273,6 +1329,13 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
hdr->tid = tid;
}
+static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
+ __be64 tid, u32 attr_mod)
+{
+ cm_format_mad_hdr(hdr, attr_id, tid);
+ hdr->attr_mod = cpu_to_be32(attr_mod);
+}
+
static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
@@ -1280,62 +1343,88 @@ static void cm_format_req(struct cm_req_msg *req_msg,
struct sa_path_rec *pri_path = param->primary_path;
struct sa_path_rec *alt_path = param->alternate_path;
bool pri_ext = false;
+ __be16 lid;
if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
pri_path->opa.slid);
- cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv));
-
- req_msg->local_comm_id = cm_id_priv->id.local_id;
- req_msg->service_id = param->service_id;
- req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
- cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
- cm_req_set_init_depth(req_msg, param->initiator_depth);
- cm_req_set_remote_resp_timeout(req_msg,
- param->remote_cm_response_timeout);
+ cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv), param->ece.attr_mod);
+
+ IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
+ IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
+ be64_to_cpu(cm_id_priv->id.device->node_guid));
+ IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
+ IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
+ IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
+ param->remote_cm_response_timeout);
cm_req_set_qp_type(req_msg, param->qp_type);
- cm_req_set_flow_ctrl(req_msg, param->flow_control);
- cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
- cm_req_set_local_resp_timeout(req_msg,
- param->local_cm_response_timeout);
- req_msg->pkey = param->primary_path->pkey;
- cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
- cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+ IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
+ IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
+ IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
+ param->local_cm_response_timeout);
+ IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
+ be16_to_cpu(param->primary_path->pkey));
+ IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
+ param->primary_path->mtu);
+ IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
if (param->qp_type != IB_QPT_XRC_INI) {
- cm_req_set_resp_res(req_msg, param->responder_resources);
- cm_req_set_retry_count(req_msg, param->retry_count);
- cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
- cm_req_set_srq(req_msg, param->srq);
+ IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
+ param->responder_resources);
+ IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
+ IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
+ param->rnr_retry_count);
+ IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
}
- req_msg->primary_local_gid = pri_path->sgid;
- req_msg->primary_remote_gid = pri_path->dgid;
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
+ pri_path->sgid;
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
+ pri_path->dgid;
if (pri_ext) {
- req_msg->primary_local_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
- req_msg->primary_remote_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
}
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_ext ? 0 :
- htons(ntohl(sa_path_get_slid(pri_path)));
- req_msg->primary_remote_lid = pri_ext ? 0 :
- htons(ntohl(sa_path_get_dlid(pri_path)));
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(pri_ext ? 0 :
+ htons(ntohl(sa_path_get_slid(
+ pri_path)))));
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(pri_ext ? 0 :
+ htons(ntohl(sa_path_get_dlid(
+ pri_path)))));
} else {
+
+ if (param->primary_path_inbound) {
+ lid = param->primary_path_inbound->ib.dlid;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(lid));
+ } else
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+
/* Work-around until there's a way to obtain remote LID info */
- req_msg->primary_local_lid = IB_LID_PERMISSIVE;
- req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
}
- cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
- cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
- req_msg->primary_traffic_class = pri_path->traffic_class;
- req_msg->primary_hop_limit = pri_path->hop_limit;
- cm_req_set_primary_sl(req_msg, pri_path->sl);
- cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
- cm_req_set_primary_local_ack_timeout(req_msg,
+ IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
+ be32_to_cpu(pri_path->flow_label));
+ IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
+ IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
+ IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
+ IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
+ IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
+ (pri_path->hop_limit <= 1));
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
pri_path->packet_life_time));
@@ -1346,46 +1435,60 @@ static void cm_format_req(struct cm_req_msg *req_msg,
alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
alt_path->opa.slid);
- req_msg->alt_local_gid = alt_path->sgid;
- req_msg->alt_remote_gid = alt_path->dgid;
+ *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
+ alt_path->sgid;
+ *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
+ alt_path->dgid;
if (alt_ext) {
- req_msg->alt_local_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
- req_msg->alt_remote_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
+ IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+ req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
+ IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
+ req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
}
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_ext ? 0 :
- htons(ntohl(sa_path_get_slid(alt_path)));
- req_msg->alt_remote_lid = alt_ext ? 0 :
- htons(ntohl(sa_path_get_dlid(alt_path)));
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(
+ alt_ext ? 0 :
+ htons(ntohl(sa_path_get_slid(
+ alt_path)))));
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(
+ alt_ext ? 0 :
+ htons(ntohl(sa_path_get_dlid(
+ alt_path)))));
} else {
- req_msg->alt_local_lid = IB_LID_PERMISSIVE;
- req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
}
- cm_req_set_alt_flow_label(req_msg,
- alt_path->flow_label);
- cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
- req_msg->alt_traffic_class = alt_path->traffic_class;
- req_msg->alt_hop_limit = alt_path->hop_limit;
- cm_req_set_alt_sl(req_msg, alt_path->sl);
- cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
- cm_req_set_alt_local_ack_timeout(req_msg,
+ IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
+ be32_to_cpu(alt_path->flow_label));
+ IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
+ IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
+ alt_path->traffic_class);
+ IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
+ alt_path->hop_limit);
+ IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
+ IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
+ (alt_path->hop_limit <= 1));
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
alt_path->packet_life_time));
}
+ IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
if (param->private_data && param->private_data_len)
- memcpy(req_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
+ param->private_data_len);
}
static int cm_validate_req_param(struct ib_cm_req_param *param)
{
- /* peer-to-peer not supported */
- if (param->peer_to_peer)
- return -EINVAL;
-
if (!param->primary_path)
return -EINVAL;
@@ -1408,7 +1511,9 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
int ib_send_cm_req(struct ib_cm_id *cm_id,
struct ib_cm_req_param *param)
{
+ struct cm_av av = {}, alt_av = {};
struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
struct cm_req_msg *req_msg;
unsigned long flags;
int ret;
@@ -1420,10 +1525,9 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
/* Verify that we're not in timewait. */
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_IDLE) {
+ if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1431,22 +1535,23 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
- goto out;
+ cm_id_priv->timewait_info = NULL;
+ return ret;
}
ret = cm_init_av_by_path(param->primary_path,
- param->ppath_sgid_attr, &cm_id_priv->av,
- cm_id_priv);
+ param->ppath_sgid_attr, &av);
if (ret)
- goto error1;
+ return ret;
if (param->alternate_path) {
ret = cm_init_av_by_path(param->alternate_path, NULL,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto error1;
+ &alt_av);
+ if (ret) {
+ cm_destroy_av(&av);
+ return ret;
+ }
}
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
@@ -1459,33 +1564,42 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->pkey = param->primary_path->pkey;
cm_id_priv->qp_type = param->qp_type;
- ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
- if (ret)
- goto error1;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ cm_move_av_from_path(&cm_id_priv->av, &av);
+ if (param->primary_path_outbound)
+ cm_id_priv->av.dlid_datapath =
+ be16_to_cpu(param->primary_path_outbound->ib.dlid);
+
+ if (param->alternate_path)
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
- req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+ msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
+ }
+
+ req_msg = (struct cm_req_msg *)msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
- cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
- cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
- cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
+ cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- goto error2;
- }
+ trace_icm_send_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
BUG_ON(cm_id->state != IB_CM_IDLE);
cm_id->state = IB_CM_REQ_SENT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-
-error2: cm_free_msg(cm_id_priv->msg);
-error1: kfree(cm_id_priv->timewait_info);
-out: return ret;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);
@@ -1499,7 +1613,7 @@ static int cm_issue_rej(struct cm_port *port,
struct cm_rej_msg *rej_msg, *rcv_msg;
int ret;
- ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ ret = cm_alloc_response_msg(port, mad_recv_wc, false, &msg);
if (ret)
return ret;
@@ -1508,16 +1622,21 @@ static int cm_issue_rej(struct cm_port *port,
rej_msg = (struct cm_rej_msg *) msg->mad;
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
- rej_msg->remote_comm_id = rcv_msg->local_comm_id;
- rej_msg->local_comm_id = rcv_msg->remote_comm_id;
- cm_rej_set_msg_rejected(rej_msg, msg_rejected);
- rej_msg->reason = cpu_to_be16(reason);
+ IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
+ IBA_SET(CM_REJ_REASON, rej_msg, reason);
if (ari && ari_length) {
- cm_rej_set_reject_info_len(rej_msg, ari_length);
- memcpy(rej_msg->ari, ari, ari_length);
+ IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+ IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -1525,21 +1644,15 @@ static int cm_issue_rej(struct cm_port *port,
return ret;
}
-static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
- __be32 local_qpn, __be32 remote_qpn)
-{
- return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
- ((local_ca_guid == remote_ca_guid) &&
- (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
-}
-
static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
{
- return ((req_msg->alt_local_lid) ||
- (ib_is_opa_gid(&req_msg->alt_local_gid)));
+ return ((cpu_to_be16(
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
+ (ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+ req_msg))));
}
-static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
+static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
struct sa_path_rec *path, union ib_gid *gid)
{
if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
@@ -1550,20 +1663,23 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u8 port_num,
static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
u32 lid;
if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(primary_path,
- ntohs(req_msg->primary_local_lid));
+ sa_path_set_dlid(primary_path, wc->slid);
sa_path_set_slid(primary_path,
- ntohs(req_msg->primary_remote_lid));
+ IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg));
} else {
- lid = opa_get_lid_from_gid(&req_msg->primary_local_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
sa_path_set_dlid(primary_path, lid);
- lid = opa_get_lid_from_gid(&req_msg->primary_remote_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
sa_path_set_slid(primary_path, lid);
}
@@ -1571,77 +1687,98 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
return;
if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(alt_path, ntohs(req_msg->alt_local_lid));
- sa_path_set_slid(alt_path, ntohs(req_msg->alt_remote_lid));
+ sa_path_set_dlid(alt_path,
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+ req_msg));
+ sa_path_set_slid(alt_path,
+ IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+ req_msg));
} else {
- lid = opa_get_lid_from_gid(&req_msg->alt_local_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
sa_path_set_dlid(alt_path, lid);
- lid = opa_get_lid_from_gid(&req_msg->alt_remote_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
sa_path_set_slid(alt_path, lid);
}
}
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
struct sa_path_rec *primary_path,
- struct sa_path_rec *alt_path)
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
{
- primary_path->dgid = req_msg->primary_local_gid;
- primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
- primary_path->hop_limit = req_msg->primary_hop_limit;
- primary_path->traffic_class = req_msg->primary_traffic_class;
+ primary_path->dgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
+ primary_path->sgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
+ primary_path->flow_label =
+ cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
+ primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
+ primary_path->traffic_class =
+ IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
primary_path->reversible = 1;
- primary_path->pkey = req_msg->pkey;
- primary_path->sl = cm_req_get_primary_sl(req_msg);
+ primary_path->pkey =
+ cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
primary_path->mtu_selector = IB_SA_EQ;
- primary_path->mtu = cm_req_get_path_mtu(req_msg);
+ primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
primary_path->rate_selector = IB_SA_EQ;
- primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
+ primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
primary_path->packet_life_time_selector = IB_SA_EQ;
primary_path->packet_life_time =
- cm_req_get_primary_local_ack_timeout(req_msg);
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
- primary_path->service_id = req_msg->service_id;
+ primary_path->service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
if (sa_path_is_roce(primary_path))
primary_path->roce.route_resolved = false;
if (cm_req_has_alt_path(req_msg)) {
- alt_path->dgid = req_msg->alt_local_gid;
- alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
- alt_path->hop_limit = req_msg->alt_hop_limit;
- alt_path->traffic_class = req_msg->alt_traffic_class;
+ alt_path->dgid = *IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
+ alt_path->sgid = *IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
+ alt_path->flow_label = cpu_to_be32(
+ IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
+ alt_path->hop_limit =
+ IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
+ alt_path->traffic_class =
+ IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
alt_path->reversible = 1;
- alt_path->pkey = req_msg->pkey;
- alt_path->sl = cm_req_get_alt_sl(req_msg);
+ alt_path->pkey =
+ cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
alt_path->mtu_selector = IB_SA_EQ;
- alt_path->mtu = cm_req_get_path_mtu(req_msg);
+ alt_path->mtu =
+ IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
alt_path->rate_selector = IB_SA_EQ;
- alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
+ alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
alt_path->packet_life_time_selector = IB_SA_EQ;
alt_path->packet_life_time =
- cm_req_get_alt_local_ack_timeout(req_msg);
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
- alt_path->service_id = req_msg->service_id;
+ alt_path->service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
if (sa_path_is_roce(alt_path))
alt_path->roce.route_resolved = false;
}
- cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
+ cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
}
static u16 cm_get_bth_pkey(struct cm_work *work)
{
struct ib_device *ib_dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
u16 pkey;
int ret;
ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
if (ret) {
- dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
+ dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n",
port_num, pkey_index, ret);
return 0;
}
@@ -1650,7 +1787,7 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
}
/**
- * Convert OPA SGID to IB SGID
+ * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
* ULPs (such as IPoIB) do not understand OPA GIDs and will
* reject them as the local_gid will not match the sgid. Therefore,
* change the pathrec's SGID to an IB SGID.
@@ -1662,7 +1799,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
struct sa_path_rec *path)
{
struct ib_device *dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
if (rdma_cap_opa_ah(dev, port_num) &&
(ib_is_opa_gid(&path->sgid))) {
@@ -1698,23 +1835,28 @@ static void cm_format_req_event(struct cm_work *work,
} else {
param->alternate_path = NULL;
}
- param->remote_ca_guid = req_msg->local_ca_guid;
- param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
- param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
+ param->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+ param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
+ param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
param->qp_type = cm_req_get_qp_type(req_msg);
- param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
- param->responder_resources = cm_req_get_init_depth(req_msg);
- param->initiator_depth = cm_req_get_resp_res(req_msg);
+ param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
+ param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
param->local_cm_response_timeout =
- cm_req_get_remote_resp_timeout(req_msg);
- param->flow_control = cm_req_get_flow_ctrl(req_msg);
+ IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
+ param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
param->remote_cm_response_timeout =
- cm_req_get_local_resp_timeout(req_msg);
- param->retry_count = cm_req_get_retry_count(req_msg);
- param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
- param->srq = cm_req_get_srq(req_msg);
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
+ param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
- work->cm_event.private_data = &req_msg->private_data;
+ param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
+ param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);
+
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}
static void cm_process_work(struct cm_id_private *cm_id_priv,
@@ -1744,58 +1886,67 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
static void cm_format_mra(struct cm_mra_msg *mra_msg,
struct cm_id_private *cm_id_priv,
- enum cm_msg_response msg_mraed, u8 service_timeout,
+ enum cm_msg_response msg_mraed,
const void *private_data, u8 private_data_len)
{
cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
- cm_mra_set_msg_mraed(mra_msg, msg_mraed);
- mra_msg->local_comm_id = cm_id_priv->id.local_id;
- mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_mra_set_service_timeout(mra_msg, service_timeout);
+ IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
+ IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, CM_MRA_SETTING);
if (private_data && private_data_len)
- memcpy(mra_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
+ private_data_len);
}
static void cm_format_rej(struct cm_rej_msg *rej_msg,
struct cm_id_private *cm_id_priv,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len, enum ib_cm_state state)
{
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
- rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
- switch(cm_id_priv->id.state) {
+ switch (state) {
case IB_CM_REQ_RCVD:
- rej_msg->local_comm_id = 0;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
break;
case IB_CM_MRA_REQ_SENT:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
break;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
break;
default:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
+ CM_MSG_RESPONSE_OTHER);
break;
}
- rej_msg->reason = cpu_to_be16(reason);
+ IBA_SET(CM_REJ_REASON, rej_msg, reason);
if (ari && ari_length) {
- cm_rej_set_reject_info_len(rej_msg, ari_length);
- memcpy(rej_msg->ari, ari, ari_length);
+ IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+ IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
if (private_data && private_data_len)
- memcpy(rej_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
+ private_data_len);
}
static void cm_dup_req_handler(struct cm_work *work,
@@ -1804,14 +1955,18 @@ static void cm_dup_req_handler(struct cm_work *work,
struct ib_mad_send_buf *msg = NULL;
int ret;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REQ_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]);
/* Quick state check to discard duplicate REQs. */
- if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
+ spin_unlock_irq(&cm_id_priv->lock);
return;
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
- ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+ ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
if (ret)
return;
@@ -1819,19 +1974,21 @@ static void cm_dup_req_handler(struct cm_work *work,
switch (cm_id_priv->id.state) {
case IB_CM_MRA_REQ_SENT:
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
+ CM_MSG_RESPONSE_REQ,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
break;
case IB_CM_TIMEWAIT:
- cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
- IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv,
+ IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0,
+ IB_CM_TIMEWAIT);
break;
default:
goto unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -1841,13 +1998,12 @@ unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_msg(msg);
}
-static struct cm_id_private * cm_match_req(struct cm_work *work,
- struct cm_id_private *cm_id_priv)
+static struct cm_id_private *cm_match_req(struct cm_work *work,
+ struct cm_id_private *cm_id_priv)
{
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
- struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1855,7 +2011,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
spin_lock_irq(&cm.lock);
timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
if (timewait_info) {
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock_irq(&cm.lock);
if (cur_cm_id_priv) {
@@ -1868,8 +2024,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Check for stale connections. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cm_remove_remote(cm_id_priv);
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock_irq(&cm.lock);
@@ -1877,30 +2033,25 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
return NULL;
}
/* Find matching listen request. */
- listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
- req_msg->service_id);
+ listen_cm_id_priv = cm_find_listen(
+ cm_id_priv->id.device,
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
NULL, 0);
- goto out;
+ return NULL;
}
- atomic_inc(&listen_cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->refcount);
- cm_id_priv->id.state = IB_CM_REQ_RCVD;
- atomic_inc(&cm_id_priv->work_count);
spin_unlock_irq(&cm.lock);
-out:
return listen_cm_id_priv;
}
@@ -1911,30 +2062,37 @@ out:
*/
static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
{
- if (!cm_req_get_primary_subnet_local(req_msg)) {
- if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
- req_msg->primary_local_lid = ib_lid_be16(wc->slid);
- cm_req_set_primary_sl(req_msg, wc->sl);
+ if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
+ if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE) {
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(ib_lid_be16(wc->slid)));
+ IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
}
- if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
- req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE)
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ wc->dlid_path_bits);
}
- if (!cm_req_get_alt_subnet_local(req_msg)) {
- if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
- req_msg->alt_local_lid = ib_lid_be16(wc->slid);
- cm_req_set_alt_sl(req_msg, wc->sl);
+ if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
+ if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE) {
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(ib_lid_be16(wc->slid)));
+ IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
}
- if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
- req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE)
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ wc->dlid_path_bits);
}
}
static int cm_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
const struct ib_global_route *grh;
@@ -1943,12 +2101,32 @@ static int cm_req_handler(struct cm_work *work)
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
+
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+ cm_id_priv->tid = req_msg->hdr.tid;
+ cm_id_priv->timeout_ms = cm_convert_to_ms(
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+ cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+ cm_id_priv->remote_qpn =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+ cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- cm_id_priv->id.remote_id = req_msg->local_comm_id;
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
@@ -1958,54 +2136,57 @@ static int cm_req_handler(struct cm_work *work)
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
goto destroy;
}
- cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
- cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
- cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
+ cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
+ cm_id_priv->timewait_info->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+ cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;
+
+ /*
+ * Note that the ID pointer is not in the xarray at this point,
+ * so this set is only visible to the local thread.
+ */
+ cm_id_priv->id.state = IB_CM_REQ_RCVD;
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
- pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__,
- be32_to_cpu(cm_id->local_id));
+ trace_icm_no_listener_err(&cm_id_priv->id);
+ cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
- goto free_timeinfo;
+ goto destroy;
}
- cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_id_priv->id.service_id = req_msg->service_id;
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
-
- cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
-
memset(&work->path[0], 0, sizeof(work->path[0]));
if (cm_req_has_alt_path(req_msg))
memset(&work->path[1], 0, sizeof(work->path[1]));
grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
gid_attr = grh->sgid_attr;
- if (gid_attr &&
- rdma_protocol_roce(work->port->cm_dev->ib_device,
- work->port->port_num)) {
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) {
work->path[0].rec_type =
sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
- cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &req_msg->primary_local_gid);
+ cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
+ cm_path_set_rec_type(
+ work->port->cm_dev->ib_device, work->port->port_num,
+ &work->path[0],
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
+ req_msg));
}
if (cm_req_has_alt_path(req_msg))
work->path[1].rec_type = work->path[0].rec_type;
cm_format_paths_from_req(req_msg, &work->path[0],
- &work->path[1]);
+ &work->path[1], work->mad_recv_wc->wc);
if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
- cm_id_priv);
+
+ /* This destroy call is needed to pair with cm_init_av_for_response */
+ cm_destroy_av(&cm_id_priv->av);
+ ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
if (ret) {
int err;
@@ -2013,51 +2194,55 @@ static int cm_req_handler(struct cm_work *work)
work->port->port_num, 0,
&work->path[0].sgid);
if (err)
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
NULL, 0, NULL, 0);
else
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid,
sizeof(work->path[0].sgid),
NULL, 0);
goto rejected;
}
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB)
+ cm_id_priv->av.dlid_datapath =
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg);
+
if (cm_req_has_alt_path(req_msg)) {
ret = cm_init_av_by_path(&work->path[1], NULL,
- &cm_id_priv->alt_av, cm_id_priv);
+ &cm_id_priv->alt_av);
if (ret) {
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+ ib_send_cm_rej(&cm_id_priv->id,
+ IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
- cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->timeout_ms = cm_convert_to_ms(
- cm_req_get_local_resp_timeout(req_msg));
- cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
- cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
- cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
- cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
- cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
- cm_id_priv->pkey = req_msg->pkey;
- cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
- cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
- cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
- cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
+
+ /* Now MAD handlers can see the new ID */
+ spin_lock_irq(&cm_id_priv->lock);
+ cm_finalize_id(cm_id_priv);
+
+ /* Refcount belongs to the event, pairs with cm_process_work() */
+ refcount_inc(&cm_id_priv->refcount);
+ cm_queue_work_unlock(cm_id_priv, work);
+ /*
+ * Since this ID was just created and was not made visible to other MAD
+ * handlers until the cm_finalize_id() above we know that the
+ * cm_process_work() will deliver the event and the listen_cm_id
+ * embedded in the event can be derefed here.
+ */
cm_deref_id(listen_cm_id_priv);
return 0;
rejected:
- atomic_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
-free_timeinfo:
- kfree(cm_id_priv->timewait_info);
destroy:
- ib_destroy_cm_id(cm_id);
+ ib_destroy_cm_id(&cm_id_priv->id);
return ret;
}
@@ -2065,30 +2250,41 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_rep_param *param)
{
- cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
- rep_msg->local_comm_id = cm_id_priv->id.local_id;
- rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
- rep_msg->resp_resources = param->responder_resources;
- cm_rep_set_target_ack_delay(rep_msg,
- cm_id_priv->av.port->cm_dev->ack_delay);
- cm_rep_set_failover(rep_msg, param->failover_accepted);
- cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
- rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+ cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid,
+ param->ece.attr_mod);
+ IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
+ IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
+ param->responder_resources);
+ IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
+ cm_id_priv->av.port->cm_dev->ack_delay);
+ IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
+ IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
+ IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
+ be64_to_cpu(cm_id_priv->id.device->node_guid));
if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
- rep_msg->initiator_depth = param->initiator_depth;
- cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
- cm_rep_set_srq(rep_msg, param->srq);
- cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+ IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
+ param->initiator_depth);
+ IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
+ param->flow_control);
+ IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
+ IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
} else {
- cm_rep_set_srq(rep_msg, 1);
- cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+ IBA_SET(CM_REP_SRQ, rep_msg, 1);
+ IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
}
+ IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id);
+ IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8);
+ IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16);
+
if (param->private_data && param->private_data_len)
- memcpy(rep_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
+ param->private_data_len);
}
int ib_send_cm_rep(struct ib_cm_id *cm_id,
@@ -2108,36 +2304,40 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
- pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_priv_msg_rep(cm_id_priv, IB_CM_REP_SENT, true);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto out;
+ }
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_REP_SENT;
- cm_id_priv->msg = msg;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
- cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
+ cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ WARN_ONCE(param->qp_num & 0xFF000000,
+ "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
+ param->qp_num);
cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out_free:
+ cm_free_priv_msg(msg);
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rep);
@@ -2148,11 +2348,14 @@ static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
- rtu_msg->local_comm_id = cm_id_priv->id.local_id;
- rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
if (private_data && private_data_len)
- memcpy(rtu_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
+ private_data_len);
}
int ib_send_cm_rtu(struct ib_cm_id *cm_id,
@@ -2176,19 +2379,21 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
- pr_debug("%s: local_id %d, cm_id->state %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto error;
+ }
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -2215,18 +2420,25 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.rep_rcvd;
- param->remote_ca_guid = rep_msg->local_ca_guid;
- param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
+ param->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
+ param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
- param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
- param->responder_resources = rep_msg->initiator_depth;
- param->initiator_depth = rep_msg->resp_resources;
- param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
- param->failover_accepted = cm_rep_get_failover(rep_msg);
- param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
- param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
- param->srq = cm_rep_get_srq(rep_msg);
- work->cm_event.private_data = &rep_msg->private_data;
+ param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
+ param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+ param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+ param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
+ param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
+ param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
+ param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+ param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+ param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg);
+ param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod);
+
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
}
static void cm_dup_rep_handler(struct cm_work *work)
@@ -2237,14 +2449,15 @@ static void cm_dup_rep_handler(struct cm_work *work)
int ret;
rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
- rep_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
if (!cm_id_priv)
return;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REP_COUNTER]);
- ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]);
+ ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
if (ret)
goto deref;
@@ -2255,13 +2468,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
cm_id_priv->private_data_len);
else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
+ CM_MSG_RESPONSE_REP,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
else
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -2278,15 +2492,15 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_rep_msg *rep_msg;
int ret;
struct cm_id_private *cur_cm_id_priv;
- struct ib_cm_id *cm_id;
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
- pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__,
- be32_to_cpu(rep_msg->remote_comm_id));
+ trace_icm_remote_no_priv_err(
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2298,17 +2512,19 @@ static int cm_rep_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
break;
default:
- spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n",
- __func__, cm_id_priv->id.state,
- be32_to_cpu(rep_msg->local_comm_id),
- be32_to_cpu(rep_msg->remote_comm_id));
+ trace_icm_rep_unknown_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
+ spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
- cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
- cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
+ cm_id_priv->timewait_info->work.remote_id =
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
+ cm_id_priv->timewait_info->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
spin_lock(&cm.lock);
@@ -2317,17 +2533,15 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
- pr_debug("%s: Failed to insert remote id %d\n", __func__,
- be32_to_cpu(rep_msg->remote_comm_id));
+ trace_icm_insert_failed_err(
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
/* Check for a stale connection. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- rb_erase(&cm_id_priv->timewait_info->remote_id_node,
- &cm.remote_id_table);
- cm_id_priv->timewait_info->inserted_remote_id = 0;
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cm_remove_remote(cm_id_priv);
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock(&cm.lock);
@@ -2336,13 +2550,12 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
- pr_debug("%s: Stale connection. local_comm_id %d, remote_comm_id %d\n",
- __func__, be32_to_cpu(rep_msg->local_comm_id),
- be32_to_cpu(rep_msg->remote_comm_id));
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
@@ -2351,13 +2564,17 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
cm_id_priv->id.state = IB_CM_REP_RCVD;
- cm_id_priv->id.remote_id = rep_msg->local_comm_id;
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
- cm_id_priv->initiator_depth = rep_msg->resp_resources;
- cm_id_priv->responder_resources = rep_msg->initiator_depth;
- cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
- cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
- cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+ cm_id_priv->target_ack_delay =
+ IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
cm_id_priv->av.timeout =
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->av.timeout - 1);
@@ -2365,18 +2582,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->alt_av.timeout - 1);
- /* todo: handle peer_to_peer */
-
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
error:
@@ -2387,7 +2594,6 @@ error:
static int cm_establish_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
- int ret;
/* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
@@ -2400,16 +2606,8 @@ static int cm_establish_handler(struct cm_work *work)
goto out;
}
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2420,36 +2618,29 @@ static int cm_rtu_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rtu_msg *rtu_msg;
- int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
- rtu_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
+ cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &rtu_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
spin_unlock_irq(&cm_id_priv->lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_RTU_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_RTU_COUNTER]);
goto out;
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2463,19 +2654,42 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
{
cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
cm_form_tid(cm_id_priv));
- dreq_msg->local_comm_id = cm_id_priv->id.local_id;
- dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
+ IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
+ be32_to_cpu(cm_id_priv->remote_qpn));
if (private_data && private_data_len)
- memcpy(dreq_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
+ private_data_len);
}
-int ib_send_cm_dreq(struct ib_cm_id *cm_id,
- const void *private_data,
+static void cm_issue_dreq(struct cm_id_private *cm_id_priv)
+{
+ struct ib_mad_send_buf *msg;
+ int ret;
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return;
+
+ cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, NULL, 0);
+
+ trace_icm_send_dreq(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ cm_free_msg(msg);
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
@@ -2483,41 +2697,38 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED) {
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
+ trace_icm_dreq_skipped(&cm_id_priv->id);
ret = -EINVAL;
- goto out;
+ goto unlock;
}
- if (cm_id->lap_state == IB_CM_LAP_SENT ||
- cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->msg);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret) {
+ msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT);
+ if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv);
- goto out;
+ ret = PTR_ERR(msg);
+ goto unlock;
}
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
+ cm_free_priv_msg(msg);
+ goto unlock;
}
- cm_id->state = IB_CM_DREQ_SENT;
- cm_id_priv->msg = msg;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_id_priv->id.state = IB_CM_DREQ_SENT;
+unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_dreq);
@@ -2528,58 +2739,68 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
- drep_msg->local_comm_id = cm_id_priv->id.local_id;
- drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
if (private_data && private_data_len)
- memcpy(drep_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
+ private_data_len);
}
-int ib_send_cm_drep(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- unsigned long flags;
- void *data;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
return -EINVAL;
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_DREQ_RCVD) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- pr_debug("%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n",
- __func__, be32_to_cpu(cm_id->local_id), cm_id->state);
+ if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+ trace_icm_send_drep_err(&cm_id_priv->id);
+ kfree(private_data);
return -EINVAL;
}
- cm_set_private_data(cm_id_priv, data, private_data_len);
+ cm_set_private_data(cm_id_priv, private_data, private_data_len);
cm_enter_timewait(cm_id_priv);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
+ return 0;
+}
+
+int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ void *data;
+ int ret;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_drep_locked(cm_id_priv, data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_drep);
@@ -2592,7 +2813,7 @@ static int cm_issue_drep(struct cm_port *port,
struct cm_drep_msg *drep_msg;
int ret;
- ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ ret = cm_alloc_response_msg(port, mad_recv_wc, true, &msg);
if (ret)
return ret;
@@ -2600,9 +2821,14 @@ static int cm_issue_drep(struct cm_port *port,
drep_msg = (struct cm_drep_msg *) msg->mad;
cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
- drep_msg->remote_comm_id = dreq_msg->local_comm_id;
- drep_msg->local_comm_id = dreq_msg->remote_comm_id;
-
+ IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
+ IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -2615,43 +2841,45 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
- dreq_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
+ cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
if (!cm_id_priv) {
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
- pr_debug("%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n",
- __func__, be32_to_cpu(dreq_msg->local_comm_id),
- be32_to_cpu(dreq_msg->remote_comm_id));
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
- work->cm_event.private_data = &dreq_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
spin_lock_irq(&cm_id_priv->lock);
- if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
+ if (cm_id_priv->local_qpn !=
+ cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
goto unlock;
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- break;
- case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_TIMEWAIT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
- msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
+ msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc,
+ true);
if (IS_ERR(msg))
goto unlock;
@@ -2665,26 +2893,16 @@ static int cm_dreq_handler(struct cm_work *work)
cm_free_msg(msg);
goto deref;
case IB_CM_DREQ_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
goto unlock;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
cm_id_priv->tid = dreq_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2696,15 +2914,16 @@ static int cm_drep_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_drep_msg *drep_msg;
- int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
- drep_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
+ cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &drep_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
@@ -2714,81 +2933,83 @@ static int cm_drep_handler(struct cm_work *work)
}
cm_enter_timewait(cm_id_priv);
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
return -EINVAL;
}
-int ib_send_cm_rej(struct ib_cm_id *cm_id,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
+ enum ib_cm_state state = cm_id_priv->id.state;
struct ib_mad_send_buf *msg;
- unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
(ari && ari_length > IB_CM_REJ_ARI_LENGTH))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ trace_icm_send_rej(&cm_id_priv->id, reason);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state) {
+ switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_reset_to_idle(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_enter_timewait(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id_priv->id.local_id), cm_id->state);
- ret = -EINVAL;
- goto out;
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
+ return -EINVAL;
}
- if (ret)
- goto out;
-
ret = ib_post_send_mad(msg, NULL);
- if (ret)
+ if (ret) {
cm_free_msg(msg);
+ return ret;
+ }
+
+ return 0;
+}
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
+ void *ari, u8 ari_length, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length,
+ private_data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rej);
@@ -2800,41 +3021,33 @@ static void cm_format_rej_event(struct cm_work *work)
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.rej_rcvd;
- param->ari = rej_msg->ari;
- param->ari_length = cm_rej_get_reject_info_len(rej_msg);
- param->reason = __be16_to_cpu(rej_msg->reason);
- work->cm_event.private_data = &rej_msg->private_data;
+ param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
+ param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
+ param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
}
-static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
+static struct cm_id_private *cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
{
- struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
__be32 remote_id;
- remote_id = rej_msg->local_comm_id;
-
- if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
- spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
- remote_id);
- if (!timewait_info) {
- spin_unlock_irq(&cm.lock);
- return NULL;
- }
- cm_id_priv = xa_load(&cm.local_id_table,
- cm_local_id(timewait_info->work.local_id));
- if (cm_id_priv) {
- if (cm_id_priv->id.remote_id == remote_id)
- atomic_inc(&cm_id_priv->refcount);
- else
- cm_id_priv = NULL;
- }
- spin_unlock_irq(&cm.lock);
- } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
- cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
+ remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
+
+ if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
+ cm_id_priv = cm_find_remote_id(
+ *((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
+ remote_id);
+ } else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
+ CM_MSG_RESPONSE_REQ)
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+ 0);
else
- cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+ remote_id);
return cm_id_priv;
}
@@ -2843,7 +3056,6 @@ static int cm_rej_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rej_msg *rej_msg;
- int ret;
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_rejected_id(rej_msg);
@@ -2858,18 +3070,18 @@ static int cm_rej_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
- if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
+ if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
cm_enter_timewait(cm_id_priv);
else
cm_reset_to_idle(cm_id_priv);
break;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
cm_enter_timewait(cm_id_priv);
@@ -2878,127 +3090,79 @@ static int cm_rej_handler(struct cm_work *work)
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
break;
}
- /* fall through */
+ fallthrough;
default:
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
- ret = -EINVAL;
goto out;
}
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
return -EINVAL;
}
-int ib_send_cm_mra(struct ib_cm_id *cm_id,
- u8 service_timeout,
- const void *private_data,
- u8 private_data_len)
+int ib_prepare_cm_mra(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
enum ib_cm_state cm_state;
enum ib_cm_lap_state lap_state;
- enum cm_msg_response msg_response;
- void *data;
unsigned long flags;
- int ret;
-
- if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
- return -EINVAL;
-
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch(cm_id_priv->id.state) {
+ switch (cm_id_priv->id.state) {
case IB_CM_REQ_RCVD:
cm_state = IB_CM_MRA_REQ_SENT;
lap_state = cm_id->lap_state;
- msg_response = CM_MSG_RESPONSE_REQ;
break;
case IB_CM_REP_RCVD:
cm_state = IB_CM_MRA_REP_SENT;
lap_state = cm_id->lap_state;
- msg_response = CM_MSG_RESPONSE_REP;
break;
case IB_CM_ESTABLISHED:
if (cm_id->lap_state == IB_CM_LAP_RCVD) {
cm_state = cm_id->state;
lap_state = IB_CM_MRA_LAP_SENT;
- msg_response = CM_MSG_RESPONSE_OTHER;
break;
}
- /* fall through */
+ fallthrough;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_prepare_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
- goto error1;
- }
-
- if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error1;
-
- cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- msg_response, service_timeout,
- private_data, private_data_len);
- ret = ib_post_send_mad(msg, NULL);
- if (ret)
- goto error2;
+ goto error_unlock;
}
cm_id->state = cm_state;
cm_id->lap_state = lap_state;
- cm_id_priv->service_timeout = service_timeout;
- cm_set_private_data(cm_id_priv, data, private_data_len);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return 0;
+ cm_set_private_data(cm_id_priv, NULL, 0);
-error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- return ret;
-
-error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- cm_free_msg(msg);
+error_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
-EXPORT_SYMBOL(ib_send_cm_mra);
+EXPORT_SYMBOL(ib_prepare_cm_mra);
-static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
+static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
{
- switch (cm_mra_get_msg_mraed(mra_msg)) {
+ switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
case CM_MSG_RESPONSE_REQ:
- return cm_acquire_id(mra_msg->remote_comm_id, 0);
+ return cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+ 0);
case CM_MSG_RESPONSE_REP:
case CM_MSG_RESPONSE_OTHER:
- return cm_acquire_id(mra_msg->remote_comm_id,
- mra_msg->local_comm_id);
+ return cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+ cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
default:
return NULL;
}
@@ -3008,71 +3172,62 @@ static int cm_mra_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_mra_msg *mra_msg;
- int timeout, ret;
+ int timeout;
mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_mraed_id(mra_msg);
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &mra_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
work->cm_event.param.mra_rcvd.service_timeout =
- cm_mra_get_service_timeout(mra_msg);
- timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
+ IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
+ timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.timeout);
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
case IB_CM_REQ_SENT:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_REQ ||
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_REP ||
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
case IB_CM_ESTABLISHED:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout)) {
+ ib_modify_mad(cm_id_priv->msg, timeout)) {
if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- atomic_long_inc(&work->port->
- counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
goto out;
}
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
case IB_CM_MRA_REQ_RCVD:
case IB_CM_MRA_REP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
- /* fall through */
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
+ fallthrough;
default:
- pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
cm_id_priv->msg->context[1] = (void *) (unsigned long)
cm_id_priv->id.state;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
spin_unlock_irq(&cm_id_priv->lock);
@@ -3080,117 +3235,23 @@ out:
return -EINVAL;
}
-static void cm_format_lap(struct cm_lap_msg *lap_msg,
- struct cm_id_private *cm_id_priv,
- struct sa_path_rec *alternate_path,
- const void *private_data,
- u8 private_data_len)
-{
- bool alt_ext = false;
-
- if (alternate_path->rec_type == SA_PATH_REC_TYPE_OPA)
- alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
- alternate_path->opa.slid);
- cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
- cm_form_tid(cm_id_priv));
- lap_msg->local_comm_id = cm_id_priv->id.local_id;
- lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
- /* todo: need remote CM response timeout */
- cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid =
- htons(ntohl(sa_path_get_slid(alternate_path)));
- lap_msg->alt_remote_lid =
- htons(ntohl(sa_path_get_dlid(alternate_path)));
- lap_msg->alt_local_gid = alternate_path->sgid;
- lap_msg->alt_remote_gid = alternate_path->dgid;
- if (alt_ext) {
- lap_msg->alt_local_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.slid));
- lap_msg->alt_remote_gid.global.interface_id
- = OPA_MAKE_ID(be32_to_cpu(alternate_path->opa.dlid));
- }
- cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
- cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
- lap_msg->alt_hop_limit = alternate_path->hop_limit;
- cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
- cm_lap_set_sl(lap_msg, alternate_path->sl);
- cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
- cm_lap_set_local_ack_timeout(lap_msg,
- cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
- alternate_path->packet_life_time));
-
- if (private_data && private_data_len)
- memcpy(lap_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct sa_path_rec *alternate_path,
- const void *private_data,
- u8 private_data_len)
-{
- struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
- unsigned long flags;
- int ret;
-
- if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED ||
- (cm_id->lap_state != IB_CM_LAP_UNINIT &&
- cm_id->lap_state != IB_CM_LAP_IDLE)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
- cm_id_priv);
- if (ret)
- goto out;
- cm_id_priv->alt_av.timeout =
- cm_ack_timeout(cm_id_priv->target_ack_delay,
- cm_id_priv->alt_av.timeout - 1);
-
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
- alternate_path, private_data, private_data_len);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
-
- ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
-
- cm_id->lap_state = IB_CM_LAP_SENT;
- cm_id_priv->msg = msg;
-
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_lap);
-
static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
struct sa_path_rec *path)
{
u32 lid;
if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
- sa_path_set_dlid(path, ntohs(lap_msg->alt_local_lid));
- sa_path_set_slid(path, ntohs(lap_msg->alt_remote_lid));
+ sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
+ lap_msg));
+ sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
+ lap_msg));
} else {
- lid = opa_get_lid_from_gid(&lap_msg->alt_local_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
sa_path_set_dlid(path, lid);
- lid = opa_get_lid_from_gid(&lap_msg->alt_remote_gid);
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
sa_path_set_slid(path, lid);
}
}
@@ -3199,20 +3260,23 @@ static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
- path->dgid = lap_msg->alt_local_gid;
- path->sgid = lap_msg->alt_remote_gid;
- path->flow_label = cm_lap_get_flow_label(lap_msg);
- path->hop_limit = lap_msg->alt_hop_limit;
- path->traffic_class = cm_lap_get_traffic_class(lap_msg);
+ path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
+ path->sgid =
+ *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
+ path->flow_label =
+ cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
+ path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
+ path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
path->reversible = 1;
path->pkey = cm_id_priv->pkey;
- path->sl = cm_lap_get_sl(lap_msg);
+ path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
path->mtu_selector = IB_SA_EQ;
path->mtu = cm_id_priv->path_mtu;
path->rate_selector = IB_SA_EQ;
- path->rate = cm_lap_get_packet_rate(lap_msg);
+ path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
path->packet_life_time_selector = IB_SA_EQ;
- path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
+ path->packet_life_time =
+ IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
path->packet_life_time -= (path->packet_life_time > 0);
cm_format_path_lid_from_lap(lap_msg, path);
}
@@ -3223,6 +3287,8 @@ static int cm_lap_handler(struct cm_work *work)
struct cm_lap_msg *lap_msg;
struct ib_cm_lap_event_param *param;
struct ib_mad_send_buf *msg = NULL;
+ struct rdma_ah_attr ah_attr;
+ struct cm_av alt_av = {};
int ret;
/* Currently Alternate path messages are not supported for
@@ -3234,22 +3300,42 @@ static int cm_lap_handler(struct cm_work *work)
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
- lap_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
+ cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
if (!cm_id_priv)
return -EINVAL;
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
- work->port->port_num,
- &work->path[0],
- &lap_msg->alt_local_gid);
+ work->port->port_num, &work->path[0],
+ IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
+ lap_msg));
param->alternate_path = &work->path[0];
cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
- work->cm_event.private_data = &lap_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
+
+ ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto deref;
+
+ ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
+ if (ret) {
+ rdma_destroy_ah_attr(&ah_attr);
+ goto deref;
+ }
spin_lock_irq(&cm_id_priv->lock);
+ cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+ &ah_attr, &cm_id_priv->av);
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
+
if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
goto unlock;
@@ -3258,15 +3344,15 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_LAP_IDLE:
break;
case IB_CM_MRA_LAP_SENT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
- msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
+ msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc,
+ true);
if (IS_ERR(msg))
goto unlock;
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_OTHER,
- cm_id_priv->service_timeout,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
spin_unlock_irq(&cm_id_priv->lock);
@@ -3276,35 +3362,16 @@ static int cm_lap_handler(struct cm_work *work)
cm_free_msg(msg);
goto deref;
case IB_CM_LAP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
goto unlock;
default:
goto unlock;
}
- ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
- if (ret)
- goto unlock;
-
- ret = cm_init_av_by_path(param->alternate_path, NULL,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto unlock;
-
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -3312,77 +3379,10 @@ deref: cm_deref_id(cm_id_priv);
return -EINVAL;
}
-static void cm_format_apr(struct cm_apr_msg *apr_msg,
- struct cm_id_private *cm_id_priv,
- enum ib_cm_apr_status status,
- void *info,
- u8 info_length,
- const void *private_data,
- u8 private_data_len)
-{
- cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
- apr_msg->local_comm_id = cm_id_priv->id.local_id;
- apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
- apr_msg->ap_status = (u8) status;
-
- if (info && info_length) {
- apr_msg->info_length = info_length;
- memcpy(apr_msg->info, info, info_length);
- }
-
- if (private_data && private_data_len)
- memcpy(apr_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_apr(struct ib_cm_id *cm_id,
- enum ib_cm_apr_status status,
- void *info,
- u8 info_length,
- const void *private_data,
- u8 private_data_len)
-{
- struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
- unsigned long flags;
- int ret;
-
- if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
- (info && info_length > IB_CM_APR_INFO_LENGTH))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED ||
- (cm_id->lap_state != IB_CM_LAP_RCVD &&
- cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
- info, info_length, private_data, private_data_len);
- ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
-
- cm_id->lap_state = IB_CM_LAP_IDLE;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_apr);
-
static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_apr_msg *apr_msg;
- int ret;
/* Currently Alternate path messages are not supported for
* RoCE link layer.
@@ -3392,15 +3392,20 @@ static int cm_apr_handler(struct cm_work *work)
return -EINVAL;
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
- apr_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
+ cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
if (!cm_id_priv)
return -EINVAL; /* Unmatched reply. */
- work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
- work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
- work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
- work->cm_event.private_data = &apr_msg->private_data;
+ work->cm_event.param.apr_rcvd.ap_status =
+ IBA_GET(CM_APR_AR_STATUS, apr_msg);
+ work->cm_event.param.apr_rcvd.apr_info =
+ IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
+ work->cm_event.param.apr_rcvd.info_len =
+ IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
@@ -3410,18 +3415,8 @@ static int cm_apr_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- cm_id_priv->msg = NULL;
-
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3432,9 +3427,8 @@ static int cm_timewait_handler(struct cm_work *work)
{
struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
- int ret;
- timewait_info = (struct cm_timewait_info *)work;
+ timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
list_del(&timewait_info->list);
spin_unlock_irq(&cm.lock);
@@ -3451,15 +3445,7 @@ static int cm_timewait_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3472,13 +3458,16 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
{
cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
cm_form_tid(cm_id_priv));
- sidr_req_msg->request_id = cm_id_priv->id.local_id;
- sidr_req_msg->pkey = param->path->pkey;
- sidr_req_msg->service_id = param->service_id;
+ IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
+ be16_to_cpu(param->path->pkey));
+ IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
+ be64_to_cpu(param->service_id));
if (param->private_data && param->private_data_len)
- memcpy(sidr_req_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
+ param->private_data, param->private_data_len);
}
int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
@@ -3486,6 +3475,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
+ struct cm_av av = {};
unsigned long flags;
int ret;
@@ -3494,40 +3484,40 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, param->sgid_attr,
- &cm_id_priv->av,
- cm_id_priv);
+ ret = cm_init_av_by_path(param->path, param->sgid_attr, &av);
if (ret)
- goto out;
+ return ret;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ cm_move_av_from_path(&cm_id_priv->av, &av);
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
cm_id_priv->max_cm_retries = param->max_cm_retries;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
- param);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
-
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
- ret = ib_post_send_mad(msg, NULL);
- else
+ if (cm_id->state != IB_CM_IDLE) {
ret = -EINVAL;
+ goto out_unlock;
+ }
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- goto out;
+ msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
}
+
+ cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
+ param);
+
+ trace_icm_send_sidr_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_SIDR_REQ_SENT;
- cm_id_priv->msg = msg;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-out:
+ return 0;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
@@ -3542,72 +3532,86 @@ static void cm_format_sidr_req_event(struct cm_work *work,
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.sidr_req_rcvd;
- param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
+ param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
param->listen_id = listen_id;
- param->service_id = sidr_req_msg->service_id;
+ param->service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
param->bth_pkey = cm_get_bth_pkey(work);
param->port = work->port->port_num;
param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
- work->cm_event.private_data = &sidr_req_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
}
static int cm_sidr_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+ struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
int ret;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
/* Record SGID/SLID and request ID for lookup. */
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
+
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
+ cm_id_priv->tid = sidr_req_msg->hdr.tid;
+
wc = work->mad_recv_wc->wc;
- cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
- cm_id_priv->av.dgid.global.interface_id = 0;
+ cm_id_priv->sidr_slid = wc->slid;
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
if (ret)
goto out;
- cm_id_priv->id.remote_id = sidr_req_msg->request_id;
- cm_id_priv->tid = sidr_req_msg->hdr.tid;
- atomic_inc(&cm_id_priv->work_count);
-
spin_lock_irq(&cm.lock);
- cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
- if (cur_cm_id_priv) {
+ listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+ if (listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_SIDR_REQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_SIDR_REQ_COUNTER]);
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
- cur_cm_id_priv = cm_find_listen(cm_id->device,
- sidr_req_msg->service_id);
- if (!cur_cm_id_priv) {
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ cm_id_priv->id.service_id);
+ if (!listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
+ ib_send_cm_sidr_rep(&cm_id_priv->id,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_UNSUPPORTED });
goto out; /* No match. */
}
- atomic_inc(&cur_cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
- cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = cur_cm_id_priv->id.context;
- cm_id_priv->id.service_id = sidr_req_msg->service_id;
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
- cm_deref_id(cur_cm_id_priv);
+ /*
+ * A SIDR ID does not need to be in the xarray since it does not receive
+ * mads, is not placed in the remote_id or remote_qpn rbtree, and does
+ * not enter timewait.
+ */
+
+ cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+ cm_free_work(work);
+ /*
+ * A pointer to the listen_cm_id is held in the event, so this deref
+ * must be after the event is delivered above.
+ */
+ cm_deref_id(listen_cm_id_priv);
+ if (ret)
+ cm_destroy_id(&cm_id_priv->id, ret);
return 0;
out:
ib_destroy_cm_id(&cm_id_priv->id);
@@ -3618,57 +3622,59 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
- cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
- cm_id_priv->tid);
- sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
- sidr_rep_msg->status = param->status;
- cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
- sidr_rep_msg->service_id = cm_id_priv->id.service_id;
- sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
+ cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+ cm_id_priv->tid, param->ece.attr_mod);
+ IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
+ IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
+ IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
+ be64_to_cpu(cm_id_priv->id.service_id));
+ IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg,
+ param->ece.vendor_id & 0xFF);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg,
+ (param->ece.vendor_id >> 8) & 0xFF);
if (param->info && param->info_length)
- memcpy(sidr_rep_msg->info, param->info, param->info_length);
+ IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
+ param->info, param->info_length);
if (param->private_data && param->private_data_len)
- memcpy(sidr_rep_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
+ param->private_data, param->private_data_len);
}
-int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
- struct ib_cm_sidr_rep_param *param)
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
(param->private_data &&
param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
- ret = -EINVAL;
- goto error;
- }
+ if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
+ return -EINVAL;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
- cm_id->state = IB_CM_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-
+ cm_id_priv->id.state = IB_CM_IDLE;
spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
@@ -3676,8 +3682,19 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
}
spin_unlock_irqrestore(&cm.lock, flags);
return 0;
+}
-error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_rep_param *param)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_sidr_rep_locked(cm_id_priv, param);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_rep);
@@ -3691,13 +3708,16 @@ static void cm_format_sidr_rep_event(struct cm_work *work,
sidr_rep_msg = (struct cm_sidr_rep_msg *)
work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.sidr_rep_rcvd;
- param->status = sidr_rep_msg->status;
- param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
- param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
- param->info = &sidr_rep_msg->info;
- param->info_len = sidr_rep_msg->info_length;
+ param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
+ param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
+ param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
+ param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
+ sidr_rep_msg);
+ param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
+ sidr_rep_msg);
param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
- work->cm_event.private_data = &sidr_rep_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
}
static int cm_sidr_rep_handler(struct cm_work *work)
@@ -3707,7 +3727,8 @@ static int cm_sidr_rep_handler(struct cm_work *work)
sidr_rep_msg = (struct cm_sidr_rep_msg *)
work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
if (!cm_id_priv)
return -EINVAL; /* Unmatched reply. */
@@ -3717,7 +3738,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
spin_unlock_irq(&cm_id_priv->lock);
cm_format_sidr_rep_event(work, cm_id_priv);
@@ -3728,25 +3749,29 @@ out:
return -EINVAL;
}
-static void cm_process_send_error(struct ib_mad_send_buf *msg,
+static void cm_process_send_error(struct cm_id_private *cm_id_priv,
+ struct ib_mad_send_buf *msg,
enum ib_wc_status wc_status)
{
- struct cm_id_private *cm_id_priv;
- struct ib_cm_event cm_event;
- enum ib_cm_state state;
+ enum ib_cm_state state = (unsigned long) msg->context[1];
+ struct ib_cm_event cm_event = {};
int ret;
- memset(&cm_event, 0, sizeof cm_event);
- cm_id_priv = msg->context[0];
-
- /* Discard old sends or ones without a response. */
+ /* Discard old sends. */
spin_lock_irq(&cm_id_priv->lock);
- state = (enum ib_cm_state) (unsigned long) msg->context[1];
- if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
- goto discard;
+ if (msg != cm_id_priv->msg) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_free_msg(msg);
+ cm_deref_id(cm_id_priv);
+ return;
+ }
+ cm_free_priv_msg(msg);
+
+ if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS ||
+ wc_status == IB_WC_WR_FLUSH_ERR)
+ goto out_unlock;
- pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
- state, ib_wc_status_msg(wc_status));
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3767,26 +3792,25 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
cm_event.event = IB_CM_SIDR_REQ_ERROR;
break;
default:
- goto discard;
+ goto out_unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
cm_event.param.send_status = wc_status;
/* No other events can occur on the cm_id at this point. */
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
- cm_free_msg(msg);
if (ret)
ib_destroy_cm_id(&cm_id_priv->id);
return;
-discard:
+out_unlock:
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_msg(msg);
}
static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_id_private *cm_id_priv;
struct cm_port *port;
u16 attr_index;
@@ -3794,33 +3818,22 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
attr_index = be16_to_cpu(((struct ib_mad_hdr *)
msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
- /*
- * If the send was in response to a received message (context[0] is not
- * set to a cm_id), and is not a REJ, then it is a send that was
- * manually retried.
- */
- if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ if (msg->context[0] == CM_DIRECT_RETRY_CTX) {
msg->retries = 1;
+ cm_id_priv = NULL;
+ } else {
+ cm_id_priv = msg->context[0];
+ }
- atomic_long_add(1 + msg->retries,
- &port->counter_group[CM_XMIT].counter[attr_index]);
+ atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]);
if (msg->retries)
atomic_long_add(msg->retries,
- &port->counter_group[CM_XMIT_RETRIES].
- counter[attr_index]);
+ &port->counters[CM_XMIT_RETRIES][attr_index]);
- switch (mad_send_wc->status) {
- case IB_WC_SUCCESS:
- case IB_WC_WR_FLUSH_ERR:
+ if (cm_id_priv)
+ cm_process_send_error(cm_id_priv, msg, mad_send_wc->status);
+ else
cm_free_msg(msg);
- break;
- default:
- if (msg->context[0] && msg->context[1])
- cm_process_send_error(msg, mad_send_wc->status);
- else
- cm_free_msg(msg);
- break;
- }
}
static void cm_work_handler(struct work_struct *_work)
@@ -3869,7 +3882,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
- pr_debug("cm_event.event: 0x%x\n", work->cm_event.event);
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3895,8 +3908,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state)
- {
+ switch (cm_id->state) {
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
cm_id->state = IB_CM_ESTABLISHED;
@@ -3905,8 +3917,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
- pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__,
- be32_to_cpu(cm_id->local_id), cm_id->state);
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@@ -3946,9 +3957,7 @@ out:
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct cm_av tmp_av;
unsigned long flags;
- int tmp_send_port_not_ready;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3957,14 +3966,7 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
- /* Swap address vector */
- tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av;
- cm_id_priv->alt_av = tmp_av;
- /* Swap port send ready state */
- tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
- cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
- cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -4046,8 +4048,7 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
}
attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
- atomic_long_inc(&port->counter_group[CM_RECV].
- counter[attr_id - CM_ATTR_ID_OFFSET]);
+ atomic_long_inc(&port->counters[CM_RECV][attr_id - CM_ATTR_ID_OFFSET]);
work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
if (!work) {
@@ -4095,17 +4096,25 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- if (cm_id_priv->responder_resources)
+ if (cm_id_priv->responder_resources) {
+ struct ib_device *ib_dev = cm_id_priv->id.device;
+ u64 support_flush = ib_dev->attrs.device_cap_flags &
+ (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT);
+ u32 flushable = support_flush ?
+ (IB_ACCESS_FLUSH_GLOBAL |
+ IB_ACCESS_FLUSH_PERSISTENT) : 0;
+
qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_ATOMIC;
+ IB_ACCESS_REMOTE_ATOMIC |
+ flushable;
+ }
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
- qp_attr->port_num = cm_id_priv->av.port->port_num;
+ if (cm_id_priv->av.port)
+ qp_attr->port_num = cm_id_priv->av.port->port_num;
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4132,6 +4141,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) &&
+ cm_id_priv->av.dlid_datapath &&
+ (cm_id_priv->av.dlid_datapath != 0xffff))
+ qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
@@ -4143,7 +4156,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) &&
+ cm_id_priv->alt_av.port) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -4153,9 +4167,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4192,7 +4204,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
- /* fall through */
+ fallthrough;
case IB_QPT_XRC_TGT:
*qp_attr_mask |= IB_QP_TIMEOUT;
qp_attr->timeout = cm_id_priv->av.timeout;
@@ -4206,7 +4218,9 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
}
} else {
*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
- qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+ if (cm_id_priv->alt_av.port)
+ qp_attr->alt_port_num =
+ cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -4215,9 +4229,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
- pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n",
- __func__, be32_to_cpu(cm_id_priv->id.local_id),
- cm_id_priv->id.state);
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -4251,75 +4263,76 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
- char *buf)
+static ssize_t cm_show_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
- struct cm_counter_group *group;
- struct cm_counter_attribute *cm_attr;
-
- group = container_of(obj, struct cm_counter_group, obj);
- cm_attr = container_of(attr, struct cm_counter_attribute, attr);
-
- return sprintf(buf, "%ld\n",
- atomic_long_read(&group->counter[cm_attr->index]));
-}
-
-static const struct sysfs_ops cm_counter_ops = {
- .show = cm_show_counter
-};
+ struct cm_counter_attribute *cm_attr =
+ container_of(attr, struct cm_counter_attribute, attr);
+ struct cm_device *cm_dev = ib_get_client_data(ibdev, &cm_client);
-static struct kobj_type cm_counter_obj_type = {
- .sysfs_ops = &cm_counter_ops,
- .default_attrs = cm_counter_default_attrs
-};
+ if (WARN_ON(!cm_dev))
+ return -EINVAL;
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ return sysfs_emit(
+ buf, "%ld\n",
+ atomic_long_read(
+ &cm_dev->port[port_num - 1]
+ ->counters[cm_attr->group][cm_attr->index]));
}
-struct class cm_class = {
- .owner = THIS_MODULE,
- .name = "infiniband_cm",
- .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
-static int cm_create_port_fs(struct cm_port *port)
-{
- int i, ret;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++) {
- ret = ib_port_register_module_stat(port->cm_dev->ib_device,
- port->port_num,
- &port->counter_group[i].obj,
- &cm_counter_obj_type,
- counter_group_names[i]);
- if (ret)
- goto error;
+#define CM_COUNTER_ATTR(_name, _group, _index) \
+ { \
+ .attr = __ATTR(_name, 0444, cm_show_counter, NULL), \
+ .group = _group, .index = _index \
}
- return 0;
-
-error:
- while (i--)
- ib_port_unregister_module_stat(&port->counter_group[i].obj);
- return ret;
-
-}
-
-static void cm_remove_port_fs(struct cm_port *port)
-{
- int i;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++)
- ib_port_unregister_module_stat(&port->counter_group[i].obj);
+#define CM_COUNTER_GROUP(_group, _name) \
+ static struct cm_counter_attribute cm_counter_attr_##_group[] = { \
+ CM_COUNTER_ATTR(req, _group, CM_REQ_COUNTER), \
+ CM_COUNTER_ATTR(mra, _group, CM_MRA_COUNTER), \
+ CM_COUNTER_ATTR(rej, _group, CM_REJ_COUNTER), \
+ CM_COUNTER_ATTR(rep, _group, CM_REP_COUNTER), \
+ CM_COUNTER_ATTR(rtu, _group, CM_RTU_COUNTER), \
+ CM_COUNTER_ATTR(dreq, _group, CM_DREQ_COUNTER), \
+ CM_COUNTER_ATTR(drep, _group, CM_DREP_COUNTER), \
+ CM_COUNTER_ATTR(sidr_req, _group, CM_SIDR_REQ_COUNTER), \
+ CM_COUNTER_ATTR(sidr_rep, _group, CM_SIDR_REP_COUNTER), \
+ CM_COUNTER_ATTR(lap, _group, CM_LAP_COUNTER), \
+ CM_COUNTER_ATTR(apr, _group, CM_APR_COUNTER), \
+ }; \
+ static struct attribute *cm_counter_attrs_##_group[] = { \
+ &cm_counter_attr_##_group[0].attr.attr, \
+ &cm_counter_attr_##_group[1].attr.attr, \
+ &cm_counter_attr_##_group[2].attr.attr, \
+ &cm_counter_attr_##_group[3].attr.attr, \
+ &cm_counter_attr_##_group[4].attr.attr, \
+ &cm_counter_attr_##_group[5].attr.attr, \
+ &cm_counter_attr_##_group[6].attr.attr, \
+ &cm_counter_attr_##_group[7].attr.attr, \
+ &cm_counter_attr_##_group[8].attr.attr, \
+ &cm_counter_attr_##_group[9].attr.attr, \
+ &cm_counter_attr_##_group[10].attr.attr, \
+ NULL, \
+ }; \
+ static const struct attribute_group cm_counter_group_##_group = { \
+ .name = _name, \
+ .attrs = cm_counter_attrs_##_group, \
+ };
-}
+CM_COUNTER_GROUP(CM_XMIT, "cm_tx_msgs")
+CM_COUNTER_GROUP(CM_XMIT_RETRIES, "cm_tx_retries")
+CM_COUNTER_GROUP(CM_RECV, "cm_rx_msgs")
+CM_COUNTER_GROUP(CM_RECV_DUPLICATES, "cm_rx_duplicates")
+
+static const struct attribute_group *cm_counter_groups[] = {
+ &cm_counter_group_CM_XMIT,
+ &cm_counter_group_CM_XMIT_RETRIES,
+ &cm_counter_group_CM_RECV,
+ &cm_counter_group_CM_RECV_DUPLICATES,
+ NULL,
+};
-static void cm_add_one(struct ib_device *ib_device)
+static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
@@ -4333,34 +4346,38 @@ static void cm_add_one(struct ib_device *ib_device)
unsigned long flags;
int ret;
int count = 0;
- u8 i;
+ u32 i;
cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
GFP_KERNEL);
if (!cm_dev)
- return;
+ return -ENOMEM;
+ kref_init(&cm_dev->kref);
+ rwlock_init(&cm_dev->mad_agent_lock);
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
+ ib_set_client_data(ib_device, &cm_client, cm_dev);
+
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = kzalloc(sizeof *port, GFP_KERNEL);
- if (!port)
+ if (!port) {
+ ret = -ENOMEM;
goto error1;
+ }
cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
port->port_num = i;
- INIT_LIST_HEAD(&port->cm_priv_prim_list);
- INIT_LIST_HEAD(&port->cm_priv_altr_list);
-
- ret = cm_create_port_fs(port);
+ ret = ib_port_register_client_groups(ib_device, i,
+ cm_counter_groups);
if (ret)
goto error1;
@@ -4372,30 +4389,47 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
- if (IS_ERR(port->mad_agent))
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
goto error2;
+ }
+
+ port->rep_agent = ib_register_mad_agent(ib_device, i,
+ IB_QPT_GSI,
+ NULL,
+ 0,
+ cm_send_handler,
+ NULL,
+ port,
+ 0);
+ if (IS_ERR(port->rep_agent)) {
+ ret = PTR_ERR(port->rep_agent);
+ goto error3;
+ }
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
- goto error3;
+ goto error4;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
-
- ib_set_client_data(ib_device, &cm_client, cm_dev);
+ }
write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
- return;
+ return 0;
+error4:
+ ib_unregister_mad_agent(port->rep_agent);
error3:
ib_unregister_mad_agent(port->mad_agent);
error2:
- cm_remove_port_fs(port);
+ ib_port_unregister_client_groups(ib_device, i, cm_counter_groups);
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
@@ -4405,27 +4439,25 @@ error1:
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+ ib_unregister_mad_agent(port->rep_agent);
ib_unregister_mad_agent(port->mad_agent);
- cm_remove_port_fs(port);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
free:
- kfree(cm_dev);
+ cm_device_put(cm_dev);
+ return ret;
}
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev = client_data;
struct cm_port *port;
- struct cm_id_private *cm_id_priv;
- struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
};
unsigned long flags;
- int i;
-
- if (!cm_dev)
- return;
+ u32 i;
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
@@ -4435,34 +4467,38 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_dev->going_down = 1;
spin_unlock_irq(&cm.lock);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
+ struct ib_mad_agent *mad_agent;
+ struct ib_mad_agent *rep_agent;
+
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = cm_dev->port[i-1];
+ mad_agent = port->mad_agent;
+ rep_agent = port->rep_agent;
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
- /* Mark all the cm_id's as not valid */
- spin_lock_irq(&cm.lock);
- list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
- cm_id_priv->altr_send_port_not_ready = 1;
- list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
- cm_id_priv->prim_send_port_not_ready = 1;
- spin_unlock_irq(&cm.lock);
/*
* We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent
*/
flush_workqueue(cm.wq);
- spin_lock_irq(&cm.state_lock);
- cur_mad_agent = port->mad_agent;
+ /*
+ * The above ensures no call paths from the work are running,
+ * the remaining paths all take the mad_agent_lock.
+ */
+ write_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
- spin_unlock_irq(&cm.state_lock);
- ib_unregister_mad_agent(cur_mad_agent);
- cm_remove_port_fs(port);
+ port->rep_agent = NULL;
+ write_unlock(&cm_dev->mad_agent_lock);
+ ib_unregister_mad_agent(mad_agent);
+ ib_unregister_mad_agent(rep_agent);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
- kfree(cm_dev);
+ cm_device_put(cm_dev);
}
static int __init ib_cm_init(void)
@@ -4472,23 +4508,16 @@ static int __init ib_cm_init(void)
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
- spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
cm.remote_qp_table = RB_ROOT;
cm.remote_sidr_table = RB_ROOT;
- xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
- ret = class_register(&cm_class);
- if (ret) {
- ret = -ENOMEM;
- goto error1;
- }
-
- cm.wq = alloc_workqueue("ib_cm", 0, 1);
+ cm.wq = alloc_workqueue("ib_cm", WQ_PERCPU, 1);
if (!cm.wq) {
ret = -ENOMEM;
goto error2;
@@ -4502,8 +4531,6 @@ static int __init ib_cm_init(void)
error3:
destroy_workqueue(cm.wq);
error2:
- class_unregister(&cm_class);
-error1:
return ret;
}
@@ -4524,7 +4551,6 @@ static void __exit ib_cm_cleanup(void)
kfree(timewait_info);
}
- class_unregister(&cm_class);
WARN_ON(!xa_empty(&cm.local_id_table));
}
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 3d16d614aff6..8462de7ca26e 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,39 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING the madirectory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use source and binary forms, with or
- * withmodification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retathe above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
-#if !defined(CM_MSGS_H)
+#ifndef CM_MSGS_H
#define CM_MSGS_H
+#include <rdma/ibta_vol1_c12.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cm.h>
@@ -44,120 +19,14 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-struct cm_req_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 rsvd4;
- __be64 service_id;
- __be64 local_ca_guid;
- __be32 rsvd24;
- __be32 local_qkey;
- /* local QPN:24, responder resources:8 */
- __be32 offset32;
- /* local EECN:24, initiator depth:8 */
- __be32 offset36;
- /*
- * remote EECN:24, remote CM response timeout:5,
- * transport service type:2, end-to-end flow control:1
- */
- __be32 offset40;
- /* starting PSN:24, local CM response timeout:5, retry count:3 */
- __be32 offset44;
- __be16 pkey;
- /* path MTU:4, RDC exists:1, RNR retry count:3. */
- u8 offset50;
- /* max CM Retries:4, SRQ:1, extended transport type:3 */
- u8 offset51;
-
- __be16 primary_local_lid;
- __be16 primary_remote_lid;
- union ib_gid primary_local_gid;
- union ib_gid primary_remote_gid;
- /* flow label:20, rsvd:6, packet rate:6 */
- __be32 primary_offset88;
- u8 primary_traffic_class;
- u8 primary_hop_limit;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 primary_offset94;
- /* local ACK timeout:5, rsvd:3 */
- u8 primary_offset95;
-
- __be16 alt_local_lid;
- __be16 alt_remote_lid;
- union ib_gid alt_local_gid;
- union ib_gid alt_remote_gid;
- /* flow label:20, rsvd:6, packet rate:6 */
- __be32 alt_offset132;
- u8 alt_traffic_class;
- u8 alt_hop_limit;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 alt_offset138;
- /* local ACK timeout:5, rsvd:3 */
- u8 alt_offset139;
-
- u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-
-} __packed;
-
-static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
-}
-
-static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
-{
- req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(req_msg->offset32) &
- 0x000000FF));
-}
-
-static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
-{
- return (u8) be32_to_cpu(req_msg->offset32);
-}
-
-static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
-{
- req_msg->offset32 = cpu_to_be32(resp_res |
- (be32_to_cpu(req_msg->offset32) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
-{
- return (u8) be32_to_cpu(req_msg->offset36);
-}
-
-static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
- u8 init_depth)
-{
- req_msg->offset36 = cpu_to_be32(init_depth |
- (be32_to_cpu(req_msg->offset36) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
- u8 resp_timeout)
-{
- req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(req_msg->offset40) &
- 0xFFFFFF07));
-}
-
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
- u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
- switch(transport_type) {
+ u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg);
+ switch (transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
- switch (req_msg->offset51 & 0x7) {
+ switch (IBA_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg)) {
case 1: return IB_QPT_XRC_TGT;
default: return 0;
}
@@ -168,242 +37,19 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
enum ib_qp_type qp_type)
{
- switch(qp_type) {
+ switch (qp_type) {
case IB_QPT_UC:
- req_msg->offset40 = cpu_to_be32((be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9) | 0x2);
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1);
break;
case IB_QPT_XRC_INI:
- req_msg->offset40 = cpu_to_be32((be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9) | 0x6);
- req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 3);
+ IBA_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg, 1);
break;
default:
- req_msg->offset40 = cpu_to_be32(be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9);
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 0);
}
}
-static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
-{
- return be32_to_cpu(req_msg->offset40) & 0x1;
-}
-
-static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
- u8 flow_ctrl)
-{
- req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
- (be32_to_cpu(req_msg->offset40) &
- 0xFFFFFFFE));
-}
-
-static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
-}
-
-static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
- __be32 starting_psn)
-{
- req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
- (be32_to_cpu(req_msg->offset44) & 0x000000FF));
-}
-
-static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
- u8 resp_timeout)
-{
- req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
-}
-
-static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
-}
-
-static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
- u8 retry_count)
-{
- req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
- (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
-}
-
-static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
-{
- return req_msg->offset50 >> 4;
-}
-
-static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
-{
- req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
-}
-
-static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
-{
- return req_msg->offset50 & 0x7;
-}
-
-static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
- u8 rnr_retry_count)
-{
- req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
- (rnr_retry_count & 0x7));
-}
-
-static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
-{
- return req_msg->offset51 >> 4;
-}
-
-static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
- u8 retries)
-{
- req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
-}
-
-static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
-{
- return (req_msg->offset51 & 0x8) >> 3;
-}
-
-static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
-{
- req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
- ((srq & 0x1) << 3));
-}
-
-static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
-}
-
-static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
- __be32 flow_label)
-{
- req_msg->primary_offset88 = cpu_to_be32(
- (be32_to_cpu(req_msg->primary_offset88) &
- 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
-}
-
-static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
- u8 rate)
-{
- req_msg->primary_offset88 = cpu_to_be32(
- (be32_to_cpu(req_msg->primary_offset88) &
- 0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->primary_offset94 >> 4);
-}
-
-static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
-{
- req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
- (sl << 4));
-}
-
-static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
-{
- return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
- u8 subnet_local)
-{
- req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
- ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->primary_offset95 >> 3);
-}
-
-static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
- u8 local_ack_timeout)
-{
- req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
- (local_ack_timeout << 3));
-}
-
-static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
-}
-
-static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
- __be32 flow_label)
-{
- req_msg->alt_offset132 = cpu_to_be32(
- (be32_to_cpu(req_msg->alt_offset132) &
- 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
-}
-
-static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
- u8 rate)
-{
- req_msg->alt_offset132 = cpu_to_be32(
- (be32_to_cpu(req_msg->alt_offset132) &
- 0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->alt_offset138 >> 4);
-}
-
-static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
-{
- req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
- (sl << 4));
-}
-
-static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
-{
- return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
- u8 subnet_local)
-{
- req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
- ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->alt_offset139 >> 3);
-}
-
-static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
- u8 local_ack_timeout)
-{
- req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
- (local_ack_timeout << 3));
-}
-
/* Message REJected or MRAed */
enum cm_msg_response {
CM_MSG_RESPONSE_REQ = 0x0,
@@ -411,419 +57,12 @@ enum cm_msg_response {
CM_MSG_RESPONSE_OTHER = 0x2
};
- struct cm_mra_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* message MRAed:2, rsvd:6 */
- u8 offset8;
- /* service timeout:5, rsvd:3 */
- u8 offset9;
-
- u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
-{
- return (u8) (mra_msg->offset8 >> 6);
-}
-
-static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
-{
- mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
-{
- return (u8) (mra_msg->offset9 >> 3);
-}
-
-static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
- u8 service_timeout)
-{
- mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
- (service_timeout << 3));
-}
-
-struct cm_rej_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* message REJected:2, rsvd:6 */
- u8 offset8;
- /* reject info length:7, rsvd:1. */
- u8 offset9;
- __be16 reason;
- u8 ari[IB_CM_REJ_ARI_LENGTH];
-
- u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
-{
- return (u8) (rej_msg->offset8 >> 6);
-}
-
-static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
-{
- rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
-{
- return (u8) (rej_msg->offset9 >> 1);
-}
-
-static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
- u8 len)
-{
- rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
-}
-
-struct cm_rep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- __be32 local_qkey;
- /* local QPN:24, rsvd:8 */
- __be32 offset12;
- /* local EECN:24, rsvd:8 */
- __be32 offset16;
- /* starting PSN:24 rsvd:8 */
- __be32 offset20;
- u8 resp_resources;
- u8 initiator_depth;
- /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
- u8 offset26;
- /* RNR retry count:3, SRQ:1, rsvd:5 */
- u8 offset27;
- __be64 local_ca_guid;
-
- u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
-}
-
-static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
-{
- rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
-}
-
-static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
-}
-
-static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
-{
- rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
- (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
-}
-
static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
{
return (qp_type == IB_QPT_XRC_INI) ?
- cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
-}
-
-static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
-}
-
-static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
- __be32 starting_psn)
-{
- rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
- (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
-}
-
-static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset26 >> 3);
-}
-
-static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
- u8 target_ack_delay)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
- (target_ack_delay << 3));
-}
-
-static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
-{
- return (u8) ((rep_msg->offset26 & 0x06) >> 1);
-}
-
-static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
- ((failover & 0x3) << 1));
-}
-
-static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset26 & 0x01);
-}
-
-static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
- u8 flow_ctrl)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
- (flow_ctrl & 0x1));
-}
-
-static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset27 >> 5);
-}
-
-static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
- u8 rnr_retry_count)
-{
- rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
- (rnr_retry_count << 5));
-}
-
-static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
-{
- return (u8) ((rep_msg->offset27 >> 4) & 0x1);
-}
-
-static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
-{
- rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
- ((srq & 0x1) << 4));
-}
-
-struct cm_rtu_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-struct cm_dreq_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* remote QPN/EECN:24, rsvd:8 */
- __be32 offset8;
-
- u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
-{
- return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
-}
-
-static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
-{
- dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
-}
-
-struct cm_drep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
-
-} __packed;
-
-struct cm_lap_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- __be32 rsvd8;
- /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
- __be32 offset12;
- __be32 rsvd16;
-
- __be16 alt_local_lid;
- __be16 alt_remote_lid;
- union ib_gid alt_local_gid;
- union ib_gid alt_remote_gid;
- /* flow label:20, rsvd:4, traffic class:8 */
- __be32 offset56;
- u8 alt_hop_limit;
- /* rsvd:2, packet rate:6 */
- u8 offset61;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 offset62;
- /* local ACK timeout:5, rsvd:3 */
- u8 offset63;
-
- u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
-} __packed;
-
-static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
-{
- return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
-}
-
-static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
-{
- lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(lap_msg->offset12) &
- 0x000000FF));
-}
-
-static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
-{
- return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
-}
-
-static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
- u8 resp_timeout)
-{
- lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(lap_msg->offset12) &
- 0xFFFFFF07));
-}
-
-static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
-{
- return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
-}
-
-static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
- __be32 flow_label)
-{
- lap_msg->offset56 = cpu_to_be32(
- (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
-{
- return (u8) be32_to_cpu(lap_msg->offset56);
-}
-
-static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
- u8 traffic_class)
-{
- lap_msg->offset56 = cpu_to_be32(traffic_class |
- (be32_to_cpu(lap_msg->offset56) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset61 & 0x3F;
-}
-
-static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
- u8 packet_rate)
-{
- lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
-}
-
-static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset62 >> 4;
-}
-
-static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
-{
- lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
-}
-
-static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
-{
- return (lap_msg->offset62 >> 3) & 0x1;
-}
-
-static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
- u8 subnet_local)
-{
- lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
- (lap_msg->offset61 & 0xF7);
-}
-static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset63 >> 3;
-}
-
-static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
- u8 local_ack_timeout)
-{
- lap_msg->offset63 = (local_ack_timeout << 3) |
- (lap_msg->offset63 & 0x07);
-}
-
-struct cm_apr_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 info_length;
- u8 ap_status;
- __be16 rsvd;
- u8 info[IB_CM_APR_INFO_LENGTH];
-
- u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
-} __packed;
-
-struct cm_sidr_req_msg {
- struct ib_mad_hdr hdr;
-
- __be32 request_id;
- __be16 pkey;
- __be16 rsvd;
- __be64 service_id;
-
- u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-} __packed;
-
-struct cm_sidr_rep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 request_id;
- u8 status;
- u8 info_length;
- __be16 rsvd;
- /* QPN:24, rsvd:8 */
- __be32 offset8;
- __be64 service_id;
- __be32 qkey;
- u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
-
- u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
-} __packed;
-
-static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
-}
-
-static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
- __be32 qpn)
-{
- sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(sidr_rep_msg->offset8) &
- 0x000000FF));
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_EE_CONTEXT_NUMBER,
+ rep_msg)) :
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg));
}
#endif /* CM_MSGS_H */
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 000000000000..8f3482f66338
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 000000000000..4a4987da69d4
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/misc/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(prepare_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 19f1730a4f24..95e89f5c147c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1,36 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/completion.h>
@@ -38,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -47,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -63,16 +38,15 @@
#include "core_priv.h"
#include "cma_priv.h"
+#include "cma_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
-#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
#define CMA_MAX_CM_RETRIES 15
-#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
-#define CMA_IBOE_PACKET_LIFETIME 18
+#define CMA_IBOE_PACKET_LIFETIME 16
#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
static const char * const cma_events[] = {
@@ -94,6 +68,11 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
+
+static void cma_netevent_work_handler(struct work_struct *_work);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -117,7 +96,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_reject_msg);
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
{
if (rdma_ib_or_roce(id->device, id->port_num))
return reason == IB_CM_REJ_CONSUMER_DEFINED;
@@ -128,7 +113,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
WARN_ON_ONCE(1);
return false;
}
-EXPORT_SYMBOL(rdma_is_consumer_reject);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len)
@@ -161,20 +145,7 @@ struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
}
EXPORT_SYMBOL(rdma_iw_cm_id);
-/**
- * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
- * @res: rdma resource tracking entry pointer
- */
-struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
-{
- struct rdma_id_private *id_priv =
- container_of(res, struct rdma_id_private, res);
-
- return &id_priv->id;
-}
-EXPORT_SYMBOL(rdma_res_to_id);
-
-static void cma_add_one(struct ib_device *device);
+static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
@@ -187,6 +158,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -221,11 +195,16 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
u8 *default_roce_tos;
@@ -237,14 +216,6 @@ struct rdma_bind_list {
unsigned short port;
};
-struct class_port_info_context {
- struct ib_class_port_info *class_port_info;
- struct ib_device *device;
- struct completion done;
- struct ib_sa_query *sa_query;
- u8 port_num;
-};
-
static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
@@ -273,9 +244,15 @@ enum {
CMA_OPTION_AFONLY,
};
-void cma_ref_dev(struct cma_device *cma_dev)
+void cma_dev_get(struct cma_device *cma_dev)
{
- atomic_inc(&cma_dev->refcount);
+ refcount_inc(&cma_dev->refcount);
+}
+
+void cma_dev_put(struct cma_device *cma_dev)
+{
+ if (refcount_dec_and_test(&cma_dev->refcount))
+ complete(&cma_dev->comp);
}
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
@@ -293,13 +270,13 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
}
if (found_cma_dev)
- cma_ref_dev(found_cma_dev);
+ cma_dev_get(found_cma_dev);
mutex_unlock(&lock);
return found_cma_dev;
}
int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port)
+ u32 port)
{
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
@@ -308,7 +285,7 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
}
int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
+ u32 port,
enum ib_gid_type default_gid_type)
{
unsigned long supported_gids;
@@ -316,6 +293,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
+ if (default_gid_type == IB_GID_TYPE_IB &&
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
+ default_gid_type = IB_GID_TYPE_ROCE;
+
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
@@ -327,7 +308,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+int cma_get_default_roce_tos(struct cma_device *cma_dev, u32 port)
{
if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
@@ -335,7 +316,7 @@ int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
}
-int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+int cma_set_default_roce_tos(struct cma_device *cma_dev, u32 port,
u8 default_roce_tos)
{
if (!rdma_is_port_valid(cma_dev->device, port))
@@ -361,12 +342,15 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
struct cma_multicast {
struct rdma_id_private *id_priv;
union {
- struct ib_sa_multicast *ib;
- } multicast;
+ struct ib_sa_multicast *sa_mc;
+ struct {
+ struct work_struct work;
+ struct rdma_cm_event event;
+ } iboe_join;
+ };
struct list_head list;
void *context;
struct sockaddr_storage addr;
- struct kref mcref;
u8 join_state;
};
@@ -378,18 +362,6 @@ struct cma_work {
struct rdma_cm_event event;
};
-struct cma_ndev_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct rdma_cm_event event;
-};
-
-struct iboe_mcast_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct cma_multicast *mc;
-};
-
union cma_ip_addr {
struct in6_addr ip6;
struct {
@@ -419,23 +391,21 @@ struct cma_req_info {
u16 pkey;
};
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- ret = (id_priv->state == comp);
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return ret;
-}
-
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
+ /*
+ * The FSM uses a funny double locking where state is protected by both
+ * the handler_mutex and the spinlock. State is not allowed to change
+ * to/from a handler_mutex protected value without also holding
+ * handler_mutex.
+ */
+ if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT)
+ lockdep_assert_held(&id_priv->handler_mutex);
+
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
@@ -443,27 +413,24 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
- enum rdma_cm_state exch)
+static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
{
- unsigned long flags;
- enum rdma_cm_state old;
+ return hdr->ip_version >> 4;
+}
- spin_lock_irqsave(&id_priv->lock, flags);
- old = id_priv->state;
- id_priv->state = exch;
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return old;
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+ hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
- return hdr->ip_version >> 4;
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
- hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
@@ -486,19 +453,135 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET &&
+ __builtin_object_size(sa, 0) >= sizeof(struct sockaddr_in)) {
+ return memcmp(&((struct sockaddr_in *)sa)->sin_addr,
+ &((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+ }
+
+ if (sa->sa_family == AF_INET6 &&
+ __builtin_object_size(sa, 0) >= sizeof(struct sockaddr_in6)) {
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+ }
+
+ return -1;
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
- cma_ref_dev(cma_dev);
+ cma_dev_get(cma_dev);
id_priv->cma_dev = cma_dev;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
- list_add_tail(&id_priv->list, &cma_dev->id_list);
- if (id_priv->res.kern_name)
- rdma_restrack_kadd(&id_priv->res);
- else
- rdma_restrack_uadd(&id_priv->res);
+ list_add_tail(&id_priv->device_item, &cma_dev->id_list);
+
+ trace_cm_id_attach(id_priv, cma_dev->device);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -510,60 +593,30 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
rdma_start_port(cma_dev->device)];
}
-void cma_deref_dev(struct cma_device *cma_dev)
-{
- if (atomic_dec_and_test(&cma_dev->refcount))
- complete(&cma_dev->comp);
-}
-
-static inline void release_mc(struct kref *kref)
-{
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
- kfree(mc->multicast.ib);
- kfree(mc);
-}
-
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
- list_del(&id_priv->list);
- cma_deref_dev(id_priv->cma_dev);
+ list_del_init(&id_priv->device_item);
+ cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ id_priv->id.device = NULL;
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
+ }
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
}
-static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+static int cma_set_default_qkey(struct rdma_id_private *id_priv)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
- if (id_priv->qkey) {
- if (qkey && id_priv->qkey != qkey)
- return -EINVAL;
- return 0;
- }
-
- if (qkey) {
- id_priv->qkey = qkey;
- return 0;
- }
-
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
case RDMA_PS_IB:
@@ -583,6 +636,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
return ret;
}
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+{
+ if (!qkey ||
+ (id_priv->qkey && (id_priv->qkey != qkey)))
+ return -EINVAL;
+
+ id_priv->qkey = qkey;
+ return 0;
+}
+
static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -605,37 +668,90 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static const struct ib_gid_attr *
-cma_validate_port(struct ib_device *device, u8 port,
+cma_validate_port(struct ib_device *device, u32 port,
enum ib_gid_type gid_type,
union ib_gid *gid,
struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV);
int bound_if_index = dev_addr->bound_dev_if;
- const struct ib_gid_attr *sgid_attr;
int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
+ struct net_device *pdev = NULL;
if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
- return ERR_PTR(-ENODEV);
+ goto out;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
- return ERR_PTR(-ENODEV);
+ goto out;
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
- return ERR_PTR(-ENODEV);
+ goto out;
+
+ /*
+ * For drivers that do not associate more than one net device with
+ * their gid tables, such as iWARP drivers, it is sufficient to
+ * return the first table entry.
+ *
+ * Other driver classes might be included in the future.
+ */
+ if (rdma_protocol_iwarp(device, port)) {
+ sgid_attr = rdma_get_gid_attr(device, port, 0);
+ if (IS_ERR(sgid_attr))
+ goto out;
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
- ndev = dev_get_by_index(dev_addr->net, bound_if_index);
- if (!ndev)
- return ERR_PTR(-ENODEV);
+ rcu_read_lock();
+ ndev = rcu_dereference(sgid_attr->ndev);
+ if (ndev->ifindex != bound_if_index) {
+ pdev = dev_get_by_index_rcu(dev_addr->net, bound_if_index);
+ if (pdev) {
+ if (is_vlan_dev(pdev)) {
+ pdev = vlan_dev_real_dev(pdev);
+ if (ndev->ifindex == pdev->ifindex)
+ bound_if_index = pdev->ifindex;
+ }
+ if (is_vlan_dev(ndev)) {
+ pdev = vlan_dev_real_dev(ndev);
+ if (bound_if_index == pdev->ifindex)
+ bound_if_index = ndev->ifindex;
+ }
+ }
+ }
+ if (!net_eq(dev_net(ndev), dev_addr->net) ||
+ ndev->ifindex != bound_if_index) {
+ rdma_put_gid_attr(sgid_attr);
+ sgid_attr = ERR_PTR(-ENODEV);
+ }
+ rcu_read_unlock();
+ goto out;
+ }
+
+ /*
+ * For a RXE device, it should work with TUN device and normal ethernet
+ * devices. Use driver_id to check if a device is a RXE device or not.
+ * ARPHDR_NONE means a TUN device.
+ */
+ if (device->ops.driver_id == RDMA_DRIVER_RXE) {
+ if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER)
+ && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ }
} else {
- gid_type = IB_GID_TYPE_IB;
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
}
sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
+out:
return sgid_attr;
}
@@ -663,7 +779,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
- unsigned int port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@@ -743,6 +859,7 @@ static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
mutex_lock(&lock);
cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
mutex_unlock(&lock);
+ rdma_restrack_add(&id_priv->res);
return 0;
}
@@ -755,7 +872,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
enum ib_gid_type gid_type;
int ret = -ENODEV;
union ib_gid gid;
- u8 port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
@@ -779,7 +896,7 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
}
list_for_each_entry(cma_dev, &dev_list, list) {
- for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+ rdma_for_each_port (cma_dev->device, port) {
if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
@@ -797,8 +914,10 @@ static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
}
out:
- if (!ret)
+ if (!ret) {
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
+ }
mutex_unlock(&lock);
return ret;
@@ -812,9 +931,10 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
struct sockaddr_ib *addr;
union ib_gid gid, sgid, *dgid;
+ unsigned int p;
u16 pkey, index;
- u8 p;
enum ib_port_state port_state;
+ int ret;
int i;
cma_dev = NULL;
@@ -824,7 +944,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
mutex_lock(&lock);
list_for_each_entry(cur_dev, &dev_list, list) {
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!rdma_cap_af_ib(cur_dev->device, p))
continue;
@@ -833,9 +953,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !rdma_query_gid(cur_dev->device,
- p, i, &gid);
- i++) {
+
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(cur_dev->device, p, i,
+ &gid);
+ if (ret)
+ continue;
+
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -859,6 +984,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
found:
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
mutex_unlock(&lock);
addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
@@ -866,16 +992,21 @@ found:
return 0;
}
-static void cma_deref_id(struct rdma_id_private *id_priv)
+static void cma_id_get(struct rdma_id_private *id_priv)
{
- if (atomic_dec_and_test(&id_priv->refcount))
+ refcount_inc(&id_priv->refcount);
+}
+
+static void cma_id_put(struct rdma_id_private *id_priv)
+{
+ if (refcount_dec_and_test(&id_priv->refcount))
complete(&id_priv->comp);
}
-struct rdma_cm_id *__rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_ucm_port_space ps,
- enum ib_qp_type qp_type, const char *caller)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
{
struct rdma_id_private *id_priv;
@@ -883,8 +1014,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- rdma_restrack_set_task(&id_priv->res, caller);
- id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
@@ -892,21 +1021,61 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
id_priv->id.qp_type = qp_type;
id_priv->tos_set = false;
id_priv->timeout_set = false;
+ id_priv->min_rnr_timer_set = false;
id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
- atomic_set(&id_priv->refcount, 1);
+ refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
+ INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
+ INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler);
- return &id_priv->id;
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+ if (parent)
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+ return id_priv;
}
-EXPORT_SYMBOL(__rdma_create_id);
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, caller);
+ return &ret->id;
+}
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+ ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, NULL);
+ return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -955,27 +1124,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id->device != pd->device)
- return -EINVAL;
+ if (id->device != pd->device) {
+ ret = -EINVAL;
+ goto out_err;
+ }
qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto out_err;
+ }
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
if (ret)
- goto err;
+ goto out_destroy;
id->qp = qp;
id_priv->qp_num = qp->qp_num;
id_priv->srq = (qp->srq != NULL);
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
return 0;
-err:
+out_destroy:
ib_destroy_qp(qp);
+out_err:
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
return ret;
}
EXPORT_SYMBOL(rdma_create_qp);
@@ -985,6 +1161,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_qp_destroy(id_priv);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
@@ -1093,7 +1270,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (id_priv->id.qp_type == IB_QPT_UD) {
- ret = cma_set_qkey(id_priv, 0);
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
@@ -1131,12 +1308,16 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
qp_attr_mask);
qp_attr->port_num = id_priv->id.port_num;
*qp_attr_mask |= IB_QP_PORT;
- } else
+ } else {
ret = -ENOSYS;
+ }
if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
qp_attr->timeout = id_priv->timeout;
+ if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+ qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
+
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -1423,7 +1604,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
return false;
memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_iif = net_dev->ifindex;
+ fl4.flowi4_oif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
@@ -1577,7 +1758,7 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
struct ib_device *device = id->device;
- const int port_num = id->port_num ?: rdma_start_port(device);
+ const u32 port_num = id->port_num ?: rdma_start_port(device);
return rdma_protocol_roce(device, port_num);
}
@@ -1631,6 +1812,8 @@ static struct rdma_id_private *cma_find_listener(
{
struct rdma_id_private *id_priv, *id_priv_dev;
+ lockdep_assert_held(&lock);
+
if (!bind_list)
return ERR_PTR(-EINVAL);
@@ -1641,7 +1824,7 @@ static struct rdma_id_private *cma_find_listener(
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
- listen_list) {
+ listen_item) {
if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(&id_priv_dev->id,
net_dev, req))
@@ -1677,6 +1860,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
}
+ mutex_lock(&lock);
/*
* Net namespace might be getting deleted while route lookup,
* cm_id lookup is in progress. Therefore, perform netdevice
@@ -1705,8 +1889,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
- (struct sockaddr *)&req->listen_addr_storage,
- (struct sockaddr *)&req->src_addr_storage)) {
+ (struct sockaddr *)&req->src_addr_storage,
+ (struct sockaddr *)&req->listen_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
@@ -1718,6 +1902,7 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err:
rcu_read_unlock();
+ mutex_unlock(&lock);
if (IS_ERR(id_priv) && *net_dev) {
dev_put(*net_dev);
*net_dev = NULL;
@@ -1738,28 +1923,36 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
+ lockdep_assert_held(&lock);
+
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
- mutex_lock(&lock);
- list_del(&id_priv->list);
+ list_del_init(&id_priv->listen_any_item);
while (!list_empty(&id_priv->listen_list)) {
- dev_id_priv = list_entry(id_priv->listen_list.next,
- struct rdma_id_private, listen_list);
+ dev_id_priv =
+ list_first_entry(&id_priv->listen_list,
+ struct rdma_id_private, listen_item);
/* sync with device removal to avoid duplicate destruction */
- list_del_init(&dev_id_priv->list);
- list_del(&dev_id_priv->listen_list);
+ list_del_init(&dev_id_priv->device_item);
+ list_del_init(&dev_id_priv->listen_item);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+ mutex_lock(&lock);
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
}
@@ -1768,6 +1961,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -1799,19 +2000,39 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
- struct cma_multicast *mc)
+static void destroy_mc(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
{
- struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+ ib_sa_free_multicast(mc->sa_mc);
+
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
+ union ib_gid mgid;
+
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
+ cma_igmp_send(ndev, &mgid, false);
+ }
dev_put(ndev);
+
+ cancel_work_sync(&mc->iboe_join.work);
}
- kref_put(&mc->mcref, release_mc);
+ kfree(mc);
}
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
@@ -1819,36 +2040,20 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
- mc = container_of(id_priv->mc_list.next,
- struct cma_multicast, list);
+ mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+ list);
list_del(&mc->list);
- if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
- id_priv->id.port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else {
- cma_leave_roce_mc_group(id_priv, mc);
- }
+ destroy_mc(id_priv, mc);
}
}
-void rdma_destroy_id(struct rdma_cm_id *id)
+static void _destroy_id(struct rdma_id_private *id_priv,
+ enum rdma_cm_state state)
{
- struct rdma_id_private *id_priv;
- enum rdma_cm_state state;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- /*
- * Wait for any active callback to finish. New callbacks will find
- * the id_priv state set to destroying and abort.
- */
- mutex_lock(&id_priv->handler_mutex);
- mutex_unlock(&id_priv->handler_mutex);
-
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1862,20 +2067,56 @@ void rdma_destroy_id(struct rdma_cm_id *id)
}
cma_release_port(id_priv);
- cma_deref_id(id_priv);
+ cma_id_put(id_priv);
wait_for_completion(&id_priv->comp);
if (id_priv->internal_id)
- cma_deref_id(id_priv->id.context);
+ cma_id_put(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
-
- if (id_priv->id.route.addr.dev_addr.sgid_attr)
- rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ kfree(id_priv->id.route.path_rec_inbound);
+ kfree(id_priv->id.route.path_rec_outbound);
+ kfree(id_priv->id.route.service_recs);
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
+
+/*
+ * destroy an ID from within the handler_mutex. This ensures that no other
+ * handlers can start running concurrently.
+ */
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
+ __releases(&idprv->handler_mutex)
+{
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ trace_cm_id_destroy(id_priv);
+
+ /*
+ * Setting the state to destroyed under the handler mutex provides a
+ * fence against calling handler callbacks. If this is invoked due to
+ * the failure of a handler callback then it guarentees that no future
+ * handlers will be called.
+ */
+ lockdep_assert_held(&id_priv->handler_mutex);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ id_priv->state = RDMA_CM_DESTROYING;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ _destroy_id(id_priv, state);
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+ destroy_id_handler_unlock(id_priv);
+}
EXPORT_SYMBOL(rdma_destroy_id);
static int cma_rep_recv(struct rdma_id_private *id_priv)
@@ -1890,6 +2131,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
if (ret)
goto reject;
+ trace_cm_send_rtu(id_priv);
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
@@ -1898,6 +2140,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
reject:
pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
+ trace_cm_send_rej(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
@@ -1915,6 +2158,22 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
event->param.conn.srq = rep_data->srq;
event->param.conn.qp_num = rep_data->remote_qpn;
+
+ event->ece.vendor_id = rep_data->ece.vendor_id;
+ event->ece.attr_mod = rep_data->ece.attr_mod;
+}
+
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
+ struct rdma_cm_event *event)
+{
+ int ret;
+
+ lockdep_assert_held(&id_priv->handler_mutex);
+
+ trace_cm_event_handler(id_priv, event);
+ ret = id_priv->id.event_handler(&id_priv->id, event);
+ trace_cm_event_done(id_priv, event, ret);
+ return ret;
}
static int cma_ib_handler(struct ib_cm_id *cm_id,
@@ -1922,13 +2181,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
{
struct rdma_id_private *id_priv = cm_id->context;
struct rdma_cm_event event = {};
- int ret = 0;
+ enum rdma_cm_state state;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
+ state = READ_ONCE(id_priv->state);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_CONNECT) ||
+ state != RDMA_CM_CONNECT) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_DISCONNECT))
+ state != RDMA_CM_DISCONNECT))
goto out;
switch (ib_event->event) {
@@ -1938,9 +2199,11 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
- if (cma_comp(id_priv, RDMA_CM_CONNECT) &&
- (id_priv->id.qp_type != IB_QPT_UD))
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ if (state == RDMA_CM_CONNECT &&
+ (id_priv->id.qp_type != IB_QPT_UD)) {
+ trace_cm_prepare_mra(id_priv);
+ ib_prepare_cm_mra(cm_id);
+ }
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1956,7 +2219,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
event.event = RDMA_CM_EVENT_ESTABLISHED;
break;
case IB_CM_DREQ_ERROR:
- event.status = -ETIMEDOUT; /* fall through */
+ event.status = -ETIMEDOUT;
+ fallthrough;
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
@@ -1985,18 +2249,16 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static struct rdma_id_private *
@@ -2015,28 +2277,29 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
- listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
+ listen_id->ps,
+ ib_event->param.req_rcvd.qp_type,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
goto err;
rt = &id->route;
- rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
- rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
- GFP_KERNEL);
+ rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc_array(rt->num_pri_alt_paths,
+ sizeof(*rt->path_rec), GFP_KERNEL);
if (!rt->path_rec)
goto err;
rt->path_rec[0] = *path;
- if (rt->num_paths == 2)
+ if (rt->num_pri_alt_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
@@ -2076,13 +2339,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
int ret;
listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
- id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD,
- listen_id_priv->res.kern_name);
- if (IS_ERR(id))
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
+ listen_id->context, listen_id->ps, IB_QPT_UD,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
@@ -2120,6 +2383,9 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
event->param.conn.srq = req_data->srq;
event->param.conn.qp_num = req_data->remote_qpn;
+
+ event->ece.vendor_id = req_data->ece.vendor_id;
+ event->ece.attr_mod = req_data->ece.attr_mod;
}
static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
@@ -2146,15 +2412,16 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
+ trace_cm_req_handler(listen_id, ib_event->event);
if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
}
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN) {
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
- goto err1;
+ goto err_unlock;
}
offset = cma_user_data_offset(listen_id);
@@ -2171,57 +2438,41 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
}
if (!conn_id) {
ret = -ENOMEM;
- goto err1;
+ goto err_unlock;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
- if (ret)
- goto err2;
+ if (ret) {
+ destroy_id_handler_unlock(conn_id);
+ goto err_unlock;
+ }
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
- if (ret)
- goto err3;
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD))
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
- mutex_unlock(&lock);
- mutex_unlock(&conn_id->handler_mutex);
- mutex_unlock(&listen_id->handler_mutex);
- cma_deref_id(conn_id);
- if (net_dev)
- dev_put(net_dev);
- return 0;
+ ret = cma_cm_event_handler(conn_id, &event);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.ib = NULL;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ goto net_dev_put;
+ }
-err3:
- cma_deref_id(conn_id);
- /* Destroy the CM ID by returning a non-zero value. */
- conn_id->cm_id.ib = NULL;
-err2:
- cma_exch(conn_id, RDMA_CM_DESTROYING);
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+ conn_id->id.qp_type != IB_QPT_UD) {
+ trace_cm_prepare_mra(cm_id->context);
+ ib_prepare_cm_mra(cm_id);
+ }
mutex_unlock(&conn_id->handler_mutex);
-err1:
+
+err_unlock:
mutex_unlock(&listen_id->handler_mutex);
- if (conn_id)
- rdma_destroy_id(&conn_id->id);
net_dev_put:
- if (net_dev)
- dev_put(net_dev);
+ dev_put(net_dev);
return ret;
}
@@ -2271,7 +2522,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (iw_event->event) {
@@ -2313,13 +2564,11 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.status = iw_event->status;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
@@ -2331,7 +2580,6 @@ out:
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
- struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
@@ -2347,35 +2595,33 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN)
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
- listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC,
- listen_id->res.kern_name);
- if (IS_ERR(new_cm_id)) {
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ listen_id->id.event_handler,
+ listen_id->id.context, RDMA_PS_TCP,
+ IB_QPT_RC, listen_id);
+ if (IS_ERR(conn_id)) {
ret = -ENOMEM;
goto out;
}
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
conn_id->cm_id.iw = cm_id;
@@ -2385,24 +2631,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
- cma_deref_id(conn_id);
- rdma_destroy_id(&conn_id->id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
- cma_deref_id(conn_id);
out:
mutex_unlock(&listen_id->handler_mutex);
@@ -2437,8 +2675,11 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
if (IS_ERR(id))
return PTR_ERR(id);
+ mutex_lock(&id_priv->qp_mutex);
id->tos = id_priv->tos;
id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+ id->afonly = id_priv->afonly;
id_priv->cm_id.iw = id;
memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@@ -2459,56 +2700,88 @@ static int cma_listen_handler(struct rdma_cm_id *id,
{
struct rdma_id_private *id_priv = id->context;
+ /* Listening IDs are always destroyed on removal */
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ return -1;
+
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
+ trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev,
+ struct rdma_id_private **to_destroy)
{
struct rdma_id_private *dev_id_priv;
- struct rdma_cm_id *id;
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
- if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
- return;
+ lockdep_assert_held(&lock);
- id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type, id_priv->res.kern_name);
- if (IS_ERR(id))
- return;
+ *to_destroy = NULL;
+ if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
+ return 0;
- dev_id_priv = container_of(id, struct rdma_id_private, id);
+ dev_id_priv =
+ __rdma_create_id(net, cma_listen_handler, id_priv,
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
+ if (IS_ERR(dev_id_priv))
+ return PTR_ERR(dev_id_priv);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
_cma_attach_to_dev(dev_id_priv, cma_dev);
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
- atomic_inc(&id_priv->refcount);
+ rdma_restrack_add(&dev_id_priv->res);
+ cma_id_get(id_priv);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
+ mutex_lock(&id_priv->qp_mutex);
dev_id_priv->tos_set = id_priv->tos_set;
dev_id_priv->tos = id_priv->tos;
+ mutex_unlock(&id_priv->qp_mutex);
- ret = rdma_listen(id, id_priv->backlog);
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
- dev_warn(&cma_dev->device->dev,
- "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
+ goto err_listen;
+ list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list);
+ return 0;
+err_listen:
+ /* Caller must destroy this after releasing lock */
+ *to_destroy = dev_id_priv;
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
+ return ret;
}
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
+ int ret;
mutex_lock(&lock);
- list_add_tail(&id_priv->list, &listen_any_list);
- list_for_each_entry(cma_dev, &dev_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_add_tail(&id_priv->listen_any_item, &listen_any_list);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret) {
+ /* Prevent racing with cma_process_remove() */
+ if (to_destroy)
+ list_del_init(&to_destroy->device_item);
+ goto err_listen;
+ }
+ }
+ mutex_unlock(&lock);
+ return 0;
+
+err_listen:
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
+ if (to_destroy)
+ rdma_destroy_id(&to_destroy->id);
+ return ret;
}
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
@@ -2516,8 +2789,10 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->tos = (u8) tos;
id_priv->tos_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_set_service_type);
@@ -2530,7 +2805,9 @@ EXPORT_SYMBOL(rdma_set_service_type);
* This function should be called before rdma_connect() on active side,
* and on passive side before rdma_accept(). It is applicable to primary
* path only. The timeout will affect the local side of the QP, it is not
- * negotiated with remote side and zero disables the timer.
+ * negotiated with remote side and zero disables the timer. In case it is
+ * set before rdma_resolve_route, the value will also be used to determine
+ * PacketLifeTime for RoCE.
*
* Return: 0 for success
*/
@@ -2538,37 +2815,128 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
{
struct rdma_id_private *id_priv;
- if (id->qp_type != IB_QPT_RC)
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->timeout = timeout;
id_priv->timeout_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
return 0;
}
EXPORT_SYMBOL(rdma_set_ack_timeout);
+/**
+ * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the
+ * QP associated with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK
+ * Timer Field" in the IBTA specification.
+ *
+ * This function should be called before rdma_connect() on active
+ * side, and on passive side before rdma_accept(). The timer value
+ * will be associated with the local QP. When it receives a send it is
+ * not read to handle, typically if the receive queue is empty, an RNR
+ * Retry NAK is returned to the requester with the min_rnr_timer
+ * encoded. The requester will then wait at least the time specified
+ * in the NAK before retrying. The default is zero, which translates
+ * to a minimum RNR Timer value of 655 ms.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
+{
+ struct rdma_id_private *id_priv;
+
+ /* It is a five-bit value */
+ if (min_rnr_timer & 0xe0)
+ return -EINVAL;
+
+ if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT))
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ id_priv->min_rnr_timer = min_rnr_timer;
+ id_priv->min_rnr_timer_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+
+static int route_set_path_rec_inbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_inbound) {
+ route->path_rec_inbound =
+ kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL);
+ if (!route->path_rec_inbound)
+ return -ENOMEM;
+ }
+
+ *route->path_rec_inbound = *path_rec;
+ return 0;
+}
+
+static int route_set_path_rec_outbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_outbound) {
+ route->path_rec_outbound =
+ kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL);
+ if (!route->path_rec_outbound)
+ return -ENOMEM;
+ }
+
+ *route->path_rec_outbound = *path_rec;
+ return 0;
+}
+
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
- void *context)
+ unsigned int num_prs, void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
+ int i;
route = &work->id->id.route;
- if (!status) {
- route->num_paths = 1;
- *route->path_rec = *path_rec;
- } else {
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
- work->event.status = status;
- pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
- status);
+ if (status)
+ goto fail;
+
+ for (i = 0; i < num_prs; i++) {
+ if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP))
+ *route->path_rec = path_rec[i];
+ else if (path_rec[i].flags & IB_PATH_INBOUND)
+ status = route_set_path_rec_inbound(work, &path_rec[i]);
+ else if (path_rec[i].flags & IB_PATH_OUTBOUND)
+ status = route_set_path_rec_outbound(work,
+ &path_rec[i]);
+ else
+ status = -EINVAL;
+
+ if (status)
+ goto fail;
}
+ route->num_pri_alt_paths = 1;
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
queue_work(cma_wq, &work->work);
}
@@ -2625,49 +2993,54 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv,
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
-static void cma_work_handler(struct work_struct *_work)
+static void cma_iboe_join_work_handler(struct work_struct *work)
{
- struct cma_work *work = container_of(_work, struct cma_work, work);
- struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
+ struct cma_multicast *mc =
+ container_of(work, struct cma_multicast, iboe_join.work);
+ struct rdma_cm_event *event = &mc->iboe_join.event;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
- }
-out:
+ ret = cma_cm_event_handler(id_priv, event);
+ WARN_ON(ret);
+
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
- kfree(work);
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
}
-static void cma_ndev_work_handler(struct work_struct *_work)
+static void cma_work_handler(struct work_struct *_work)
{
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+ struct cma_work *work = container_of(_work, struct cma_work, work);
struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == RDMA_CM_DESTROYING ||
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+ if (work->old_state != 0 || work->new_state != 0) {
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out_unlock;
+ }
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
+ if (cma_cm_event_handler(id_priv, &work->event)) {
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ goto out_free;
}
-out:
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
+ cma_id_put(id_priv);
+out_free:
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
kfree(work);
}
@@ -2681,14 +3054,19 @@ static void cma_init_resolve_route_work(struct cma_work *work,
work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
}
-static void cma_init_resolve_addr_work(struct cma_work *work,
- struct rdma_id_private *id_priv)
+static void enqueue_resolve_addr_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
{
+ /* Balances with cma_id_put() in cma_work_handler */
+ cma_id_get(id_priv);
+
work->id = id_priv;
INIT_WORK(&work->work, cma_work_handler);
work->old_state = RDMA_CM_ADDR_QUERY;
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+
+ queue_work(cma_wq, &work->work);
}
static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
@@ -2704,7 +3082,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
cma_init_resolve_route_work(work, id_priv);
- route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec)
+ route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
@@ -2802,7 +3181,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
dev_put(ndev);
}
- id->route.num_paths = 1;
+ id->route.num_pri_alt_paths = 1;
return 0;
err_free:
@@ -2827,22 +3206,86 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
return 0;
}
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
{
- int prio;
struct net_device *dev;
- prio = rt_tos2priority(tos);
- dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
+ dev = vlan_dev_real_dev(vlan_ndev);
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
+ return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+
+struct iboe_prio_tc_map {
+ int input_prio;
+ int output_tc;
+ bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev,
+ struct netdev_nested_priv *priv)
+{
+ struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
+
+ if (is_vlan_dev(dev))
+ map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+ else if (dev->num_tc)
+ map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+ else
+ map->output_tc = 0;
+ /* We are interested only in first level VLAN device, so always
+ * return 1 to stop iterating over next level devices.
+ */
+ map->found = true;
+ return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+ struct iboe_prio_tc_map prio_tc_map = {};
+ int prio = rt_tos2priority(tos);
+ struct netdev_nested_priv priv;
+
+ /* If VLAN device, get it directly from the VLAN netdev */
if (is_vlan_dev(ndev))
- return (vlan_dev_get_egress_qos_mask(ndev, prio) &
- VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
- return 0;
+ return get_vlan_ndev_tc(ndev, prio);
+
+ prio_tc_map.input_prio = prio;
+ priv.data = (void *)&prio_tc_map;
+ rcu_read_lock();
+ netdev_walk_all_lower_dev_rcu(ndev,
+ get_lower_vlan_dev_tc,
+ &priv);
+ rcu_read_unlock();
+ /* If map is found from lower device, use it; Otherwise
+ * continue with the current netdevice to get priority to tc map.
+ */
+ if (prio_tc_map.found)
+ return prio_tc_map.output_tc;
+ else if (ndev->num_tc)
+ return netdev_get_prio_tc_map(ndev, prio);
+ else
+ return 0;
+}
+
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
}
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
@@ -2855,8 +3298,11 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ u8 tos;
+ mutex_lock(&id_priv->qp_mutex);
+ tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ mutex_unlock(&id_priv->qp_mutex);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
@@ -2868,7 +3314,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err1;
}
- route->num_paths = 1;
+ route->num_pri_alt_paths = 1;
ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
@@ -2893,15 +3339,33 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
- route->path_rec->rate = iboe_get_rate(ndev);
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
- route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ /* In case ACK timeout is set, use this value to calculate
+ * PacketLifeTime. As per IBTA 12.7.34,
+ * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
+ * Assuming a negligible local ACK delay, we can use
+ * PacketLifeTime = local ACK timeout/2
+ * as a reasonable approximation for RoCE networks.
+ */
+ mutex_lock(&id_priv->qp_mutex);
+ if (id_priv->timeout_set && id_priv->timeout)
+ route->path_rec->packet_life_time = id_priv->timeout - 1;
+ else
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ mutex_unlock(&id_priv->qp_mutex);
+
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
}
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+
cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
@@ -2910,6 +3374,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
+ route->num_pri_alt_paths = 0;
err1:
kfree(work);
return ret;
@@ -2918,17 +3383,28 @@ err1:
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
+ enum rdma_cm_state state;
int ret;
+ if (!timeout_ms)
+ return -EINVAL;
+
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
+ state = id_priv->state;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY) &&
+ !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED,
+ RDMA_CM_ROUTE_QUERY))
return -EINVAL;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -2939,8 +3415,8 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
return 0;
err:
- cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
- cma_deref_id(id_priv);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state);
+ cma_id_put(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
@@ -2967,9 +3443,9 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
union ib_gid gid;
enum ib_port_state port_state;
+ unsigned int p;
u16 pkey;
int ret;
- u8 p;
cma_dev = NULL;
mutex_lock(&lock);
@@ -2981,7 +3457,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
if (!cma_dev)
cma_dev = cur_dev;
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
@@ -3014,6 +3490,7 @@ port_found:
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
cma_set_loopback(cma_src_addr(id_priv));
out:
mutex_unlock(&lock);
@@ -3046,7 +3523,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status);
- } else {
+ rdma_restrack_add(&id_priv->res);
+ } else if (status) {
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
}
@@ -3061,10 +3539,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ if (cma_cm_event_handler(id_priv, &event)) {
+ destroy_id_handler_unlock(id_priv);
return;
}
out:
@@ -3090,8 +3566,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- cma_init_resolve_addr_work(work, id_priv);
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
@@ -3116,77 +3591,13 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
- cma_init_resolve_addr_work(work, id_priv);
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
return ret;
}
-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr)
-{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) &&
- dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
- }
- return rdma_bind_addr(id, src_addr);
-}
-
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
-{
- struct rdma_id_private *id_priv;
- int ret;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
- ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
- return ret;
- }
-
- if (cma_family(id_priv) != dst_addr->sa_family)
- return -EINVAL;
-
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
- return -EINVAL;
-
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (cma_any_addr(dst_addr)) {
- ret = cma_resolve_loopback(id_priv);
- } else {
- if (dst_addr->sa_family == AF_IB) {
- ret = cma_resolve_ib_addr(id_priv);
- } else {
- ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
- &id->route.addr.dev_addr,
- timeout_ms, addr_handler,
- false, id_priv);
- }
- }
- if (ret)
- goto err;
-
- return 0;
-err:
- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
- return ret;
-}
-EXPORT_SYMBOL(rdma_resolve_addr);
-
int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
{
struct rdma_id_private *id_priv;
@@ -3195,7 +3606,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+ id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
@@ -3234,6 +3646,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
u64 sid, mask;
__be16 port;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
port = htons(bind_list->port);
@@ -3262,6 +3676,8 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list;
int ret;
+ lockdep_assert_held(&lock);
+
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
@@ -3288,6 +3704,8 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
struct sockaddr *saddr = cma_src_addr(id_priv);
__be16 dport = cma_port(daddr);
+ lockdep_assert_held(&lock);
+
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
struct sockaddr *cur_saddr = cma_src_addr(cur_id);
@@ -3327,9 +3745,11 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
unsigned int rover;
struct net *net = id_priv->id.route.addr.dev_addr.net;
+ lockdep_assert_held(&lock);
+
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = get_random_u32_inclusive(low, remaining + low - 1);
retry:
if (last_used_port != rover) {
struct rdma_bind_list *bind_list;
@@ -3374,13 +3794,14 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
- cur_id->reuseaddr)
+ if (reuseaddr && cur_id->reuseaddr)
continue;
cur_addr = cma_src_addr(cur_id);
@@ -3404,6 +3825,8 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
unsigned short snum;
int ret;
+ lockdep_assert_held(&lock);
+
snum = ntohs(cma_port(cma_src_addr(id_priv)));
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -3419,18 +3842,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps,
return ret;
}
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
- int ret = 0;
-
- mutex_lock(&lock);
- if (bind_list->owners.first->next)
- ret = cma_check_port(bind_list, id_priv, 0);
- mutex_unlock(&lock);
- return ret;
-}
-
static enum rdma_ucm_port_space
cma_select_inet_ps(struct rdma_id_private *id_priv)
{
@@ -3524,28 +3935,41 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
- id->route.addr.src_addr.ss_family = AF_INET;
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ struct sockaddr_in any_in = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
if (ret)
return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN)))
+ return -EINVAL;
}
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
- return -EINVAL;
-
+ /*
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+ * any more, and has to be unique in the bind list.
+ */
if (id_priv->reuseaddr) {
- ret = cma_bind_listen(id_priv);
+ mutex_lock(&lock);
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+ if (!ret)
+ id_priv->reuseaddr = 0;
+ mutex_unlock(&lock);
if (ret)
goto err;
}
id_priv->backlog = backlog;
- if (id->device) {
+ if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id->device, 1)) {
ret = cma_ib_listen(id_priv);
if (ret)
@@ -3558,38 +3982,44 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
ret = -ENOSYS;
goto err;
}
- } else
- cma_listen_on_all(id_priv);
+ } else {
+ ret = cma_listen_on_all(id_priv);
+ if (ret)
+ goto err;
+ }
return 0;
err:
id_priv->backlog = 0;
+ /*
+ * All the failure paths that lead here will not allow the req_handler's
+ * to have run.
+ */
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
EXPORT_SYMBOL(rdma_listen);
-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+static int rdma_bind_addr_dst(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, const struct sockaddr *daddr)
{
- struct rdma_id_private *id_priv;
+ struct sockaddr *id_daddr;
int ret;
- struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
return -EAFNOSUPPORT;
- id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;
- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr);
if (ret)
goto err1;
memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
+ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr);
if (ret)
goto err1;
@@ -3609,22 +4039,148 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
}
#endif
}
- daddr = cma_dst_addr(id_priv);
- daddr->sa_family = addr->sa_family;
+ id_daddr = cma_dst_addr(id_priv);
+ if (daddr != id_daddr)
+ memcpy(id_daddr, daddr, rdma_addr_size(addr));
+ id_daddr->sa_family = addr->sa_family;
ret = cma_get_port(id_priv);
if (ret)
goto err2;
+ if (!cma_any_addr(addr))
+ rdma_restrack_add(&id_priv->res);
return 0;
err2:
- rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev)
cma_release_dev(id_priv);
err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ }
+ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr);
+}
+
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ int ret;
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY)))
+ return -EINVAL;
+
+ } else {
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+ }
+
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err_state;
+ }
+ return 0;
+
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
+
+ if (cma_any_addr(dst_addr)) {
+ ret = cma_resolve_loopback(id_priv);
+ } else {
+ if (dst_addr->sa_family == AF_IB) {
+ ret = cma_resolve_ib_addr(id_priv);
+ } else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+ &id->route.addr.dev_addr,
+ timeout_ms, addr_handler,
+ false, id_priv);
+ }
+ }
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_addr);
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv));
+}
EXPORT_SYMBOL(rdma_bind_addr);
static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
@@ -3664,10 +4220,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
struct rdma_cm_event event = {};
const struct ib_cm_sidr_rep_event_param *rep =
&ib_event->param.sidr_rep_rcvd;
- int ret = 0;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
switch (ib_event->event) {
@@ -3708,20 +4264,18 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
@@ -3735,8 +4289,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3772,6 +4325,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ trace_cm_send_sidr_req(id_priv);
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -3794,8 +4348,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3826,7 +4379,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
}
req.primary_path = &route->path_rec[0];
- if (route->num_paths == 2)
+ req.primary_path_inbound = route->path_rec_inbound;
+ req.primary_path_outbound = route->path_rec_outbound;
+ if (route->num_pri_alt_paths == 2)
req.alternate_path = &route->path_rec[1];
req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
@@ -3844,7 +4399,10 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ req.ece.vendor_id = id_priv->ece.vendor_id;
+ req.ece.attr_mod = id_priv->ece.attr_mod;
+ trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
if (ret && !IS_ERR(id)) {
@@ -3867,8 +4425,11 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
+ mutex_lock(&id_priv->qp_mutex);
cm_id->tos = id_priv->tos;
cm_id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+
id_priv->cm_id.iw = cm_id;
memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
@@ -3899,12 +4460,23 @@ out:
return ret;
}
-int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+/**
+ * rdma_connect_locked - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Same as rdma_connect() but can only be called from the
+ * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback.
+ */
+int rdma_connect_locked(struct rdma_cm_id *id,
+ struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
+ lockdep_assert_held(&id_priv->handler_mutex);
+
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3918,20 +4490,66 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_connect_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
+ }
if (ret)
- goto err;
-
+ goto err_state;
return 0;
-err:
+err_state:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
+EXPORT_SYMBOL(rdma_connect_locked);
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with by having
+ * called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP information
+ * for unconnected rdma_cm_id's. The actual operation is based on the
+ * rdma_cm_id's port space.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ mutex_lock(&id_priv->handler_mutex);
+ ret = rdma_connect_locked(id, conn_param);
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
EXPORT_SYMBOL(rdma_connect);
+/**
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ * @ece: ECE parameters
+ *
+ * See rdma_connect() explanation.
+ */
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_connect(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_connect_ece);
+
static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
@@ -3957,7 +4575,10 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
+ trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
@@ -3980,9 +4601,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
- if (id_priv->id.qp) {
+ if (id_priv->id.qp)
iw_param.qpn = id_priv->qp_num;
- } else
+ else
iw_param.qpn = conn_param->qp_num;
return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
@@ -3998,29 +4619,53 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
- ret = cma_set_qkey(id_priv, qkey);
+ if (qkey)
+ ret = cma_set_qkey(id_priv, qkey);
+ else
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
rep.qkey = id_priv->qkey;
+
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
}
+
rep.private_data = private_data;
rep.private_data_len = private_data_len;
+ trace_cm_send_sidr_rep(id_priv);
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
-int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
- const char *caller)
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
-
- rdma_restrack_set_task(&id_priv->res, caller);
+ lockdep_assert_held(&id_priv->handler_mutex);
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
return -EINVAL;
if (!id->qp && conn_param) {
@@ -4044,21 +4689,52 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
else
ret = cma_rep_recv(id_priv);
}
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_accept_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
-
+ }
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(id_priv);
- rdma_reject(id, NULL, 0);
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
-EXPORT_SYMBOL(__rdma_accept);
+EXPORT_SYMBOL(rdma_accept);
+
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
@@ -4082,7 +4758,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
EXPORT_SYMBOL(rdma_notify);
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
- u8 private_data_len)
+ u8 private_data_len, u8 reason)
{
struct rdma_id_private *id_priv;
int ret;
@@ -4092,18 +4768,20 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
return -EINVAL;
if (rdma_cap_ib_cm(id->device, id->port_num)) {
- if (id->qp_type == IB_QPT_UD)
+ if (id->qp_type == IB_QPT_UD) {
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
- else
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
- 0, private_data, private_data_len);
+ } else {
+ trace_cm_send_rej(id_priv);
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
+ private_data, private_data_len);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
- } else
+ } else {
ret = -ENOSYS;
+ }
return ret;
}
@@ -4123,8 +4801,13 @@ int rdma_disconnect(struct rdma_cm_id *id)
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ trace_cm_disconnect(id_priv);
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
+ trace_cm_sent_drep(id_priv);
+ } else {
+ trace_cm_sent_dreq(id_priv);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
} else
@@ -4135,70 +4818,68 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ struct ib_sa_multicast *multicast,
+ struct rdma_cm_event *event,
+ struct cma_multicast *mc)
+{
+ struct rdma_dev_addr *dev_addr;
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
+
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
+
+ event->status = status;
+ event->param.ud.private_data = mc->context;
+ if (status) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ return;
+ }
+
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ gid_type =
+ id_priv->cma_dev
+ ->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+ &multicast->rec, ndev, gid_type,
+ &event->param.ud.ah_attr)) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ goto out;
+ }
+
+ event->param.ud.qp_num = 0xFFFFFF;
+ event->param.ud.qkey = id_priv->qkey;
+
+out:
+ dev_put(ndev);
+}
+
static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
{
- struct rdma_id_private *id_priv;
struct cma_multicast *mc = multicast->context;
+ struct rdma_id_private *id_priv = mc->id_priv;
struct rdma_cm_event event = {};
int ret = 0;
- id_priv = mc->id_priv;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
goto out;
- if (!status)
- status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
- else
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
- status);
- mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp) {
- status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- be16_to_cpu(multicast->rec.mlid));
- if (status)
- pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
- status);
+ ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ if (!ret) {
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
}
- mutex_unlock(&id_priv->qp_mutex);
-
- event.status = status;
- event.param.ud.private_data = mc->context;
- if (!status) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev =
- dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- enum ib_gid_type gid_type =
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
-
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ret = ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num,
- &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
- if (ret)
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
-
- event.param.ud.qp_num = 0xFFFFFF;
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- if (ndev)
- dev_put(ndev);
- } else
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
-
- ret = id_priv->id.event_handler(&id_priv->id, &event);
-
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
- if (ret) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
- return 0;
- }
+ WARN_ON(ret);
out:
mutex_unlock(&id_priv->handler_mutex);
@@ -4249,9 +4930,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (ret)
return ret;
- ret = cma_set_qkey(id_priv, 0);
- if (ret)
- return ret;
+ if (!id_priv->qkey) {
+ ret = cma_set_default_qkey(id_priv);
+ if (ret)
+ return ret;
+ }
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
rec.qkey = cpu_to_be32(id_priv->qkey);
@@ -4259,17 +4942,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if ((rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) &&
- (!ib_sa_sendonly_fullmem_support(&sa_client,
- id_priv->id.device,
- id_priv->id.port_num))) {
- dev_warn(
- &id_priv->id.device->dev,
- "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
- id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
-
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
@@ -4283,23 +4955,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
- id_priv->id.port_num, &rec,
- comp_mask, GFP_KERNEL,
- cma_ib_mc_handler, mc);
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
-}
-
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
- struct cma_multicast *mc = mw->mc;
- struct ib_sa_multicast *m = mc->multicast.ib;
-
- mc->multicast.ib->context = mc;
- cma_ib_mc_handler(0, m);
- kref_put(&mc->mcref, release_mc);
- kfree(mw);
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec, comp_mask,
+ GFP_KERNEL, cma_ib_mc_handler, mc);
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
}
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
@@ -4334,52 +4993,38 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ struct ib_sa_multicast ib = {};
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ if (cma_zero_addr(addr))
return -EINVAL;
- work = kzalloc(sizeof *work, GFP_KERNEL);
- if (!work)
- return -ENOMEM;
-
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
- if (!mc->multicast.ib) {
- err = -ENOMEM;
- goto out1;
- }
-
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
-
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
- if (id_priv->id.ps == RDMA_PS_UDP)
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
+ ib.rec.pkey = cpu_to_be16(0xffff);
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (!ndev) {
- err = -ENODEV;
- goto out2;
- }
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
- mc->multicast.ib->rec.hop_limit = 1;
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ if (!ndev)
+ return -ENODEV;
+
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
if (!send_only) {
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
true);
}
}
@@ -4388,44 +5033,41 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !mc->multicast.ib->rec.mtu) {
- if (!err)
- err = -EINVAL;
- goto out2;
- }
- rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
- work->id = id_priv;
- work->mc = mc;
- INIT_WORK(&work->work, iboe_mcast_work_handler);
- kref_get(&mc->mcref);
- queue_work(cma_wq, &work->work);
+ if (err || !ib.rec.mtu)
+ return err ?: -EINVAL;
- return 0;
+ if (!id_priv->qkey)
+ cma_set_default_qkey(id_priv);
-out2:
- kfree(mc->multicast.ib);
-out1:
- kfree(work);
- return err;
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &ib.rec.port_gid);
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
+ queue_work(cma_wq, &mc->iboe_join.work);
+ return 0;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
u8 join_state, void *context)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
struct cma_multicast *mc;
int ret;
- if (!id->device)
+ /* Not supported for kernel QPs */
+ if (WARN_ON(id->qp))
return -EINVAL;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+ /* ULP is calling this wrong. */
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ if (id_priv->id.qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
@@ -4435,7 +5077,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
mc->join_state = join_state;
if (rdma_protocol_roce(id->device, id->port_num)) {
- kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
if (ret)
goto out_err;
@@ -4467,25 +5108,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
-
- if (id->qp)
- ib_detach_mcast(id->qp,
- &mc->multicast.ib->rec.mgid,
- be16_to_cpu(mc->multicast.ib->rec.mlid));
-
- BUG_ON(id_priv->cma_dev->device != id->device);
-
- if (rdma_cap_ib_mcast(id->device, id->port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num)) {
- cma_leave_roce_mc_group(id_priv, mc);
- }
- return;
- }
+ if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+ continue;
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ WARN_ON(id_priv->cma_dev->device != id->device);
+ destroy_mc(id_priv, mc);
+ return;
}
spin_unlock_irq(&id_priv->lock);
}
@@ -4494,7 +5124,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
- struct cma_ndev_work *work;
+ struct cma_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -4507,10 +5137,10 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, cma_ndev_work_handler);
+ INIT_WORK(&work->work, cma_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
}
@@ -4533,7 +5163,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ list_for_each_entry(id_priv, &cma_dev->id_list, device_item) {
ret = cma_netdev_change(ndev, id_priv);
if (ret)
goto out;
@@ -4544,33 +5174,192 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ cma_id_get(current_id);
+ if (!queue_work(cma_wq, &current_id->id.net_work))
+ cma_id_put(current_id);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
-static void cma_add_one(struct ib_device *device)
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ mutex_lock(&id_priv->handler_mutex);
+ /* Record that we want to remove the device */
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+ return;
+ }
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ /*
+ * At this point the ULP promises it won't call
+ * rdma_destroy_id() concurrently
+ */
+ cma_id_put(id_priv);
+ mutex_unlock(&id_priv->handler_mutex);
+ trace_cm_id_destroy(id_priv);
+ _destroy_id(id_priv, state);
+ return;
+ }
+ mutex_unlock(&id_priv->handler_mutex);
+
+ /*
+ * If this races with destroy then the thread that first assigns state
+ * to a destroying does the cancel.
+ */
+ cma_cancel_operation(id_priv, state);
+ cma_id_put(id_priv);
+}
+
+static void cma_process_remove(struct cma_device *cma_dev)
+{
+ mutex_lock(&lock);
+ while (!list_empty(&cma_dev->id_list)) {
+ struct rdma_id_private *id_priv = list_first_entry(
+ &cma_dev->id_list, struct rdma_id_private, device_item);
+
+ list_del_init(&id_priv->listen_item);
+ list_del_init(&id_priv->device_item);
+ cma_id_get(id_priv);
+ mutex_unlock(&lock);
+
+ cma_send_device_removal_put(id_priv);
+
+ mutex_lock(&lock);
+ }
+ mutex_unlock(&lock);
+
+ cma_dev_put(cma_dev);
+ wait_for_completion(&cma_dev->comp);
+}
+
+static bool cma_supported(struct ib_device *device)
+{
+ u32 i;
+
+ rdma_for_each_port(device, i) {
+ if (rdma_cap_ib_cm(device, i) || rdma_cap_iw_cm(device, i))
+ return true;
+ }
+ return false;
+}
+
+static int cma_add_one(struct ib_device *device)
+{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
- unsigned int i;
unsigned long supported_gids = 0;
+ int ret;
+ u32 i;
+
+ if (!cma_supported(device))
+ return -EOPNOTSUPP;
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
if (!cma_dev)
- return;
+ return -ENOMEM;
cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type)
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
goto free_cma_dev;
+ }
cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_roce_tos),
GFP_KERNEL);
- if (!cma_dev->default_roce_tos)
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
goto free_gid_type;
+ }
rdma_for_each_port (device, i) {
supported_gids = roce_gid_type_mask_support(device, i);
@@ -4585,86 +5374,42 @@ static void cma_add_one(struct ib_device *device)
}
init_completion(&cma_dev->comp);
- atomic_set(&cma_dev->refcount, 1);
+ refcount_set(&cma_dev->refcount, 1);
INIT_LIST_HEAD(&cma_dev->id_list);
ib_set_client_data(device, &cma_client, cma_dev);
mutex_lock(&lock);
list_add_tail(&cma_dev->list, &dev_list);
- list_for_each_entry(id_priv, &listen_any_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_for_each_entry(id_priv, &listen_any_list, listen_any_item) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret)
+ goto free_listen;
+ }
mutex_unlock(&lock);
- return;
+ trace_cm_add_one(device);
+ return 0;
+
+free_listen:
+ list_del(&cma_dev->list);
+ mutex_unlock(&lock);
+ /* cma_process_remove() will delete to_destroy */
+ cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
free_gid_type:
kfree(cma_dev->default_gid_type);
free_cma_dev:
kfree(cma_dev);
-
- return;
-}
-
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
-{
- struct rdma_cm_event event = {};
- enum rdma_cm_state state;
- int ret = 0;
-
- /* Record that we want to remove the device */
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
- if (state == RDMA_CM_DESTROYING)
- return 0;
-
- cma_cancel_operation(id_priv, state);
- mutex_lock(&id_priv->handler_mutex);
-
- /* Check for destruction from another callback. */
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
- goto out;
-
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
-out:
- mutex_unlock(&id_priv->handler_mutex);
return ret;
}
-static void cma_process_remove(struct cma_device *cma_dev)
-{
- struct rdma_id_private *id_priv;
- int ret;
-
- mutex_lock(&lock);
- while (!list_empty(&cma_dev->id_list)) {
- id_priv = list_entry(cma_dev->id_list.next,
- struct rdma_id_private, list);
-
- list_del(&id_priv->listen_list);
- list_del_init(&id_priv->list);
- atomic_inc(&id_priv->refcount);
- mutex_unlock(&lock);
-
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
- cma_deref_id(id_priv);
- if (ret)
- rdma_destroy_id(&id_priv->id);
-
- mutex_lock(&lock);
- }
- mutex_unlock(&lock);
-
- cma_deref_dev(cma_dev);
- wait_for_completion(&cma_dev->comp);
-}
-
static void cma_remove_one(struct ib_device *device, void *client_data)
{
struct cma_device *cma_dev = client_data;
- if (!cma_dev)
- return;
+ trace_cm_remove_one(device);
mutex_lock(&lock);
list_del(&cma_dev->list);
@@ -4709,6 +5454,19 @@ static int __init cma_init(void)
{
int ret;
+ /*
+ * There is a rare lock ordering dependency in cma_netdev_callback()
+ * that only happens when bonding is enabled. Teach lockdep that rtnl
+ * must never be nested under lock so it can find these without having
+ * to test with bonding.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ rtnl_lock();
+ mutex_lock(&lock);
+ mutex_unlock(&lock);
+ rtnl_unlock();
+ }
+
cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
if (!cma_wq)
return -ENOMEM;
@@ -4719,18 +5477,25 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
goto err;
- cma_configfs_init();
+ ret = cma_configfs_init();
+ if (ret)
+ goto err_ib;
return 0;
+err_ib:
+ ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
@@ -4740,6 +5505,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -4748,3 +5514,129 @@ static void __exit cma_cleanup(void)
module_init(cma_init);
module_exit(cma_cleanup);
+
+static void cma_query_ib_service_handler(int status,
+ struct sa_service_rec *recs,
+ unsigned int num_recs, void *context)
+{
+ struct cma_work *work = context;
+ struct rdma_id_private *id_priv = work->id;
+ struct sockaddr_ib *addr;
+
+ if (status)
+ goto fail;
+
+ if (!num_recs) {
+ status = -ENOENT;
+ goto fail;
+ }
+
+ if (id_priv->id.route.service_recs) {
+ status = -EALREADY;
+ goto fail;
+ }
+
+ id_priv->id.route.service_recs =
+ kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL);
+ if (!id_priv->id.route.service_recs) {
+ status = -ENOMEM;
+ goto fail;
+ }
+
+ id_priv->id.route.num_service_recs = num_recs;
+ memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs);
+
+ addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr;
+ addr->sib_family = AF_IB;
+ addr->sib_addr = *(struct ib_addr *)&recs->gid;
+ addr->sib_pkey = recs->pkey;
+ addr->sib_sid = recs->id;
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
+ (union ib_gid *)&addr->sib_addr);
+ ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr,
+ ntohs(addr->sib_pkey));
+
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDR_BOUND;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited(
+ "RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n",
+ status);
+ queue_work(cma_wq, &work->work);
+}
+
+static int cma_resolve_ib_service(struct rdma_id_private *id_priv,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct sa_service_rec sr = {};
+ ib_sa_comp_mask mask = 0;
+ struct cma_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ cma_id_get(id_priv);
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDRINFO_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED;
+
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) {
+ sr.id = cpu_to_be64(ibs->service_id);
+ mask |= IB_SA_SERVICE_REC_SERVICE_ID;
+ }
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) {
+ strscpy(sr.name, ibs->service_name, sizeof(sr.name));
+ mask |= IB_SA_SERVICE_REC_SERVICE_NAME;
+ }
+
+ id_priv->query_id = ib_sa_service_rec_get(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num,
+ &sr, mask,
+ 2000, GFP_KERNEL,
+ cma_query_ib_service_handler,
+ work, &id_priv->query);
+
+ if (id_priv->query_id < 0) {
+ cma_id_put(id_priv);
+ kfree(work);
+ return id_priv->query_id;
+ }
+
+ return 0;
+}
+
+int rdma_resolve_ib_service(struct rdma_cm_id *id,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!id_priv->cma_dev ||
+ !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY))
+ return -EINVAL;
+
+ if (rdma_cap_ib_sa(id->device, id->port_num))
+ ret = cma_resolve_ib_service(id_priv, ibs);
+ else
+ ret = -EOPNOTSUPP;
+
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ib_service);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 3ec2c415bb70..f2fb2d8a6597 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -43,7 +42,7 @@ struct cma_device;
struct cma_dev_group;
struct cma_dev_port_group {
- unsigned int port_num;
+ u32 port_num;
struct cma_dev_group *cma_dev_group;
struct config_group group;
};
@@ -94,7 +93,7 @@ static int cma_configfs_params_get(struct config_item *item,
static void cma_configfs_params_put(struct cma_device *cma_dev)
{
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
}
static ssize_t default_roce_mode_show(struct config_item *item,
@@ -115,7 +114,7 @@ static ssize_t default_roce_mode_show(struct config_item *item,
if (gid_type < 0)
return gid_type;
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+ return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type));
}
static ssize_t default_roce_mode_store(struct config_item *item,
@@ -123,16 +122,19 @@ static ssize_t default_roce_mode_store(struct config_item *item,
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
- int gid_type = ib_cache_gid_parse_type_str(buf);
+ int gid_type;
ssize_t ret;
- if (gid_type < 0)
- return -EINVAL;
-
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
+ gid_type = ib_cache_gid_parse_type_str(buf);
+ if (gid_type < 0) {
+ cma_configfs_params_put(cma_dev);
+ return -EINVAL;
+ }
+
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
@@ -156,7 +158,7 @@ static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
tos = cma_get_default_roce_tos(cma_dev, group->port_num);
cma_configfs_params_put(cma_dev);
- return sprintf(buf, "%u\n", tos);
+ return sysfs_emit(buf, "%u\n", tos);
}
static ssize_t default_roce_tos_store(struct config_item *item,
@@ -197,11 +199,10 @@ static const struct config_item_type cma_port_group_type = {
static int make_cma_ports(struct cma_dev_group *cma_dev_group,
struct cma_device *cma_dev)
{
- struct ib_device *ibdev;
- unsigned int i;
- unsigned int ports_num;
struct cma_dev_port_group *ports;
- int err;
+ struct ib_device *ibdev;
+ u32 ports_num;
+ u32 i;
ibdev = cma_get_ib_dev(cma_dev);
@@ -212,13 +213,11 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
- if (!ports) {
- err = -ENOMEM;
- goto free;
- }
+ if (!ports)
+ return -ENOMEM;
for (i = 0; i < ports_num; i++) {
- char port_str[10];
+ char port_str[11];
ports[i].port_num = i + 1;
snprintf(port_str, sizeof(port_str), "%u", i + 1);
@@ -231,12 +230,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
-
return 0;
-free:
- kfree(ports);
- cma_dev_group->ports = NULL;
- return err;
}
static void release_cma_dev(struct config_item *item)
@@ -298,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
@@ -312,18 +306,31 @@ static struct config_group *make_cma_dev(struct config_group *group,
configfs_add_default_group(&cma_dev_group->ports_group,
&cma_dev_group->device_group);
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
return &cma_dev_group->device_group;
fail:
if (cma_dev)
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
kfree(cma_dev_group);
return ERR_PTR(err);
}
+static void drop_cma_dev(struct config_group *cgroup, struct config_item *item)
+{
+ struct config_group *group =
+ container_of(item, struct config_group, cg_item);
+ struct cma_dev_group *cma_dev_group =
+ container_of(group, struct cma_dev_group, device_group);
+
+ configfs_remove_default_groups(&cma_dev_group->ports_group);
+ configfs_remove_default_groups(&cma_dev_group->device_group);
+ config_item_put(item);
+}
+
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
+ .drop_item = drop_cma_dev,
};
static const struct config_item_type cma_subsys_type = {
@@ -342,12 +349,18 @@ static struct configfs_subsystem cma_subsys = {
int __init cma_configfs_init(void)
{
+ int ret;
+
config_group_init(&cma_subsys.su_group);
mutex_init(&cma_subsys.su_mutex);
- return configfs_register_subsystem(&cma_subsys);
+ ret = configfs_register_subsystem(&cma_subsys);
+ if (ret)
+ mutex_destroy(&cma_subsys.su_mutex);
+ return ret;
}
void __exit cma_configfs_exit(void)
{
configfs_unregister_subsystem(&cma_subsys);
+ mutex_destroy(&cma_subsys.su_mutex);
}
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index ca7307277518..c604b601f4d9 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -47,7 +47,9 @@ enum rdma_cm_state {
RDMA_CM_ADDR_BOUND,
RDMA_CM_LISTEN,
RDMA_CM_DEVICE_REMOVAL,
- RDMA_CM_DESTROYING
+ RDMA_CM_DESTROYING,
+ RDMA_CM_ADDRINFO_QUERY,
+ RDMA_CM_ADDRINFO_RESOLVED
};
struct rdma_id_private {
@@ -55,8 +57,16 @@ struct rdma_id_private {
struct rdma_bind_list *bind_list;
struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
+ union {
+ struct list_head device_item; /* On cma_device->id_list */
+ struct list_head listen_any_item; /* On listen_any_list */
+ };
+ union {
+ /* On rdma_id_private->listen_list */
+ struct list_head listen_item;
+ struct list_head listen_list;
+ };
+ struct list_head id_list_entry;
struct cma_device *cma_dev;
struct list_head mc_list;
@@ -66,7 +76,7 @@ struct rdma_id_private {
struct mutex qp_mutex;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct mutex handler_mutex;
int backlog;
@@ -86,15 +96,19 @@ struct rdma_id_private {
u8 tos;
u8 tos_set:1;
u8 timeout_set:1;
+ u8 min_rnr_timer_set:1;
u8 reuseaddr;
u8 afonly;
u8 timeout;
+ u8 min_rnr_timer;
+ u8 used_resolve_ip;
enum ib_gid_type gid_type;
/*
* Internal to RDMA/core, don't use in the drivers
*/
struct rdma_restrack_entry res;
+ struct rdma_ucm_ece ece;
};
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
@@ -111,16 +125,16 @@ static inline void cma_configfs_exit(void)
}
#endif
-void cma_ref_dev(struct cma_device *dev);
-void cma_deref_dev(struct cma_device *dev);
+void cma_dev_get(struct cma_device *dev);
+void cma_dev_put(struct cma_device *dev);
typedef bool (*cma_device_filter)(struct ib_device *, void *);
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
void *cookie);
-int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
-int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+int cma_get_default_gid_type(struct cma_device *dev, u32 port);
+int cma_set_default_gid_type(struct cma_device *dev, u32 port,
enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+int cma_get_default_roce_tos(struct cma_device *dev, u32 port);
+int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *dev);
diff --git a/drivers/infiniband/core/cma_trace.c b/drivers/infiniband/core/cma_trace.c
new file mode 100644
index 000000000000..b314a281e10e
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the RDMA Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include "cma_priv.h"
+
+#include "cma_trace.h"
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
new file mode 100644
index 000000000000..3456d5f3aa47
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdma_cma
+
+#if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_RDMA_CMA_H
+
+#include <linux/tracepoint.h>
+#include <trace/misc/rdma.h>
+
+
+DECLARE_EVENT_CLASS(cma_fsm_class,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos
+ )
+);
+
+#define DEFINE_CMA_FSM_EVENT(name) \
+ DEFINE_EVENT(cma_fsm_class, cm_##name, \
+ TP_PROTO( \
+ const struct rdma_id_private *id_priv \
+ ), \
+ TP_ARGS(id_priv))
+
+DEFINE_CMA_FSM_EVENT(send_rtu);
+DEFINE_CMA_FSM_EVENT(send_rej);
+DEFINE_CMA_FSM_EVENT(prepare_mra);
+DEFINE_CMA_FSM_EVENT(send_sidr_req);
+DEFINE_CMA_FSM_EVENT(send_sidr_rep);
+DEFINE_CMA_FSM_EVENT(disconnect);
+DEFINE_CMA_FSM_EVENT(sent_drep);
+DEFINE_CMA_FSM_EVENT(sent_dreq);
+DEFINE_CMA_FSM_EVENT(id_destroy);
+
+TRACE_EVENT(cm_id_attach,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct ib_device *device
+ ),
+
+ TP_ARGS(id_priv, device),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ __string(devname, device->name)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ __assign_str(devname);
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __get_str(devname)
+ )
+);
+
+DECLARE_EVENT_CLASS(cma_qp_class,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(u32, qp_num)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->qp_num = id_priv->qp_num;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ __entry->qp_num
+ )
+);
+
+#define DEFINE_CMA_QP_EVENT(name) \
+ DEFINE_EVENT(cma_qp_class, cm_##name, \
+ TP_PROTO( \
+ const struct rdma_id_private *id_priv \
+ ), \
+ TP_ARGS(id_priv))
+
+DEFINE_CMA_QP_EVENT(send_req);
+DEFINE_CMA_QP_EVENT(send_rep);
+DEFINE_CMA_QP_EVENT(qp_destroy);
+
+/*
+ * enum ib_wp_type, from include/rdma/ib_verbs.h
+ */
+#define IB_QP_TYPE_LIST \
+ ib_qp_type(SMI) \
+ ib_qp_type(GSI) \
+ ib_qp_type(RC) \
+ ib_qp_type(UC) \
+ ib_qp_type(UD) \
+ ib_qp_type(RAW_IPV6) \
+ ib_qp_type(RAW_ETHERTYPE) \
+ ib_qp_type(RAW_PACKET) \
+ ib_qp_type(XRC_INI) \
+ ib_qp_type_end(XRC_TGT)
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x) TRACE_DEFINE_ENUM(IB_QPT_##x);
+#define ib_qp_type_end(x) TRACE_DEFINE_ENUM(IB_QPT_##x);
+
+IB_QP_TYPE_LIST
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x) { IB_QPT_##x, #x },
+#define ib_qp_type_end(x) { IB_QPT_##x, #x }
+
+#define rdma_show_qp_type(x) \
+ __print_symbolic(x, IB_QP_TYPE_LIST)
+
+
+TRACE_EVENT(cm_qp_create,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct ib_pd *pd,
+ const struct ib_qp_init_attr *qp_init_attr,
+ int rc
+ ),
+
+ TP_ARGS(id_priv, pd, qp_init_attr, rc),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, pd_id)
+ __field(u32, tos)
+ __field(u32, qp_num)
+ __field(u32, send_wr)
+ __field(u32, recv_wr)
+ __field(int, rc)
+ __field(unsigned long, qp_type)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->pd_id = pd->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->send_wr = qp_init_attr->cap.max_send_wr;
+ __entry->recv_wr = qp_init_attr->cap.max_recv_wr;
+ __entry->rc = rc;
+ if (!rc) {
+ __entry->qp_num = id_priv->qp_num;
+ __entry->qp_type = id_priv->id.qp_type;
+ } else {
+ __entry->qp_num = 0;
+ __entry->qp_type = 0;
+ }
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s"
+ " send_wr=%u recv_wr=%u qp_num=%u rc=%d",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __entry->tos, __entry->pd_id,
+ rdma_show_qp_type(__entry->qp_type), __entry->send_wr,
+ __entry->recv_wr, __entry->qp_num, __entry->rc
+ )
+);
+
+TRACE_EVENT(cm_req_handler,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ int event
+ ),
+
+ TP_ARGS(id_priv, event),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_ib_cm_event(__entry->event), __entry->event
+ )
+);
+
+TRACE_EVENT(cm_event_handler,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct rdma_cm_event *event
+ ),
+
+ TP_ARGS(id_priv, event),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __field(int, status)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event->event;
+ __entry->status = event->status;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_cm_event(__entry->event), __entry->event,
+ __entry->status
+ )
+);
+
+TRACE_EVENT(cm_event_done,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct rdma_cm_event *event,
+ int result
+ ),
+
+ TP_ARGS(id_priv, event, result),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __field(int, result)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event->event;
+ __entry->result = result;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_cm_event(__entry->event), __entry->result
+ )
+);
+
+DECLARE_EVENT_CLASS(cma_client_class,
+ TP_PROTO(
+ const struct ib_device *device
+ ),
+
+ TP_ARGS(device),
+
+ TP_STRUCT__entry(
+ __string(name, device->name)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ ),
+
+ TP_printk("device name=%s",
+ __get_str(name)
+ )
+);
+
+#define DEFINE_CMA_CLIENT_EVENT(name) \
+ DEFINE_EVENT(cma_client_class, cm_##name, \
+ TP_PROTO( \
+ const struct ib_device *device \
+ ), \
+ TP_ARGS(device))
+
+DEFINE_CMA_CLIENT_EVENT(add_one);
+DEFINE_CMA_CLIENT_EVENT(remove_one);
+
+#endif /* _TRACE_RDMA_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE cma_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index ff40a450b5d2..05102769a918 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -36,12 +36,15 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/cgroup_rdma.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <rdma/ib_verbs.h>
#include <rdma/opa_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/restrack.h>
#include "mad_priv.h"
+#include "restrack.h"
/* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 8192
@@ -54,21 +57,38 @@ struct pkey_index_qp_list {
struct list_head qp_list;
};
+/**
+ * struct rdma_dev_net - rdma net namespace metadata for a net
+ * @nl_sock: Pointer to netlink socket
+ * @net: Pointer to owner net namespace
+ * @id: xarray id to identify the net namespace.
+ */
+struct rdma_dev_net {
+ struct sock *nl_sock;
+ possible_net_t net;
+ u32 id;
+};
+
extern const struct attribute_group ib_dev_attr_group;
extern bool ib_devices_shared_netns;
+extern unsigned int rdma_dev_net_id;
+
+static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
+{
+ return net_generic(net, rdma_dev_net_id);
+}
-int ib_device_register_sysfs(struct ib_device *device);
-void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
+int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim);
-typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
+typedef void (*roce_netdev_callback)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
-typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
+typedef bool (*roce_netdev_filter)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
- unsigned int port);
+ u32 port);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
@@ -88,6 +108,15 @@ typedef int (*nldev_callback)(struct ib_device *device,
int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
struct netlink_callback *cb);
+struct ib_client_nl_info {
+ struct sk_buff *nl_msg;
+ struct device *cdev;
+ u32 port;
+ u64 abi;
+};
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+ struct ib_client_nl_info *res);
+
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
@@ -97,28 +126,29 @@ int ib_cache_gid_parse_type_str(const char *buf);
const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev);
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+void ib_dispatch_event_clients(struct ib_event *event);
#ifdef CONFIG_CGROUP_RDMA
void ib_device_register_rdmacg(struct ib_device *device);
@@ -169,7 +199,7 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
-int rdma_nl_init(void);
+void rdma_nl_init(void);
void rdma_nl_exit(void);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
@@ -182,15 +212,15 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack);
-int ib_get_cached_subnet_prefix(struct ib_device *device,
- u8 port_num,
- u64 *sn_pfx);
+void ib_get_cached_subnet_prefix(struct ib_device *device,
+ u32 port_num,
+ u64 *sn_pfx);
#ifdef CONFIG_SECURITY_INFINIBAND
void ib_security_release_port_pkey_list(struct ib_device *device);
void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix);
int ib_security_modify_qp(struct ib_qp *qp,
@@ -215,7 +245,7 @@ static inline void ib_security_release_port_pkey_list(struct ib_device *device)
}
static inline void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
}
@@ -286,46 +316,15 @@ struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
void nldev_init(void);
void nldev_exit(void);
-static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
- struct ib_pd *pd,
- struct ib_qp_init_attr *attr,
- struct ib_udata *udata,
- struct ib_uobject *uobj)
-{
- struct ib_qp *qp;
-
- if (!dev->ops.create_qp)
- return ERR_PTR(-EOPNOTSUPP);
-
- qp = dev->ops.create_qp(pd, attr, udata);
- if (IS_ERR(qp))
- return qp;
-
- qp->device = dev;
- qp->pd = pd;
- qp->uobject = uobj;
- qp->real_qp = qp;
- /*
- * We don't track XRC QPs for now, because they don't have PD
- * and more importantly they are created internaly by driver,
- * see mlx5 create_dev_resources() as an example.
- */
- if (attr->qp_type < IB_QPT_XRC_INI) {
- qp->res.type = RDMA_RESTRACK_QP;
- if (uobj)
- rdma_restrack_uadd(&qp->res);
- else
- rdma_restrack_kadd(&qp->res);
- } else
- qp->res.valid = false;
-
- return qp;
-}
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp);
+void ib_qp_usecnt_dec(struct ib_qp *qp);
struct rdma_dev_addr;
-int rdma_resolve_ip_route(struct sockaddr *src_addr,
- const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr);
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
@@ -342,14 +341,34 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
void ib_free_port_attrs(struct ib_core_device *coredev);
int ib_setup_port_attrs(struct ib_core_device *coredev);
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev, u32 port_num);
+void ib_device_release_hw_stats(struct hw_stats_device_data *data);
+int ib_setup_device_attrs(struct ib_device *ibdev);
int rdma_compatdev_set(u8 enable);
-int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
- struct kobject *kobj, struct kobj_type *ktype,
- const char *name);
-void ib_port_unregister_module_stat(struct kobject *kobj);
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
int ib_device_set_netns_put(struct sk_buff *skb,
struct ib_device *dev, u32 ns_fd);
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet);
+void rdma_nl_net_exit(struct rdma_dev_net *rnet);
+
+struct rdma_umap_priv {
+ struct vm_area_struct *vma;
+ struct list_head list;
+ struct rdma_user_mmap_entry *entry;
+};
+
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry);
+
+void ib_cq_pool_cleanup(struct ib_device *dev);
+
+bool rdma_nl_get_privileged_qkey(void);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
new file mode 100644
index 000000000000..c3aa6d7fc66b
--- /dev/null
+++ b/drivers/infiniband/core/counters.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_counter.h>
+
+#include "core_priv.h"
+#include "restrack.h"
+
+#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
+
+static int __counter_set_mode(struct rdma_port_counter *port_counter,
+ enum rdma_nl_counter_mode new_mode,
+ enum rdma_nl_counter_mask new_mask,
+ bool bind_opcnt)
+{
+ if (new_mode == RDMA_COUNTER_MODE_AUTO) {
+ if (new_mask & (~ALL_AUTO_MODE_MASKS))
+ return -EINVAL;
+ if (port_counter->num_counters)
+ return -EBUSY;
+ }
+
+ port_counter->mode.mode = new_mode;
+ port_counter->mode.mask = new_mask;
+ port_counter->mode.bind_opcnt = bind_opcnt;
+ return 0;
+}
+
+/*
+ * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
+ *
+ * @dev: Device to operate
+ * @port: Port to use
+ * @mask: Mask to configure
+ * @extack: Message to the user
+ *
+ * Return 0 on success. If counter mode wasn't changed then it is considered
+ * as success as well.
+ * Return -EBUSY when changing to auto mode while there are bounded counters.
+ *
+ */
+int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mask mask,
+ bool bind_opcnt,
+ struct netlink_ext_ack *extack)
+{
+ struct rdma_port_counter *port_counter;
+ enum rdma_nl_counter_mode mode;
+ int ret;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&port_counter->lock);
+ if (mask)
+ mode = RDMA_COUNTER_MODE_AUTO;
+ else
+ mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
+ RDMA_COUNTER_MODE_NONE;
+
+ if (port_counter->mode.mode == mode &&
+ port_counter->mode.mask == mask &&
+ port_counter->mode.bind_opcnt == bind_opcnt) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
+
+out:
+ mutex_unlock(&port_counter->lock);
+ if (ret == -EBUSY)
+ NL_SET_ERR_MSG(
+ extack,
+ "Modifying auto mode is not allowed when there is a bound QP");
+ return ret;
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp, u32 port)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp, port);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+ unsigned int index, bool enable)
+{
+ struct rdma_hw_stats *stats;
+ int ret = 0;
+
+ if (!dev->ops.modify_hw_stat)
+ return -EOPNOTSUPP;
+
+ stats = ib_get_hw_stats_port(dev, port);
+ if (!stats || index >= stats->num_counters ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ mutex_lock(&stats->lock);
+
+ if (enable != test_bit(index, stats->is_disabled))
+ goto out;
+
+ ret = dev->ops.modify_hw_stat(dev, port, index, enable);
+ if (ret)
+ goto out;
+
+ if (enable)
+ clear_bit(index, stats->is_disabled);
+ else
+ set_bit(index, stats->is_disabled);
+out:
+ mutex_unlock(&stats->lock);
+ return ret;
+}
+
+static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
+ struct ib_qp *qp,
+ enum rdma_nl_counter_mode mode,
+ bool bind_opcnt)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
+ return NULL;
+
+ counter = rdma_zalloc_drv_obj(dev, rdma_counter);
+ if (!counter)
+ return NULL;
+
+ counter->device = dev;
+ counter->port = port;
+
+ dev->ops.counter_init(counter);
+
+ rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+ counter->stats = dev->ops.counter_alloc_stats(counter);
+ if (!counter->stats)
+ goto err_stats;
+
+ port_counter = &dev->port_data[port].port_counter;
+ mutex_lock(&port_counter->lock);
+ switch (mode) {
+ case RDMA_COUNTER_MODE_MANUAL:
+ ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
+ 0, bind_opcnt);
+ if (ret) {
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
+ }
+ break;
+ case RDMA_COUNTER_MODE_AUTO:
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
+ }
+
+ port_counter->num_counters++;
+ mutex_unlock(&port_counter->lock);
+
+ counter->mode.mode = mode;
+ counter->mode.bind_opcnt = bind_opcnt;
+ kref_init(&counter->kref);
+ mutex_init(&counter->lock);
+
+ ret = __rdma_counter_bind_qp(counter, qp, port);
+ if (ret)
+ goto err_mode;
+
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
+ return counter;
+
+err_mode:
+ rdma_free_hw_stats_struct(counter->stats);
+err_stats:
+ rdma_restrack_put(&counter->res);
+ kfree(counter);
+ return NULL;
+}
+
+static void rdma_counter_free(struct rdma_counter *counter)
+{
+ struct rdma_port_counter *port_counter;
+
+ port_counter = &counter->device->port_data[counter->port].port_counter;
+ mutex_lock(&port_counter->lock);
+ port_counter->num_counters--;
+ if (!port_counter->num_counters &&
+ (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0,
+ false);
+
+ mutex_unlock(&port_counter->lock);
+
+ rdma_restrack_del(&counter->res);
+ rdma_free_hw_stats_struct(counter->stats);
+ kfree(counter);
+}
+
+static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
+ enum rdma_nl_counter_mask auto_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+ bool match = true;
+
+ if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ match &= (param->qp_type == qp->qp_type);
+
+ if (auto_mask & RDMA_COUNTER_MASK_PID)
+ match &= (task_pid_nr(counter->res.task) ==
+ task_pid_nr(qp->res.task));
+
+ return match;
+}
+
+static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!qp->device->ops.counter_unbind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_unbind_qp(qp, port);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static void counter_history_stat_update(struct rdma_counter *counter)
+{
+ struct ib_device *dev = counter->device;
+ struct rdma_port_counter *port_counter;
+ int i;
+
+ port_counter = &dev->port_data[counter->port].port_counter;
+ if (!port_counter->hstats)
+ return;
+
+ rdma_counter_query_stats(counter);
+
+ for (i = 0; i < counter->stats->num_counters; i++)
+ port_counter->hstats->value[i] += counter->stats->value[i];
+}
+
+/*
+ * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
+ * with in auto mode
+ *
+ * Return: The counter (with ref-count increased) if found
+ */
+static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
+ u32 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter = NULL;
+ struct ib_device *dev = qp->device;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ unsigned long id = 0;
+
+ port_counter = &dev->port_data[port].port_counter;
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != qp->device) || (counter->port != port))
+ goto next;
+
+ if (auto_mode_match(qp, counter, port_counter->mode.mask))
+ break;
+next:
+ counter = NULL;
+ }
+
+ if (counter && !kref_get_unless_zero(&counter->kref))
+ counter = NULL;
+
+ xa_unlock(&rt->xa);
+ return counter;
+}
+
+static void counter_release(struct kref *kref)
+{
+ struct rdma_counter *counter;
+
+ counter = container_of(kref, struct rdma_counter, kref);
+ counter_history_stat_update(counter);
+ counter->device->ops.counter_dealloc(counter);
+ rdma_counter_free(counter);
+}
+
+/*
+ * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
+ * the auto-mode rule
+ */
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_device *dev = qp->device;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
+ return 0;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
+ return 0;
+
+ counter = rdma_get_counter_auto_mode(qp, port);
+ if (counter) {
+ ret = __rdma_counter_bind_qp(counter, qp, port);
+ if (ret) {
+ kref_put(&counter->kref, counter_release);
+ return ret;
+ }
+ } else {
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO,
+ port_counter->mode.bind_opcnt);
+ if (!counter)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * rdma_counter_unbind_qp - Unbind a qp from a counter
+ * @force:
+ * true - Decrease the counter ref-count anyway (e.g., qp destroy)
+ */
+int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!counter)
+ return -EINVAL;
+
+ ret = __rdma_counter_unbind_qp(qp, port);
+ if (ret && !force)
+ return ret;
+
+ kref_put(&counter->kref, counter_release);
+ return 0;
+}
+
+int rdma_counter_query_stats(struct rdma_counter *counter)
+{
+ struct ib_device *dev = counter->device;
+ int ret;
+
+ if (!dev->ops.counter_update_stats)
+ return -EINVAL;
+
+ mutex_lock(&counter->lock);
+ ret = dev->ops.counter_update_stats(counter);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
+ u32 port, u32 index)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct rdma_counter *counter;
+ unsigned long id = 0;
+ u64 sum = 0;
+
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ xa_unlock(&rt->xa);
+
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != dev) || (counter->port != port) ||
+ rdma_counter_query_stats(counter))
+ goto next;
+
+ sum += counter->stats->value[index];
+
+next:
+ xa_lock(&rt->xa);
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+ return sum;
+}
+
+/*
+ * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
+ * specific port, including the running ones and history data
+ */
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
+{
+ struct rdma_port_counter *port_counter;
+ u64 sum;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return 0;
+
+ sum = get_running_counters_hwstat_sum(dev, port, index);
+ sum += port_counter->hstats->value[index];
+
+ return sum;
+}
+
+static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
+{
+ struct rdma_restrack_entry *res = NULL;
+ struct ib_qp *qp = NULL;
+
+ res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
+ if (IS_ERR(res))
+ return NULL;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (qp->qp_type == IB_QPT_RAW_PACKET && !rdma_dev_has_raw_cap(dev))
+ goto err;
+
+ return qp;
+
+err:
+ rdma_restrack_put(res);
+ return NULL;
+}
+
+static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
+ u32 counter_id)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_counter *counter;
+
+ res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
+ if (IS_ERR(res))
+ return NULL;
+
+ counter = container_of(res, struct rdma_counter, res);
+ kref_get(&counter->kref);
+ rdma_restrack_put(res);
+
+ return counter;
+}
+
+/*
+ * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
+ */
+int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
+ u32 qp_num, u32 counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ struct ib_qp *qp;
+ int ret;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ counter = rdma_get_counter_by_id(dev, counter_id);
+ if (!counter) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ if ((counter->device != qp->device) || (counter->port != qp->port)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ ret = __rdma_counter_bind_qp(counter, qp, port);
+ if (ret)
+ goto err_task;
+
+ rdma_restrack_put(&qp->res);
+ return 0;
+
+err_task:
+ kref_put(&counter->kref, counter_release);
+err:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+/*
+ * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
+ * The id of new counter is returned in @counter_id
+ */
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
+ u32 qp_num, u32 *counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ struct ib_qp *qp;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return -EOPNOTSUPP;
+
+ if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true);
+ if (!counter) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (counter_id)
+ *counter_id = counter->id;
+
+ rdma_restrack_put(&qp->res);
+ return 0;
+
+err:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+/*
+ * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
+ */
+int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
+ u32 qp_num, u32 counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_qp *qp;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!qp->counter || qp->counter->id != counter_id ||
+ port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdma_counter_unbind_qp(qp, port, false);
+
+out:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+int rdma_counter_get_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mode *mode,
+ enum rdma_nl_counter_mask *mask,
+ bool *opcnt)
+{
+ struct rdma_port_counter *port_counter;
+
+ port_counter = &dev->port_data[port].port_counter;
+ *mode = port_counter->mode.mode;
+ *mask = port_counter->mode.mask;
+ *opcnt = port_counter->mode.bind_opcnt;
+
+ return 0;
+}
+
+void rdma_counter_init(struct ib_device *dev)
+{
+ struct rdma_port_counter *port_counter;
+ u32 port, i;
+
+ if (!dev->port_data)
+ return;
+
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
+ mutex_init(&port_counter->lock);
+
+ if (!dev->ops.alloc_hw_port_stats)
+ continue;
+
+ port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
+ if (!port_counter->hstats)
+ goto fail;
+ }
+
+ return;
+
+fail:
+ for (i = port; i >= rdma_start_port(dev); i--) {
+ port_counter = &dev->port_data[port].port_counter;
+ rdma_free_hw_stats_struct(port_counter->hstats);
+ port_counter->hstats = NULL;
+ mutex_destroy(&port_counter->lock);
+ }
+}
+
+void rdma_counter_release(struct ib_device *dev)
+{
+ struct rdma_port_counter *port_counter;
+ u32 port;
+
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ rdma_free_hw_stats_struct(port_counter->hstats);
+ mutex_destroy(&port_counter->lock);
+ }
+}
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a4c81992267c..584537c71545 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -1,20 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2015 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
-#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+#include <trace/events/rdma_core.h>
+/* Max size for shared CQ, may require tuning */
+#define IB_MAX_SHARED_CQ_SZ 4096U
+
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
#define IB_POLL_BATCH_DIRECT 8
@@ -26,18 +23,86 @@
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+static const struct dim_cq_moder
+rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
+ {1, 0, 1, 0},
+ {1, 0, 4, 0},
+ {2, 0, 4, 0},
+ {2, 0, 8, 0},
+ {4, 0, 8, 0},
+ {16, 0, 8, 0},
+ {16, 0, 16, 0},
+ {32, 0, 16, 0},
+ {32, 0, 32, 0},
+};
+
+static void ib_cq_rdma_dim_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct ib_cq *cq = dim->priv;
+
+ u16 usec = rdma_dim_prof[dim->profile_ix].usec;
+ u16 comps = rdma_dim_prof[dim->profile_ix].comps;
+
+ dim->state = DIM_START_MEASURE;
+
+ trace_cq_modify(cq, comps, usec);
+ cq->device->ops.modify_cq(cq, comps, usec);
+}
+
+static void rdma_dim_init(struct ib_cq *cq)
+{
+ struct dim *dim;
+
+ if (!cq->device->ops.modify_cq || !cq->device->use_cq_dim ||
+ cq->poll_ctx == IB_POLL_DIRECT)
+ return;
+
+ dim = kzalloc(sizeof(struct dim), GFP_KERNEL);
+ if (!dim)
+ return;
+
+ dim->state = DIM_START_MEASURE;
+ dim->tune_state = DIM_GOING_RIGHT;
+ dim->profile_ix = RDMA_DIM_START_PROFILE;
+ dim->priv = cq;
+ cq->dim = dim;
+
+ INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
+}
+
+static void rdma_dim_destroy(struct ib_cq *cq)
+{
+ if (!cq->dim)
+ return;
+
+ cancel_work_sync(&cq->dim->work);
+ kfree(cq->dim);
+}
+
+static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+ int rc;
+
+ rc = ib_poll_cq(cq, num_entries, wc);
+ trace_cq_poll(cq, num_entries, rc);
+ return rc;
+}
+
static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
int batch)
{
int i, n, completed = 0;
+ trace_cq_process(cq);
+
/*
* budget might be (-1) if the caller does not
* want to bound this call, thus we need unsigned
* minimum here.
*/
- while ((n = ib_poll_cq(cq, min_t(u32, batch,
- budget - completed), wcs)) > 0) {
+ while ((n = __poll_cq(cq, min_t(u32, batch,
+ budget - completed), wcs)) > 0) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = &wcs[i];
@@ -57,7 +122,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
}
/**
- * ib_process_direct_cq - process a CQ in caller context
+ * ib_process_cq_direct - process a CQ in caller context
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
@@ -86,20 +151,27 @@ static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
static int ib_poll_handler(struct irq_poll *iop, int budget)
{
struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+ struct dim *dim = cq->dim;
int completed;
completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
if (completed < budget) {
irq_poll_complete(&cq->iop);
- if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
+ trace_cq_reschedule(cq);
irq_poll_sched(&cq->iop);
+ }
}
+ if (dim)
+ rdma_dim(dim, completed);
+
return completed;
}
static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
irq_poll_sched(&cq->iop);
}
@@ -113,10 +185,13 @@ static void ib_cq_poll_work(struct work_struct *work)
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(cq->comp_wq, &cq->work);
+ else if (cq->dim)
+ rdma_dim(cq->dim, completed);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
queue_work(cq->comp_wq, &cq->work);
}
@@ -128,17 +203,15 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
- * @udata: Valid user data or NULL for kernel object
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector,
- enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -147,24 +220,28 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
struct ib_cq *cq;
int ret = -ENOMEM;
- cq = dev->ops.create_cq(dev, &cq_attr, NULL);
- if (IS_ERR(cq))
- return cq;
+ cq = rdma_zalloc_drv_obj(dev, ib_cq);
+ if (!cq)
+ return ERR_PTR(ret);
cq->device = dev;
- cq->uobject = NULL;
- cq->event_handler = NULL;
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
+ cq->comp_vector = comp_vector;
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
- goto out_destroy_cq;
+ goto out_free_cq;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_kadd(&cq->res);
+ ret = dev->ops.create_cq(cq, &cq_attr, NULL);
+ if (ret)
+ goto out_free_wc;
+
+ rdma_dim_init(cq);
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -186,31 +263,71 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
break;
default:
ret = -EINVAL;
- goto out_free_wc;
+ goto out_destroy_cq;
}
+ rdma_restrack_add(&cq->res);
+ trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
+out_destroy_cq:
+ rdma_dim_destroy(cq);
+ cq->device->ops.destroy_cq(cq, NULL);
out_free_wc:
+ rdma_restrack_put(&cq->res);
kfree(cq->wc);
- rdma_restrack_del(&cq->res);
-out_destroy_cq:
- cq->device->ops.destroy_cq(cq, udata);
+out_free_cq:
+ kfree(cq);
+ trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_cq_user);
+EXPORT_SYMBOL(__ib_alloc_cq);
+
+/**
+ * __ib_alloc_cq_any - allocate a completion queue
+ * @dev: device to allocate the CQ for
+ * @private: driver private data, accessible from cq->cq_context
+ * @nr_cqe: number of CQEs to allocate
+ * @poll_ctx: context to poll the CQ from
+ * @caller: module owner name
+ *
+ * Attempt to spread ULP Completion Queues over each device's interrupt
+ * vectors. A simple best-effort mechanism is used.
+ */
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller)
+{
+ static atomic_t counter;
+ int comp_vector = 0;
+
+ if (dev->num_comp_vectors > 1)
+ comp_vector =
+ atomic_inc_return(&counter) %
+ min_t(int, dev->num_comp_vectors, num_online_cpus());
+
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller);
+}
+EXPORT_SYMBOL(__ib_alloc_cq_any);
/**
* ib_free_cq - free a completion queue
* @cq: completion queue to free.
- * @udata: User data or NULL for kernel object
*/
-void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
+void ib_free_cq(struct ib_cq *cq)
{
- int ret;
+ int ret = 0;
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
+ if (WARN_ON_ONCE(cq->cqe_used))
+ return;
+
+ if (cq->device->ops.pre_destroy_cq) {
+ ret = cq->device->ops.pre_destroy_cq(cq);
+ WARN_ONCE(ret, "Disable of kernel CQ shouldn't fail");
+ }
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -226,9 +343,173 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
WARN_ON_ONCE(1);
}
- kfree(cq->wc);
+ rdma_dim_destroy(cq);
+ trace_cq_free(cq);
+ if (cq->device->ops.post_destroy_cq)
+ cq->device->ops.post_destroy_cq(cq);
+ else
+ ret = cq->device->ops.destroy_cq(cq, NULL);
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
rdma_restrack_del(&cq->res);
- ret = cq->device->ops.destroy_cq(cq, udata);
- WARN_ON_ONCE(ret);
+ kfree(cq->wc);
+ kfree(cq);
+}
+EXPORT_SYMBOL(ib_free_cq);
+
+void ib_cq_pool_cleanup(struct ib_device *dev)
+{
+ struct ib_cq *cq, *n;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) {
+ list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
+ pool_entry) {
+ WARN_ON(cq->cqe_used);
+ list_del(&cq->pool_entry);
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ }
+}
+
+static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes,
+ enum ib_poll_context poll_ctx)
+{
+ LIST_HEAD(tmp_list);
+ unsigned int nr_cqs, i;
+ struct ib_cq *cq, *n;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate at least as many CQEs as requested, and otherwise
+ * a reasonable batch size so that we can share CQs between
+ * multiple users instead of allocating a larger number of CQs.
+ */
+ nr_cqes = min_t(unsigned int, dev->attrs.max_cqe,
+ max(nr_cqes, IB_MAX_SHARED_CQ_SZ));
+ nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ for (i = 0; i < nr_cqs; i++) {
+ cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto out_free_cqs;
+ }
+ cq->shared = true;
+ list_add_tail(&cq->pool_entry, &tmp_list);
+ }
+
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_splice(&tmp_list, &dev->cq_pools[poll_ctx]);
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return 0;
+
+out_free_cqs:
+ list_for_each_entry_safe(cq, n, &tmp_list, pool_entry) {
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ return ret;
+}
+
+/**
+ * ib_cq_pool_get() - Find the least used completion queue that matches
+ * a given cpu hint (or least used for wild card affinity) and fits
+ * nr_cqe.
+ * @dev: rdma device
+ * @nr_cqe: number of needed cqe entries
+ * @comp_vector_hint: completion vector hint (-1) for the driver to assign
+ * a comp vector based on internal counter
+ * @poll_ctx: cq polling context
+ *
+ * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and
+ * claim entries in it for us. In case there is no available cq, allocate
+ * a new cq with the requirements and add it to the device pool.
+ * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
+ * for @poll_ctx.
+ */
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx)
+{
+ static unsigned int default_comp_vector;
+ unsigned int vector, num_comp_vectors;
+ struct ib_cq *cq, *found = NULL;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return ERR_PTR(-EINVAL);
+ }
+
+ num_comp_vectors =
+ min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ /* Project the affinty to the device completion vector range */
+ if (comp_vector_hint < 0) {
+ comp_vector_hint =
+ (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors;
+ WRITE_ONCE(default_comp_vector, comp_vector_hint);
+ }
+ vector = comp_vector_hint % num_comp_vectors;
+
+ /*
+ * Find the least used CQ with correct affinity and
+ * enough free CQ entries
+ */
+ while (!found) {
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_for_each_entry(cq, &dev->cq_pools[poll_ctx],
+ pool_entry) {
+ /*
+ * Check to see if we have found a CQ with the
+ * correct completion vector
+ */
+ if (vector != cq->comp_vector)
+ continue;
+ if (cq->cqe_used + nr_cqe > cq->cqe)
+ continue;
+ found = cq;
+ break;
+ }
+
+ if (found) {
+ found->cqe_used += nr_cqe;
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return found;
+ }
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ /*
+ * Didn't find a match or ran out of CQs in the device
+ * pool, allocate a new array of CQs.
+ */
+ ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return found;
+}
+EXPORT_SYMBOL(ib_cq_pool_get);
+
+/**
+ * ib_cq_pool_put - Return a CQ taken from a shared pool.
+ * @cq: The CQ to return.
+ * @nr_cqe: The max number of cqes that the user had requested.
+ */
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe)
+{
+ if (WARN_ON_ONCE(nr_cqe > cq->cqe_used))
+ return;
+
+ spin_lock_irq(&cq->device->cq_pools_lock);
+ cq->cqe_used -= nr_cqe;
+ spin_unlock_irq(&cq->device->cq_pools_lock);
}
-EXPORT_SYMBOL(ib_free_cq_user);
+EXPORT_SYMBOL(ib_cq_pool_put);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 78dc07c6ac4b..13e8a1714bbd 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -39,13 +39,13 @@
#include <linux/init.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/hashtable.h>
#include <rdma/rdma_netlink.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
+#include <rdma/rdma_counter.h>
#include "core_priv.h"
#include "restrack.h"
@@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
+static struct workqueue_struct *ib_unreg_wq;
/*
* Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -93,28 +94,24 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(devices_rwsem);
#define DEVICE_REGISTERED XA_MARK_1
-static LIST_HEAD(client_list);
+static u32 highest_client_id;
#define CLIENT_REGISTERED XA_MARK_1
static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(clients_rwsem);
+static void ib_client_put(struct ib_client *client)
+{
+ if (refcount_dec_and_test(&client->uses))
+ complete(&client->uses_zero);
+}
+
/*
* If client_data is registered then the corresponding client must also still
* be registered.
*/
#define CLIENT_DATA_REGISTERED XA_MARK_1
-/**
- * struct rdma_dev_net - rdma net namespace metadata for a net
- * @net: Pointer to owner net namespace
- * @id: xarray id to identify the net namespace.
- */
-struct rdma_dev_net {
- possible_net_t net;
- u32 id;
-};
-
-static unsigned int rdma_dev_net_id;
+unsigned int rdma_dev_net_id;
/*
* A list of net namespaces is maintained in an xarray. This is necessary
@@ -132,17 +129,14 @@ module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
MODULE_PARM_DESC(netns_mode,
"Share device among net namespaces; default=1 (shared)");
/**
- * rdma_dev_access_netns() - Return whether a rdma device can be accessed
+ * rdma_dev_access_netns() - Return whether an rdma device can be accessed
* from a specified net namespace or not.
- * @device: Pointer to rdma device which needs to be checked
+ * @dev: Pointer to rdma device which needs to be checked
* @net: Pointer to net namesapce for which access to be checked
*
- * rdma_dev_access_netns() - Return whether a rdma device can be accessed
- * from a specified net namespace or not. When
- * rdma device is in shared mode, it ignores the
- * net namespace. When rdma device is exclusive
- * to a net namespace, rdma device net namespace is
- * checked against the specified one.
+ * When the rdma device is in shared mode, it ignores the net namespace.
+ * When the rdma device is exclusive to a net namespace, rdma device net
+ * namespace is checked against the specified one.
*/
bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
{
@@ -151,6 +145,33 @@ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
}
EXPORT_SYMBOL(rdma_dev_access_netns);
+/**
+ * rdma_dev_has_raw_cap() - Returns whether a specified rdma device has
+ * CAP_NET_RAW capability or not.
+ *
+ * @dev: Pointer to rdma device whose capability to be checked
+ *
+ * Returns true if a rdma device's owning user namespace has CAP_NET_RAW
+ * capability, otherwise false. When rdma subsystem is in legacy shared network,
+ * namespace mode, the default net namespace is considered.
+ */
+bool rdma_dev_has_raw_cap(const struct ib_device *dev)
+{
+ const struct net *net;
+
+ /* Network namespace is the resource whose user namespace
+ * to be considered. When in shared mode, there is no reliable
+ * network namespace resource, so consider the default net namespace.
+ */
+ if (ib_devices_shared_netns)
+ net = &init_net;
+ else
+ net = read_pnet(&dev->coredev.rdma_net);
+
+ return ns_capable(net->user_ns, CAP_NET_RAW);
+}
+EXPORT_SYMBOL(rdma_dev_has_raw_cap);
+
/*
* xarray has this behavior where it won't iterate over NULL values stored in
* allocated arrays. So we need our own iterator to see all values stored in
@@ -215,23 +236,6 @@ static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
printk("%s(NULL ib_device): %pV", level, vaf);
}
-void ibdev_printk(const char *level, const struct ib_device *ibdev,
- const char *format, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, format);
-
- vaf.fmt = format;
- vaf.va = &args;
-
- __ibdev_printk(level, ibdev, &vaf);
-
- va_end(args);
-}
-EXPORT_SYMBOL(ibdev_printk);
-
#define define_ibdev_printk_level(func, level) \
void func(const struct ib_device *ibdev, const char *fmt, ...) \
{ \
@@ -270,7 +274,7 @@ struct ib_port_data_rcu {
struct ib_port_data pdata[];
};
-static int ib_device_check_mandatory(struct ib_device *device)
+static void ib_device_check_mandatory(struct ib_device *device)
{
#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
static const struct {
@@ -279,7 +283,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
- IB_MANDATORY_FUNC(query_pkey),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
IB_MANDATORY_FUNC(create_qp),
@@ -292,6 +295,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(reg_user_mr),
IB_MANDATORY_FUNC(dereg_mr),
IB_MANDATORY_FUNC(get_port_immutable)
};
@@ -305,8 +309,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
break;
}
}
-
- return 0;
}
/*
@@ -375,7 +377,7 @@ struct ib_device *ib_device_get_by_name(const char *name,
down_read(&devices_rwsem);
device = __ib_device_get_by_name(name);
if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
- device->driver_id != driver_id)
+ device->ops.driver_id != driver_id)
device = NULL;
if (device) {
@@ -409,27 +411,54 @@ static int rename_compat_devs(struct ib_device *device)
int ib_device_rename(struct ib_device *ibdev, const char *name)
{
+ unsigned long index;
+ void *client_data;
int ret;
down_write(&devices_rwsem);
if (!strcmp(name, dev_name(&ibdev->dev))) {
- ret = 0;
- goto out;
+ up_write(&devices_rwsem);
+ return 0;
}
if (__ib_device_get_by_name(name)) {
- ret = -EEXIST;
- goto out;
+ up_write(&devices_rwsem);
+ return -EEXIST;
}
ret = device_rename(&ibdev->dev, name);
- if (ret)
- goto out;
- strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+ if (ret) {
+ up_write(&devices_rwsem);
+ return ret;
+ }
+
+ strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
ret = rename_compat_devs(ibdev);
-out:
- up_write(&devices_rwsem);
- return ret;
+
+ downgrade_write(&devices_rwsem);
+ down_read(&ibdev->client_data_rwsem);
+ xan_for_each_marked(&ibdev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
+
+ if (!client || !client->rename)
+ continue;
+
+ client->rename(ibdev, client_data);
+ }
+ up_read(&ibdev->client_data_rwsem);
+ rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT);
+ up_read(&devices_rwsem);
+ return 0;
+}
+
+int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim)
+{
+ if (use_dim > 1)
+ return -EINVAL;
+ ibdev->use_cq_dim = use_dim;
+
+ return 0;
}
static int alloc_name(struct ib_device *ibdev, const char *name)
@@ -440,7 +469,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name)
int rc;
int i;
- lockdep_assert_held_exclusive(&devices_rwsem);
+ lockdep_assert_held_write(&devices_rwsem);
ida_init(&inuse);
xa_for_each (&devices, index, device) {
char buf[IB_DEVICE_NAME_MAX];
@@ -474,18 +503,27 @@ static void ib_device_release(struct device *device)
free_netdevs(dev);
WARN_ON(refcount_read(&dev->refcount));
- ib_cache_release_one(dev);
- ib_security_release_port_pkey_list(dev);
- xa_destroy(&dev->compat_devs);
- xa_destroy(&dev->client_data);
- if (dev->port_data)
+ if (dev->hw_stats_data)
+ ib_device_release_hw_stats(dev->hw_stats_data);
+ if (dev->port_data) {
+ ib_cache_release_one(dev);
+ ib_security_release_port_pkey_list(dev);
+ rdma_counter_release(dev);
kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
pdata[0]),
rcu_head);
+ }
+
+ mutex_destroy(&dev->subdev_lock);
+ mutex_destroy(&dev->unregistration_lock);
+ mutex_destroy(&dev->compat_devs_mutex);
+
+ xa_destroy(&dev->compat_devs);
+ xa_destroy(&dev->client_data);
kfree_rcu(dev, rcu_head);
}
-static int ib_device_uevent(struct device *device,
+static int ib_device_uevent(const struct device *device,
struct kobj_uevent_env *env)
{
if (add_uevent_var(env, "NAME=%s", dev_name(device)))
@@ -498,9 +536,9 @@ static int ib_device_uevent(struct device *device,
return 0;
}
-static const void *net_namespace(struct device *d)
+static const void *net_namespace(const struct device *d)
{
- struct ib_core_device *coredev =
+ const struct ib_core_device *coredev =
container_of(d, struct ib_core_device, dev);
return read_pnet(&coredev->rdma_net);
@@ -517,6 +555,8 @@ static struct class ib_class = {
static void rdma_init_coredev(struct ib_core_device *coredev,
struct ib_device *dev, struct net *net)
{
+ bool is_full_dev = &dev->coredev == coredev;
+
/* This BUILD_BUG_ON is intended to catch layout change
* of union of ib_core_device and device.
* dev must be the first element as ib_core and providers
@@ -528,6 +568,13 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
coredev->dev.class = &ib_class;
coredev->dev.groups = dev->groups;
+
+ /*
+ * Don't expose hw counters outside of the init namespace.
+ */
+ if (!is_full_dev && dev->hw_stats_attr_index)
+ coredev->dev.groups[dev->hw_stats_attr_index] = NULL;
+
device_initialize(&coredev->dev);
coredev->owner = dev;
INIT_LIST_HEAD(&coredev->port_list);
@@ -537,6 +584,8 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
/**
* _ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
+ * @net: network namespace device should be located in, namespace
+ * must stay valid until ib_register_device() is completed.
*
* Low-level drivers should use ib_alloc_device() to allocate &struct
* ib_device. @size is the size of the structure to be allocated,
@@ -544,9 +593,10 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
* ib_dealloc_device() must be used to free structures allocated with
* ib_alloc_device().
*/
-struct ib_device *_ib_alloc_device(size_t size)
+struct ib_device *_ib_alloc_device(size_t size, struct net *net)
{
struct ib_device *device;
+ unsigned int i;
if (WARN_ON(size < sizeof(struct ib_device)))
return NULL;
@@ -560,11 +610,19 @@ struct ib_device *_ib_alloc_device(size_t size)
return NULL;
}
- device->groups[0] = &ib_dev_attr_group;
- rdma_init_coredev(&device->coredev, device, &init_net);
+ /* ib_devices_shared_netns can't change while we have active namespaces
+ * in the system which means either init_net is passed or the user has
+ * no idea what they are doing.
+ *
+ * To avoid breaking backward compatibility, when in shared mode,
+ * force to init the device in the init_net.
+ */
+ net = ib_devices_shared_netns ? &init_net : net;
+ rdma_init_coredev(&device->coredev, device, net);
INIT_LIST_HEAD(&device->event_handler_list);
- spin_lock_init(&device->event_handler_lock);
+ spin_lock_init(&device->qp_open_list_lock);
+ init_rwsem(&device->event_handler_rwsem);
mutex_init(&device->unregistration_lock);
/*
* client_data needs to be alloc because we don't want our mark to be
@@ -577,6 +635,48 @@ struct ib_device *_ib_alloc_device(size_t size)
init_completion(&device->unreg_completion);
INIT_WORK(&device->unregistration_work, ib_unregister_work);
+ spin_lock_init(&device->cq_pools_lock);
+ for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
+ INIT_LIST_HEAD(&device->cq_pools[i]);
+
+ rwlock_init(&device->cache_lock);
+
+ device->uverbs_cmd_mask =
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
+
+ mutex_init(&device->subdev_lock);
+ INIT_LIST_HEAD(&device->subdev_list_head);
+ INIT_LIST_HEAD(&device->subdev_list);
+
return device;
}
EXPORT_SYMBOL(_ib_alloc_device);
@@ -633,6 +733,14 @@ static int add_client_context(struct ib_device *device,
down_write(&device->client_data_rwsem);
/*
+ * So long as the client is registered hold both the client and device
+ * unregistration locks.
+ */
+ if (!refcount_inc_not_zero(&client->uses))
+ goto out_unlock;
+ refcount_inc(&device->refcount);
+
+ /*
* Another caller to add_client_context got here first and has already
* completely initialized context.
*/
@@ -645,8 +753,20 @@ static int add_client_context(struct ib_device *device,
if (ret)
goto out;
downgrade_write(&device->client_data_rwsem);
- if (client->add)
- client->add(device);
+ if (client->add) {
+ if (client->add(device)) {
+ /*
+ * If a client fails to add then the error code is
+ * ignored, but we won't call any more ops on this
+ * client.
+ */
+ xa_erase(&device->client_data, client->client_id);
+ up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
+ return 0;
+ }
+ }
/* Readers shall not see a client until add has been completed */
xa_set_mark(&device->client_data, client->client_id,
@@ -655,6 +775,9 @@ static int add_client_context(struct ib_device *device,
return 0;
out:
+ ib_device_put(device);
+ ib_client_put(client);
+out_unlock:
up_write(&device->client_data_rwsem);
return ret;
}
@@ -674,7 +797,7 @@ static void remove_client_context(struct ib_device *device,
client_data = xa_load(&device->client_data, client_id);
xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
client = xa_load(&clients, client_id);
- downgrade_write(&device->client_data_rwsem);
+ up_write(&device->client_data_rwsem);
/*
* Notice we cannot be holding any exclusive locks when calling the
@@ -684,23 +807,19 @@ static void remove_client_context(struct ib_device *device,
*
* For this reason clients and drivers should not call the
* unregistration functions will holdling any locks.
- *
- * It tempting to drop the client_data_rwsem too, but this is required
- * to ensure that unregister_client does not return until all clients
- * are completely unregistered, which is required to avoid module
- * unloading races.
*/
if (client->remove)
client->remove(device, client_data);
xa_erase(&device->client_data, client_id);
- up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
}
static int alloc_port_data(struct ib_device *device)
{
struct ib_port_data_rcu *pdata_rcu;
- unsigned int port;
+ u32 port;
if (device->port_data)
return 0;
@@ -709,6 +828,10 @@ static int alloc_port_data(struct ib_device *device)
if (WARN_ON(!device->phys_port_cnt))
return -EINVAL;
+ /* Reserve U32_MAX so the logic to go over all the ports is sane */
+ if (WARN_ON(device->phys_port_cnt == U32_MAX))
+ return -EINVAL;
+
/*
* device->port_data is indexed directly by the port number to make
* access to this data as efficient as possible.
@@ -717,7 +840,7 @@ static int alloc_port_data(struct ib_device *device)
* empty slots at the beginning.
*/
pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
- rdma_end_port(device) + 1),
+ size_add(rdma_end_port(device), 1)),
GFP_KERNEL);
if (!pdata_rcu)
return -ENOMEM;
@@ -740,7 +863,7 @@ static int alloc_port_data(struct ib_device *device)
return 0;
}
-static int verify_immutable(const struct ib_device *dev, u8 port)
+static int verify_immutable(const struct ib_device *dev, u32 port)
{
return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
rdma_max_mad_size(dev, port) != 0);
@@ -748,7 +871,7 @@ static int verify_immutable(const struct ib_device *dev, u8 port)
static int setup_port_data(struct ib_device *device)
{
- unsigned int port;
+ u32 port;
int ret;
ret = alloc_port_data(device);
@@ -769,6 +892,20 @@ static int setup_port_data(struct ib_device *device)
return 0;
}
+/**
+ * ib_port_immutable_read() - Read rdma port's immutable data
+ * @dev: IB device
+ * @port: port number whose immutable data to read. It starts with index 1 and
+ * valid upto including rdma_end_port().
+ */
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port)
+{
+ WARN_ON(!rdma_is_port_valid(dev, port));
+ return &dev->port_data[port].immutable;
+}
+EXPORT_SYMBOL(ib_port_immutable_read);
+
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
if (dev->ops.get_dev_fw_str)
@@ -789,15 +926,8 @@ static void ib_policy_change_task(struct work_struct *work)
rdma_for_each_port (dev, i) {
u64 sp;
- int ret = ib_get_cached_subnet_prefix(dev,
- i,
- &sp);
-
- WARN_ONCE(ret,
- "ib_get_cached_subnet_prefix err: %d, this should never happen here\n",
- ret);
- if (!ret)
- ib_security_cache_change(dev, i, sp);
+ ib_get_cached_subnet_prefix(dev, i, &sp);
+ ib_security_cache_change(dev, i, sp);
}
}
up_read(&devices_rwsem);
@@ -865,7 +995,9 @@ static int add_one_compat_dev(struct ib_device *device,
cdev->dev.parent = device->dev.parent;
rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
cdev->dev.release = compatdev_release;
- dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
ret = device_add(&cdev->dev);
if (ret)
@@ -1019,7 +1151,7 @@ int rdma_compatdev_set(u8 enable)
static void rdma_dev_exit_net(struct net *net)
{
- struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
struct ib_device *dev;
unsigned long index;
int ret;
@@ -1053,25 +1185,32 @@ static void rdma_dev_exit_net(struct net *net)
}
up_read(&devices_rwsem);
+ rdma_nl_net_exit(rnet);
xa_erase(&rdma_nets, rnet->id);
}
static __net_init int rdma_dev_init_net(struct net *net)
{
- struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
unsigned long index;
struct ib_device *dev;
int ret;
+ write_pnet(&rnet->net, net);
+
+ ret = rdma_nl_net_init(rnet);
+ if (ret)
+ return ret;
+
/* No need to create any compat devices in default init_net. */
if (net_eq(net, &init_net))
return 0;
- write_pnet(&rnet->net, net);
-
ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
- if (ret)
+ if (ret) {
+ rdma_nl_net_exit(rnet);
return ret;
+ }
down_read(&devices_rwsem);
xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
@@ -1114,7 +1253,7 @@ static int assign_name(struct ib_device *device, const char *name)
ret = -ENFILE;
goto out;
}
- strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+ strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
&last_id, GFP_KERNEL);
@@ -1126,44 +1265,6 @@ out:
return ret;
}
-static void setup_dma_device(struct ib_device *device)
-{
- struct device *parent = device->dev.parent;
-
- WARN_ON_ONCE(device->dma_device);
- if (device->dev.dma_ops) {
- /*
- * The caller provided custom DMA operations. Copy the
- * DMA-related fields that are used by e.g. dma_alloc_coherent()
- * into device->dev.
- */
- device->dma_device = &device->dev;
- if (!device->dev.dma_mask) {
- if (parent)
- device->dev.dma_mask = parent->dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- if (!device->dev.coherent_dma_mask) {
- if (parent)
- device->dev.coherent_dma_mask =
- parent->coherent_dma_mask;
- else
- WARN_ON_ONCE(true);
- }
- } else {
- /*
- * The caller did not provide custom DMA operations. Use the
- * DMA mapping operations of the parent device.
- */
- WARN_ON_ONCE(!parent);
- device->dma_device = parent;
- }
- /* Setup default max segment size for all IB devices */
- dma_set_max_seg_size(device->dma_device, SZ_2G);
-
-}
-
/*
* setup_device() allocates memory and sets up data that requires calling the
* device ops, this is the only reason these actions are not done during
@@ -1174,11 +1275,7 @@ static int setup_device(struct ib_device *device)
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
int ret;
- setup_dma_device(device);
-
- ret = ib_device_check_mandatory(device);
- if (ret)
- return ret;
+ ib_device_check_mandatory(device);
ret = setup_port_data(device);
if (ret) {
@@ -1199,7 +1296,7 @@ static int setup_device(struct ib_device *device)
static void disable_device(struct ib_device *device)
{
- struct ib_client *client;
+ u32 cid;
WARN_ON(!refcount_read(&device->refcount));
@@ -1207,10 +1304,21 @@ static void disable_device(struct ib_device *device)
xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
up_write(&devices_rwsem);
+ /*
+ * Remove clients in LIFO order, see assign_client_id. This could be
+ * more efficient if xarray learns to reverse iterate. Since no new
+ * clients can be added to this ib_device past this point we only need
+ * the maximum possible client_id value here.
+ */
down_read(&clients_rwsem);
- list_for_each_entry_reverse(client, &client_list, list)
- remove_client_context(device, client->client_id);
+ cid = highest_client_id;
up_read(&clients_rwsem);
+ while (cid) {
+ cid--;
+ remove_client_context(device, cid);
+ }
+
+ ib_cq_pool_cleanup(device);
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
@@ -1269,9 +1377,49 @@ out:
return ret;
}
+static void prevent_dealloc_device(struct ib_device *ib_dev)
+{
+}
+
+static void ib_device_notify_register(struct ib_device *device)
+{
+ struct net_device *netdev;
+ u32 port;
+ int ret;
+
+ down_read(&devices_rwsem);
+
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
+ ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
+ if (ret)
+ goto out;
+
+ rdma_for_each_port(device, port) {
+ netdev = ib_device_get_netdev(device, port);
+ if (!netdev)
+ continue;
+
+ ret = rdma_nl_notify_event(device, port,
+ RDMA_NETDEV_ATTACH_EVENT);
+ dev_put(netdev);
+ if (ret)
+ goto out;
+ }
+
+out:
+ up_read(&devices_rwsem);
+}
+
/**
* ib_register_device - Register an IB device with IB core
- * @device:Device to register
+ * @device: Device to register
+ * @name: unique string device name. This may include a '%' which will
+ * cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ * device will be used. In this case the caller should fully
+ * setup the ibdev for DMA. This usually means using dma_virt_ops.
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
@@ -1282,7 +1430,8 @@ out:
* asynchronously then the device pointer may become freed as soon as this
* function returns.
*/
-int ib_register_device(struct ib_device *device, const char *name)
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device)
{
int ret;
@@ -1290,6 +1439,14 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
return ret;
+ /*
+ * If the caller does not provide a DMA capable device then the IB core
+ * will set up ib_sge and scatterlist structures that stash the kernel
+ * virtual address into the address field.
+ */
+ WARN_ON(dma_device && !dma_device->dma_parms);
+ device->dma_device = dma_device;
+
ret = setup_device(device);
if (ret)
return ret;
@@ -1301,8 +1458,16 @@ int ib_register_device(struct ib_device *device, const char *name)
return ret;
}
+ device->groups[0] = &ib_dev_attr_group;
+ device->groups[1] = device->ops.device_group;
+ ret = ib_setup_device_attrs(device);
+ if (ret)
+ goto cache_cleanup;
+
ib_device_register_rdmacg(device);
+ rdma_counter_init(device);
+
/*
* Ensure that ADD uevent is not fired because it
* is too early amd device is not initialized yet.
@@ -1312,7 +1477,7 @@ int ib_register_device(struct ib_device *device, const char *name)
if (ret)
goto cg_cleanup;
- ret = ib_device_register_sysfs(device);
+ ret = ib_setup_port_attrs(&device->coredev);
if (ret) {
dev_warn(&device->dev,
"Couldn't register device with driver model\n");
@@ -1320,9 +1485,6 @@ int ib_register_device(struct ib_device *device, const char *name)
}
ret = enable_device_and_get(device);
- dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
if (ret) {
void (*dealloc_fn)(struct ib_device *);
@@ -1334,16 +1496,21 @@ int ib_register_device(struct ib_device *device, const char *name)
* possibility for a parallel unregistration along with this
* error flow. Since we have a refcount here we know any
* parallel flow is stopped in disable_device and will see the
- * NULL pointers, causing the responsibility to
+ * special dealloc_driver pointer, causing the responsibility to
* ib_dealloc_device() to revert back to this thread.
*/
dealloc_fn = device->ops.dealloc_driver;
- device->ops.dealloc_driver = NULL;
+ device->ops.dealloc_driver = prevent_dealloc_device;
ib_device_put(device);
__ib_unregister_device(device);
device->ops.dealloc_driver = dealloc_fn;
+ dev_set_uevent_suppress(&device->dev, false);
return ret;
}
+ dev_set_uevent_suppress(&device->dev, false);
+
+ ib_device_notify_register(device);
+
ib_device_put(device);
return 0;
@@ -1353,6 +1520,7 @@ dev_cleanup:
cg_cleanup:
dev_set_uevent_suppress(&device->dev, false);
ib_device_unregister_rdmacg(device);
+cache_cleanup:
ib_cache_cleanup_one(device);
return ret;
}
@@ -1361,9 +1529,21 @@ EXPORT_SYMBOL(ib_register_device);
/* Callers must hold a get on the device. */
static void __ib_unregister_device(struct ib_device *ib_dev)
{
+ struct ib_device *sub, *tmp;
+
+ mutex_lock(&ib_dev->subdev_lock);
+ list_for_each_entry_safe_reverse(sub, tmp,
+ &ib_dev->subdev_list_head,
+ subdev_list) {
+ list_del(&sub->subdev_list);
+ ib_dev->ops.del_sub_dev(sub);
+ ib_device_put(ib_dev);
+ }
+ mutex_unlock(&ib_dev->subdev_lock);
+
/*
* We have a registration lock so that all the calls to unregister are
- * fully fenced, once any unregister returns the device is truely
+ * fully fenced, once any unregister returns the device is truly
* unregistered even if multiple callers are unregistering it at the
* same time. This also interacts with the registration flow and
* provides sane semantics if register and unregister are racing.
@@ -1373,11 +1553,12 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
goto out;
disable_device(ib_dev);
+ rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
/* Expedite removing unregistered pointers from the hash table */
free_netdevs(ib_dev);
- ib_device_unregister_sysfs(ib_dev);
+ ib_free_port_attrs(&ib_dev->coredev);
device_del(&ib_dev->dev);
ib_device_unregister_rdmacg(ib_dev);
ib_cache_cleanup_one(ib_dev);
@@ -1386,7 +1567,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
* Drivers using the new flow may not call ib_dealloc_device except
* in error unwind prior to registration success.
*/
- if (ib_dev->ops.dealloc_driver) {
+ if (ib_dev->ops.dealloc_driver &&
+ ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
ib_dealloc_device(ib_dev);
}
@@ -1396,7 +1578,7 @@ out:
/**
* ib_unregister_device - Unregister an IB device
- * @device: The device to unregister
+ * @ib_dev: The device to unregister
*
* Unregister an IB device. All clients will receive a remove callback.
*
@@ -1418,7 +1600,7 @@ EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_unregister_device_and_put - Unregister a device while holding a 'get'
- * device: The device to unregister
+ * @ib_dev: The device to unregister
*
* This is the same as ib_unregister_device(), except it includes an internal
* ib_device_put() that should match a 'get' obtained by the caller.
@@ -1461,7 +1643,7 @@ void ib_unregister_driver(enum rdma_driver_id driver_id)
down_read(&devices_rwsem);
xa_for_each (&devices, index, ib_dev) {
- if (ib_dev->driver_id != driver_id)
+ if (ib_dev->ops.driver_id != driver_id)
continue;
get_device(&ib_dev->dev);
@@ -1488,7 +1670,7 @@ static void ib_unregister_work(struct work_struct *work)
/**
* ib_unregister_device_queued - Unregister a device using a work queue
- * device: The device to unregister
+ * @ib_dev: The device to unregister
*
* This schedules an asynchronous unregistration using a WQ for the device. A
* driver should use this to avoid holding locks while doing unregistration,
@@ -1502,7 +1684,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
WARN_ON(!refcount_read(&ib_dev->refcount));
WARN_ON(!ib_dev->ops.dealloc_driver);
get_device(&ib_dev->dev);
- if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+ if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
put_device(&ib_dev->dev);
}
EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -1594,13 +1776,11 @@ int ib_device_set_netns_put(struct sk_buff *skb,
}
/*
- * Currently supported only for those providers which support
- * disassociation and don't do port specific sysfs init. Once a
- * port_cleanup infrastructure is implemented, this limitation will be
- * removed.
+ * All the ib_clients, including uverbs, are reset when the namespace is
+ * changed and this cannot be blocked waiting for userspace to do
+ * something, so disassociation is mandatory.
*/
- if (!dev->ops.disassociate_ucontext || dev->ops.init_port ||
- ib_devices_shared_netns) {
+ if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) {
ret = -EOPNOTSUPP;
goto ns_err;
}
@@ -1631,32 +1811,30 @@ static int assign_client_id(struct ib_client *client)
{
int ret;
- down_write(&clients_rwsem);
+ lockdep_assert_held(&clients_rwsem);
/*
* The add/remove callbacks must be called in FIFO/LIFO order. To
* achieve this we assign client_ids so they are sorted in
- * registration order, and retain a linked list we can reverse iterate
- * to get the LIFO order. The extra linked list can go away if xarray
- * learns to reverse iterate.
+ * registration order.
*/
- if (list_empty(&client_list)) {
- client->client_id = 0;
- } else {
- struct ib_client *last;
-
- last = list_last_entry(&client_list, struct ib_client, list);
- client->client_id = last->client_id + 1;
- }
+ client->client_id = highest_client_id;
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
if (ret)
- goto out;
+ return ret;
+ highest_client_id++;
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
- list_add_tail(&client->list, &client_list);
+ return 0;
+}
-out:
+static void remove_client_id(struct ib_client *client)
+{
+ down_write(&clients_rwsem);
+ xa_erase(&clients, client->client_id);
+ for (; highest_client_id; highest_client_id--)
+ if (xa_load(&clients, highest_client_id - 1))
+ break;
up_write(&clients_rwsem);
- return ret;
}
/**
@@ -1676,23 +1854,35 @@ int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
unsigned long index;
+ bool need_unreg = false;
int ret;
+ refcount_set(&client->uses, 1);
+ init_completion(&client->uses_zero);
+
+ /*
+ * The devices_rwsem is held in write mode to ensure that a racing
+ * ib_register_device() sees a consisent view of clients and devices.
+ */
+ down_write(&devices_rwsem);
+ down_write(&clients_rwsem);
ret = assign_client_id(client);
if (ret)
- return ret;
+ goto out;
- down_read(&devices_rwsem);
+ need_unreg = true;
xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
ret = add_client_context(device, client);
- if (ret) {
- up_read(&devices_rwsem);
- ib_unregister_client(client);
- return ret;
- }
+ if (ret)
+ goto out;
}
- up_read(&devices_rwsem);
- return 0;
+ ret = 0;
+out:
+ up_write(&clients_rwsem);
+ up_write(&devices_rwsem);
+ if (need_unreg && ret)
+ ib_unregister_client(client);
+ return ret;
}
EXPORT_SYMBOL(ib_register_client);
@@ -1713,24 +1903,131 @@ void ib_unregister_client(struct ib_client *client)
unsigned long index;
down_write(&clients_rwsem);
+ ib_client_put(client);
xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
up_write(&clients_rwsem);
- /*
- * Every device still known must be serialized to make sure we are
- * done with the client callbacks before we return.
- */
- down_read(&devices_rwsem);
- xa_for_each (&devices, index, device)
+
+ /* We do not want to have locks while calling client->remove() */
+ rcu_read_lock();
+ xa_for_each (&devices, index, device) {
+ if (!ib_device_try_get(device))
+ continue;
+ rcu_read_unlock();
+
remove_client_context(device, client->client_id);
- up_read(&devices_rwsem);
- down_write(&clients_rwsem);
- list_del(&client->list);
- xa_erase(&clients, client->client_id);
- up_write(&clients_rwsem);
+ ib_device_put(device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+
+ /*
+ * remove_client_context() is not a fence, it can return even though a
+ * removal is ongoing. Wait until all removals are completed.
+ */
+ wait_for_completion(&client->uses_zero);
+ remove_client_id(client);
}
EXPORT_SYMBOL(ib_unregister_client);
+static int __ib_get_global_client_nl_info(const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ struct ib_client *client;
+ unsigned long index;
+ int ret = -ENOENT;
+
+ down_read(&clients_rwsem);
+ xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+ if (strcmp(client->name, client_name) != 0)
+ continue;
+ if (!client->get_global_nl_info) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = client->get_global_nl_info(res);
+ if (WARN_ON(ret == -ENOENT))
+ ret = -EINVAL;
+ if (!ret && res->cdev)
+ get_device(res->cdev);
+ break;
+ }
+ up_read(&clients_rwsem);
+ return ret;
+}
+
+static int __ib_get_client_nl_info(struct ib_device *ibdev,
+ const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ unsigned long index;
+ void *client_data;
+ int ret = -ENOENT;
+
+ down_read(&ibdev->client_data_rwsem);
+ xan_for_each_marked (&ibdev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
+
+ if (!client || strcmp(client->name, client_name) != 0)
+ continue;
+ if (!client->get_nl_info) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = client->get_nl_info(ibdev, client_data, res);
+ if (WARN_ON(ret == -ENOENT))
+ ret = -EINVAL;
+
+ /*
+ * The cdev is guaranteed valid as long as we are inside the
+ * client_data_rwsem as remove_one can't be called. Keep it
+ * valid for the caller.
+ */
+ if (!ret && res->cdev)
+ get_device(res->cdev);
+ break;
+ }
+ up_read(&ibdev->client_data_rwsem);
+
+ return ret;
+}
+
+/**
+ * ib_get_client_nl_info - Fetch the nl_info from a client
+ * @ibdev: IB device
+ * @client_name: Name of the client
+ * @res: Result of the query
+ */
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ int ret;
+
+ if (ibdev)
+ ret = __ib_get_client_nl_info(ibdev, client_name, res);
+ else
+ ret = __ib_get_global_client_nl_info(client_name, res);
+#ifdef CONFIG_MODULES
+ if (ret == -ENOENT) {
+ request_module("rdma-client-%s", client_name);
+ if (ibdev)
+ ret = __ib_get_client_nl_info(ibdev, client_name, res);
+ else
+ ret = __ib_get_global_client_nl_info(client_name, res);
+ }
+#endif
+ if (ret) {
+ if (ret == -ENOENT)
+ return -EOPNOTSUPP;
+ return ret;
+ }
+
+ if (WARN_ON(!res->cdev))
+ return -EINVAL;
+ return 0;
+}
+
/**
* ib_set_client_data - Set IB client context
* @device:Device to set context for
@@ -1762,17 +2059,15 @@ EXPORT_SYMBOL(ib_set_client_data);
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
- * chapter 11 of the InfiniBand Architecture Specification). This
- * callback may occur in interrupt context.
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback occurs in workqueue context.
*/
void ib_register_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_register_event_handler);
@@ -1785,35 +2080,83 @@ EXPORT_SYMBOL(ib_register_event_handler);
*/
void ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_del(&event_handler->list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_unregister_event_handler);
-/**
- * ib_dispatch_event - Dispatch an asynchronous event
- * @event:Event to dispatch
- *
- * Low-level drivers must call ib_dispatch_event() to dispatch the
- * event to all registered event handlers when an asynchronous event
- * occurs.
- */
-void ib_dispatch_event(struct ib_event *event)
+void ib_dispatch_event_clients(struct ib_event *event)
{
- unsigned long flags;
struct ib_event_handler *handler;
- spin_lock_irqsave(&event->device->event_handler_lock, flags);
+ down_read(&event->device->event_handler_rwsem);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
- spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+ up_read(&event->device->event_handler_rwsem);
+}
+
+static int iw_query_port(struct ib_device *device,
+ u32 port_num,
+ struct ib_port_attr *port_attr)
+{
+ struct in_device *inetdev;
+ struct net_device *netdev;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ netdev = ib_device_get_netdev(device, port_num);
+ if (!netdev)
+ return -ENODEV;
+
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+
+ if (!netif_carrier_ok(netdev)) {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ } else {
+ rcu_read_lock();
+ inetdev = __in_dev_get_rcu(netdev);
+
+ if (inetdev && inetdev->ifa_list) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ port_attr->state = IB_PORT_INIT;
+ port_attr->phys_state =
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
+ }
+
+ rcu_read_unlock();
+ }
+
+ dev_put(netdev);
+ return device->ops.query_port(device, port_num, port_attr);
+}
+
+static int __ib_query_port(struct ib_device *device,
+ u32 port_num,
+ struct ib_port_attr *port_attr)
+{
+ int err;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ err = device->ops.query_port(device, port_num, port_attr);
+ if (err || port_attr->subnet_prefix)
+ return err;
+
+ if (rdma_port_get_link_layer(device, port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return 0;
+
+ ib_get_cached_subnet_prefix(device, port_num,
+ &port_attr->subnet_prefix);
+ return 0;
}
-EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_port - Query IB port attributes
@@ -1825,29 +2168,16 @@ EXPORT_SYMBOL(ib_dispatch_event);
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
- union ib_gid gid;
- int err;
-
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- memset(port_attr, 0, sizeof(*port_attr));
- err = device->ops.query_port(device, port_num, port_attr);
- if (err || port_attr->subnet_prefix)
- return err;
-
- if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
- return 0;
-
- err = device->ops.query_gid(device, port_num, 0, &gid);
- if (err)
- return err;
-
- port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
- return 0;
+ if (rdma_protocol_iwarp(device, port_num))
+ return iw_query_port(device, port_num, port_attr);
+ else
+ return __ib_query_port(device, port_num, port_attr);
}
EXPORT_SYMBOL(ib_query_port);
@@ -1890,13 +2220,17 @@ static void add_ndev_hash(struct ib_port_data *pdata)
* NETDEV_UNREGISTER event.
*/
int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
- unsigned int port)
+ u32 port)
{
+ enum rdma_nl_notify_event_type etype;
struct net_device *old_ndev;
struct ib_port_data *pdata;
unsigned long flags;
int ret;
+ if (!rdma_is_port_valid(ib_dev, port))
+ return -EINVAL;
+
/*
* Drivers wish to call this before ib_register_driver, so we have to
* setup the port data early.
@@ -1905,9 +2239,6 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
if (ret)
return ret;
- if (!rdma_is_port_valid(ib_dev, port))
- return -EINVAL;
-
pdata = &ib_dev->port_data[port];
spin_lock_irqsave(&pdata->netdev_lock, flags);
old_ndev = rcu_dereference_protected(
@@ -1917,14 +2248,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
return 0;
}
- if (ndev)
- dev_hold(ndev);
rcu_assign_pointer(pdata->netdev, ndev);
+ netdev_put(old_ndev, &pdata->netdev_tracker);
+ netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
add_ndev_hash(pdata);
- if (old_ndev)
- dev_put(old_ndev);
+
+ /* Make sure that the device is registered before we send events */
+ if (xa_load(&devices, ib_dev->index) != ib_dev)
+ return 0;
+
+ etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
+ rdma_nl_notify_event(ib_dev, port, etype);
return 0;
}
@@ -1933,7 +2269,10 @@ EXPORT_SYMBOL(ib_device_set_netdev);
static void free_netdevs(struct ib_device *ib_dev)
{
unsigned long flags;
- unsigned int port;
+ u32 port;
+
+ if (!ib_dev->port_data)
+ return;
rdma_for_each_port (ib_dev, port) {
struct ib_port_data *pdata = &ib_dev->port_data[port];
@@ -1954,14 +2293,14 @@ static void free_netdevs(struct ib_device *ib_dev)
* comparisons after the put
*/
rcu_assign_pointer(pdata->netdev, NULL);
- dev_put(ndev);
+ netdev_put(ndev, &pdata->netdev_tracker);
}
spin_unlock_irqrestore(&pdata->netdev_lock, flags);
}
}
struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
- unsigned int port)
+ u32 port)
{
struct ib_port_data *pdata;
struct net_device *res;
@@ -1969,6 +2308,9 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
if (!rdma_is_port_valid(ib_dev, port))
return NULL;
+ if (!ib_dev->port_data)
+ return NULL;
+
pdata = &ib_dev->port_data[port];
/*
@@ -1981,22 +2323,40 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
spin_lock(&pdata->netdev_lock);
res = rcu_dereference_protected(
pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
- if (res)
- dev_hold(res);
+ dev_hold(res);
spin_unlock(&pdata->netdev_lock);
}
- /*
- * If we are starting to unregister expedite things by preventing
- * propagation of an unregistering netdev.
- */
- if (res && res->reg_state != NETREG_REGISTERED) {
- dev_put(res);
- return NULL;
+ return res;
+}
+EXPORT_SYMBOL(ib_device_get_netdev);
+
+/**
+ * ib_query_netdev_port - Query the port number of a net_device
+ * associated with an ibdev
+ * @ibdev: IB device
+ * @ndev: Network device
+ * @port: IB port the net_device is connected to
+ */
+int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev,
+ u32 *port)
+{
+ struct net_device *ib_ndev;
+ u32 port_num;
+
+ rdma_for_each_port(ibdev, port_num) {
+ ib_ndev = ib_device_get_netdev(ibdev, port_num);
+ if (ndev == ib_ndev) {
+ *port = port_num;
+ dev_put(ib_ndev);
+ return 0;
+ }
+ dev_put(ib_ndev);
}
- return res;
+ return -ENOENT;
}
+EXPORT_SYMBOL(ib_query_netdev_port);
/**
* ib_device_get_by_netdev - Find an IB device associated with a netdev
@@ -2018,7 +2378,7 @@ struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
(uintptr_t)ndev) {
if (rcu_access_pointer(cur->netdev) == ndev &&
(driver_id == RDMA_DRIVER_UNKNOWN ||
- cur->ib_dev->driver_id == driver_id) &&
+ cur->ib_dev->ops.driver_id == driver_id) &&
ib_device_try_get(cur->ib_dev)) {
res = cur->ib_dev;
break;
@@ -2048,7 +2408,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_callback cb,
void *cookie)
{
- unsigned int port;
+ u32 port;
rdma_for_each_port (ib_dev, port)
if (rdma_protocol_roce(ib_dev, port)) {
@@ -2057,9 +2417,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
if (filter(ib_dev, port, idev, filter_cookie))
cb(ib_dev, port, idev, cookie);
-
- if (idev)
- dev_put(idev);
+ dev_put(idev);
}
}
@@ -2088,7 +2446,7 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
up_read(&devices_rwsem);
}
-/**
+/*
* ib_enum_all_devs - enumerate all ib_devices
* @cb: Callback to call for each found ib_device
*
@@ -2126,11 +2484,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey)
+ u32 port_num, u16 index, u16 *pkey)
{
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ if (!device->ops.query_pkey)
+ return -EOPNOTSUPP;
+
return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -2149,7 +2510,7 @@ int ib_modify_device(struct ib_device *device,
struct ib_device_modify *device_modify)
{
if (!device->ops.modify_device)
- return -ENOSYS;
+ return -EOPNOTSUPP;
return device->ops.modify_device(device, device_modify_mask,
device_modify);
@@ -2168,7 +2529,7 @@ EXPORT_SYMBOL(ib_modify_device);
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
- u8 port_num, int port_modify_mask,
+ u32 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
int rc;
@@ -2180,8 +2541,12 @@ int ib_modify_port(struct ib_device *device,
rc = device->ops.modify_port(device, port_num,
port_modify_mask,
port_modify);
+ else if (rdma_protocol_roce(device, port_num) &&
+ ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
+ (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
+ rc = 0;
else
- rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
+ rc = -EOPNOTSUPP;
return rc;
}
EXPORT_SYMBOL(ib_modify_port);
@@ -2196,10 +2561,10 @@ EXPORT_SYMBOL(ib_modify_port);
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- u8 *port_num, u16 *index)
+ u32 *port_num, u16 *index)
{
union ib_gid tmp_gid;
- unsigned int port;
+ u32 port;
int ret, i;
rdma_for_each_port (device, port) {
@@ -2210,7 +2575,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
++i) {
ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
- return ret;
+ continue;
+
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
@@ -2233,7 +2599,7 @@ EXPORT_SYMBOL(ib_find_gid);
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
- u8 port_num, u16 pkey, u16 *index)
+ u32 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
@@ -2276,7 +2642,7 @@ EXPORT_SYMBOL(ib_find_pkey);
*
*/
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
- u8 port,
+ u32 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr)
@@ -2323,12 +2689,30 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
+ if (ops->driver_id != RDMA_DRIVER_UNKNOWN) {
+ WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN &&
+ dev_ops->driver_id != ops->driver_id);
+ dev_ops->driver_id = ops->driver_id;
+ }
+ if (ops->owner) {
+ WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner);
+ dev_ops->owner = ops->owner;
+ }
+ if (ops->uverbs_abi_ver)
+ dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver;
+
+ dev_ops->uverbs_no_driver_id_binding |=
+ ops->uverbs_no_driver_id_binding;
+
SET_DEVICE_OP(dev_ops, add_gid);
+ SET_DEVICE_OP(dev_ops, add_sub_dev);
SET_DEVICE_OP(dev_ops, advise_mr);
SET_DEVICE_OP(dev_ops, alloc_dm);
- SET_DEVICE_OP(dev_ops, alloc_fmr);
- SET_DEVICE_OP(dev_ops, alloc_hw_stats);
+ SET_DEVICE_OP(dev_ops, alloc_dmah);
+ SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
+ SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
SET_DEVICE_OP(dev_ops, alloc_mr);
+ SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
SET_DEVICE_OP(dev_ops, alloc_mw);
SET_DEVICE_OP(dev_ops, alloc_pd);
SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
@@ -2336,23 +2720,31 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, alloc_xrcd);
SET_DEVICE_OP(dev_ops, attach_mcast);
SET_DEVICE_OP(dev_ops, check_mr_status);
+ SET_DEVICE_OP(dev_ops, counter_alloc_stats);
+ SET_DEVICE_OP(dev_ops, counter_bind_qp);
+ SET_DEVICE_OP(dev_ops, counter_dealloc);
+ SET_DEVICE_OP(dev_ops, counter_init);
+ SET_DEVICE_OP(dev_ops, counter_unbind_qp);
+ SET_DEVICE_OP(dev_ops, counter_update_stats);
SET_DEVICE_OP(dev_ops, create_ah);
SET_DEVICE_OP(dev_ops, create_counters);
SET_DEVICE_OP(dev_ops, create_cq);
+ SET_DEVICE_OP(dev_ops, create_cq_umem);
SET_DEVICE_OP(dev_ops, create_flow);
- SET_DEVICE_OP(dev_ops, create_flow_action_esp);
SET_DEVICE_OP(dev_ops, create_qp);
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_user_ah);
SET_DEVICE_OP(dev_ops, create_wq);
SET_DEVICE_OP(dev_ops, dealloc_dm);
+ SET_DEVICE_OP(dev_ops, dealloc_dmah);
SET_DEVICE_OP(dev_ops, dealloc_driver);
- SET_DEVICE_OP(dev_ops, dealloc_fmr);
SET_DEVICE_OP(dev_ops, dealloc_mw);
SET_DEVICE_OP(dev_ops, dealloc_pd);
SET_DEVICE_OP(dev_ops, dealloc_ucontext);
SET_DEVICE_OP(dev_ops, dealloc_xrcd);
SET_DEVICE_OP(dev_ops, del_gid);
+ SET_DEVICE_OP(dev_ops, del_sub_dev);
SET_DEVICE_OP(dev_ops, dereg_mr);
SET_DEVICE_OP(dev_ops, destroy_ah);
SET_DEVICE_OP(dev_ops, destroy_counters);
@@ -2363,22 +2755,33 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
SET_DEVICE_OP(dev_ops, destroy_srq);
SET_DEVICE_OP(dev_ops, destroy_wq);
+ SET_DEVICE_OP(dev_ops, device_group);
SET_DEVICE_OP(dev_ops, detach_mcast);
SET_DEVICE_OP(dev_ops, disassociate_ucontext);
SET_DEVICE_OP(dev_ops, drain_rq);
SET_DEVICE_OP(dev_ops, drain_sq);
SET_DEVICE_OP(dev_ops, enable_driver);
- SET_DEVICE_OP(dev_ops, fill_res_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_srq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_srq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
SET_DEVICE_OP(dev_ops, get_dev_fw_str);
SET_DEVICE_OP(dev_ops, get_dma_mr);
SET_DEVICE_OP(dev_ops, get_hw_stats);
SET_DEVICE_OP(dev_ops, get_link_layer);
SET_DEVICE_OP(dev_ops, get_netdev);
+ SET_DEVICE_OP(dev_ops, get_numa_node);
SET_DEVICE_OP(dev_ops, get_port_immutable);
SET_DEVICE_OP(dev_ops, get_vector_affinity);
SET_DEVICE_OP(dev_ops, get_vf_config);
+ SET_DEVICE_OP(dev_ops, get_vf_guid);
SET_DEVICE_OP(dev_ops, get_vf_stats);
- SET_DEVICE_OP(dev_ops, init_port);
SET_DEVICE_OP(dev_ops, iw_accept);
SET_DEVICE_OP(dev_ops, iw_add_ref);
SET_DEVICE_OP(dev_ops, iw_connect);
@@ -2388,18 +2791,22 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, iw_reject);
SET_DEVICE_OP(dev_ops, iw_rem_ref);
SET_DEVICE_OP(dev_ops, map_mr_sg);
- SET_DEVICE_OP(dev_ops, map_phys_fmr);
+ SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
SET_DEVICE_OP(dev_ops, mmap);
+ SET_DEVICE_OP(dev_ops, mmap_free);
SET_DEVICE_OP(dev_ops, modify_ah);
SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device);
- SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp);
SET_DEVICE_OP(dev_ops, modify_srq);
SET_DEVICE_OP(dev_ops, modify_wq);
SET_DEVICE_OP(dev_ops, peek_cq);
+ SET_DEVICE_OP(dev_ops, pre_destroy_cq);
SET_DEVICE_OP(dev_ops, poll_cq);
+ SET_DEVICE_OP(dev_ops, port_groups);
+ SET_DEVICE_OP(dev_ops, post_destroy_cq);
SET_DEVICE_OP(dev_ops, post_recv);
SET_DEVICE_OP(dev_ops, post_send);
SET_DEVICE_OP(dev_ops, post_srq_recv);
@@ -2411,25 +2818,99 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, query_port);
SET_DEVICE_OP(dev_ops, query_qp);
SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, query_ucontext);
SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
SET_DEVICE_OP(dev_ops, read_counters);
SET_DEVICE_OP(dev_ops, reg_dm_mr);
SET_DEVICE_OP(dev_ops, reg_user_mr);
- SET_DEVICE_OP(dev_ops, req_ncomp_notif);
+ SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
SET_DEVICE_OP(dev_ops, req_notify_cq);
SET_DEVICE_OP(dev_ops, rereg_user_mr);
SET_DEVICE_OP(dev_ops, resize_cq);
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
- SET_DEVICE_OP(dev_ops, unmap_fmr);
+ SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
+ SET_DEVICE_OP(dev_ops, report_port_event);
SET_OBJ_SIZE(dev_ops, ib_ah);
+ SET_OBJ_SIZE(dev_ops, ib_counters);
+ SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_dmah);
+ SET_OBJ_SIZE(dev_ops, ib_mw);
SET_OBJ_SIZE(dev_ops, ib_pd);
+ SET_OBJ_SIZE(dev_ops, ib_qp);
+ SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
+ SET_OBJ_SIZE(dev_ops, ib_xrcd);
+ SET_OBJ_SIZE(dev_ops, rdma_counter);
}
EXPORT_SYMBOL(ib_set_device_ops);
+int ib_add_sub_device(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name)
+{
+ struct ib_device *sub;
+ int ret = 0;
+
+ if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev)
+ return -EOPNOTSUPP;
+
+ if (!ib_device_try_get(parent))
+ return -EINVAL;
+
+ sub = parent->ops.add_sub_dev(parent, type, name);
+ if (IS_ERR(sub)) {
+ ib_device_put(parent);
+ return PTR_ERR(sub);
+ }
+
+ sub->type = type;
+ sub->parent = parent;
+
+ mutex_lock(&parent->subdev_lock);
+ list_add_tail(&parent->subdev_list_head, &sub->subdev_list);
+ mutex_unlock(&parent->subdev_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_add_sub_device);
+
+int ib_del_sub_device_and_put(struct ib_device *sub)
+{
+ struct ib_device *parent = sub->parent;
+
+ if (!parent)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&parent->subdev_lock);
+ list_del(&sub->subdev_list);
+ mutex_unlock(&parent->subdev_lock);
+
+ ib_device_put(sub);
+ parent->ops.del_sub_dev(sub);
+ ib_device_put(parent);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_del_sub_device_and_put);
+
+#ifdef CONFIG_INFINIBAND_VIRT_DMA
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ sg_dma_address(s) = (uintptr_t)sg_virt(s);
+ sg_dma_len(s) = s->length;
+ }
+ return nents;
+}
+EXPORT_SYMBOL(ib_dma_virt_map_sg);
+#endif /* CONFIG_INFINIBAND_VIRT_DMA */
+
static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
@@ -2445,29 +2926,121 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
},
};
+void ib_dispatch_port_state_event(struct ib_device *ibdev, struct net_device *ndev)
+{
+ enum ib_port_state curr_state;
+ struct ib_event ibevent = {};
+ u32 port;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port))
+ return;
+
+ curr_state = ib_get_curr_port_state(ndev);
+
+ write_lock_irq(&ibdev->cache_lock);
+ if (ibdev->port_data[port].cache.last_port_state == curr_state) {
+ write_unlock_irq(&ibdev->cache_lock);
+ return;
+ }
+ ibdev->port_data[port].cache.last_port_state = curr_state;
+ write_unlock_irq(&ibdev->cache_lock);
+
+ ibevent.event = (curr_state == IB_PORT_DOWN) ?
+ IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
+ ibevent.device = ibdev;
+ ibevent.element.port_num = port;
+ ib_dispatch_event(&ibevent);
+}
+EXPORT_SYMBOL(ib_dispatch_port_state_event);
+
+static void handle_port_event(struct net_device *ndev, unsigned long event)
+{
+ struct ib_device *ibdev;
+
+ /* Currently, link events in bonding scenarios are still
+ * reported by drivers that support bonding.
+ */
+ if (netif_is_lag_master(ndev) || netif_is_lag_port(ndev))
+ return;
+
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return;
+
+ if (ibdev->ops.report_port_event) {
+ ibdev->ops.report_port_event(ibdev, ndev, event);
+ goto put_ibdev;
+ }
+
+ ib_dispatch_port_state_event(ibdev, ndev);
+
+put_ibdev:
+ ib_device_put(ibdev);
+};
+
+static int ib_netdevice_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct ib_device *ibdev;
+ u32 port;
+
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port)) {
+ ib_device_put(ibdev);
+ break;
+ }
+
+ rdma_nl_notify_event(ibdev, port, RDMA_NETDEV_RENAME_EVENT);
+ ib_device_put(ibdev);
+ break;
+
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ case NETDEV_DOWN:
+ handle_port_event(ndev, event);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nb_netdevice = {
+ .notifier_call = ib_netdevice_event,
+};
+
static int __init ib_core_init(void)
{
- int ret;
+ int ret = -ENOMEM;
- ib_wq = alloc_workqueue("infiniband", 0, 0);
+ ib_wq = alloc_workqueue("infiniband", WQ_PERCPU, 0);
if (!ib_wq)
return -ENOMEM;
- ib_comp_wq = alloc_workqueue("ib-comp-wq",
- WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
- if (!ib_comp_wq) {
- ret = -ENOMEM;
+ ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_unreg_wq)
goto err;
- }
+
+ ib_comp_wq = alloc_workqueue("ib-comp-wq",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_PERCPU, 0);
+ if (!ib_comp_wq)
+ goto err_unbound;
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
- if (!ib_comp_unbound_wq) {
- ret = -ENOMEM;
+ if (!ib_comp_unbound_wq)
goto err_comp;
- }
ret = class_register(&ib_class);
if (ret) {
@@ -2475,15 +3048,11 @@ static int __init ib_core_init(void)
goto err_comp_unbound;
}
- ret = rdma_nl_init();
- if (ret) {
- pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
- goto err_sysfs;
- }
+ rdma_nl_init();
ret = addr_init();
if (ret) {
- pr_warn("Could't init IB address resolution\n");
+ pr_warn("Couldn't init IB address resolution\n");
goto err_ibnl;
}
@@ -2499,7 +3068,7 @@ static int __init ib_core_init(void)
goto err_mad;
}
- ret = register_lsm_notifier(&ibdev_lsm_nb);
+ ret = register_blocking_lsm_notifier(&ibdev_lsm_nb);
if (ret) {
pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
goto err_sa;
@@ -2513,12 +3082,22 @@ static int __init ib_core_init(void)
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
- roce_gid_mgmt_init();
+ ret = roce_gid_mgmt_init();
+ if (ret) {
+ pr_warn("Couldn't init RoCE GID management\n");
+ goto err_parent;
+ }
+
+ register_netdevice_notifier(&nb_netdevice);
return 0;
+err_parent:
+ rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
err_compat:
- unregister_lsm_notifier(&ibdev_lsm_nb);
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
err_sa:
ib_sa_cleanup();
err_mad:
@@ -2526,13 +3105,13 @@ err_mad:
err_addr:
addr_cleanup();
err_ibnl:
- rdma_nl_exit();
-err_sysfs:
class_unregister(&ib_class);
err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
+err_unbound:
+ destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -2540,11 +3119,12 @@ err:
static void __exit ib_core_cleanup(void)
{
+ unregister_netdevice_notifier(&nb_netdevice);
roce_gid_mgmt_cleanup();
- nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
unregister_pernet_device(&rdma_dev_net_ops);
- unregister_lsm_notifier(&ibdev_lsm_nb);
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
@@ -2554,7 +3134,7 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
- flush_workqueue(system_unbound_wq);
+ destroy_workqueue(ib_unreg_wq);
WARN_ON(!xa_empty(&clients));
WARN_ON(!xa_empty(&devices));
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
deleted file mode 100644
index 7d841b689a1e..000000000000
--- a/drivers/infiniband/core/fmr_pool.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/jhash.h>
-#include <linux/kthread.h>
-
-#include <rdma/ib_fmr_pool.h>
-
-#include "core_priv.h"
-
-#define PFX "fmr_pool: "
-
-enum {
- IB_FMR_MAX_REMAPS = 32,
-
- IB_FMR_HASH_BITS = 8,
- IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
- IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
-};
-
-/*
- * If an FMR is not in use, then the list member will point to either
- * its pool's free_list (if the FMR can be mapped again; that is,
- * remap_count < pool->max_remaps) or its pool's dirty_list (if the
- * FMR needs to be unmapped before being remapped). In either of
- * these cases it is a bug if the ref_count is not 0. In other words,
- * if ref_count is > 0, then the list member must not be linked into
- * either free_list or dirty_list.
- *
- * The cache_node member is used to link the FMR into a cache bucket
- * (if caching is enabled). This is independent of the reference
- * count of the FMR. When a valid FMR is released, its ref_count is
- * decremented, and if ref_count reaches 0, the FMR is placed in
- * either free_list or dirty_list as appropriate. However, it is not
- * removed from the cache and may be "revived" if a call to
- * ib_fmr_register_physical() occurs before the FMR is remapped. In
- * this case we just increment the ref_count and remove the FMR from
- * free_list/dirty_list.
- *
- * Before we remap an FMR from free_list, we remove it from the cache
- * (to prevent another user from obtaining a stale FMR). When an FMR
- * is released, we add it to the tail of the free list, so that our
- * cache eviction policy is "least recently used."
- *
- * All manipulation of ref_count, list and cache_node is protected by
- * pool_lock to maintain consistency.
- */
-
-struct ib_fmr_pool {
- spinlock_t pool_lock;
-
- int pool_size;
- int max_pages;
- int max_remaps;
- int dirty_watermark;
- int dirty_len;
- struct list_head free_list;
- struct list_head dirty_list;
- struct hlist_head *cache_bucket;
-
- void (*flush_function)(struct ib_fmr_pool *pool,
- void * arg);
- void *flush_arg;
-
- struct kthread_worker *worker;
- struct kthread_work work;
-
- atomic_t req_ser;
- atomic_t flush_ser;
-
- wait_queue_head_t force_wait;
-};
-
-static inline u32 ib_fmr_hash(u64 first_page)
-{
- return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
- (IB_FMR_HASH_SIZE - 1);
-}
-
-/* Caller must hold pool_lock */
-static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
- u64 *page_list,
- int page_list_len,
- u64 io_virtual_address)
-{
- struct hlist_head *bucket;
- struct ib_pool_fmr *fmr;
-
- if (!pool->cache_bucket)
- return NULL;
-
- bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
-
- hlist_for_each_entry(fmr, bucket, cache_node)
- if (io_virtual_address == fmr->io_virtual_address &&
- page_list_len == fmr->page_list_len &&
- !memcmp(page_list, fmr->page_list,
- page_list_len * sizeof *page_list))
- return fmr;
-
- return NULL;
-}
-
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
-{
- int ret;
- struct ib_pool_fmr *fmr;
- LIST_HEAD(unmap_list);
- LIST_HEAD(fmr_list);
-
- spin_lock_irq(&pool->pool_lock);
-
- list_for_each_entry(fmr, &pool->dirty_list, list) {
- hlist_del_init(&fmr->cache_node);
- fmr->remap_count = 0;
- list_add_tail(&fmr->fmr->list, &fmr_list);
-
-#ifdef DEBUG
- if (fmr->ref_count !=0) {
- pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
- fmr, fmr->ref_count);
- }
-#endif
- }
-
- list_splice_init(&pool->dirty_list, &unmap_list);
- pool->dirty_len = 0;
-
- spin_unlock_irq(&pool->pool_lock);
-
- if (list_empty(&unmap_list)) {
- return;
- }
-
- ret = ib_unmap_fmr(&fmr_list);
- if (ret)
- pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
-
- spin_lock_irq(&pool->pool_lock);
- list_splice(&unmap_list, &pool->free_list);
- spin_unlock_irq(&pool->pool_lock);
-}
-
-static void ib_fmr_cleanup_func(struct kthread_work *work)
-{
- struct ib_fmr_pool *pool = container_of(work, struct ib_fmr_pool, work);
-
- ib_fmr_batch_release(pool);
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
-
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0)
- kthread_queue_work(pool->worker, &pool->work);
-}
-
-/**
- * ib_create_fmr_pool - Create an FMR pool
- * @pd:Protection domain for FMRs
- * @params:FMR pool parameters
- *
- * Create a pool of FMRs. Return value is pointer to new pool or
- * error code if creation failed.
- */
-struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
- struct ib_fmr_pool_param *params)
-{
- struct ib_device *device;
- struct ib_fmr_pool *pool;
- int i;
- int ret;
- int max_remaps;
-
- if (!params)
- return ERR_PTR(-EINVAL);
-
- device = pd->device;
- if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr ||
- !device->ops.map_phys_fmr || !device->ops.unmap_fmr) {
- dev_info(&device->dev, "Device does not support FMRs\n");
- return ERR_PTR(-ENOSYS);
- }
-
- if (!device->attrs.max_map_per_fmr)
- max_remaps = IB_FMR_MAX_REMAPS;
- else
- max_remaps = device->attrs.max_map_per_fmr;
-
- pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool)
- return ERR_PTR(-ENOMEM);
-
- pool->cache_bucket = NULL;
- pool->flush_function = params->flush_function;
- pool->flush_arg = params->flush_arg;
-
- INIT_LIST_HEAD(&pool->free_list);
- INIT_LIST_HEAD(&pool->dirty_list);
-
- if (params->cache) {
- pool->cache_bucket =
- kmalloc_array(IB_FMR_HASH_SIZE,
- sizeof(*pool->cache_bucket),
- GFP_KERNEL);
- if (!pool->cache_bucket) {
- ret = -ENOMEM;
- goto out_free_pool;
- }
-
- for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
- INIT_HLIST_HEAD(pool->cache_bucket + i);
- }
-
- pool->pool_size = 0;
- pool->max_pages = params->max_pages_per_fmr;
- pool->max_remaps = max_remaps;
- pool->dirty_watermark = params->dirty_watermark;
- pool->dirty_len = 0;
- spin_lock_init(&pool->pool_lock);
- atomic_set(&pool->req_ser, 0);
- atomic_set(&pool->flush_ser, 0);
- init_waitqueue_head(&pool->force_wait);
-
- pool->worker =
- kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
- if (IS_ERR(pool->worker)) {
- pr_warn(PFX "couldn't start cleanup kthread worker\n");
- ret = PTR_ERR(pool->worker);
- goto out_free_pool;
- }
- kthread_init_work(&pool->work, ib_fmr_cleanup_func);
-
- {
- struct ib_pool_fmr *fmr;
- struct ib_fmr_attr fmr_attr = {
- .max_pages = params->max_pages_per_fmr,
- .max_maps = pool->max_remaps,
- .page_shift = params->page_shift
- };
- int bytes_per_fmr = sizeof *fmr;
-
- if (pool->cache_bucket)
- bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
-
- for (i = 0; i < params->pool_size; ++i) {
- fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
- if (!fmr)
- goto out_fail;
-
- fmr->pool = pool;
- fmr->remap_count = 0;
- fmr->ref_count = 0;
- INIT_HLIST_NODE(&fmr->cache_node);
-
- fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
- if (IS_ERR(fmr->fmr)) {
- pr_warn(PFX "fmr_create failed for FMR %d\n",
- i);
- kfree(fmr);
- goto out_fail;
- }
-
- list_add_tail(&fmr->list, &pool->free_list);
- ++pool->pool_size;
- }
- }
-
- return pool;
-
- out_free_pool:
- kfree(pool->cache_bucket);
- kfree(pool);
-
- return ERR_PTR(ret);
-
- out_fail:
- ib_destroy_fmr_pool(pool);
-
- return ERR_PTR(-ENOMEM);
-}
-EXPORT_SYMBOL(ib_create_fmr_pool);
-
-/**
- * ib_destroy_fmr_pool - Free FMR pool
- * @pool:FMR pool to free
- *
- * Destroy an FMR pool and free all associated resources.
- */
-void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
-{
- struct ib_pool_fmr *fmr;
- struct ib_pool_fmr *tmp;
- LIST_HEAD(fmr_list);
- int i;
-
- kthread_destroy_worker(pool->worker);
- ib_fmr_batch_release(pool);
-
- i = 0;
- list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
- if (fmr->remap_count) {
- INIT_LIST_HEAD(&fmr_list);
- list_add_tail(&fmr->fmr->list, &fmr_list);
- ib_unmap_fmr(&fmr_list);
- }
- ib_dealloc_fmr(fmr->fmr);
- list_del(&fmr->list);
- kfree(fmr);
- ++i;
- }
-
- if (i < pool->pool_size)
- pr_warn(PFX "pool still has %d regions registered\n",
- pool->pool_size - i);
-
- kfree(pool->cache_bucket);
- kfree(pool);
-}
-EXPORT_SYMBOL(ib_destroy_fmr_pool);
-
-/**
- * ib_flush_fmr_pool - Invalidate all unmapped FMRs
- * @pool:FMR pool to flush
- *
- * Ensure that all unmapped FMRs are fully invalidated.
- */
-int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
-{
- int serial;
- struct ib_pool_fmr *fmr, *next;
-
- /*
- * The free_list holds FMRs that may have been used
- * but have not been remapped enough times to be dirty.
- * Put them on the dirty list now so that the cleanup
- * thread will reap them too.
- */
- spin_lock_irq(&pool->pool_lock);
- list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
- if (fmr->remap_count > 0)
- list_move(&fmr->list, &pool->dirty_list);
- }
- spin_unlock_irq(&pool->pool_lock);
-
- serial = atomic_inc_return(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
-
- if (wait_event_interruptible(pool->force_wait,
- atomic_read(&pool->flush_ser) - serial >= 0))
- return -EINTR;
-
- return 0;
-}
-EXPORT_SYMBOL(ib_flush_fmr_pool);
-
-/**
- * ib_fmr_pool_map_phys - Map an FMR from an FMR pool.
- * @pool_handle: FMR pool to allocate FMR from
- * @page_list: List of pages to map
- * @list_len: Number of pages in @page_list
- * @io_virtual_address: I/O virtual address for new FMR
- */
-struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
- u64 *page_list,
- int list_len,
- u64 io_virtual_address)
-{
- struct ib_fmr_pool *pool = pool_handle;
- struct ib_pool_fmr *fmr;
- unsigned long flags;
- int result;
-
- if (list_len < 1 || list_len > pool->max_pages)
- return ERR_PTR(-EINVAL);
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- fmr = ib_fmr_cache_lookup(pool,
- page_list,
- list_len,
- io_virtual_address);
- if (fmr) {
- /* found in cache */
- ++fmr->ref_count;
- if (fmr->ref_count == 1) {
- list_del(&fmr->list);
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return fmr;
- }
-
- if (list_empty(&pool->free_list)) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- return ERR_PTR(-EAGAIN);
- }
-
- fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
- list_del(&fmr->list);
- hlist_del_init(&fmr->cache_node);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
- io_virtual_address);
-
- if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- pr_warn(PFX "fmr_map returns %d\n", result);
-
- return ERR_PTR(result);
- }
-
- ++fmr->remap_count;
- fmr->ref_count = 1;
-
- if (pool->cache_bucket) {
- fmr->io_virtual_address = io_virtual_address;
- fmr->page_list_len = list_len;
- memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- hlist_add_head(&fmr->cache_node,
- pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_fmr_pool_map_phys);
-
-/**
- * ib_fmr_pool_unmap - Unmap FMR
- * @fmr:FMR to unmap
- *
- * Unmap an FMR. The FMR mapping may remain valid until the FMR is
- * reused (or until ib_flush_fmr_pool() is called).
- */
-void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
-{
- struct ib_fmr_pool *pool;
- unsigned long flags;
-
- pool = fmr->pool;
-
- spin_lock_irqsave(&pool->pool_lock, flags);
-
- --fmr->ref_count;
- if (!fmr->ref_count) {
- if (fmr->remap_count < pool->max_remaps) {
- list_add_tail(&fmr->list, &pool->free_list);
- } else {
- list_add_tail(&fmr->list, &pool->dirty_list);
- if (++pool->dirty_len >= pool->dirty_watermark) {
- atomic_inc(&pool->req_ser);
- kthread_queue_work(pool->worker, &pool->work);
- }
- }
- }
-
-#ifdef DEBUG
- if (fmr->ref_count < 0)
- pr_warn(PFX "FMR %p has ref count %d < 0\n",
- fmr, fmr->ref_count);
-#endif
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-}
-EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
new file mode 100644
index 000000000000..b51bd7087a88
--- /dev/null
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019 Marvell. All rights reserved.
+ */
+#include <linux/xarray.h>
+#include "uverbs.h"
+#include "core_priv.h"
+
+/**
+ * rdma_umap_priv_init() - Initialize the private data of a vma
+ *
+ * @priv: The already allocated private data
+ * @vma: The vm area struct that needs private data
+ * @entry: entry into the mmap_xa that needs to be linked with
+ * this vma
+ *
+ * Each time we map IO memory into user space this keeps track of the
+ * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
+ * to point to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ *
+ */
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+ priv->vma = vma;
+ if (entry) {
+ kref_get(&entry->ref);
+ priv->entry = entry;
+ }
+ vma->vm_private_data = priv;
+ /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
+
+ mutex_lock(&ufile->umap_lock);
+ list_add(&priv->list, &ufile->umaps);
+ mutex_unlock(&ufile->umap_lock);
+}
+EXPORT_SYMBOL(rdma_umap_priv_init);
+
+/**
+ * rdma_user_mmap_io() - Map IO memory into a process
+ *
+ * @ucontext: associated user context
+ * @vma: the vma related to the current mmap call
+ * @pfn: pfn to map
+ * @size: size to map
+ * @prot: pgprot to use in remap call
+ * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
+ * if mmap_entry is not used by the driver
+ *
+ * This is to be called by drivers as part of their mmap() functions if they
+ * wish to send something like PCI-E BAR memory to userspace.
+ *
+ * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
+ * success.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ struct rdma_umap_priv *priv;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ if (vma->vm_end - vma->vm_start != size)
+ return -EINVAL;
+
+ /* Driver is using this wrong, must be called by ib_uverbs_mmap */
+ if (WARN_ON(!vma->vm_file ||
+ vma->vm_file->private_data != ufile))
+ return -EINVAL;
+ lockdep_assert_held(&ufile->device->disassociate_srcu);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ vma->vm_page_prot = prot;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+ kfree(priv);
+ return -EAGAIN;
+ }
+
+ rdma_umap_priv_init(priv, vma, entry);
+ return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/**
+ * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @pgoff: The mmap offset >> PAGE_SHIFT
+ *
+ * This function is called when a user tries to mmap with an offset (returned
+ * by rdma_user_mmap_get_offset()) it initially received from the driver. The
+ * rdma_user_mmap_entry was created by the function
+ * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
+ * entry so that it won't be deleted from the xarray in the meantime.
+ *
+ * Return an reference to an entry if exists or NULL if there is no
+ * match. rdma_user_mmap_entry_put() must be called to put the reference.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (pgoff > U32_MAX)
+ return NULL;
+
+ xa_lock(&ucontext->mmap_xa);
+
+ entry = xa_load(&ucontext->mmap_xa, pgoff);
+
+ /*
+ * If refcount is zero, entry is already being deleted, driver_removed
+ * indicates that the no further mmaps are possible and we waiting for
+ * the active VMAs to be closed.
+ */
+ if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
+ !kref_get_unless_zero(&entry->ref))
+ goto err;
+
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
+ pgoff, entry->npages);
+
+ return entry;
+
+err:
+ xa_unlock(&ucontext->mmap_xa);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @vma: the vma being mmap'd into
+ *
+ * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
+ * checks that the VMA is correct.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return NULL;
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
+ if (!entry)
+ return NULL;
+ if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
+ rdma_user_mmap_entry_put(entry);
+ return NULL;
+ }
+ return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+static void rdma_user_mmap_entry_free(struct kref *kref)
+{
+ struct rdma_user_mmap_entry *entry =
+ container_of(kref, struct rdma_user_mmap_entry, ref);
+ struct ib_ucontext *ucontext = entry->ucontext;
+ unsigned long i;
+
+ /*
+ * Erase all entries occupied by this single entry, this is deferred
+ * until all VMA are closed so that the mmap offsets remain unique.
+ */
+ xa_lock(&ucontext->mmap_xa);
+ for (i = 0; i < entry->npages; i++)
+ __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
+ entry->start_pgoff, entry->npages);
+
+ if (ucontext->device->ops.mmap_free)
+ ucontext->device->ops.mmap_free(entry);
+}
+
+/**
+ * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
+ *
+ * @entry: an entry in the mmap_xa
+ *
+ * This function is called when the mapping is closed if it was
+ * an io mapping or when the driver is done with the entry for
+ * some other reason.
+ * Should be called after rdma_user_mmap_entry_get was called
+ * and entry is no longer needed. This function will erase the
+ * entry and free it if its refcnt reaches zero.
+ */
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
+{
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_put);
+
+/**
+ * rdma_user_mmap_entry_remove() - Drop reference to entry and
+ * mark it as unmmapable
+ *
+ * @entry: the entry to insert into the mmap_xa
+ *
+ * Drivers can call this to prevent userspace from creating more mappings for
+ * entry, however existing mmaps continue to exist and ops->mmap_free() will
+ * not be called until all user mmaps are destroyed.
+ */
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
+{
+ if (!entry)
+ return;
+
+ xa_lock(&entry->ucontext->mmap_xa);
+ entry->driver_removed = true;
+ xa_unlock(&entry->ucontext->mmap_xa);
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
+
+/**
+ * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
+ * in a given range.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ * @min_pgoff: minimum pgoff to be returned
+ * @max_pgoff: maximum pgoff to be returned
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for implementing their mmap syscall A database of mmap offsets is
+ * handled in the core and helper functions are provided to insert entries
+ * into the database and extract entries when the user calls mmap with the
+ * given offset. The function allocates a unique page offset in a given range
+ * that should be provided to user, the user will use the offset to retrieve
+ * information such as address to be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
+ u32 xa_first, xa_last, npages;
+ int err;
+ u32 i;
+
+ if (!entry)
+ return -EINVAL;
+
+ kref_init(&entry->ref);
+ entry->ucontext = ucontext;
+
+ /*
+ * We want the whole allocation to be done without interruption from a
+ * different thread. The allocation requires finding a free range and
+ * storing. During the xa_insert the lock could be released, possibly
+ * allowing another thread to choose the same range.
+ */
+ mutex_lock(&ufile->umap_lock);
+
+ xa_lock(&ucontext->mmap_xa);
+
+ /* We want to find an empty range */
+ npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
+ entry->npages = npages;
+ while (true) {
+ /* First find an empty index */
+ xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART)
+ goto err_unlock;
+
+ xa_first = xas.xa_index;
+
+ /* Is there enough room to have the range? */
+ if (check_add_overflow(xa_first, npages, &xa_last))
+ goto err_unlock;
+
+ /*
+ * Now look for the next present entry. If an entry doesn't
+ * exist, we found an empty range and can proceed.
+ */
+ xas_next_entry(&xas, xa_last - 1);
+ if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
+ break;
+ }
+
+ for (i = xa_first; i < xa_last; i++) {
+ err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
+ if (err)
+ goto err_undo;
+ }
+
+ /*
+ * Internally the kernel uses a page offset, in libc this is a byte
+ * offset. Drivers should not return pgoff to userspace.
+ */
+ entry->start_pgoff = xa_first;
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
+ entry->start_pgoff, npages);
+
+ return 0;
+
+err_undo:
+ for (; i > xa_first; i--)
+ __xa_erase(&ucontext->mmap_xa, i - 1);
+
+err_unlock:
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user calls mmap with the given offset.
+ * The function allocates a unique page offset that should be provided to user,
+ * the user will use the offset to retrieve information such as address to
+ * be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
+ U32_MAX);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..62410578dec3 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -109,9 +109,10 @@ static struct ctl_table iwcm_ctl_table[] = {
.data = &default_backlog,
.maxlen = sizeof(default_backlog),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
- { }
};
/*
@@ -144,8 +145,8 @@ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
if (list_empty(&cm_id_priv->work_free_list))
return NULL;
- work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
- free_list);
+ work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work,
+ free_list);
list_del_init(&work->free_list);
return work;
}
@@ -159,8 +160,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
{
struct list_head *e, *tmp;
- list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+ list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) {
+ list_del(e);
kfree(list_entry(e, struct iwcm_work, free_list));
+ }
}
static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
@@ -205,25 +208,24 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
/*
* Release a reference on cm_id. If the last reference is being
- * released, free the cm_id and return 1.
+ * released, free the cm_id and return 'true'.
*/
-static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+ if (refcount_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static void add_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
}
static void rem_ref(struct iw_cm_id *cm_id)
@@ -255,7 +257,7 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
cm_id_priv->id.add_ref = add_ref;
cm_id_priv->id.rem_ref = rem_ref;
spin_lock_init(&cm_id_priv->lock);
- atomic_set(&cm_id_priv->refcount, 1);
+ refcount_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
init_completion(&cm_id_priv->destroy_comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
@@ -366,12 +368,12 @@ EXPORT_SYMBOL(iw_cm_disconnect);
/*
* CM_ID <-- DESTROYING
*
- * Clean up all resources associated with the connection and release
- * the initial reference taken by iw_create_cm_id.
+ * Clean up all resources associated with the connection.
*/
static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
+ struct ib_qp *qp;
unsigned long flags;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -389,6 +391,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
+
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
@@ -401,7 +406,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* Abrupt close of the connection */
- (void)iwcm_modify_qp_err(cm_id_priv->qp);
+ (void)iwcm_modify_qp_err(qp);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_IDLE:
@@ -426,29 +431,30 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
BUG();
break;
}
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
if (cm_id->mapped) {
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
}
-
- (void)iwcm_deref_id(cm_id_priv);
}
/*
- * This function is only called by the application thread and cannot
- * be called by the event thread. The function will wait for all
- * references to be released on the cm_id and then kfree the cm_id
- * object.
+ * Destroy cm_id. If the cm_id still has other references, wait for all
+ * references to be released on the cm_id and then release the initial
+ * reference taken by iw_create_cm_id.
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
+ struct iwcm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
destroy_cm_id(cm_id);
+ if (refcount_read(&cm_id_priv->refcount) > 1)
+ flush_workqueue(iwcm_wq);
+ iwcm_deref_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -671,11 +677,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->ops.iw_rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
}
@@ -696,7 +702,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
struct iwcm_id_private *cm_id_priv;
int ret;
unsigned long flags;
- struct ib_qp *qp;
+ struct ib_qp *qp = NULL;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -730,13 +736,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
return 0; /* success */
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->ops.iw_rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
err:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return ret;
@@ -878,6 +884,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp = NULL;
unsigned long flags;
int ret;
@@ -896,11 +903,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
} else {
/* REJECTED or RESET */
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+ qp = cm_id_priv->qp;
cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
if (iw_event->private_data_len)
@@ -942,21 +951,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp;
unsigned long flags;
- int ret = 0;
+ int ret = 0, notify_event = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
switch (cm_id_priv->state) {
case IW_CM_STATE_ESTABLISHED:
case IW_CM_STATE_CLOSING:
cm_id_priv->state = IW_CM_STATE_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
+ notify_event = 1;
break;
case IW_CM_STATE_DESTROYING:
break;
@@ -965,6 +971,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
+ if (notify_event)
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
return ret;
}
@@ -1011,30 +1021,27 @@ static void cm_work_handler(struct work_struct *_work)
struct iw_cm_event levent;
struct iwcm_id_private *cm_id_priv = work->cm_id;
unsigned long flags;
- int empty;
int ret = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- empty = list_empty(&cm_id_priv->work_list);
- while (!empty) {
- work = list_entry(cm_id_priv->work_list.next,
- struct iwcm_work, list);
+ while (!list_empty(&cm_id_priv->work_list)) {
+ work = list_first_entry(&cm_id_priv->work_list,
+ struct iwcm_work, list);
list_del_init(&work->list);
- empty = list_empty(&cm_id_priv->work_list);
levent = work->event;
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
- if (ret)
+ if (ret) {
destroy_cm_id(&cm_id_priv->id);
+ WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
+ }
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
return;
- if (empty)
- return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1086,12 +1093,9 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}
}
- atomic_inc(&cm_id_priv->refcount);
- if (list_empty(&cm_id_priv->work_list)) {
- list_add_tail(&work->list, &cm_id_priv->work_list);
- queue_work(iwcm_wq, &work->work);
- } else
- list_add_tail(&work->list, &cm_id_priv->work_list);
+ refcount_inc(&cm_id_priv->refcount);
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ queue_work(iwcm_wq, &work->work);
out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
@@ -1179,29 +1183,34 @@ static int __init iw_cm_init(void)
ret = iwpm_init(RDMA_NL_IWCM);
if (ret)
- pr_err("iw_cm: couldn't init iwpm\n");
- else
- rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
- iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0);
+ return ret;
+
+ iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
if (!iwcm_wq)
- return -ENOMEM;
+ goto err_alloc;
iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
iwcm_ctl_table);
if (!iwcm_ctl_table_hdr) {
pr_err("iw_cm: couldn't register sysctl paths\n");
- destroy_workqueue(iwcm_wq);
- return -ENOMEM;
+ goto err_sysctl;
}
+ rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
return 0;
+
+err_sysctl:
+ destroy_workqueue(iwcm_wq);
+err_alloc:
+ iwpm_exit(RDMA_NL_IWCM);
+ return -ENOMEM;
}
static void __exit iw_cm_cleanup(void)
{
+ rdma_nl_unregister(RDMA_NL_IWCM);
unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
- rdma_nl_unregister(RDMA_NL_IWCM);
iwpm_exit(RDMA_NL_IWCM);
}
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 82c2cd1b0a80..bf74639be128 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -52,7 +52,7 @@ struct iwcm_id_private {
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head work_free_list;
};
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 2452b0ddcf0d..3c9a9869212b 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -69,10 +69,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto pid_query_error;
- }
if (iwpm_check_registration(nl_client, IWPM_REG_VALID) ||
iwpm_user_pid == IWPM_PID_UNAVAILABLE)
return 0;
@@ -112,7 +108,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
- ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@@ -123,9 +119,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
pid_query_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
- if (skb)
- dev_kfree_skb(skb);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
@@ -154,10 +149,6 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto add_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -202,7 +193,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
- ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -212,10 +203,9 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
add_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
add_mapping_error_nowarn:
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
@@ -242,10 +232,6 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto query_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -297,7 +283,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
- ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
err_str = "Unable to send a nlmsg";
@@ -306,10 +292,9 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
query_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
query_mapping_error_nowarn:
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
@@ -334,10 +319,6 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto remove_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) {
@@ -364,7 +345,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
nlmsg_end(skb, nlh);
- ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -375,7 +356,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
"remove_mapping: Local sockaddr:");
return 0;
remove_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb_any(skb);
return ret;
@@ -395,7 +376,7 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
/**
* iwpm_register_pid_cb - Process the port mapper response to
* iwpm_register_pid query
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* If successful, the function receives the userspace port mapper pid
@@ -434,7 +415,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+ pr_info("%s: Incorrect info (dev = %s name = %s version = %u)\n",
__func__, dev_name, iwpm_name, iwpm_version);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto register_pid_response_exit;
@@ -442,13 +423,12 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_user_pid = cb->nlh->nlmsg_pid;
iwpm_ulib_version = iwpm_version;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
- pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
__func__, iwpm_user_pid);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_user_pid);
- if (iwpm_valid_client(nl_client))
- iwpm_set_registration(nl_client, IWPM_REG_VALID);
+ iwpm_set_registration(nl_client, IWPM_REG_VALID);
register_pid_response_exit:
nlmsg_request->request_done = 1;
/* always for found nlmsg_request */
@@ -471,7 +451,7 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
/**
* iwpm_add_mapping_cb - Process the port mapper response to
* iwpm_add_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -531,7 +511,8 @@ add_mapping_response_exit:
}
/* netlink attribute policy for the response to add and query mapping request
- * and response with remote address info */
+ * and response with remote address info
+ */
static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
[IWPM_NLA_RQUERY_MAPPING_SEQ] = { .type = NLA_U32 },
[IWPM_NLA_RQUERY_LOCAL_ADDR] = {
@@ -548,7 +529,7 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] =
/**
* iwpm_add_and_query_mapping_cb - Process the port mapper response to
* iwpm_add_and_query_mapping request
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
@@ -630,7 +611,7 @@ query_mapping_response_exit:
/**
* iwpm_remote_info_cb - Process remote connecting peer address info, which
* the port mapper has received from the connecting peer
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Stores the IPv4/IPv6 address info in a hash table
@@ -651,11 +632,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
local_sockaddr = (struct sockaddr_storage *)
@@ -709,7 +685,7 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
/**
* iwpm_mapping_info_cb - Process a notification that the userspace
* port mapper daemon is started
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send all the local mapping
@@ -733,22 +709,17 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
if (strcmp(iwpm_ulib_name, iwpm_name) ||
iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Invalid port mapper name = %s version = %d\n",
+ pr_info("%s: Invalid port mapper name = %s version = %u\n",
__func__, iwpm_name, iwpm_version);
return ret;
}
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_user_pid = cb->nlh->nlmsg_pid;
if (iwpm_ulib_version < IWPM_UABI_VERSION)
- pr_warn_once("%s: Down level iwpmd/pid %u. Continuing...",
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
__func__, iwpm_user_pid);
if (!iwpm_mapinfo_available())
@@ -769,7 +740,7 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
/**
* iwpm_ack_mapping_info_cb - Process the port mapper ack for
* the provided local mapping info records
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -799,7 +770,7 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
/**
* iwpm_mapping_error_cb - Process port mapper notification for error
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
@@ -844,7 +815,7 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
/**
* iwpm_hello_cb - Process a hello message from iwpmd
*
- * @skb:
+ * @skb: The socket buffer
* @cb: Contains the received message (payload and netlink header)
*
* Using the received port mapper pid, send the kernel's abi_version
@@ -865,11 +836,6 @@ int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 41929bb83739..eecb369898f5 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -48,7 +48,6 @@ static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
static struct hlist_head *iwpm_reminfo_bucket;
static DEFINE_SPINLOCK(iwpm_reminfo_lock);
-static DEFINE_MUTEX(iwpm_admin_lock);
static struct iwpm_admin_data iwpm_admin;
/**
@@ -59,35 +58,21 @@ static struct iwpm_admin_data iwpm_admin;
*/
int iwpm_init(u8 nl_client)
{
- int ret = 0;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!iwpm_hash_bucket) {
- ret = -ENOMEM;
- goto init_exit;
- }
- iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!iwpm_reminfo_bucket) {
- kfree(iwpm_hash_bucket);
- ret = -ENOMEM;
- goto init_exit;
- }
- }
- atomic_inc(&iwpm_admin.refcount);
-init_exit:
- mutex_unlock(&iwpm_admin_lock);
- if (!ret) {
- iwpm_set_valid(nl_client, 1);
- iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
- pr_debug("%s: Mapinfo and reminfo tables are created\n",
- __func__);
+ iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_hash_bucket)
+ return -ENOMEM;
+
+ iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_reminfo_bucket) {
+ kfree(iwpm_hash_bucket);
+ return -ENOMEM;
}
- return ret;
+
+ iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
+ pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__);
+ return 0;
}
static void free_hash_bucket(void);
@@ -101,22 +86,9 @@ static void free_reminfo_bucket(void);
*/
int iwpm_exit(u8 nl_client)
{
-
- if (!iwpm_valid_client(nl_client))
- return -EINVAL;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- mutex_unlock(&iwpm_admin_lock);
- pr_err("%s Incorrect usage - negative refcount\n", __func__);
- return -EINVAL;
- }
- if (atomic_dec_and_test(&iwpm_admin.refcount)) {
- free_hash_bucket();
- free_reminfo_bucket();
- pr_debug("%s: Resources are destroyed\n", __func__);
- }
- mutex_unlock(&iwpm_admin_lock);
- iwpm_set_valid(nl_client, 0);
+ free_hash_bucket();
+ free_reminfo_bucket();
+ pr_debug("%s: Resources are destroyed\n", __func__);
iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
return 0;
}
@@ -127,8 +99,8 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
/**
* iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
* info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
+ * @mapped_sockaddr: Mapped local ip/tcp address
* @nl_client: The index of the netlink client
* @map_flags: IWPM mapping flags
*/
@@ -141,8 +113,6 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client))
- return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
if (!map_info)
return -ENOMEM;
@@ -174,7 +144,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
/**
* iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
* info from the hash table
- * @local_addr: Local ip/tcp address
+ * @local_sockaddr: Local ip/tcp address
* @mapped_local_addr: Mapped local ip/tcp address
*
* Returns err code if mapping info is not found in the hash table,
@@ -302,10 +272,6 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid client = %d\n", __func__, nl_client);
- return ret;
- }
spin_lock_irqsave(&iwpm_reminfo_lock, flags);
if (iwpm_reminfo_bucket) {
hash_bucket_head = get_reminfo_hash_bucket(
@@ -341,7 +307,7 @@ get_remote_info_exit:
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
u8 nl_client, gfp_t gfp)
{
- struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request;
unsigned long flags;
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
@@ -420,16 +386,6 @@ int iwpm_get_nlmsg_seq(void)
return atomic_inc_return(&iwpm_admin.nlmsg_seq);
}
-int iwpm_valid_client(u8 nl_client)
-{
- return iwpm_admin.client_list[nl_client];
-}
-
-void iwpm_set_valid(u8 nl_client, int valid)
-{
- iwpm_admin.client_list[nl_client] = valid;
-}
-
/* valid client */
u32 iwpm_get_registration(u8 nl_client)
{
@@ -645,18 +601,17 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
nlmsg_end(skb, nlh);
- ret = rdma_nl_unicast(skb, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
goto mapinfo_num_error;
}
- pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+ pr_debug("%s: Sent mapping number = %u\n", __func__, mapping_num);
return 0;
mapinfo_num_error:
pr_info("%s: %s\n", __func__, err_str);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
@@ -674,7 +629,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
- ret = rdma_nl_unicast(skb, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret)
pr_warn("%s Unable to send a nlmsg\n", __func__);
return ret;
@@ -778,8 +733,7 @@ send_mapping_info_unlock:
send_mapping_info_exit:
if (ret) {
pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
send_nlmsg_done(skb, nl_client, iwpm_pid);
@@ -808,7 +762,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
- const char *err_str = "";
+ const char *err_str;
int ret = -EINVAL;
skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
@@ -824,7 +778,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
goto hello_num_error;
nlmsg_end(skb, nlh);
- ret = rdma_nl_unicast(skb, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
@@ -834,7 +788,6 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
return 0;
hello_num_error:
pr_info("%s: %s\n", __func__, err_str);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index 7e2bcc72f66c..d6fc8402158a 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -33,7 +33,6 @@
#ifndef _IWPM_UTIL_H
#define _IWPM_UTIL_H
-#include <linux/module.h>
#include <linux/io.h>
#include <linux/in.h>
#include <linux/in6.h>
@@ -90,9 +89,7 @@ struct iwpm_remote_info {
};
struct iwpm_admin_data {
- atomic_t refcount;
atomic_t nlmsg_seq;
- int client_list[RDMA_NL_NUM_CLIENTS];
u32 reg_list[RDMA_NL_NUM_CLIENTS];
};
@@ -141,29 +138,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
int iwpm_get_nlmsg_seq(void);
/**
- * iwpm_add_reminfo - Add remote address info of the connecting peer
+ * iwpm_add_remote_info - Add remote address info of the connecting peer
* to the remote info hash table
* @reminfo: The remote info to be added
*/
void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
/**
- * iwpm_valid_client - Check if the port mapper client is valid
- * @nl_client: The index of the netlink client
- *
- * Valid clients need to call iwpm_init() before using
- * the port mapper
- */
-int iwpm_valid_client(u8 nl_client);
-
-/**
- * iwpm_set_valid - Set the port mapper client to valid or not
- * @nl_client: The index of the netlink client
- * @valid: 1 if valid or 0 if invalid
- */
-void iwpm_set_valid(u8 nl_client, int valid);
-
-/**
* iwpm_check_registration - Check if the client registration
* matches the given one
* @nl_client: The index of the netlink client
@@ -183,7 +164,7 @@ u32 iwpm_check_registration(u8 nl_client, u32 reg);
void iwpm_set_registration(u8 nl_client, u32 reg);
/**
- * iwpm_get_registration
+ * iwpm_get_registration - Get the client registration
* @nl_client: The index of the netlink client
*
* Returns the client registration type
@@ -210,8 +191,10 @@ int iwpm_mapinfo_available(void);
/**
* iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ * @a_sockaddr: first sockaddr to compare
+ * @b_sockaddr: second sockaddr to compare
*
- * Returns 0 if they are holding the same ip/tcp address info,
+ * Return: 0 if they are holding the same ip/tcp address info,
* otherwise returns 1
*/
int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
@@ -272,6 +255,7 @@ void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
* iwpm_send_hello - Send hello response to iwpmd
*
* @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
* @abi_version: The kernel's abi_version
*
* Returns 0 on success or a negative error code
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..8fd80adfe833
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, flags);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source =
+ htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(master, ah_attr, flags);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_read_lock();
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ dev_hold(slave);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave)
+{
+ dev_put(xmit_slave);
+}
+
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave = NULL;
+ struct net_device *master;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ ah_attr->grh.flow_label))
+ return NULL;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return master;
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master))
+ goto put;
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
+put:
+ dev_put(master);
+ return slave;
+}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index cc99479b2c09..8f26bfb69586 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_qp_info *qp_info,
struct trace_event_raw_ib_mad_send_template *entry)
{
- u16 pkey;
- struct ib_device *dev = qp_info->port_priv->device;
- u8 pnum = qp_info->port_priv->port_num;
struct ib_ud_wr *wr = &mad_send_wr->send_wr;
struct rdma_ah_attr attr = {};
@@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
/* These are common */
entry->sl = attr.sl;
- ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
- entry->pkey = pkey;
entry->rqpn = wr->remote_qpn;
entry->rqkey = wr->remote_qkey;
entry->dlid = rdma_ah_get_dlid(&attr);
@@ -85,7 +80,6 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
-/* Client ID 0 is used for snoop-only clients */
static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
@@ -119,7 +113,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
* Assumes ib_mad_port_list_lock is being held
*/
static inline struct ib_mad_port_private *
-__ib_get_mad_port(struct ib_device *device, int port_num)
+__ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
@@ -135,7 +129,7 @@ __ib_get_mad_port(struct ib_device *device, int port_num)
* for a device/port
*/
static inline struct ib_mad_port_private *
-ib_get_mad_port(struct ib_device *device, int port_num)
+ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
unsigned long flags;
@@ -156,8 +150,7 @@ static inline u8 convert_mgmt_class(u8 mgmt_class)
static int get_spl_qp_index(enum ib_qp_type qp_type)
{
- switch (qp_type)
- {
+ switch (qp_type) {
case IB_QPT_SMI:
return 0;
case IB_QPT_GSI:
@@ -217,13 +210,36 @@ int ib_response_mad(const struct ib_mad_hdr *hdr)
}
EXPORT_SYMBOL(ib_response_mad);
+#define SOL_FC_MAX_DEFAULT_FRAC 4
+#define SOL_FC_MAX_SA_FRAC 32
+
+static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req)
+{
+ if (!mad_reg_req)
+ /* Send only agent */
+ return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
+
+ switch (mad_reg_req->mgmt_class) {
+ case IB_MGMT_CLASS_CM:
+ return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
+ case IB_MGMT_CLASS_SUBN_ADM:
+ return mad_recvq_size / SOL_FC_MAX_SA_FRAC;
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) /
+ SOL_FC_MAX_DEFAULT_FRAC;
+ default:
+ return 0;
+ }
+}
+
/*
* ib_register_mad_agent - Register to send/receive MADs
*
* Context: Process context.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum ib_qp_type qp_type,
struct ib_mad_reg_req *mad_reg_req,
u8 rmpp_version,
@@ -353,7 +369,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
+ dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
__func__, port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
@@ -398,13 +414,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
- INIT_LIST_HEAD(&mad_agent_priv->done_list);
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
+ INIT_LIST_HEAD(&mad_agent_priv->backlog_list);
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
- atomic_set(&mad_agent_priv->refcount, 1);
+ refcount_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
+ mad_agent_priv->sol_fc_send_count = 0;
+ mad_agent_priv->sol_fc_wait_count = 0;
+ mad_agent_priv->sol_fc_max =
+ recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0;
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
if (ret2) {
@@ -483,141 +503,12 @@ error1:
}
EXPORT_SYMBOL(ib_register_mad_agent);
-static inline int is_snooping_sends(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (/*IB_MAD_SNOOP_POSTED_SENDS |
- IB_MAD_SNOOP_RMPP_SENDS |*/
- IB_MAD_SNOOP_SEND_COMPLETIONS /*|
- IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
-}
-
-static inline int is_snooping_recvs(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (IB_MAD_SNOOP_RECVS /*|
- IB_MAD_SNOOP_RMPP_RECVS*/));
-}
-
-static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
- struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_snoop_private **new_snoop_table;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- /* Check for empty slot in array. */
- for (i = 0; i < qp_info->snoop_table_size; i++)
- if (!qp_info->snoop_table[i])
- break;
-
- if (i == qp_info->snoop_table_size) {
- /* Grow table. */
- new_snoop_table = krealloc(qp_info->snoop_table,
- sizeof mad_snoop_priv *
- (qp_info->snoop_table_size + 1),
- GFP_ATOMIC);
- if (!new_snoop_table) {
- i = -ENOMEM;
- goto out;
- }
-
- qp_info->snoop_table = new_snoop_table;
- qp_info->snoop_table_size++;
- }
- qp_info->snoop_table[i] = mad_snoop_priv;
- atomic_inc(&qp_info->snoop_count);
-out:
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- return i;
-}
-
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_mad_agent *ret;
- struct ib_mad_snoop_private *mad_snoop_priv;
- int qpn;
- int err;
-
- /* Validate parameters */
- if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
- (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- qpn = get_spl_qp_index(qp_type);
- if (qpn == -1) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- port_priv = ib_get_mad_port(device, port_num);
- if (!port_priv) {
- ret = ERR_PTR(-ENODEV);
- goto error1;
- }
- /* Allocate structures */
- mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
- if (!mad_snoop_priv) {
- ret = ERR_PTR(-ENOMEM);
- goto error1;
- }
-
- /* Now, fill in the various structures */
- mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
- mad_snoop_priv->agent.device = device;
- mad_snoop_priv->agent.recv_handler = recv_handler;
- mad_snoop_priv->agent.snoop_handler = snoop_handler;
- mad_snoop_priv->agent.context = context;
- mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
- mad_snoop_priv->agent.port_num = port_num;
- mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
- init_completion(&mad_snoop_priv->comp);
-
- err = ib_mad_agent_security_setup(&mad_snoop_priv->agent, qp_type);
- if (err) {
- ret = ERR_PTR(err);
- goto error2;
- }
-
- mad_snoop_priv->snoop_index = register_snoop_agent(
- &port_priv->qp_info[qpn],
- mad_snoop_priv);
- if (mad_snoop_priv->snoop_index < 0) {
- ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error3;
- }
-
- atomic_set(&mad_snoop_priv->refcount, 1);
- return &mad_snoop_priv->agent;
-error3:
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-error2:
- kfree(mad_snoop_priv);
-error1:
- return ret;
-}
-EXPORT_SYMBOL(ib_register_mad_snoop);
-
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
- if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ if (refcount_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
-static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- if (atomic_dec_and_test(&mad_snoop_priv->refcount))
- complete(&mad_snoop_priv->comp);
-}
-
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
@@ -639,10 +530,10 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
flush_workqueue(port_priv->wq);
- ib_cancel_rmpp_recvs(mad_agent_priv);
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_cancel_rmpp_recvs(mad_agent_priv);
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
@@ -650,25 +541,6 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
kfree_rcu(mad_agent_priv, rcu);
}
-static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_qp_info *qp_info;
- unsigned long flags;
-
- qp_info = mad_snoop_priv->qp_info;
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
- atomic_dec(&qp_info->snoop_count);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-
- deref_snoop_agent(mad_snoop_priv);
- wait_for_completion(&mad_snoop_priv->comp);
-
- ib_mad_agent_security_cleanup(&mad_snoop_priv->agent);
-
- kfree(mad_snoop_priv);
-}
-
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
*
@@ -677,20 +549,11 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_snoop_private *mad_snoop_priv;
-
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
- struct ib_mad_snoop_private,
- agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -706,59 +569,8 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
spin_unlock_irqrestore(&mad_queue->lock, flags);
}
-static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_buf, mad_send_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
-static void snoop_recv(struct ib_mad_qp_info *qp_info,
- struct ib_mad_recv_wc *mad_recv_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
- mad_recv_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
- u16 pkey_index, u8 port_num, struct ib_wc *wc)
+ u16 pkey_index, u32 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
wc->wr_cqe = cqe;
@@ -817,7 +629,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_port_private *port_priv;
struct ib_mad_agent_private *recv_mad_agent = NULL;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num;
+ u32 port_num;
struct ib_wc mad_wc;
struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
@@ -913,11 +725,10 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
/* No GRH for DR SMP */
ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
- (const struct ib_mad_hdr *)smp, mad_size,
- (struct ib_mad_hdr *)mad_priv->mad,
- &mad_size, &out_mad_pkey_index);
- switch (ret)
- {
+ (const struct ib_mad *)smp,
+ (struct ib_mad *)mad_priv->mad, &mad_size,
+ &out_mad_pkey_index);
+ switch (ret) {
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
mad_agent_priv->agent.recv_handler) {
@@ -927,7 +738,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* Reference MAD agent until receive
* side of local completion handled
*/
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
} else
kfree(mad_priv);
break;
@@ -967,7 +778,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
@@ -1016,7 +827,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
/* Allocate data segments. */
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
- seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+ seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
if (!seg) {
free_send_rmpp_list(send_wr);
return -ENOMEM;
@@ -1046,12 +857,11 @@ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
}
EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
-struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
- u32 remote_qpn, u16 pkey_index,
- int rmpp_active,
- int hdr_len, int data_len,
- gfp_t gfp_mask,
- u8 base_version)
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+ u32 remote_qpn, u16 pkey_index,
+ int rmpp_active, int hdr_len,
+ int data_len, gfp_t gfp_mask,
+ u8 base_version)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
@@ -1125,7 +935,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
mad_send_wr->send_buf.mad_agent = mad_agent;
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -1272,6 +1082,180 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
return ret;
}
+static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) {
+ mad_agent_priv->sol_fc_wait_count--;
+ list_move_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->backlog_list);
+ } else {
+ expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->backlog_list);
+ }
+}
+
+static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (mad_send_wr->state == IB_MAD_STATE_INIT) {
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ } else {
+ expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP,
+ IB_MAD_STATE_QUEUED);
+ list_move_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ }
+
+ if (mad_send_wr->is_solicited_fc) {
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
+ mad_agent_priv->sol_fc_wait_count--;
+ mad_agent_priv->sol_fc_send_count++;
+ }
+}
+
+static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *temp_mad_send_wr;
+ struct list_head *list_item;
+ unsigned long delay;
+
+ expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START,
+ IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED);
+ if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
+ mad_send_wr->is_solicited_fc) {
+ mad_agent_priv->sol_fc_send_count--;
+ mad_agent_priv->sol_fc_wait_count++;
+ }
+
+ list_del_init(&mad_send_wr->agent_list);
+ delay = mad_send_wr->timeout;
+ mad_send_wr->timeout += jiffies;
+
+ if (delay) {
+ list_for_each_prev(list_item,
+ &mad_agent_priv->wait_list) {
+ temp_mad_send_wr = list_entry(
+ list_item,
+ struct ib_mad_send_wr_private,
+ agent_list);
+ if (time_after(mad_send_wr->timeout,
+ temp_mad_send_wr->timeout))
+ break;
+ }
+ } else {
+ list_item = &mad_agent_priv->wait_list;
+ }
+
+ list_add(&mad_send_wr->agent_list, list_item);
+}
+
+static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
+ mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc;
+}
+
+static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ if (mad_send_wr->is_solicited_fc) {
+ if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
+ mad_agent_priv->sol_fc_send_count--;
+ else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
+ mad_agent_priv->sol_fc_wait_count--;
+ }
+}
+
+static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (mad_send_wr->is_solicited_fc) {
+ if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
+ mad_agent_priv->sol_fc_send_count--;
+ else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
+ mad_agent_priv->sol_fc_wait_count--;
+ }
+
+ list_del_init(&mad_send_wr->agent_list);
+}
+
+void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state new_state)
+{
+ struct ib_mad_agent_private *mad_agent_priv =
+ mad_send_wr->mad_agent_priv;
+
+ switch (new_state) {
+ case IB_MAD_STATE_INIT:
+ break;
+ case IB_MAD_STATE_QUEUED:
+ handle_queued_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_SEND_START:
+ handle_send_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_WAIT_RESP:
+ handle_wait_state(mad_send_wr, mad_agent_priv);
+ if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
+ return;
+ break;
+ case IB_MAD_STATE_EARLY_RESP:
+ handle_early_resp_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_CANCELED:
+ handle_canceled_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_DONE:
+ handle_done_state(mad_send_wr, mad_agent_priv);
+ break;
+ }
+
+ mad_send_wr->state = new_state;
+}
+
+static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ u8 mgmt_class;
+
+ if (!mad_send_wr->timeout)
+ return 0;
+
+ rmpp_mad = mad_send_wr->send_buf.mad;
+ if (mad_send_wr->mad_agent_priv->agent.rmpp_version &&
+ (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE))
+ return 0;
+
+ mgmt_class =
+ ((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class;
+ return mgmt_class == IB_MGMT_CLASS_CM ||
+ mgmt_class == IB_MGMT_CLASS_SUBN_ADM ||
+ mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE;
+}
+
+static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_agent_private *mad_agent_priv =
+ mad_send_wr->mad_agent_priv;
+
+ if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max)
+ return false;
+
+ if (!list_empty(&mad_agent_priv->backlog_list))
+ return true;
+
+ return mad_agent_priv->sol_fc_send_count +
+ mad_agent_priv->sol_fc_wait_count >=
+ mad_agent_priv->sol_fc_max;
+}
+
/*
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client
@@ -1297,9 +1281,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
if (ret)
goto error;
- if (!send_buf->mad_agent->send_handler ||
- (send_buf->timeout_ms &&
- !send_buf->mad_agent->recv_handler)) {
+ if (!send_buf->mad_agent->send_handler) {
ret = -EINVAL;
goto error;
}
@@ -1335,15 +1317,19 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->max_retries = send_buf->retries;
mad_send_wr->retries_left = send_buf->retries;
send_buf->retries = 0;
- /* Reference for work request to QP + response */
- mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
- mad_send_wr->status = IB_WC_SUCCESS;
+ change_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
/* Reference MAD agent until send completes */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- list_add_tail(&mad_send_wr->agent_list,
- &mad_agent_priv->send_list);
+ mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr);
+ if (mad_is_for_backlog(mad_send_wr)) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ return 0;
+ }
+
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
@@ -1355,9 +1341,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
if (ret < 0) {
/* Fail send request */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- list_del(&mad_send_wr->agent_list);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
goto error;
}
}
@@ -1397,25 +1383,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
}
EXPORT_SYMBOL(ib_free_recv_mad);
-struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
- u8 rmpp_version,
- ib_mad_send_handler send_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- return ERR_PTR(-EINVAL); /* XXX: for now */
-}
-EXPORT_SYMBOL(ib_redirect_mad_qp);
-
-int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
- struct ib_wc *wc)
-{
- dev_err(&mad_agent->device->dev,
- "ib_process_mad_wc() not implemented yet\n");
- return 0;
-}
-EXPORT_SYMBOL(ib_process_mad_wc);
-
static int method_in_use(struct ib_mad_mgmt_method_table **method,
struct ib_mad_reg_req *mad_reg_req)
{
@@ -1503,11 +1470,9 @@ static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
int i;
/* Remove any methods for this mad agent */
- for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
- if (method->agent[i] == agent) {
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
+ if (method->agent[i] == agent)
method->agent[i] = NULL;
- }
- }
}
static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
@@ -1682,9 +1647,8 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
* Was MAD registration request supplied
* with original registration ?
*/
- if (!agent_priv->reg_req) {
+ if (!agent_priv->reg_req)
goto out;
- }
port_priv = agent_priv->qp_info->port_priv;
mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
@@ -1782,7 +1746,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
rcu_read_lock();
mad_agent = xa_load(&ib_mad_clients, hi_tid);
- if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+ if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
mad_agent = NULL;
rcu_read_unlock();
} else {
@@ -1834,14 +1798,14 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
}
}
if (mad_agent)
- atomic_inc(&mad_agent->refcount);
+ refcount_inc(&mad_agent->refcount);
out:
spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
if (mad_agent && !mad_agent->agent.recv_handler) {
dev_notice(&port_priv->device->dev,
- "No receive handler for client %p on port %d\n",
+ "No receive handler for client %p on port %u\n",
&mad_agent->agent, port_priv->port_num);
deref_mad_agent(mad_agent);
mad_agent = NULL;
@@ -1860,7 +1824,7 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
/* Make sure MAD base version is understood */
if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
(!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
- pr_err("MAD received with unsupported base version %d %s\n",
+ pr_err("MAD received with unsupported base version %u %s\n",
mad_hdr->base_version, opa ? "(opa)" : "");
goto out;
}
@@ -1905,15 +1869,16 @@ static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
rwc->recv_buf.mad->mad_hdr.mgmt_class;
}
-static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
- const struct ib_mad_send_wr_private *wr,
- const struct ib_mad_recv_wc *rwc )
+static inline int
+rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
+ const struct ib_mad_send_wr_private *wr,
+ const struct ib_mad_recv_wc *rwc)
{
struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num = mad_agent_priv->agent.port_num;
+ u32 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
bool has_grh;
@@ -1984,7 +1949,19 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
*/
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
- return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
+ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
+ }
+
+ list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) {
+ if ((wr->tid == mad_hdr->tid) &&
+ rcv_has_same_class(wr, wc) &&
+ /*
+ * Don't check GID for direct routed MADs.
+ * These might have permissive LIDs.
+ */
+ (is_direct(mad_hdr->mgmt_class) ||
+ rcv_has_same_gid(mad_agent_priv, wr, wc)))
+ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
/*
@@ -2003,17 +1980,55 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
/* Verify request has not been canceled */
- return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
+ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
return NULL;
}
+static void
+process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc = {};
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->backlog_list) &&
+ (mad_agent_priv->sol_fc_send_count +
+ mad_agent_priv->sol_fc_wait_count <
+ mad_agent_priv->sol_fc_max)) {
+ mad_send_wr = list_entry(mad_agent_priv->backlog_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ ret = ib_send_mad(mad_send_wr);
+ if (ret) {
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ deref_mad_agent(mad_agent_priv);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_send_wc.status = IB_WC_LOC_QP_OP_ERR;
+ mad_agent_priv->agent.send_handler(
+ &mad_agent_priv->agent, &mad_send_wc);
+ }
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ }
+
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
{
mad_send_wr->timeout = 0;
- if (mad_send_wr->refcount == 1)
- list_move_tail(&mad_send_wr->agent_list,
- &mad_send_wr->mad_agent_priv->done_list);
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP ||
+ mad_send_wr->state == IB_MAD_STATE_QUEUED)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ else
+ change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP);
}
static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
@@ -2022,6 +2037,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
+ bool is_mad_done;
int ret;
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
@@ -2059,16 +2075,18 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent, NULL,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
} else {
/* not user rmpp, revert to normal behavior and
- * drop the mad */
+ * drop the mad
+ */
ib_free_recv_mad(mad_recv_wc);
deref_mad_agent(mad_agent_priv);
return;
}
} else {
ib_mark_mad_done(mad_send_wr);
+ is_mad_done = (mad_send_wr->state == IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
@@ -2076,26 +2094,27 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
&mad_agent_priv->agent,
&mad_send_wr->send_buf,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
- mad_send_wc.status = IB_WC_SUCCESS;
- mad_send_wc.vendor_err = 0;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ if (is_mad_done) {
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr,
+ &mad_send_wc);
+ }
}
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
-
- return;
}
static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
const struct ib_mad_qp_info *qp_info,
const struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
@@ -2182,7 +2201,7 @@ static enum smi_action
handle_opa_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
@@ -2238,7 +2257,7 @@ static enum smi_action
handle_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response,
bool opa)
@@ -2262,7 +2281,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
struct ib_mad_agent_private *mad_agent;
- int port_num;
+ u32 port_num;
int ret = IB_MAD_RESULT_SUCCESS;
size_t mad_size;
u16 resp_mad_pkey_index = 0;
@@ -2308,9 +2327,6 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
recv->header.recv_wc.recv_buf.grh = &recv->grh;
- if (atomic_read(&qp_info->snoop_count))
- snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
-
/* Validate MAD */
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
@@ -2340,9 +2356,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (port_priv->device->ops.process_mad) {
ret = port_priv->device->ops.process_mad(
port_priv->device, 0, port_priv->port_num, wc,
- &recv->grh, (const struct ib_mad_hdr *)recv->mad,
- recv->mad_size, (struct ib_mad_hdr *)response->mad,
- &mad_size, &resp_mad_pkey_index);
+ &recv->grh, (const struct ib_mad *)recv->mad,
+ (struct ib_mad *)response->mad, &mad_size,
+ &resp_mad_pkey_index);
if (opa)
wc->pkey_index = resp_mad_pkey_index;
@@ -2414,29 +2430,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_send_wr_private *temp_mad_send_wr;
- struct list_head *list_item;
unsigned long delay;
mad_agent_priv = mad_send_wr->mad_agent_priv;
- list_del(&mad_send_wr->agent_list);
-
delay = mad_send_wr->timeout;
- mad_send_wr->timeout += jiffies;
-
- if (delay) {
- list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
- temp_mad_send_wr = list_entry(list_item,
- struct ib_mad_send_wr_private,
- agent_list);
- if (time_after(mad_send_wr->timeout,
- temp_mad_send_wr->timeout))
- break;
- }
- }
- else
- list_item = &mad_agent_priv->wait_list;
- list_add(&mad_send_wr->agent_list, list_item);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP);
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
@@ -2470,32 +2468,28 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
} else
ret = IB_RMPP_RESULT_UNHANDLED;
- if (mad_send_wc->status != IB_WC_SUCCESS &&
- mad_send_wr->status == IB_WC_SUCCESS) {
- mad_send_wr->status = mad_send_wc->status;
- mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
- }
-
- if (--mad_send_wr->refcount > 0) {
- if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
- mad_send_wr->status == IB_WC_SUCCESS) {
- wait_for_response(mad_send_wr);
- }
+ if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
+ mad_send_wc->status = IB_WC_WR_FLUSH_ERR;
+ else if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
+ mad_send_wr->timeout) {
+ wait_for_response(mad_send_wr);
goto done;
}
/* Remove send from MAD agent and notify client of completion */
- list_del(&mad_send_wr->agent_list);
+ if (mad_send_wr->state != IB_MAD_STATE_DONE)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
adjust_timeout(mad_agent_priv);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_send_wr->status != IB_WC_SUCCESS )
- mad_send_wc->status = mad_send_wr->status;
- if (ret == IB_RMPP_RESULT_INTERNAL)
+ if (ret == IB_RMPP_RESULT_INTERNAL) {
ib_rmpp_send_handler(mad_send_wc);
- else
+ } else {
+ if (mad_send_wr->is_solicited_fc)
+ process_backlog_mads(mad_agent_priv);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
+ }
/* Release reference on agent taken when sending */
deref_mad_agent(mad_agent_priv);
@@ -2557,9 +2551,6 @@ retry:
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = wc->status;
mad_send_wc.vendor_err = wc->vendor_err;
- if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
@@ -2640,40 +2631,53 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
return true;
}
+static void clear_mad_error_list(struct list_head *list,
+ enum ib_wc_status wc_status,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *mad_send_wr, *n;
+ struct ib_mad_send_wc mad_send_wc;
+
+ mad_send_wc.status = wc_status;
+ mad_send_wc.vendor_err = 0;
+
+ list_for_each_entry_safe(mad_send_wr, n, list, agent_list) {
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+ deref_mad_agent(mad_agent_priv);
+ }
+}
+
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
{
unsigned long flags;
struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
- struct ib_mad_send_wc mad_send_wc;
struct list_head cancel_list;
INIT_LIST_HEAD(&cancel_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
- &mad_agent_priv->send_list, agent_list) {
- if (mad_send_wr->status == IB_WC_SUCCESS) {
- mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
- mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
- }
- }
-
- /* Empty wait list to prevent receives from finding a request */
- list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ &mad_agent_priv->send_list, agent_list)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
- /* Report all cancelled requests */
- mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
- mad_send_wc.vendor_err = 0;
+ /* Empty wait & backlog list to prevent receives from finding request */
+ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
+ &mad_agent_priv->wait_list, agent_list) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ list_add_tail(&mad_send_wr->agent_list, &cancel_list);
+ }
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
- &cancel_list, agent_list) {
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- list_del(&mad_send_wr->agent_list);
- mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
- &mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ &mad_agent_priv->backlog_list, agent_list) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ list_add_tail(&mad_send_wr->agent_list, &cancel_list);
}
+
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ /* Report all cancelled requests */
+ clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
}
static struct ib_mad_send_wr_private*
@@ -2695,31 +2699,40 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
&mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list,
+ agent_list) {
+ if (&mad_send_wr->send_buf == send_buf)
+ return mad_send_wr;
+ }
+
return NULL;
}
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
unsigned long flags;
int active;
- mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
- agent);
+ if (!send_buf)
+ return -EINVAL;
+
+ mad_agent_priv = container_of(send_buf->mad_agent,
+ struct ib_mad_agent_private, agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
- if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
+ if (!mad_send_wr || mad_send_wr->state == IB_MAD_STATE_CANCELED) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
}
- active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
- if (!timeout_ms) {
- mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
- mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
- }
+ active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) ||
+ (mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) ||
+ (mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms));
+ if (!timeout_ms)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
mad_send_wr->send_buf.timeout_ms = timeout_ms;
if (active)
@@ -2732,13 +2745,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_modify_mad);
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf)
-{
- ib_modify_mad(mad_agent, send_buf, 0);
-}
-EXPORT_SYMBOL(ib_cancel_mad);
-
static void local_completions(struct work_struct *work)
{
struct ib_mad_agent_private *mad_agent_priv;
@@ -2801,16 +2807,12 @@ static void local_completions(struct work_struct *work)
local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
local->mad_priv->header.recv_wc.recv_buf.mad =
(struct ib_mad *)local->mad_priv->mad;
- if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
- snoop_recv(recv_mad_agent->qp_info,
- &local->mad_priv->header.recv_wc,
- IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
- atomic_dec(&recv_mad_agent->refcount);
+ deref_mad_agent(recv_mad_agent);
spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
}
@@ -2819,15 +2821,11 @@ local_send_completion:
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
- if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info,
- &local->mad_send_wr->send_buf,
- &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
if (free_mad)
kfree(local->mad_priv);
kfree(local);
@@ -2846,6 +2844,11 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.retries++;
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+ if (mad_send_wr->is_solicited_fc &&
+ !list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
+ return 0;
+ }
if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
@@ -2863,24 +2866,25 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
} else
ret = ib_send_mad(mad_send_wr);
- if (!ret) {
- mad_send_wr->refcount++;
- list_add_tail(&mad_send_wr->agent_list,
- &mad_send_wr->mad_agent_priv->send_list);
- }
+ if (!ret)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
+
return ret;
}
static void timeout_sends(struct work_struct *work)
{
- struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
- struct ib_mad_send_wc mad_send_wc;
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct list_head timeout_list;
+ struct list_head cancel_list;
+ struct list_head *list_item;
unsigned long flags, delay;
mad_agent_priv = container_of(work, struct ib_mad_agent_private,
timed_work.work);
- mad_send_wc.vendor_err = 0;
+ INIT_LIST_HEAD(&timeout_list);
+ INIT_LIST_HEAD(&cancel_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
while (!list_empty(&mad_agent_priv->wait_list)) {
@@ -2898,25 +2902,22 @@ static void timeout_sends(struct work_struct *work)
break;
}
- list_del(&mad_send_wr->agent_list);
- if (mad_send_wr->status == IB_WC_SUCCESS &&
- !retry_send(mad_send_wr))
- continue;
-
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
-
- if (mad_send_wr->status == IB_WC_SUCCESS)
- mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
+ if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
+ list_item = &cancel_list;
+ else if (retry_send(mad_send_wr))
+ list_item = &timeout_list;
else
- mad_send_wc.status = mad_send_wr->status;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
- &mad_send_wc);
+ continue;
- atomic_dec(&mad_agent_priv->refcount);
- spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ list_add_tail(&mad_send_wr->agent_list, list_item);
}
+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ process_backlog_mads(mad_agent_priv);
+ clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR,
+ mad_agent_priv);
+ clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
}
/*
@@ -2926,11 +2927,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
struct ib_mad_private *mad)
{
unsigned long flags;
- int post, ret;
struct ib_mad_private *mad_priv;
struct ib_sge sg_list;
struct ib_recv_wr recv_wr;
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
+ int ret = 0;
/* Initialize common scatter list fields */
sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
@@ -2940,7 +2941,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
recv_wr.sg_list = &sg_list;
recv_wr.num_sge = 1;
- do {
+ while (true) {
/* Allocate and map receive buffer */
if (mad) {
mad_priv = mad;
@@ -2948,10 +2949,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
} else {
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
GFP_ATOMIC);
- if (!mad_priv) {
- ret = -ENOMEM;
- break;
- }
+ if (!mad_priv)
+ return -ENOMEM;
}
sg_list.length = mad_priv_dma_size(mad_priv);
sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
@@ -2961,35 +2960,40 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
ret = -ENOMEM;
- break;
+ goto free_mad_priv;
}
mad_priv->header.mapping = sg_list.addr;
mad_priv->header.mad_list.mad_queue = recv_queue;
mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
-
- /* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
- post = (++recv_queue->count < recv_queue->max_active);
- list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
+ if (recv_queue->count >= recv_queue->max_active) {
+ /* Fully populated the receive queue */
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ break;
+ }
+ recv_queue->count++;
+ list_add_tail(&mad_priv->header.mad_list.list,
+ &recv_queue->list);
spin_unlock_irqrestore(&recv_queue->lock, flags);
+
ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
if (ret) {
spin_lock_irqsave(&recv_queue->lock, flags);
list_del(&mad_priv->header.mad_list.list);
recv_queue->count--;
spin_unlock_irqrestore(&recv_queue->lock, flags);
- ib_dma_unmap_single(qp_info->port_priv->device,
- mad_priv->header.mapping,
- mad_priv_dma_size(mad_priv),
- DMA_FROM_DEVICE);
- kfree(mad_priv);
dev_err(&qp_info->port_priv->device->dev,
"ib_post_recv failed: %d\n", ret);
break;
}
- } while (post);
+ }
+ ib_dma_unmap_single(qp_info->port_priv->device,
+ mad_priv->header.mapping,
+ mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE);
+free_mad_priv:
+ kfree(mad_priv);
return ret;
}
@@ -3118,7 +3122,7 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
/* It's worse than that! He's dead, Jim! */
dev_err(&qp_info->port_priv->device->dev,
- "Fatal error (%d) on MAD QP (%d)\n",
+ "Fatal error (%d) on MAD QP (%u)\n",
event->event, qp_info->qp->qp_num);
}
@@ -3138,10 +3142,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv,
init_mad_queue(qp_info, &qp_info->send_queue);
init_mad_queue(qp_info, &qp_info->recv_queue);
INIT_LIST_HEAD(&qp_info->overflow_list);
- spin_lock_init(&qp_info->snoop_lock);
- qp_info->snoop_table = NULL;
- qp_info->snoop_table_size = 0;
- atomic_set(&qp_info->snoop_count, 0);
}
static int create_mad_qp(struct ib_mad_qp_info *qp_info,
@@ -3185,7 +3185,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
return;
ib_destroy_qp(qp_info->qp);
- kfree(qp_info->snoop_table);
}
/*
@@ -3193,12 +3192,11 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
* Create the QP, PD, MR, and CQ if needed
*/
static int ib_mad_port_open(struct ib_device *device,
- int port_num)
+ u32 port_num)
{
int ret, cq_size;
struct ib_mad_port_private *port_priv;
unsigned long flags;
- char name[sizeof "ib_mad123"];
int has_smi;
if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
@@ -3224,18 +3222,18 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
+ port_priv->pd = ib_alloc_pd(device, 0);
+ if (IS_ERR(port_priv->pd)) {
+ dev_err(&device->dev, "Couldn't create ib_mad PD\n");
+ ret = PTR_ERR(port_priv->pd);
+ goto error3;
+ }
+
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
- goto error3;
- }
-
- port_priv->pd = ib_alloc_pd(device, 0);
- if (IS_ERR(port_priv->pd)) {
- dev_err(&device->dev, "Couldn't create ib_mad PD\n");
- ret = PTR_ERR(port_priv->pd);
goto error4;
}
@@ -3244,12 +3242,15 @@ static int ib_mad_port_open(struct ib_device *device,
if (ret)
goto error6;
}
- ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
- if (ret)
- goto error7;
- snprintf(name, sizeof name, "ib_mad%d", port_num);
- port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ if (rdma_cap_ib_cm(device, port_num)) {
+ ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
+ if (ret)
+ goto error7;
+ }
+
+ port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM,
+ port_num);
if (!port_priv->wq) {
ret = -ENOMEM;
goto error8;
@@ -3278,11 +3279,11 @@ error8:
error7:
destroy_mad_qp(&port_priv->qp_info[0]);
error6:
- ib_dealloc_pd(port_priv->pd);
-error4:
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
+error4:
+ ib_dealloc_pd(port_priv->pd);
error3:
kfree(port_priv);
@@ -3294,7 +3295,7 @@ error3:
* If there are no classes using the port, free the port
* resources (CQ, MR, PD, QP) and remove the port's info structure
*/
-static int ib_mad_port_close(struct ib_device *device, int port_num)
+static int ib_mad_port_close(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *port_priv;
unsigned long flags;
@@ -3303,7 +3304,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- dev_err(&device->dev, "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %u not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -3312,8 +3313,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
- ib_dealloc_pd(port_priv->pd);
ib_free_cq(port_priv->cq);
+ ib_dealloc_pd(port_priv->pd);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
@@ -3323,9 +3324,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}
-static void ib_mad_init_device(struct ib_device *device)
+static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
+ unsigned int count = 0;
+ int ret;
start = rdma_start_port(device);
@@ -3333,17 +3336,23 @@ static void ib_mad_init_device(struct ib_device *device)
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_mad_port_open(device, i)) {
+ ret = ib_mad_port_open(device, i);
+ if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
- if (ib_agent_port_open(device, i)) {
+ ret = ib_agent_port_open(device, i);
+ if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
+ count++;
}
- return;
+ if (!count)
+ return -EOPNOTSUPP;
+
+ return 0;
error_agent:
if (ib_mad_port_close(device, i))
@@ -3360,6 +3369,7 @@ error:
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
+ return ret;
}
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
@@ -3372,9 +3382,9 @@ static void ib_mad_remove_device(struct ib_device *device, void *client_data)
if (ib_agent_port_close(device, i))
dev_err(&device->dev,
- "Couldn't close port %d for agents\n", i);
+ "Couldn't close port %u for agents\n", i);
if (ib_mad_port_close(device, i))
- dev_err(&device->dev, "Couldn't close port %d\n", i);
+ dev_err(&device->dev, "Couldn't close port %u\n", i);
}
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 956b3a7dfed7..f444357d33f4 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -79,13 +79,13 @@ struct ib_mad_private {
struct ib_mad_private_header header;
size_t mad_size;
struct ib_grh grh;
- u8 mad[0];
+ u8 mad[];
} __packed;
struct ib_rmpp_segment {
struct list_head list;
u32 num;
- u8 data[0];
+ u8 data[];
};
struct ib_mad_agent_private {
@@ -95,15 +95,18 @@ struct ib_mad_agent_private {
spinlock_t lock;
struct list_head send_list;
+ unsigned int sol_fc_send_count;
struct list_head wait_list;
- struct list_head done_list;
+ unsigned int sol_fc_wait_count;
struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
struct list_head rmpp_list;
+ unsigned int sol_fc_max;
+ struct list_head backlog_list;
- atomic_t refcount;
+ refcount_t refcount;
union {
struct completion comp;
struct rcu_head rcu;
@@ -115,10 +118,35 @@ struct ib_mad_snoop_private {
struct ib_mad_qp_info *qp_info;
int snoop_index;
int mad_snoop_flags;
- atomic_t refcount;
struct completion comp;
};
+enum ib_mad_state {
+ /* MAD is in the making and is not yet in any list */
+ IB_MAD_STATE_INIT,
+ /* MAD is in backlog list */
+ IB_MAD_STATE_QUEUED,
+ /*
+ * MAD was sent to the QP and is waiting for completion
+ * notification in send list.
+ */
+ IB_MAD_STATE_SEND_START,
+ /*
+ * MAD send completed successfully, waiting for a response
+ * in wait list.
+ */
+ IB_MAD_STATE_WAIT_RESP,
+ /*
+ * Response came early, before send completion notification,
+ * in send list.
+ */
+ IB_MAD_STATE_EARLY_RESP,
+ /* MAD was canceled while in wait or send list */
+ IB_MAD_STATE_CANCELED,
+ /* MAD processing completed, MAD in no list */
+ IB_MAD_STATE_DONE
+};
+
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
@@ -133,8 +161,6 @@ struct ib_mad_send_wr_private {
int max_retries;
int retries_left;
int retry;
- int refcount;
- enum ib_wc_status status;
/* RMPP control */
struct list_head rmpp_list;
@@ -144,8 +170,48 @@ struct ib_mad_send_wr_private {
int seg_num;
int newwin;
int pad;
+
+ enum ib_mad_state state;
+
+ /* Solicited MAD flow control */
+ bool is_solicited_fc;
};
+static inline void expect_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state expected_state)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state != expected_state);
+}
+
+static inline void expect_mad_state2(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state expected_state1,
+ enum ib_mad_state expected_state2)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state != expected_state1 &&
+ mad_send_wr->state != expected_state2);
+}
+
+static inline void expect_mad_state3(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state expected_state1,
+ enum ib_mad_state expected_state2,
+ enum ib_mad_state expected_state3)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state != expected_state1 &&
+ mad_send_wr->state != expected_state2 &&
+ mad_send_wr->state != expected_state3);
+}
+
+static inline void
+not_expect_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state wrong_state)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state == wrong_state);
+}
+
struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
@@ -223,4 +289,7 @@ void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
unsigned long timeout_ms);
+void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state new_state);
+
#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 5ec57abc0849..1c5e0eaf1c94 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -40,8 +40,7 @@
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
- RMPP_STATE_COMPLETE,
- RMPP_STATE_CANCELING
+ RMPP_STATE_COMPLETE
};
struct mad_rmpp_recv {
@@ -52,7 +51,7 @@ struct mad_rmpp_recv {
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
@@ -73,7 +72,7 @@ struct mad_rmpp_recv {
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
- if (atomic_dec_and_test(&rmpp_recv->refcount))
+ if (refcount_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
@@ -92,22 +91,18 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
- if (rmpp_recv->state != RMPP_STATE_COMPLETE)
- ib_free_recv_mad(rmpp_recv->rmpp_wc);
- rmpp_recv->state = RMPP_STATE_CANCELING;
- }
- spin_unlock_irqrestore(&agent->lock, flags);
-
- list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
+ spin_unlock_irqrestore(&agent->lock, flags);
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
destroy_rmpp_recv(rmpp_recv);
}
}
@@ -163,7 +158,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
- return (void *) ah;
+ return ERR_CAST(ah);
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
@@ -272,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
- if (rmpp_recv->state == RMPP_STATE_CANCELING) {
- spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
- return;
- }
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
@@ -305,7 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
- atomic_set(&rmpp_recv->refcount, 1);
+ refcount_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
@@ -357,7 +348,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent,
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
@@ -391,8 +382,8 @@ static inline int get_seg_num(struct ib_mad_recv_buf *seg)
return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
}
-static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
- struct ib_mad_recv_buf *seg)
+static inline struct ib_mad_recv_buf *get_next_seg(struct list_head *rmpp_list,
+ struct ib_mad_recv_buf *seg)
{
if (seg->list.next == rmpp_list)
return NULL;
@@ -405,8 +396,8 @@ static inline int window_size(struct ib_mad_agent_private *agent)
return max(agent->qp_info->recv_queue.max_active >> 3, 1);
}
-static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
- int seg_num)
+static struct ib_mad_recv_buf *find_seg_location(struct list_head *rmpp_list,
+ int seg_num)
{
struct ib_mad_recv_buf *seg_buf;
int cur_seg_num;
@@ -458,7 +449,7 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
return hdr_size + rmpp_recv->seg_num * data_size - pad;
}
-static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
+static struct ib_mad_recv_wc *complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_mad_recv_wc *rmpp_wc;
@@ -553,7 +544,7 @@ start_rmpp(struct ib_mad_agent_private *agent,
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
@@ -617,16 +608,20 @@ static void abort_send(struct ib_mad_agent_private *agent,
goto out; /* Unmatched send */
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
- (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+ (!mad_send_wr->timeout) ||
+ (mad_send_wr->state == IB_MAD_STATE_CANCELED))
goto out; /* Send is already done */
ib_mark_mad_done(mad_send_wr);
+ if (mad_send_wr->state == IB_MAD_STATE_DONE) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ wc.status = IB_WC_REM_ABORT_ERR;
+ wc.vendor_err = rmpp_status;
+ wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &wc);
+ return;
+ }
spin_unlock_irqrestore(&agent->lock, flags);
-
- wc.status = IB_WC_REM_ABORT_ERR;
- wc.vendor_err = rmpp_status;
- wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
spin_unlock_irqrestore(&agent->lock, flags);
@@ -693,7 +688,8 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
- (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+ (!mad_send_wr->timeout) ||
+ (mad_send_wr->state == IB_MAD_STATE_CANCELED))
goto out; /* Send is already done */
if (seg_num > mad_send_wr->send_buf.seg_count ||
@@ -718,21 +714,24 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
+ if (mad_send_wr->state == IB_MAD_STATE_DONE) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ wc.status = IB_WC_SUCCESS;
+ wc.vendor_err = 0;
+ wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &wc);
+ return;
+ }
spin_unlock_irqrestore(&agent->lock, flags);
-
- wc.status = IB_WC_SUCCESS;
- wc.vendor_err = 0;
- wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
- if (mad_send_wr->refcount == 1)
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
ib_reset_mad_timeout(mad_send_wr,
mad_send_wr->send_buf.timeout_ms);
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return;
- } else if (mad_send_wr->refcount == 1 &&
+ } else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
/* Send failure will just result in a timeout/retry */
@@ -740,7 +739,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (ret)
goto out;
- mad_send_wr->refcount++;
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
list_move_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->send_list);
}
@@ -899,7 +898,6 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->newwin = init_newwin(mad_send_wr);
/* We need to wait for the final ACK even if there isn't a response */
- mad_send_wr->refcount += (mad_send_wr->timeout == 0);
ret = send_next_seg(mad_send_wr);
if (!ret)
return IB_RMPP_RESULT_CONSUMED;
@@ -921,7 +919,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
if (mad_send_wc->status != IB_WC_SUCCESS ||
- mad_send_wr->status != IB_WC_SUCCESS)
+ mad_send_wr->state == IB_MAD_STATE_CANCELED)
return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
if (!mad_send_wr->timeout)
diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c
index 49d478b2ea94..c0e2df128b34 100644
--- a/drivers/infiniband/core/mr_pool.c
+++ b/drivers/infiniband/core/mr_pool.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <rdma/ib_verbs.h>
#include <rdma/mr_pool.h>
@@ -42,14 +34,18 @@ void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr)
EXPORT_SYMBOL(ib_mr_pool_put);
int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
- enum ib_mr_type type, u32 max_num_sg)
+ enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg)
{
struct ib_mr *mr;
unsigned long flags;
int ret, i;
for (i = 0; i < nr; i++) {
- mr = ib_alloc_mr(qp->pd, type, max_num_sg);
+ if (type == IB_MR_TYPE_INTEGRITY)
+ mr = ib_alloc_mr_integrity(qp->pd, max_num_sg,
+ max_num_meta_sg);
+ else
+ mr = ib_alloc_mr(qp->pd, type, max_num_sg);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto out;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index cd338ddc4a39..a236532a9026 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"
-static void mcast_add_one(struct ib_device *device);
+static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
@@ -61,9 +61,9 @@ struct mcast_port {
struct mcast_device *dev;
spinlock_t lock;
struct rb_root table;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
- u8 port_num;
+ u32 port_num;
};
struct mcast_device {
@@ -71,7 +71,7 @@ struct mcast_device {
struct ib_event_handler event_handler;
int start_port;
int end_port;
- struct mcast_port port[0];
+ struct mcast_port port[];
};
enum mcast_state {
@@ -117,7 +117,7 @@ struct mcast_member {
struct mcast_group *group;
struct list_head list;
enum mcast_state state;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
};
@@ -178,7 +178,7 @@ static struct mcast_group *mcast_insert(struct mcast_port *port,
static void deref_port(struct mcast_port *port)
{
- if (atomic_dec_and_test(&port->refcount))
+ if (refcount_dec_and_test(&port->refcount))
complete(&port->comp);
}
@@ -199,7 +199,7 @@ static void release_group(struct mcast_group *group)
static void deref_member(struct mcast_member *member)
{
- if (atomic_dec_and_test(&member->refcount))
+ if (refcount_dec_and_test(&member->refcount))
complete(&member->comp);
}
@@ -401,7 +401,7 @@ static void process_group_error(struct mcast_group *group)
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
adjust_membership(group, member->multicast.rec.join_state, -1);
member->state = MCAST_ERROR;
@@ -445,7 +445,7 @@ retest:
struct mcast_member, list);
multicast = &member->multicast;
join_state = multicast->rec.join_state;
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
if (join_state == (group->rec.join_state & join_state)) {
status = cmp_rec(&group->rec, &multicast->rec,
@@ -497,7 +497,7 @@ static void process_join_error(struct mcast_group *group, int status)
member = list_entry(group->pending_list.next,
struct mcast_member, list);
if (group->last_join == member) {
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(status, &member->multicast);
@@ -589,7 +589,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
kfree(group);
group = cur_group;
} else
- atomic_inc(&port->refcount);
+ refcount_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
@@ -605,7 +605,7 @@ found:
*/
struct ib_sa_multicast *
ib_sa_join_multicast(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
@@ -632,7 +632,7 @@ ib_sa_join_multicast(struct ib_sa_client *client,
member->multicast.callback = callback;
member->multicast.context = context;
init_completion(&member->comp);
- atomic_set(&member->refcount, 1);
+ refcount_set(&member->refcount, 1);
member->state = MCAST_JOINING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
@@ -690,7 +690,7 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
}
EXPORT_SYMBOL(ib_sa_free_multicast);
-int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
{
struct mcast_device *dev;
@@ -721,6 +721,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* member record and gid of the device.
* @device: RDMA device
* @port_num: Port of the rdma device to consider
+ * @rec: Multicast member record to use
* @ndev: Optional netdevice, applicable only for RoCE
* @gid_type: GID type to consider
* @ah_attr: AH attribute to fillup on successful completion
@@ -731,7 +732,7 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
* success or appropriate error code.
*
*/
-int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
@@ -815,7 +816,7 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}
-static void mcast_add_one(struct ib_device *device)
+static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
@@ -825,7 +826,7 @@ static void mcast_add_one(struct ib_device *device)
dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
- return;
+ return -ENOMEM;
dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
@@ -839,13 +840,13 @@ static void mcast_add_one(struct ib_device *device)
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
- atomic_set(&port->refcount, 1);
+ refcount_set(&port->refcount, 1);
++count;
}
if (!count) {
kfree(dev);
- return;
+ return -EOPNOTSUPP;
}
dev->device = device;
@@ -853,6 +854,7 @@ static void mcast_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
+ return 0;
}
static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -861,9 +863,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;
- if (!dev)
- return;
-
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index eecfc0b377c9..def14c54b648 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -36,20 +36,25 @@
#include <linux/export.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
#include <linux/module.h>
#include "core_priv.h"
-static DEFINE_MUTEX(rdma_nl_mutex);
-static struct sock *nls;
static struct {
- const struct rdma_nl_cbs *cb_table;
+ const struct rdma_nl_cbs *cb_table;
+ /* Synchronizes between ongoing netlink commands and netlink client
+ * unregistration.
+ */
+ struct rw_semaphore sem;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
bool rdma_nl_chk_listeners(unsigned int group)
{
- return netlink_has_listeners(nls, group);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net);
+
+ return netlink_has_listeners(rnet->nl_sock, group);
}
EXPORT_SYMBOL(rdma_nl_chk_listeners);
@@ -70,65 +75,56 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
if (type >= RDMA_NL_NUM_CLIENTS)
return false;
- return (op < max_num_ops[type]) ? true : false;
+ return op < max_num_ops[type];
}
-static bool is_nl_valid(unsigned int type, unsigned int op)
+static const struct rdma_nl_cbs *
+get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
{
const struct rdma_nl_cbs *cb_table;
- if (!is_nl_msg_valid(type, op))
- return false;
+ /*
+ * Currently only NLDEV client is supporting netlink commands in
+ * non init_net net namespace.
+ */
+ if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
+ return NULL;
- if (!rdma_nl_types[type].cb_table) {
- mutex_unlock(&rdma_nl_mutex);
- request_module("rdma-netlink-subsys-%d", type);
- mutex_lock(&rdma_nl_mutex);
- }
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ if (!cb_table) {
+ /*
+ * Didn't get valid reference of the table, attempt module
+ * load once.
+ */
+ up_read(&rdma_nl_types[type].sem);
- cb_table = rdma_nl_types[type].cb_table;
+ request_module("rdma-netlink-subsys-%u", type);
+ down_read(&rdma_nl_types[type].sem);
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ }
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
- return false;
- return true;
+ return NULL;
+ return cb_table;
}
void rdma_nl_register(unsigned int index,
const struct rdma_nl_cbs cb_table[])
{
- mutex_lock(&rdma_nl_mutex);
- if (!is_nl_msg_valid(index, 0)) {
- /*
- * All clients are not interesting in success/failure of
- * this call. They want to see the print to error log and
- * continue their initialization. Print warning for them,
- * because it is programmer's error to be here.
- */
- mutex_unlock(&rdma_nl_mutex);
- WARN(true,
- "The not-valid %u index was supplied to RDMA netlink\n",
- index);
+ if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
+ WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
return;
- }
- if (rdma_nl_types[index].cb_table) {
- mutex_unlock(&rdma_nl_mutex);
- WARN(true,
- "The %u index is already registered in RDMA netlink\n",
- index);
- return;
- }
-
- rdma_nl_types[index].cb_table = cb_table;
- mutex_unlock(&rdma_nl_mutex);
+ /* Pairs with the READ_ONCE in is_nl_valid() */
+ smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
}
EXPORT_SYMBOL(rdma_nl_register);
void rdma_nl_unregister(unsigned int index)
{
- mutex_lock(&rdma_nl_mutex);
+ down_write(&rdma_nl_types[index].sem);
rdma_nl_types[index].cb_table = NULL;
- mutex_unlock(&rdma_nl_mutex);
+ up_write(&rdma_nl_types[index].sem);
}
EXPORT_SYMBOL(rdma_nl_unregister);
@@ -160,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
const struct rdma_nl_cbs *cb_table;
+ int err = -EINVAL;
- if (!is_nl_valid(index, op))
+ if (!is_nl_msg_valid(index, op))
return -EINVAL;
- cb_table = rdma_nl_types[index].cb_table;
+ down_read(&rdma_nl_types[index].sem);
+ cb_table = get_cb_table(skb, index, op);
+ if (!cb_table)
+ goto done;
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
- !netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
+ !netlink_capable(skb, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto done;
+ }
/*
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
@@ -176,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
if (index == RDMA_NL_LS) {
if (cb_table[op].doit)
- return cb_table[op].doit(skb, nlh, extack);
- return -EINVAL;
+ err = cb_table[op].doit(skb, nlh, extack);
+ goto done;
}
/* FIXME: Convert IWCM to properly handle doit callbacks */
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
@@ -185,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = cb_table[op].dump,
};
if (c.dump)
- return netlink_dump_start(nls, skb, nlh, &c);
- return -EINVAL;
+ err = netlink_dump_start(skb->sk, skb, nlh, &c);
+ goto done;
}
if (cb_table[op].doit)
- return cb_table[op].doit(skb, nlh, extack);
-
- return 0;
+ err = cb_table[op].doit(skb, nlh, extack);
+done:
+ up_read(&rdma_nl_types[index].sem);
+ return err;
}
/*
@@ -253,57 +256,77 @@ skip:
static void rdma_nl_rcv(struct sk_buff *skb)
{
- mutex_lock(&rdma_nl_mutex);
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
- mutex_unlock(&rdma_nl_mutex);
}
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
int err;
- err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
+ err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(rdma_nl_unicast);
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid)
{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
int err;
- err = netlink_unicast(nls, skb, pid, 0);
+ err = netlink_unicast(rnet->nl_sock, skb, pid, 0);
return (err < 0) ? err : 0;
}
EXPORT_SYMBOL(rdma_nl_unicast_wait);
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+ unsigned int group, gfp_t flags)
{
- return nlmsg_multicast(nls, skb, 0, group, flags);
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+
+ return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags);
}
EXPORT_SYMBOL(rdma_nl_multicast);
-int __init rdma_nl_init(void)
+void rdma_nl_init(void)
{
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ init_rwsem(&rdma_nl_types[idx].sem);
+}
+
+void rdma_nl_exit(void)
+{
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ WARN(rdma_nl_types[idx].cb_table,
+ "Netlink client %d wasn't released prior to unloading %s\n",
+ idx, KBUILD_MODNAME);
+}
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet)
+{
+ struct net *net = read_pnet(&rnet->net);
struct netlink_kernel_cfg cfg = {
.input = rdma_nl_rcv,
+ .flags = NL_CFG_F_NONROOT_RECV,
};
+ struct sock *nls;
- nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
+ nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg);
if (!nls)
return -ENOMEM;
nls->sk_sndtimeo = 10 * HZ;
+ rnet->nl_sock = nls;
return 0;
}
-void rdma_nl_exit(void)
+void rdma_nl_net_exit(struct rdma_dev_net *rnet)
{
- int idx;
-
- for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
- rdma_nl_unregister(idx);
-
- netlink_kernel_release(nls);
+ netlink_kernel_release(rnet->nl_sock);
}
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 69188cbbd99b..2220a2dfab24 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -41,85 +41,137 @@
#include "core_priv.h"
#include "cma_priv.h"
#include "restrack.h"
+#include "uverbs.h"
+/*
+ * This determines whether a non-privileged user is allowed to specify a
+ * controlled QKEY or not, when true non-privileged user is allowed to specify
+ * a controlled QKEY.
+ */
+static bool privileged_qkey;
+
+typedef int (*res_fill_func_t)(struct sk_buff*, bool,
+ struct rdma_restrack_entry*, uint32_t);
+
+/*
+ * Sort array elements by the netlink attribute name
+ */
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
- [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
- .len = IB_DEVICE_NAME_MAX - 1},
- [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
- .len = IB_FW_VERSION_NAME_MAX - 1},
- [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME] = { .type = NLA_NUL_STRING,
- .len = 16 },
- [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
- .len = TASK_COMM_LEN },
+ [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
+ [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IB_DEVICE_NAME_MAX },
+ [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
+ [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
+ [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
- .len = sizeof(struct __kernel_sockaddr_storage) },
- [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
- .len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
- [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ },
- [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
- [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
- .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
- [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
- [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
- [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
[RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
- [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
- .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
- [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
- [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
- .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
+ [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
+ [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
+ [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -159,6 +211,19 @@ static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
return 0;
}
+int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
+ const char *str)
+{
+ if (put_driver_name_print_type(msg, name,
+ RDMA_NLDEV_PRINT_TYPE_UNSPEC))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
+ return -EMSGSIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_string);
+
int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
{
return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
@@ -188,6 +253,12 @@ int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
}
EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
+bool rdma_nl_get_privileged_qkey(void)
+{
+ return privileged_qkey;
+}
+EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
+
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
@@ -203,7 +274,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
{
char fw[IB_FW_VERSION_NAME_MAX];
int ret = 0;
- u8 port;
+ u32 port;
if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
@@ -232,6 +303,21 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
+ return -EMSGSIZE;
+
+ if (device->type &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
+ return -EMSGSIZE;
+
+ if (device->parent &&
+ nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
+ dev_name(&device->parent->dev)))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
+ device->name_assign_type))
+ return -EMSGSIZE;
/*
* Link type is determined on first port and mlx4 device
@@ -306,8 +392,7 @@ static int fill_port_info(struct sk_buff *msg,
}
out:
- if (netdev)
- dev_put(netdev);
+ dev_put(netdev);
return ret;
}
@@ -335,7 +420,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
+static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
+ bool show_details)
{
static const char * const names[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_PD] = "pd",
@@ -344,6 +430,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
[RDMA_RESTRACK_CTX] = "ctx",
+ [RDMA_RESTRACK_SRQ] = "srq",
};
struct nlattr *table_attr;
@@ -359,8 +446,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
if (!names[i])
continue;
- curr = rdma_restrack_count(device, i,
- task_active_pid_ns(current));
+ curr = rdma_restrack_count(device, i, show_details);
ret = fill_res_info_entry(msg, names[i], curr);
if (ret)
goto err;
@@ -377,35 +463,41 @@ err:
static int fill_res_name_pid(struct sk_buff *msg,
struct rdma_restrack_entry *res)
{
+ int err = 0;
+
/*
* For user resources, user is should read /proc/PID/comm to get the
* name of the task file.
*/
if (rdma_is_kernel_res(res)) {
- if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
- res->kern_name))
- return -EMSGSIZE;
+ err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name);
} else {
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
- task_pid_vnr(res->task)))
- return -EMSGSIZE;
+ pid_t pid;
+
+ pid = task_pid_vnr(res->task);
+ /*
+ * Task is dead and in zombie state.
+ * There is no need to print PID anymore.
+ */
+ if (pid)
+ /*
+ * This part is racy, task can be killed and PID will
+ * be zero right here but it is ok, next query won't
+ * return PID. We don't promise real-time reflection
+ * of SW objects.
+ */
+ err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
}
- return 0;
-}
-static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
- struct rdma_restrack_entry *res)
-{
- if (!dev->ops.fill_res_entry)
- return false;
- return dev->ops.fill_res_entry(msg, res);
+ return err ? -EMSGSIZE : 0;
}
-static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
- struct rdma_restrack_entry *res, uint32_t port)
+static int fill_res_qp_entry_query(struct sk_buff *msg,
+ struct rdma_restrack_entry *res,
+ struct ib_device *dev,
+ struct ib_qp *qp)
{
- struct ib_qp *qp = container_of(res, struct ib_qp, res);
- struct ib_device *dev = qp->device;
struct ib_qp_init_attr qp_init_attr;
struct ib_qp_attr qp_attr;
int ret;
@@ -414,16 +506,6 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (ret)
return ret;
- if (port && port != qp_attr.port_num)
- return -EAGAIN;
-
- /* In create_qp() port is not set yet */
- if (qp_attr.port_num &&
- nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp_attr.port_num))
- goto err;
-
- if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num))
- goto err;
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
qp_attr.dest_qp_num))
@@ -447,19 +529,53 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
goto err;
+ if (dev->ops.fill_res_qp_entry)
+ return dev->ops.fill_res_qp_entry(msg, qp);
+ return 0;
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
+ int ret;
+
+ if (port && port != qp->port)
+ return -EAGAIN;
+
+ /* In create_qp() port is not set yet */
+ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+ return -EMSGSIZE;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+ if (ret)
+ return -EMSGSIZE;
+
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_name_pid(msg, res))
- goto err;
+ ret = fill_res_name_pid(msg, res);
+ if (ret)
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return fill_res_qp_entry_query(msg, res, dev, qp);
+}
- return 0;
+static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
-err: return -EMSGSIZE;
+ if (port && port != qp->port)
+ return -EAGAIN;
+ if (!dev->ops.fill_res_qp_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_qp_entry_raw(msg, qp);
}
static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -471,7 +587,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_cm_id *cm_id = &id_priv->id;
if (port && port != cm_id->port_num)
- return 0;
+ return -EAGAIN;
if (cm_id->port_num &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
@@ -507,9 +623,8 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (fill_res_name_pid(msg, res))
goto err;
- if (fill_res_entry(dev, msg, res))
- goto err;
-
+ if (dev->ops.fill_res_cm_id_entry)
+ return dev->ops.fill_res_cm_id_entry(msg, cm_id);
return 0;
err: return -EMSGSIZE;
@@ -522,32 +637,42 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct ib_device *dev = cq->device;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
/* Poll context is only valid for kernel CQs */
if (rdma_is_kernel_res(res) &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
- goto err;
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
- cq->uobject->context->res.id))
- goto err;
+ cq->uobject->uevent.uobject.context->res.id))
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_cq_entry) ?
+ dev->ops.fill_res_cq_entry(msg, cq) : 0;
+}
- return 0;
+static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct ib_device *dev = cq->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_cq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_cq_entry_raw(msg, cq);
}
static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
@@ -558,38 +683,45 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
- goto err;
+ return -EMSGSIZE;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
RDMA_NLDEV_ATTR_PAD))
- goto err;
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
- goto err;
+ return -EMSGSIZE;
if (!rdma_is_kernel_res(res) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
- goto err;
+ return -EMSGSIZE;
if (fill_res_name_pid(msg, res))
- goto err;
+ return -EMSGSIZE;
- if (fill_res_entry(dev, msg, res))
- goto err;
+ return (dev->ops.fill_res_mr_entry) ?
+ dev->ops.fill_res_mr_entry(msg, mr) :
+ 0;
+}
- return 0;
+static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
-err: return -EMSGSIZE;
+ if (!dev->ops.fill_res_mr_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_mr_entry_raw(msg, mr);
}
static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
- struct ib_device *dev = pd->device;
if (has_cap_net_admin) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
@@ -612,15 +744,327 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
pd->uobject->context->res.id))
goto err;
+ return fill_res_name_pid(msg, res);
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
+
+ if (rdma_is_kernel_res(res))
+ return 0;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
+ return -EMSGSIZE;
+
+ return fill_res_name_pid(msg, res);
+}
+
+static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
+ uint32_t max_range)
+{
+ struct nlattr *entry_attr;
+
+ if (!min_range)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (min_range == max_range) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
+ goto err;
+ }
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
+{
+ uint32_t min_range = 0, prev = 0;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ rt = &srq->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
+ rdma_restrack_put(res);
+ continue;
+ }
+
+ if (qp->qp_num < prev)
+ /* qp_num should be ascending */
+ goto err_loop;
+
+ if (min_range == 0) {
+ min_range = qp->qp_num;
+ } else if (qp->qp_num > (prev + 1)) {
+ if (fill_res_range_qp_entry(msg, min_range, prev))
+ goto err_loop;
+
+ min_range = qp->qp_num;
+ }
+ prev = qp->qp_num;
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+
+ if (fill_res_range_qp_entry(msg, min_range, prev))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_loop:
+ rdma_restrack_put(res);
+ xa_unlock(&rt->xa);
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_srq *srq = container_of(res, struct ib_srq, res);
+ struct ib_device *dev = srq->device;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
+ goto err;
+
+ if (ib_srq_has_cq(srq->srq_type)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
+ srq->ext.cq->res.id))
+ goto err;
+ }
+
+ if (fill_res_srq_qps(msg, srq))
+ goto err;
+
if (fill_res_name_pid(msg, res))
goto err;
- if (fill_res_entry(dev, msg, res))
+ if (dev->ops.fill_res_srq_entry)
+ return dev->ops.fill_res_srq_entry(msg, srq);
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_srq *srq = container_of(res, struct ib_srq, res);
+ struct ib_device *dev = srq->device;
+
+ if (!dev->ops.fill_res_srq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_srq_entry_raw(msg, srq);
+}
+
+static int fill_stat_counter_mode(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_counter_mode *m = &counter->mode;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
+ return -EMSGSIZE;
+
+ if (m->mode == RDMA_COUNTER_MODE_AUTO) {
+ if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
+ return -EMSGSIZE;
+
+ if ((m->mask & RDMA_COUNTER_MASK_PID) &&
+ fill_res_name_pid(msg, &counter->res))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_stat_counter_qps(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+ int ret = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ rt = &counter->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->counter || (qp->counter->id != counter->id))
+ continue;
+
+ ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
+ if (ret)
+ goto err;
+ }
+
+ xa_unlock(&rt->xa);
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ xa_unlock(&rt->xa);
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
+ u64 value)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ name))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
+ value, RDMA_NLDEV_ATTR_PAD))
goto err;
+ nla_nest_end(msg, entry_attr);
return 0;
-err: return -EMSGSIZE;
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
+
+static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
+ goto err;
+
+ if (dev->ops.fill_stat_mr_entry)
+ return dev->ops.fill_stat_mr_entry(msg, mr);
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_stat_counter_hwcounters(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_hw_stats *st = counter->stats;
+ struct nlattr *table_attr;
+ int i;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ mutex_lock(&st->lock);
+ for (i = 0; i < st->num_counters; i++) {
+ if (test_bit(i, st->is_disabled))
+ continue;
+ if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
+ st->value[i]))
+ goto err;
+ }
+ mutex_unlock(&st->lock);
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ mutex_unlock(&st->lock);
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res,
+ uint32_t port)
+{
+ struct rdma_counter *counter =
+ container_of(res, struct rdma_counter, res);
+
+ if (port && port != counter->port)
+ return -EAGAIN;
+
+ /* Dump it even query failed */
+ rdma_counter_query_stats(counter);
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
+ fill_stat_counter_mode(msg, counter) ||
+ fill_stat_counter_qps(msg, counter) ||
+ fill_stat_counter_hwcounters(msg, counter))
+ return -EMSGSIZE;
+
+ return 0;
}
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -632,8 +1076,8 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 index;
int err;
- err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
@@ -652,6 +1096,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto err_free;
+ }
err = fill_dev_info(msg, device);
if (err)
@@ -660,7 +1108,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -677,8 +1125,8 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 index;
int err;
- err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
@@ -690,8 +1138,12 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
char name[IB_DEVICE_NAME_MAX] = {};
- nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
err = ib_device_rename(device, name);
goto done;
}
@@ -704,6 +1156,14 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto put_done;
}
+ if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
+ u8 use_dim;
+
+ use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
+ err = ib_device_set_dim(device, use_dim);
+ goto done;
+ }
+
done:
ib_device_put(device);
put_done:
@@ -725,7 +1185,7 @@ static int _nldev_get_dumpit(struct ib_device *device,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, NLM_F_MULTI);
- if (fill_dev_info(skb, device)) {
+ if (!nlh || fill_dev_info(skb, device)) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -757,8 +1217,8 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 port;
int err;
- err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
if (err ||
!tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
!tb[RDMA_NLDEV_ATTR_PORT_INDEX])
@@ -784,6 +1244,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto err_free;
+ }
err = fill_port_info(msg, device, port, sock_net(skb->sk));
if (err)
@@ -792,7 +1256,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -813,8 +1277,8 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
int err;
unsigned int p;
- err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, NULL);
+ err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, NULL);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
@@ -845,7 +1309,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
RDMA_NLDEV_CMD_PORT_GET),
0, NLM_F_MULTI);
- if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
+ if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -863,13 +1327,14 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ bool show_details = false;
struct ib_device *device;
struct sk_buff *msg;
u32 index;
int ret;
- ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
@@ -878,6 +1343,9 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
+ if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
+ show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
@@ -887,14 +1355,18 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_free;
+ }
- ret = fill_res_info(msg, device);
+ ret = fill_res_info(msg, device, show_details);
if (ret)
goto err_free;
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -918,7 +1390,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
0, NLM_F_MULTI);
- if (fill_res_info(skb, device)) {
+ if (!nlh || fill_res_info(skb, device, false)) {
nlmsg_cancel(skb, nlh);
goto out;
}
@@ -938,10 +1410,7 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
}
struct nldev_fill_res_entry {
- int (*fill_res_func)(struct sk_buff *msg, bool has_cap_net_admin,
- struct rdma_restrack_entry *res, u32 port);
enum rdma_nldev_attr nldev_attr;
- enum rdma_nldev_command nldev_cmd;
u8 flags;
u32 entry;
u32 id;
@@ -953,59 +1422,58 @@ enum nldev_res_flags {
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
[RDMA_RESTRACK_QP] = {
- .fill_res_func = fill_res_qp_entry,
- .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_LQPN,
},
[RDMA_RESTRACK_CM_ID] = {
- .fill_res_func = fill_res_cm_id_entry,
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
},
[RDMA_RESTRACK_CQ] = {
- .fill_res_func = fill_res_cq_entry,
- .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_CQN,
},
[RDMA_RESTRACK_MR] = {
- .fill_res_func = fill_res_mr_entry,
- .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_MRN,
},
[RDMA_RESTRACK_PD] = {
- .fill_res_func = fill_res_pd_entry,
- .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
.flags = NLDEV_PER_DEV,
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
.id = RDMA_NLDEV_ATTR_RES_PDN,
},
-};
+ [RDMA_RESTRACK_COUNTER] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
+ .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
+ .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
+ },
+ [RDMA_RESTRACK_CTX] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_CTXN,
+ },
+ [RDMA_RESTRACK_SRQ] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_SRQN,
+ },
-static bool is_visible_in_pid_ns(struct rdma_restrack_entry *res)
-{
- /*
- * 1. Kern resources should be visible in init name space only
- * 2. Present only resources visible in the current namespace
- */
- if (rdma_is_kernel_res(res))
- return task_active_pid_ns(current) == &init_pid_ns;
- return task_active_pid_ns(current) == task_active_pid_ns(res->task);
-}
+};
-static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack,
- enum rdma_restrack_type res_type)
+static noinline_for_stack int
+res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ enum rdma_restrack_type res_type,
+ res_fill_func_t fill_func)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
@@ -1016,8 +1484,8 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct sk_buff *msg;
int ret;
- ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
return -EINVAL;
@@ -1047,35 +1515,32 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err;
}
- if (!is_visible_in_pid_ns(res)) {
- ret = -ENOENT;
- goto err_get;
- }
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_get;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(nlh->nlmsg_type)),
0, 0);
- if (fill_nldev_handle(msg, device)) {
+ if (!nlh || fill_nldev_handle(msg, device)) {
ret = -EMSGSIZE;
goto err_free;
}
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
- ret = fe->fill_res_func(msg, has_cap_net_admin, res, port);
- rdma_restrack_put(res);
+
+ ret = fill_func(msg, has_cap_net_admin, res, port);
if (ret)
goto err_free;
+ rdma_restrack_put(res);
nlmsg_end(msg, nlh);
ib_device_put(device);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
@@ -1088,13 +1553,15 @@ err:
static int res_get_common_dumpit(struct sk_buff *skb,
struct netlink_callback *cb,
- enum rdma_restrack_type res_type)
+ enum rdma_restrack_type res_type,
+ res_fill_func_t fill_func)
{
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res;
struct rdma_restrack_root *rt;
int err, ret = 0, idx = 0;
+ bool show_details = false;
struct nlattr *table_attr;
struct nlattr *entry_attr;
struct ib_device *device;
@@ -1105,8 +1572,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
u32 index, port = 0;
bool filled = false;
- err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, NULL);
+ err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, NULL);
/*
* Right now, we are expecting the device index to get res information,
* but it is possible to extend this code to return all devices in
@@ -1123,6 +1590,9 @@ static int res_get_common_dumpit(struct sk_buff *skb,
if (!device)
return -EINVAL;
+ if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
+ show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
+
/*
* If no PORT_INDEX is supplied, we will return all QPs from that device
*/
@@ -1135,10 +1605,11 @@ static int res_get_common_dumpit(struct sk_buff *skb,
}
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
0, NLM_F_MULTI);
- if (fill_nldev_handle(skb, device)) {
+ if (!nlh || fill_nldev_handle(skb, device)) {
ret = -EMSGSIZE;
goto err;
}
@@ -1159,8 +1630,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
* objects.
*/
xa_for_each(&rt->xa, id, res) {
- if (!is_visible_in_pid_ns(res))
- continue;
+ if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
+ goto next;
if (idx < start || !rdma_restrack_get(res))
goto next;
@@ -1176,7 +1647,8 @@ static int res_get_common_dumpit(struct sk_buff *skb,
goto msg_full;
}
- ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
+ ret = fill_func(skb, has_cap_net_admin, res, port);
+
rdma_restrack_put(res);
if (ret) {
@@ -1223,20 +1695,29 @@ err_index:
static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
struct netlink_callback *cb) \
{ \
- return res_get_common_dumpit(skb, cb, type); \
+ return res_get_common_dumpit(skb, cb, type, \
+ fill_res_##name##_entry); \
} \
static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
struct nlmsghdr *nlh, \
struct netlink_ext_ack *extack) \
{ \
- return res_get_common_doit(skb, nlh, extack, type); \
+ return res_get_common_doit(skb, nlh, extack, type, \
+ fill_res_##name##_entry); \
}
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
+RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
+RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
+RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
static LIST_HEAD(link_ops);
static DECLARE_RWSEM(link_ops_rwsem);
@@ -1284,22 +1765,22 @@ static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
char type[IFNAMSIZ];
int err;
- err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
!tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
return -EINVAL;
- nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
sizeof(ibdev_name));
- if (strchr(ibdev_name, '%'))
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
return -EINVAL;
- nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
- nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+ nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+ nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
sizeof(ndev_name));
- ndev = dev_get_by_name(&init_net, ndev_name);
+ ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
if (!ndev)
return -ENODEV;
@@ -1328,8 +1809,8 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 index;
int err;
- err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
@@ -1338,7 +1819,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
+ if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}
@@ -1347,6 +1828,94 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
+ struct ib_client_nl_info data = {};
+ struct ib_device *ibdev = NULL;
+ struct sk_buff *msg;
+ u32 index;
+ int err;
+
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ NL_VALIDATE_LIBERAL, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
+ return -EINVAL;
+
+ nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
+ sizeof(client_name));
+
+ if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!ibdev)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(ibdev, data.port)) {
+ err = -EINVAL;
+ goto out_put;
+ }
+ } else {
+ data.port = -1;
+ }
+ } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ return -EINVAL;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_GET_CHARDEV),
+ 0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto out_nlmsg;
+ }
+
+ data.nl_msg = msg;
+ err = ib_get_client_nl_info(ibdev, client_name, &data);
+ if (err)
+ goto out_nlmsg;
+
+ err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
+ huge_encode_dev(data.cdev->devt),
+ RDMA_NLDEV_ATTR_PAD);
+ if (err)
+ goto out_data;
+ err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
+ RDMA_NLDEV_ATTR_PAD);
+ if (err)
+ goto out_data;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
+ dev_name(data.cdev))) {
+ err = -EMSGSIZE;
+ goto out_data;
+ }
+
+ nlmsg_end(msg, nlh);
+ put_device(data.cdev);
+ if (ibdev)
+ ib_device_put(ibdev);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+out_data:
+ put_device(data.cdev);
+out_nlmsg:
+ nlmsg_free(msg);
+out_put:
+ if (ibdev)
+ ib_device_put(ibdev);
+ return err;
+}
+
static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1354,8 +1923,8 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct sk_buff *msg;
int err;
- err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
- nldev_policy, extack);
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
if (err)
return err;
@@ -1367,6 +1936,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_SYS_GET),
0, 0);
+ if (!nlh) {
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+ }
err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
(u8)ib_devices_shared_netns);
@@ -1374,29 +1947,694 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_free(msg);
return err;
}
+
+ err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
+ (u8)privileged_qkey);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+ /*
+ * Copy-on-fork is supported.
+ * See commits:
+ * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
+ * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
+ * for more details. Don't backport this without them.
+ *
+ * Return value ignored on purpose, assume copy-on-fork is not
+ * supported in case of failure.
+ */
+ nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
+
nlmsg_end(msg, nlh);
- return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+}
+
+static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
+{
+ u8 enable;
+ int err;
+
+ enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
+ /* Only 0 and 1 are supported */
+ if (enable > 1)
+ return -EINVAL;
+
+ err = rdma_compatdev_set(enable);
+ return err;
+}
+
+static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
+{
+ u8 enable;
+
+ enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
+ /* Only 0 and 1 are supported */
+ if (enable > 1)
+ return -EINVAL;
+
+ privileged_qkey = enable;
+ return 0;
}
static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
- u8 enable;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
- if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
+ if (err)
return -EINVAL;
- enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
- /* Only 0 and 1 are supported */
- if (enable > 1)
+ if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
+ return nldev_set_sys_set_netns_doit(tb);
+
+ if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
+ return nldev_set_sys_set_pqkey_doit(tb);
+
+ return -EINVAL;
+}
+
+
+static int nldev_stat_set_mode_doit(struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[],
+ struct ib_device *device, u32 port)
+{
+ u32 mode, mask = 0, qpn, cntn = 0;
+ bool opcnt = false;
+ int ret;
+
+ /* Currently only counter for QP is supported */
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
return -EINVAL;
- err = rdma_compatdev_set(enable);
- return err;
+ if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
+ opcnt = !!nla_get_u8(
+ tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
+
+ mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
+ if (mode == RDMA_COUNTER_MODE_AUTO) {
+ if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
+ mask = nla_get_u32(
+ tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
+ return rdma_counter_set_auto_mode(device, port, mask, opcnt,
+ extack);
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
+ if (ret)
+ return ret;
+ } else {
+ ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
+ if (ret)
+ return ret;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ return 0;
+
+err_fill:
+ rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ return ret;
+}
+
+static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
+ struct ib_device *device,
+ u32 port)
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *entry_attr;
+ unsigned long *target;
+ int rem, i, ret = 0;
+ u32 index;
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats)
+ return -EINVAL;
+
+ target = kcalloc(BITS_TO_LONGS(stats->num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!target)
+ return -ENOMEM;
+
+ nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
+ rem) {
+ index = nla_get_u32(entry_attr);
+ if ((index >= stats->num_counters) ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_bit(index, target);
+ }
+
+ for (i = 0; i < stats->num_counters; i++) {
+ if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
+ continue;
+
+ ret = rdma_counter_modify(device, port, i, test_bit(i, target));
+ if (ret)
+ goto out;
+ }
+
+out:
+ kfree(target);
+ return ret;
+}
+
+static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_put_device;
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
+ !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = -EINVAL;
+ goto err_put_device;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err_put_device;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_SET),
+ 0, 0);
+ if (!nlh || fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
+ ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
+ if (ret)
+ goto err_free_msg;
+ }
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
+ if (ret)
+ goto err_free_msg;
+ }
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_free_msg:
+ nlmsg_free(msg);
+err_put_device:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port, qpn, cntn;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
+ !tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_SET),
+ 0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ if (ret)
+ goto err_fill;
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_fill:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static noinline_for_stack int
+stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[])
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int ret, num_cnts, i;
+ struct sk_buff *msg;
+ u32 index, port;
+ u64 v;
+
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_GET),
+ 0, 0);
+
+ if (!nlh || fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ mutex_lock(&stats->lock);
+
+ num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
+ if (num_cnts < 0) {
+ ret = -EINVAL;
+ goto err_stats;
+ }
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err_stats;
+ }
+ for (i = 0; i < num_cnts; i++) {
+ if (test_bit(i, stats->is_disabled))
+ continue;
+
+ v = stats->value[i] +
+ rdma_counter_get_hwstat_value(device, port, i);
+ if (rdma_nl_stat_hwcounter_entry(msg,
+ stats->descs[i].name, v)) {
+ ret = -EMSGSIZE;
+ goto err_table;
+ }
+ }
+ nla_nest_end(msg, table_attr);
+
+ mutex_unlock(&stats->lock);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_table:
+ nla_nest_cancel(msg, table_attr);
+err_stats:
+ mutex_unlock(&stats->lock);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static noinline_for_stack int
+stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, struct nlattr *tb[])
+
+{
+ static enum rdma_nl_counter_mode mode;
+ static enum rdma_nl_counter_mask mask;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port;
+ bool opcnt;
+ int ret;
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
+ return nldev_res_get_counter_doit(skb, nlh, extack);
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_GET),
+ 0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
+ if (ret)
+ goto err_msg;
+
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ if ((mode == RDMA_COUNTER_MODE_AUTO) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ if ((mode == RDMA_COUNTER_MODE_AUTO) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int ret;
+
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (ret)
+ return -EINVAL;
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
+ return stat_get_doit_default_counter(skb, nlh, extack, tb);
+
+ switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
+ case RDMA_NLDEV_ATTR_RES_QP:
+ ret = stat_get_doit_qp(skb, nlh, extack, tb);
+ break;
+ case RDMA_NLDEV_ATTR_RES_MR:
+ ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
+ fill_stat_mr_entry);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int nldev_stat_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int ret;
+
+ ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, NULL);
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
+ return -EINVAL;
+
+ switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
+ case RDMA_NLDEV_ATTR_RES_QP:
+ ret = nldev_res_get_counter_dumpit(skb, cb);
+ break;
+ case RDMA_NLDEV_ATTR_RES_MR:
+ ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
+ fill_stat_mr_entry);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
+ struct rdma_hw_stats *stats;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 devid, port;
+ int ret, i;
+
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), devid);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(
+ msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
+ 0, 0);
+
+ ret = -EMSGSIZE;
+ if (!nlh || fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+ goto err_msg;
+
+ table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table)
+ goto err_msg;
+
+ mutex_lock(&stats->lock);
+ for (i = 0; i < stats->num_counters; i++) {
+ entry = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry)
+ goto err_msg_table;
+
+ if (nla_put_string(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ stats->descs[i].name) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
+ goto err_msg_entry;
+
+ if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
+ (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
+ !test_bit(i, stats->is_disabled))))
+ goto err_msg_entry;
+
+ nla_nest_end(msg, entry);
+ }
+ mutex_unlock(&stats->lock);
+
+ nla_nest_end(msg, table);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg_entry:
+ nla_nest_cancel(msg, entry);
+err_msg_table:
+ mutex_unlock(&stats->lock);
+ nla_nest_cancel(msg, table);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ enum rdma_nl_dev_type type;
+ struct ib_device *parent;
+ char name[IFNAMSIZ] = {};
+ u32 parentid;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
+ return -EINVAL;
+
+ nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
+ type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
+ parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
+ if (!parent)
+ return -EINVAL;
+
+ ret = ib_add_sub_device(parent, type, name);
+ ib_device_put(parent);
+
+ return ret;
+}
+
+static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ u32 devid;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), devid);
+ if (!device)
+ return -EINVAL;
+
+ return ib_del_sub_device_and_put(device);
}
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
@@ -1404,6 +2642,9 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_get_doit,
.dump = nldev_get_dumpit,
},
+ [RDMA_NLDEV_CMD_GET_CHARDEV] = {
+ .doit = nldev_get_chardev,
+ },
[RDMA_NLDEV_CMD_SET] = {
.doit = nldev_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
@@ -1444,6 +2685,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_res_get_pd_doit,
.dump = nldev_res_get_pd_dumpit,
},
+ [RDMA_NLDEV_CMD_RES_CTX_GET] = {
+ .doit = nldev_res_get_ctx_doit,
+ .dump = nldev_res_get_ctx_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
+ .doit = nldev_res_get_srq_doit,
+ .dump = nldev_res_get_srq_dumpit,
+ },
[RDMA_NLDEV_CMD_SYS_GET] = {
.doit = nldev_sys_get_doit,
},
@@ -1451,14 +2700,219 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_set_sys_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
+ [RDMA_NLDEV_CMD_STAT_SET] = {
+ .doit = nldev_stat_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET] = {
+ .doit = nldev_stat_get_doit,
+ .dump = nldev_stat_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_STAT_DEL] = {
+ .doit = nldev_stat_del_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
+ .doit = nldev_res_get_qp_raw_doit,
+ .dump = nldev_res_get_qp_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
+ .doit = nldev_res_get_cq_raw_doit,
+ .dump = nldev_res_get_cq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
+ .doit = nldev_res_get_mr_raw_doit,
+ .dump = nldev_res_get_mr_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
+ .doit = nldev_res_get_srq_raw_doit,
+ .dump = nldev_res_get_srq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
+ .doit = nldev_stat_get_counter_status_doit,
+ },
+ [RDMA_NLDEV_CMD_NEWDEV] = {
+ .doit = nldev_newdev,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_DELDEV] = {
+ .doit = nldev_deldev,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
};
+static int fill_mon_netdev_rename(struct sk_buff *msg,
+ struct ib_device *device, u32 port,
+ const struct net *net)
+{
+ struct net_device *netdev = ib_device_get_netdev(device, port);
+ int ret = 0;
+
+ if (!netdev || !net_eq(dev_net(netdev), net))
+ goto out;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+out:
+ dev_put(netdev);
+ return ret;
+}
+
+static int fill_mon_netdev_association(struct sk_buff *msg,
+ struct ib_device *device, u32 port,
+ const struct net *net)
+{
+ struct net_device *netdev = ib_device_get_netdev(device, port);
+ int ret = 0;
+
+ if (netdev && !net_eq(dev_net(netdev), net))
+ goto out;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
+ if (ret)
+ goto out;
+
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
+ dev_name(&device->dev));
+ if (ret)
+ goto out;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
+ if (ret)
+ goto out;
+
+ if (netdev) {
+ ret = nla_put_u32(msg,
+ RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+
+ ret = nla_put_string(msg,
+ RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+ }
+
+out:
+ dev_put(netdev);
+ return ret;
+}
+
+static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
+ enum rdma_nl_notify_event_type type)
+{
+ struct net_device *netdev;
+
+ switch (type) {
+ case RDMA_REGISTER_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor register device event\n");
+ break;
+ case RDMA_UNREGISTER_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor unregister device event\n");
+ break;
+ case RDMA_NETDEV_ATTACH_EVENT:
+ netdev = ib_device_get_netdev(device, port_num);
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
+ port_num, netdev->ifindex);
+ dev_put(netdev);
+ break;
+ case RDMA_NETDEV_DETACH_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor netdev detach event: port %d\n",
+ port_num);
+ break;
+ case RDMA_RENAME_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor rename device event\n");
+ break;
+
+ case RDMA_NETDEV_RENAME_EVENT:
+ netdev = ib_device_get_netdev(device, port_num);
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
+ port_num, netdev->ifindex);
+ dev_put(netdev);
+ break;
+ default:
+ break;
+ }
+}
+
+int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
+ enum rdma_nl_notify_event_type type)
+{
+ struct sk_buff *skb;
+ int ret = -EMSGSIZE;
+ struct net *net;
+ void *nlh;
+
+ net = read_pnet(&device->coredev.rdma_net);
+ if (!net)
+ return -EINVAL;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ nlh = nlmsg_put(skb, 0, 0,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
+ 0, 0);
+ if (!nlh)
+ goto err_free;
+
+ switch (type) {
+ case RDMA_REGISTER_EVENT:
+ case RDMA_UNREGISTER_EVENT:
+ case RDMA_RENAME_EVENT:
+ ret = fill_nldev_handle(skb, device);
+ if (ret)
+ goto err_free;
+ break;
+ case RDMA_NETDEV_ATTACH_EVENT:
+ case RDMA_NETDEV_DETACH_EVENT:
+ ret = fill_mon_netdev_association(skb, device, port_num, net);
+ if (ret)
+ goto err_free;
+ break;
+ case RDMA_NETDEV_RENAME_EVENT:
+ ret = fill_mon_netdev_rename(skb, device, port_num, net);
+ if (ret)
+ goto err_free;
+ break;
+ default:
+ break;
+ }
+
+ ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
+ if (ret)
+ goto err_free;
+
+ nlmsg_end(skb, nlh);
+ ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
+ if (ret && ret != -ESRCH) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ goto err_free;
+ }
+ return 0;
+
+err_free:
+ rdma_nl_notify_err_msg(device, port_num, type);
+ nlmsg_free(skb);
+ return ret;
+}
+
void __init nldev_init(void)
{
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
}
-void __exit nldev_exit(void)
+void nldev_exit(void)
{
rdma_nl_unregister(RDMA_NL_NLDEV);
}
diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h
index af4879bdf3d6..64e2822af70f 100644
--- a/drivers/infiniband/core/opa_smi.h
+++ b/drivers/infiniband/core/opa_smi.h
@@ -40,11 +40,11 @@
#include "smi.h"
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int opa_smi_get_fwd_port(struct opa_smp *smp);
extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp);
extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index ccf4d069c25c..18918f463361 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -42,29 +42,24 @@
#include "core_priv.h"
#include "rdma_core.h"
-void uverbs_uobject_get(struct ib_uobject *uobject)
-{
- kref_get(&uobject->ref);
-}
-
static void uverbs_uobject_free(struct kref *ref)
{
- struct ib_uobject *uobj =
- container_of(ref, struct ib_uobject, ref);
-
- if (uobj->uapi_object->type_class->needs_kfree_rcu)
- kfree_rcu(uobj, rcu);
- else
- kfree(uobj);
+ kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu);
}
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
void uverbs_uobject_put(struct ib_uobject *uobject)
{
kref_put(&uobject->ref, uverbs_uobject_free);
}
+EXPORT_SYMBOL(uverbs_uobject_put);
-static int uverbs_try_lock_object(struct ib_uobject *uobj,
- enum rdma_lookup_mode mode)
+int uverbs_try_lock_object(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
{
/*
* When a shared access is required, we use a positive counter. Each
@@ -73,7 +68,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
* In exclusive access mode, we check that the counter is zero (nobody
* claimed this object) and we set it to -1. Releasing a shared access
* lock is done simply by decreasing the counter. As for exclusive
- * access locks, since only a single one of them is is allowed
+ * access locks, since only a single one of them is allowed
* concurrently, setting the counter to zero is enough for releasing
* this lock.
*/
@@ -89,6 +84,7 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj,
}
return 0;
}
+EXPORT_SYMBOL(uverbs_try_lock_object);
static void assert_uverbs_usecnt(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
@@ -117,7 +113,7 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj,
* however the type's allocat_commit function cannot have been called and the
* uobject cannot be on the uobjects_lists
*
- * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * For RDMA_REMOVE_DESTROY the caller should be holding a kref (eg via
* rdma_lookup_get_uobject) and the object is left in a state where the caller
* needs to call rdma_lookup_put_uobject.
*
@@ -135,34 +131,26 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
lockdep_assert_held(&ufile->hw_destroy_rwsem);
assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
- if (uobj->object) {
- ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
- attrs);
- if (ret) {
- if (ib_is_destroy_retryable(ret, reason, uobj))
- return ret;
-
- /* Nothing to be done, dangle the memory and move on */
- WARN(true,
- "ib_uverbs: failed to remove uobject id %d, driver err=%d",
- uobj->id, ret);
- }
-
- uobj->object = NULL;
- }
-
if (reason == RDMA_REMOVE_ABORT) {
WARN_ON(!list_empty(&uobj->list));
WARN_ON(!uobj->context);
uobj->uapi_object->type_class->alloc_abort(uobj);
+ } else if (uobj->object) {
+ ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
+ attrs);
+ if (ret)
+ /* Nothing to be done, wait till ucontext will clean it */
+ return ret;
+
+ uobj->object = NULL;
}
uobj->context = NULL;
/*
- * For DESTROY the usecnt is held write locked, the caller is expected
- * to put it unlock and put the object when done with it. Only DESTROY
- * can remove the IDR handle.
+ * For DESTROY the usecnt is not changed, the caller is expected to
+ * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
+ * handle.
*/
if (reason != RDMA_REMOVE_DESTROY)
atomic_set(&uobj->usecnt, 0);
@@ -194,7 +182,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
/*
* This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
* sequence. It should only be used from command callbacks. On success the
- * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * caller must pair this with uobj_put_destroy(). This
* version requires the caller to have already obtained an
* LOOKUP_DESTROY uobject kref.
*/
@@ -205,6 +193,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
down_read(&ufile->hw_destroy_rwsem);
+ /*
+ * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
+ * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
+ * This is because any other concurrent thread can still see the object
+ * in the xarray due to RCU. Leaving it locked ensures nothing else will
+ * touch it.
+ */
ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
if (ret)
goto out_unlock;
@@ -223,7 +218,7 @@ out_unlock:
/*
* uobj_get_destroy destroys the HW object and returns a handle to the uobj
* with a NULL object pointer. The caller must pair this with
- * uverbs_put_destroy.
+ * uobj_put_destroy().
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
u32 id, struct uverbs_attr_bundle *attrs)
@@ -257,21 +252,25 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
-
- rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ uobj_put_destroy(uobj);
return 0;
}
/* alloc_uobj must be undone by uverbs_destroy_uobject() */
-static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
+static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs,
const struct uverbs_api_object *obj)
{
+ struct ib_uverbs_file *ufile = attrs->ufile;
struct ib_uobject *uobj;
- struct ib_ucontext *ucontext;
- ucontext = ib_uverbs_get_ucontext_file(ufile);
- if (IS_ERR(ucontext))
- return ERR_CAST(ucontext);
+ if (!attrs->context) {
+ struct ib_ucontext *ucontext =
+ ib_uverbs_get_ucontext_file(ufile);
+
+ if (IS_ERR(ucontext))
+ return ERR_CAST(ucontext);
+ attrs->context = ucontext;
+ }
uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
if (!uobj)
@@ -281,7 +280,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
* The object is added to the list in the commit stage.
*/
uobj->ufile = ufile;
- uobj->context = ucontext;
+ uobj->context = attrs->context;
INIT_LIST_HEAD(&uobj->list);
uobj->uapi_object = obj;
/*
@@ -358,11 +357,11 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj,
uobject = f->private_data;
/*
- * fget(id) ensures we are not currently running uverbs_close_fd,
- * and the caller is expected to ensure that uverbs_close_fd is never
- * done while a call top lookup is possible.
+ * fget(id) ensures we are not currently running
+ * uverbs_uobject_fd_release(), and the caller is expected to ensure
+ * that release is never done while a call to lookup is possible.
*/
- if (f->f_op != fd_type->fops) {
+ if (f->f_op != fd_type->fops || uobject->ufile != ufile) {
fput(f);
return ERR_PTR(-EBADF);
}
@@ -424,12 +423,12 @@ free:
static struct ib_uobject *
alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile)
+ struct uverbs_attr_bundle *attrs)
{
int ret;
struct ib_uobject *uobj;
- uobj = alloc_uobj(ufile, obj);
+ uobj = alloc_uobj(attrs, obj);
if (IS_ERR(uobj))
return uobj;
@@ -445,7 +444,7 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
return uobj;
remove:
- xa_erase(&ufile->idr, uobj->id);
+ xa_erase(&attrs->ufile->idr, uobj->id);
uobj_put:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
@@ -453,31 +452,54 @@ uobj_put:
static struct ib_uobject *
alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile)
+ struct uverbs_attr_bundle *attrs)
{
+ const struct uverbs_obj_fd_type *fd_type;
int new_fd;
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj, *ret;
+ struct file *filp;
+
+ uobj = alloc_uobj(attrs, obj);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+ if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release &&
+ fd_type->fops->release != &uverbs_async_event_release)) {
+ ret = ERR_PTR(-EINVAL);
+ goto err_fd;
+ }
new_fd = get_unused_fd_flags(O_CLOEXEC);
- if (new_fd < 0)
- return ERR_PTR(new_fd);
+ if (new_fd < 0) {
+ ret = ERR_PTR(new_fd);
+ goto err_fd;
+ }
- uobj = alloc_uobj(ufile, obj);
- if (IS_ERR(uobj)) {
- put_unused_fd(new_fd);
- return uobj;
+ /* Note that uverbs_uobject_fd_release() is called during abort */
+ filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ ret = ERR_CAST(filp);
+ goto err_getfile;
}
+ uobj->object = filp;
uobj->id = new_fd;
- uobj->ufile = ufile;
-
return uobj;
+
+err_getfile:
+ put_unused_fd(new_fd);
+err_fd:
+ uverbs_uobject_put(uobj);
+ return ret;
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
- struct ib_uverbs_file *ufile,
struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_file *ufile = attrs->ufile;
struct ib_uobject *ret;
if (IS_ERR(obj))
@@ -491,13 +513,11 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
return ERR_PTR(-EIO);
- ret = obj->type_class->alloc_begin(obj, ufile);
+ ret = obj->type_class->alloc_begin(obj, attrs);
if (IS_ERR(ret)) {
up_read(&ufile->hw_destroy_rwsem);
return ret;
}
- if (attrs)
- attrs->context = ret->context;
return ret;
}
@@ -518,12 +538,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
struct uverbs_obj_idr_type, type);
int ret = idr_type->destroy_object(uobj, why, attrs);
- /*
- * We can only fail gracefully if the user requested to destroy the
- * object or when a retry may be called upon an error.
- * In the rest of the cases, just remove whatever you can.
- */
- if (ib_is_destroy_retryable(ret, why, uobj))
+ if (ret)
return ret;
if (why == RDMA_REMOVE_ABORT)
@@ -544,6 +559,9 @@ static void remove_handle_idr_uobject(struct ib_uobject *uobj)
static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
{
+ struct file *filp = uobj->object;
+
+ fput(filp);
put_unused_fd(uobj->id);
}
@@ -553,11 +571,8 @@ static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
{
const struct uverbs_obj_fd_type *fd_type = container_of(
uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
- int ret = fd_type->context_closed(uobj, why);
-
- if (ib_is_destroy_retryable(ret, why, uobj))
- return ret;
+ fd_type->destroy_object(uobj, why);
return 0;
}
@@ -565,7 +580,7 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj)
{
}
-static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
struct ib_uverbs_file *ufile = uobj->ufile;
void *old;
@@ -579,33 +594,35 @@ static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
*/
old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
WARN_ON(old != NULL);
-
- return 0;
}
-static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
+static void swap_idr_uobjects(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new)
{
- const struct uverbs_obj_fd_type *fd_type = container_of(
- uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
- int fd = uobj->id;
- struct file *filp;
+ struct ib_uverbs_file *ufile = obj_old->ufile;
+ void *old;
/*
- * The kref for uobj is moved into filp->private data and put in
- * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
- * must be guaranteed to be called from the provided fops release
- * callback.
+ * New must be an object that been allocated but not yet committed, this
+ * moves the pre-committed state to obj_old, new still must be comitted.
*/
- filp = anon_inode_getfile(fd_type->name,
- fd_type->fops,
- uobj,
- fd_type->flags);
- if (IS_ERR(filp))
- return PTR_ERR(filp);
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (WARN_ON(old != obj_old))
+ return;
- uobj->object = filp;
+ swap(obj_old->id, obj_new->id);
- /* Matching put will be done in uverbs_close_fd() */
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
+ WARN_ON(old != NULL);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ int fd = uobj->id;
+ struct file *filp = uobj->object;
+
+ /* Matching put will be done in uverbs_uobject_fd_release() */
kref_get(&uobj->ufile->ref);
/* This shouldn't be used anymore. Use the file object instead */
@@ -613,11 +630,10 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
/*
* NOTE: Once we install the file we loose ownership of our kref on
- * uobj. It will be put by uverbs_close_fd()
+ * uobj. It will be put by uverbs_uobject_fd_release()
*/
+ filp->private_data = uobj;
fd_install(fd, filp);
-
- return 0;
}
/*
@@ -625,19 +641,10 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
* caller can no longer assume uobj is valid. If this function fails it
* destroys the uboject, including the attached HW object.
*/
-int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
- struct uverbs_attr_bundle *attrs)
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = attrs->ufile;
- int ret;
-
- /* alloc_commit consumes the uobj kref */
- ret = uobj->uapi_object->type_class->alloc_commit(uobj);
- if (ret) {
- uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
- up_read(&ufile->hw_destroy_rwsem);
- return ret;
- }
/* kref is held so long as the uobj is on the uobj list. */
uverbs_uobject_get(uobj);
@@ -648,10 +655,40 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
/* matches atomic_set(-1) in alloc_uobj */
atomic_set(&uobj->usecnt, 0);
+ /* alloc_commit consumes the uobj kref */
+ uobj->uapi_object->type_class->alloc_commit(uobj);
+
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
+}
- return 0;
+/*
+ * new_uobj will be assigned to the handle currently used by to_uobj, and
+ * to_uobj will be destroyed.
+ *
+ * Upon return the caller must do:
+ * rdma_alloc_commit_uobject(new_uobj)
+ * uobj_put_destroy(to_uobj)
+ *
+ * to_uobj must have a write get but the put mode switches to destroy once
+ * this is called.
+ */
+void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
+
+ if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
+ !to_uobj->uapi_object->type_class->swap_uobjects))
+ return;
+
+ to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
+
+ /*
+ * If this fails then the uobject is still completely valid (though with
+ * a new ID) and we leak it until context close.
+ */
+ uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
}
/*
@@ -659,11 +696,25 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
* object and anything else connected to uobj before calling this.
*/
void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
- struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs,
+ bool hw_obj_valid)
{
struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
+
+ if (hw_obj_valid) {
+ ret = uobj->uapi_object->type_class->destroy_hw(
+ uobj, RDMA_REMOVE_ABORT, attrs);
+ /*
+ * If the driver couldn't destroy the object then go ahead and
+ * commit it. Leaking objects that can't be destroyed is only
+ * done during FD close after the driver has a few more tries to
+ * destroy it.
+ */
+ if (WARN_ON(ret))
+ return rdma_alloc_commit_uobject(uobj, attrs);
+ }
- uobj->object = NULL;
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
/* Matches the down_read in rdma_alloc_begin_uobject */
@@ -681,7 +732,10 @@ static void lookup_put_fd_uobject(struct ib_uobject *uobj,
struct file *filp = uobj->object;
WARN_ON(mode != UVERBS_LOOKUP_READ);
- /* This indirectly calls uverbs_close_fd and free the object */
+ /*
+ * This indirectly calls uverbs_uobject_fd_release() and free the
+ * object
+ */
fput(filp);
}
@@ -689,7 +743,6 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
enum rdma_lookup_mode mode)
{
assert_uverbs_usecnt(uobj, mode);
- uobj->uapi_object->type_class->lookup_put(uobj, mode);
/*
* In order to unlock an object, either decrease its usecnt for
* read access or zero it in case of exclusive access. See
@@ -706,6 +759,7 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
break;
}
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
/* Pairs with the kref obtained by type->lookup_get */
uverbs_uobject_put(uobj);
}
@@ -744,33 +798,33 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
.lookup_put = lookup_put_idr_uobject,
.destroy_hw = destroy_hw_idr_uobject,
.remove_handle = remove_handle_idr_uobject,
- /*
- * When we destroy an object, we first just lock it for WRITE and
- * actually DESTROY it in the finalize stage. So, the problematic
- * scenario is when we just started the finalize stage of the
- * destruction (nothing was executed yet). Now, the other thread
- * fetched the object for READ access, but it didn't lock it yet.
- * The DESTROY thread continues and starts destroying the object.
- * When the other thread continue - without the RCU, it would
- * access freed memory. However, the rcu_read_lock delays the free
- * until the rcu_read_lock of the READ operation quits. Since the
- * exclusive lock of the object is still taken by the DESTROY flow, the
- * READ operation will get -EBUSY and it'll just bail out.
- */
- .needs_kfree_rcu = true,
+ .swap_uobjects = swap_idr_uobjects,
};
EXPORT_SYMBOL(uverbs_idr_class);
-void uverbs_close_fd(struct file *f)
+/*
+ * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
+ * file_operations release method.
+ */
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
{
- struct ib_uobject *uobj = f->private_data;
- struct ib_uverbs_file *ufile = uobj->ufile;
- struct uverbs_attr_bundle attrs = {
- .context = uobj->context,
- .ufile = ufile,
- };
+ struct ib_uverbs_file *ufile;
+ struct ib_uobject *uobj;
+
+ /*
+ * This can only happen if the fput came from alloc_abort_fd_uobject()
+ */
+ if (!filp->private_data)
+ return 0;
+ uobj = filp->private_data;
+ ufile = uobj->ufile;
if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+ struct uverbs_attr_bundle attrs = {
+ .context = uobj->context,
+ .ufile = ufile,
+ };
+
/*
* lookup_get_fd_uobject holds the kref on the struct file any
* time a FD uobj is locked, which prevents this release
@@ -782,13 +836,14 @@ void uverbs_close_fd(struct file *f)
up_read(&ufile->hw_destroy_rwsem);
}
- /* Matches the get in alloc_begin_fd_uobject */
+ /* Matches the get in alloc_commit_fd_uobject() */
kref_put(&ufile->ref, ib_uverbs_release_file);
/* Pairs with filp->private_data in alloc_begin_fd_uobject */
uverbs_uobject_put(uobj);
+ return 0;
}
-EXPORT_SYMBOL(uverbs_close_fd);
+EXPORT_SYMBOL(uverbs_uobject_fd_release);
/*
* Drop the ucontext off the ufile and completely disconnect it from the
@@ -817,6 +872,7 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
rdma_restrack_del(&ucontext->res);
ib_dev->ops.dealloc_ucontext(ucontext);
+ WARN_ON(!xa_empty(&ucontext->mmap_xa));
kfree(ucontext);
ufile->ucontext = NULL;
@@ -825,9 +881,14 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
+ struct uverbs_attr_bundle attrs = { .ufile = ufile };
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *ib_dev = ucontext->device;
struct ib_uobject *obj, *next_obj;
int ret = -EINVAL;
- struct uverbs_attr_bundle attrs = { .ufile = ufile };
+
+ if (ib_dev->ops.ufile_hw_cleanup)
+ ib_dev->ops.ufile_hw_cleanup(ufile);
/*
* This shouldn't run while executing other commands on this
@@ -845,18 +906,23 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
* racing with a lookup_get.
*/
WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE)
+ obj->object = NULL;
if (!uverbs_destroy_uobject(obj, reason, &attrs))
ret = 0;
else
atomic_set(&obj->usecnt, 0);
}
+
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE) {
+ WARN_ON(!list_empty(&ufile->uobjects));
+ return 0;
+ }
return ret;
}
/*
- * Destroy the uncontext and every uobject associated with it. If called with
- * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
- * been completed and ufile->ucontext is NULL.
+ * Destroy the ucontext and every uobject associated with it.
*
* This is internally locked and can be called in parallel from multiple
* contexts.
@@ -864,22 +930,6 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason)
{
- if (reason == RDMA_REMOVE_CLOSE) {
- /*
- * During destruction we might trigger something that
- * synchronously calls release on any file descriptor. For
- * this reason all paths that come from file_operations
- * release must use try_lock. They can progress knowing that
- * there is an ongoing uverbs_destroy_ufile_hw that will clean
- * up the driver resources.
- */
- if (!mutex_trylock(&ufile->ucontext_lock))
- return;
-
- } else {
- mutex_lock(&ufile->ucontext_lock);
- }
-
down_write(&ufile->hw_destroy_rwsem);
/*
@@ -889,26 +939,16 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
if (!ufile->ucontext)
goto done;
- ufile->ucontext->closing = true;
- ufile->ucontext->cleanup_retryable = true;
- while (!list_empty(&ufile->uobjects))
- if (__uverbs_cleanup_ufile(ufile, reason)) {
- /*
- * No entry was cleaned-up successfully during this
- * iteration
- */
- break;
- }
-
- ufile->ucontext->cleanup_retryable = false;
- if (!list_empty(&ufile->uobjects))
- __uverbs_cleanup_ufile(ufile, reason);
+ while (!list_empty(&ufile->uobjects) &&
+ !__uverbs_cleanup_ufile(ufile, reason)) {
+ }
+ if (WARN_ON(!list_empty(&ufile->uobjects)))
+ __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE);
ufile_destroy_ucontext(ufile, reason);
done:
up_write(&ufile->hw_destroy_rwsem);
- mutex_unlock(&ufile->ucontext_lock);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -919,7 +959,6 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
.lookup_put = lookup_put_fd_uobject,
.destroy_hw = destroy_hw_fd_uobject,
.remove_handle = remove_handle_fd_uobject,
- .needs_kfree_rcu = false,
};
EXPORT_SYMBOL(uverbs_fd_class);
@@ -942,19 +981,17 @@ uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
return rdma_lookup_get_uobject(obj, attrs->ufile, id,
UVERBS_LOOKUP_WRITE, attrs);
case UVERBS_ACCESS_NEW:
- return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
+ return rdma_alloc_begin_uobject(obj, attrs);
default:
WARN_ON(true);
return ERR_PTR(-EOPNOTSUPP);
}
}
-int uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs)
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs)
{
- int ret = 0;
-
/*
* refcounts should be handled at the object level and not at the
* uobject level. Refcounts of the objects themselves are done in
@@ -974,14 +1011,40 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
break;
case UVERBS_ACCESS_NEW:
if (commit)
- ret = rdma_alloc_commit_uobject(uobj, attrs);
+ rdma_alloc_commit_uobject(uobj, attrs);
else
- rdma_alloc_abort_uobject(uobj, attrs);
+ rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid);
break;
default:
WARN_ON(true);
- ret = -EOPNOTSUPP;
}
+}
- return ret;
+/**
+ * rdma_uattrs_has_raw_cap() - Returns whether a rdma device linked to the
+ * uverbs attributes file has CAP_NET_RAW
+ * capability or not.
+ *
+ * @attrs: Pointer to uverbs attributes
+ *
+ * Returns true if a rdma device's owning user namespace has CAP_NET_RAW
+ * capability, otherwise false.
+ */
+bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_ucontext *ucontext;
+ bool has_cap = false;
+ int srcu_key;
+
+ srcu_key = srcu_read_lock(&ufile->device->disassociate_srcu);
+ ucontext = ib_uverbs_get_ucontext_file(ufile);
+ if (IS_ERR(ucontext))
+ goto out;
+ has_cap = rdma_dev_has_raw_cap(ucontext->device);
+
+out:
+ srcu_read_unlock(&ufile->device->disassociate_srcu, srcu_key);
+ return has_cap;
}
+EXPORT_SYMBOL(rdma_uattrs_has_raw_cap);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 5445323629b5..a59b087611cb 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -51,29 +51,6 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs);
/*
- * uverbs_uobject_get is called in order to increase the reference count on
- * an uobject. This is useful when a handler wants to keep the uobject's memory
- * alive, regardless if this uobject is still alive in the context's objects
- * repository. Objects are put via uverbs_uobject_put.
- */
-void uverbs_uobject_get(struct ib_uobject *uobject);
-
-/*
- * In order to indicate we no longer needs this uobject, uverbs_uobject_put
- * is called. When the reference count is decreased, the uobject is freed.
- * For example, this is used when attaching a completion channel to a CQ.
- */
-void uverbs_uobject_put(struct ib_uobject *uobject);
-
-/* Indicate this fd is no longer used by this consumer, but its memory isn't
- * necessarily released yet. When the last reference is put, we release the
- * memory. After this call is executed, calling uverbs_uobject_get isn't
- * allowed.
- * This must be called from the release file_operations of the file!
- */
-void uverbs_close_fd(struct file *f);
-
-/*
* Get an ib_uobject that corresponds to the given id from ufile, assuming
* the object is from the given type. Lock it to the required access when
* applicable.
@@ -86,30 +63,17 @@ struct ib_uobject *
uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
s64 id, struct uverbs_attr_bundle *attrs);
-/*
- * Note that certain finalize stages could return a status:
- * (a) alloc_commit could return a failure if the object is committed at the
- * same time when the context is destroyed.
- * (b) remove_commit could fail if the object wasn't destroyed successfully.
- * Since multiple objects could be finalized in one transaction, it is very NOT
- * recommended to have several finalize actions which have side effects.
- * For example, it's NOT recommended to have a certain action which has both
- * a commit action and a destroy action or two destroy objects in the same
- * action. The rule of thumb is to have one destroy or commit action with
- * multiple lookups.
- * The first non zero return value of finalize_object is returned from this
- * function. For example, this could happen when we couldn't destroy an
- * object.
- */
-int uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access, bool commit,
- struct uverbs_attr_bundle *attrs);
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs);
int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs);
+
/*
* This is the runtime description of the uverbs API, used by the syscall
* machinery to validate and dispatch calls.
@@ -187,13 +151,18 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+extern const struct uapi_definition uverbs_def_obj_async_fd[];
extern const struct uapi_definition uverbs_def_obj_counters[];
extern const struct uapi_definition uverbs_def_obj_cq[];
extern const struct uapi_definition uverbs_def_obj_device[];
extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_dmah[];
extern const struct uapi_definition uverbs_def_obj_flow_action[];
extern const struct uapi_definition uverbs_def_obj_intf[];
extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_obj_qp[];
+extern const struct uapi_definition uverbs_def_obj_srq[];
+extern const struct uapi_definition uverbs_def_obj_wq[];
extern const struct uapi_definition uverbs_def_write_intf[];
static inline const struct uverbs_api_write_method *
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 3b5ff2f7b5f8..b097cfcade1c 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -6,6 +6,7 @@
#include <rdma/rdma_cm.h>
#include <rdma/ib_verbs.h>
#include <rdma/restrack.h>
+#include <rdma/rdma_counter.h>
#include <linux/mutex.h>
#include <linux/sched/task.h>
#include <linux/pid_namespace.h>
@@ -36,20 +37,6 @@ int rdma_restrack_init(struct ib_device *dev)
return 0;
}
-static const char *type2str(enum rdma_restrack_type type)
-{
- static const char * const names[RDMA_RESTRACK_MAX] = {
- [RDMA_RESTRACK_PD] = "PD",
- [RDMA_RESTRACK_CQ] = "CQ",
- [RDMA_RESTRACK_QP] = "QP",
- [RDMA_RESTRACK_CM_ID] = "CM_ID",
- [RDMA_RESTRACK_MR] = "MR",
- [RDMA_RESTRACK_CTX] = "CTX",
- };
-
- return names[type];
-};
-
/**
* rdma_restrack_clean() - clean resource tracking
* @dev: IB device
@@ -57,47 +44,14 @@ static const char *type2str(enum rdma_restrack_type type)
void rdma_restrack_clean(struct ib_device *dev)
{
struct rdma_restrack_root *rt = dev->res;
- struct rdma_restrack_entry *e;
- char buf[TASK_COMM_LEN];
- bool found = false;
- const char *owner;
int i;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
struct xarray *xa = &dev->res[i].xa;
- if (!xa_empty(xa)) {
- unsigned long index;
-
- if (!found) {
- pr_err("restrack: %s", CUT_HERE);
- dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
- }
- xa_for_each(xa, index, e) {
- if (rdma_is_kernel_res(e)) {
- owner = e->kern_name;
- } else {
- /*
- * There is no need to call get_task_struct here,
- * because we can be here only if there are more
- * get_task_struct() call than put_task_struct().
- */
- get_task_comm(buf, e->task);
- owner = buf;
- }
-
- pr_err("restrack: %s %s object allocated by %s is not freed\n",
- rdma_is_kernel_res(e) ? "Kernel" :
- "User",
- type2str(e->type), owner);
- }
- found = true;
- }
+ WARN_ON(!xa_empty(xa));
xa_destroy(xa);
}
- if (found)
- pr_err("restrack: %s", CUT_HERE);
-
kfree(rt);
}
@@ -105,10 +59,10 @@ void rdma_restrack_clean(struct ib_device *dev)
* rdma_restrack_count() - the current usage of specific object
* @dev: IB device
* @type: actual type of object to operate
- * @ns: PID namespace
+ * @show_details: count driver specific objects
*/
int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
- struct pid_namespace *ns)
+ bool show_details)
{
struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *e;
@@ -117,42 +71,15 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
xa_lock(&rt->xa);
xas_for_each(&xas, e, U32_MAX) {
- if (ns == &init_pid_ns ||
- (!rdma_is_kernel_res(e) &&
- ns == task_active_pid_ns(e->task)))
- cnt++;
+ if (xa_get_mark(&rt->xa, e->id, RESTRACK_DD) && !show_details)
+ continue;
+ cnt++;
}
xa_unlock(&rt->xa);
return cnt;
}
EXPORT_SYMBOL(rdma_restrack_count);
-static void set_kern_name(struct rdma_restrack_entry *res)
-{
- struct ib_pd *pd;
-
- switch (res->type) {
- case RDMA_RESTRACK_QP:
- pd = container_of(res, struct ib_qp, res)->pd;
- if (!pd) {
- WARN_ONCE(true, "XRC QPs are not supported\n");
- /* Survive, despite the programmer's error */
- res->kern_name = " ";
- }
- break;
- case RDMA_RESTRACK_MR:
- pd = container_of(res, struct ib_mr, res)->pd;
- break;
- default:
- /* Other types set kern_name directly */
- pd = NULL;
- break;
- }
-
- if (pd)
- res->kern_name = pd->res.kern_name;
-}
-
static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
{
switch (res->type) {
@@ -169,85 +96,136 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
return container_of(res, struct ib_mr, res)->device;
case RDMA_RESTRACK_CTX:
return container_of(res, struct ib_ucontext, res)->device;
+ case RDMA_RESTRACK_COUNTER:
+ return container_of(res, struct rdma_counter, res)->device;
+ case RDMA_RESTRACK_SRQ:
+ return container_of(res, struct ib_srq, res)->device;
+ case RDMA_RESTRACK_DMAH:
+ return container_of(res, struct ib_dmah, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
}
-void rdma_restrack_set_task(struct rdma_restrack_entry *res,
- const char *caller)
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res: resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
{
- if (caller) {
- res->kern_name = caller;
+ if (WARN_ON_ONCE(!task))
return;
- }
if (res->task)
put_task_struct(res->task);
- get_task_struct(current);
- res->task = current;
+ get_task_struct(task);
+ res->task = task;
+ res->user = true;
}
-EXPORT_SYMBOL(rdma_restrack_set_task);
-static void rdma_restrack_add(struct rdma_restrack_entry *res)
+/**
+ * rdma_restrack_set_name() - set the task for this resource
+ * @res: resource entry
+ * @caller: kernel name, the current task will be used if the caller is NULL.
+ */
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
{
- struct ib_device *dev = res_to_dev(res);
- struct rdma_restrack_root *rt;
- int ret;
-
- if (!dev)
+ if (caller) {
+ res->kern_name = caller;
return;
-
- rt = &dev->res[res->type];
-
- kref_init(&res->kref);
- init_completion(&res->comp);
- if (res->type != RDMA_RESTRACK_QP)
- ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
- &rt->next_id, GFP_KERNEL);
- else {
- /* Special case to ensure that LQPN points to right QP */
- struct ib_qp *qp = container_of(res, struct ib_qp, res);
-
- ret = xa_insert(&rt->xa, qp->qp_num, res, GFP_KERNEL);
- res->id = ret ? 0 : qp->qp_num;
}
- if (!ret)
- res->valid = true;
+ rdma_restrack_attach_task(res, current);
}
+EXPORT_SYMBOL(rdma_restrack_set_name);
/**
- * rdma_restrack_kadd() - add kernel object to the reource tracking database
- * @res: resource entry
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
+ */
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent)
+{
+ if (rdma_is_kernel_res(parent))
+ dst->kern_name = parent->kern_name;
+ else
+ rdma_restrack_attach_task(dst, parent->task);
+}
+EXPORT_SYMBOL(rdma_restrack_parent_name);
+
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res: Entry to initialize
+ * @type: REstrack type
*/
-void rdma_restrack_kadd(struct rdma_restrack_entry *res)
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type)
{
- res->task = NULL;
- set_kern_name(res);
- res->user = false;
- rdma_restrack_add(res);
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->type = type;
}
-EXPORT_SYMBOL(rdma_restrack_kadd);
+EXPORT_SYMBOL(rdma_restrack_new);
/**
- * rdma_restrack_uadd() - add user object to the reource tracking database
+ * rdma_restrack_add() - add object to the resource tracking database
* @res: resource entry
*/
-void rdma_restrack_uadd(struct rdma_restrack_entry *res)
+void rdma_restrack_add(struct rdma_restrack_entry *res)
{
- if (res->type != RDMA_RESTRACK_CM_ID)
- res->task = NULL;
+ struct ib_device *dev = res_to_dev(res);
+ struct rdma_restrack_root *rt;
+ int ret = 0;
- if (!res->task)
- rdma_restrack_set_task(res, NULL);
- res->kern_name = NULL;
+ if (!dev)
+ return;
- res->user = true;
- rdma_restrack_add(res);
+ if (res->no_track)
+ goto out;
+
+ rt = &dev->res[res->type];
+
+ if (res->type == RDMA_RESTRACK_QP) {
+ /* Special case to ensure that LQPN points to right QP */
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+
+ WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8,
+ "QP number 0x%0X and port 0x%0X", qp->qp_num,
+ qp->port);
+ res->id = qp->qp_num;
+ if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI)
+ res->id |= qp->port << 24;
+ ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL);
+ if (ret)
+ res->id = 0;
+
+ if (qp->qp_type >= IB_QPT_DRIVER)
+ xa_set_mark(&rt->xa, res->id, RESTRACK_DD);
+ } else if (res->type == RDMA_RESTRACK_COUNTER) {
+ /* Special case to ensure that cntn points to right counter */
+ struct rdma_counter *counter;
+
+ counter = container_of(res, struct rdma_counter, res);
+ ret = xa_insert(&rt->xa, counter->id, res, GFP_KERNEL);
+ res->id = ret ? 0 : counter->id;
+ } else {
+ ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
+ &rt->next_id, GFP_KERNEL);
+ ret = (ret < 0) ? ret : 0;
+ }
+
+out:
+ if (!ret)
+ res->valid = true;
}
-EXPORT_SYMBOL(rdma_restrack_uadd);
+EXPORT_SYMBOL(rdma_restrack_add);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
@@ -285,6 +263,10 @@ static void restrack_release(struct kref *kref)
struct rdma_restrack_entry *res;
res = container_of(kref, struct rdma_restrack_entry, kref);
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
complete(&res->comp);
}
@@ -294,13 +276,25 @@ int rdma_restrack_put(struct rdma_restrack_entry *res)
}
EXPORT_SYMBOL(rdma_restrack_put);
+/**
+ * rdma_restrack_del() - delete object from the resource tracking database
+ * @res: resource entry
+ */
void rdma_restrack_del(struct rdma_restrack_entry *res)
{
struct rdma_restrack_entry *old;
struct rdma_restrack_root *rt;
struct ib_device *dev;
- if (!res->valid)
+ if (!res->valid) {
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
+ return;
+ }
+
+ if (res->no_track)
goto out;
dev = res_to_dev(res);
@@ -311,15 +305,10 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
old = xa_erase(&rt->xa, res->id);
WARN_ON(old != res);
- res->valid = false;
+out:
+ res->valid = false;
rdma_restrack_put(res);
wait_for_completion(&res->comp);
-
-out:
- if (res->task) {
- put_task_struct(res->task);
- res->task = NULL;
- }
}
EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
index 09a1fbdf578e..6a04fc41f738 100644
--- a/drivers/infiniband/core/restrack.h
+++ b/drivers/infiniband/core/restrack.h
@@ -25,4 +25,12 @@ struct rdma_restrack_root {
int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+ const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent);
#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 558de0b9895c..a9f2c6b1b29e 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -70,7 +70,7 @@ struct netdev_event_work {
};
static const struct {
- bool (*is_supported)(const struct ib_device *device, u8 port_num);
+ bool (*is_supported)(const struct ib_device *device, u32 port_num);
enum ib_gid_type gid_type;
} PORT_CAP_TO_GID_TYPE[] = {
{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
@@ -79,7 +79,7 @@ static const struct {
#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port)
{
int i;
unsigned int ret_flags = 0;
@@ -96,7 +96,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
EXPORT_SYMBOL(roce_gid_type_mask_support);
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
- u8 port, union ib_gid *gid,
+ u32 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
int i;
@@ -144,7 +144,7 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
static bool
-is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
+is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *real_dev;
@@ -168,7 +168,7 @@ is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
}
static bool
-is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
+is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *master_dev;
@@ -186,18 +186,19 @@ is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
return res;
}
-/** is_ndev_for_default_gid_filter - Check if a given netdevice
+/**
+ * is_ndev_for_default_gid_filter - Check if a given netdevice
* can be considered for default GIDs or not.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: rdma netdevice pointer
- * @cookie_ndev: Netdevice to consider to form a default GID
+ * @cookie: Netdevice to consider to form a default GID
*
* is_ndev_for_default_gid_filter() returns true if a given netdevice can be
* considered for deriving default RoCE GID, returns false otherwise.
*/
static bool
-is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
+is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *cookie_ndev = cookie;
@@ -223,13 +224,13 @@ is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
return res;
}
-static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
+static bool pass_all_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
return true;
}
-static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
+static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
bool res;
@@ -249,7 +250,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
/**
* is_upper_ndev_bond_master_filter - Check if a given netdevice
- * is bond master device of netdevice of the the RDMA device of port.
+ * is bond master device of netdevice of the RDMA device of port.
* @ib_dev: IB device to check
* @port: Port to consider for adding default GID
* @rdma_ndev: Pointer to rdma netdevice
@@ -260,7 +261,7 @@ static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
* not have been established as slave device yet.
*/
static bool
-is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
+is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -280,7 +281,7 @@ is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
static void update_gid_ip(enum gid_op_type gid_op,
struct ib_device *ib_dev,
- u8 port, struct net_device *ndev,
+ u32 port, struct net_device *ndev,
struct sockaddr *addr)
{
union ib_gid gid;
@@ -294,7 +295,7 @@ static void update_gid_ip(enum gid_op_type gid_op,
}
static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
struct net_device *event_ndev)
{
@@ -328,8 +329,9 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
}
static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
+ const struct in_ifaddr *ifa;
struct in_device *in_dev;
struct sin_list {
struct list_head list;
@@ -349,7 +351,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
return;
}
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
@@ -359,7 +361,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
entry->ip.sin_addr.s_addr = ifa->ifa_address;
list_add_tail(&entry->list, &sin_list);
}
- endfor_ifa(in_dev);
+
rcu_read_unlock();
list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) {
@@ -371,7 +373,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
}
static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *in6_dev;
@@ -416,7 +418,7 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
}
}
-static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void _add_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
enum_netdev_ipv4_ips(ib_dev, port, ndev);
@@ -424,13 +426,13 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
enum_netdev_ipv6_ips(ib_dev, port, ndev);
}
-static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void add_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
_add_netdev_ips(ib_dev, port, cookie);
}
-static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void del_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
@@ -445,7 +447,7 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
*
* del_default_gids() deletes the default GIDs of the event/cookie netdevice.
*/
-static void del_default_gids(struct ib_device *ib_dev, u8 port,
+static void del_default_gids(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *cookie_ndev = cookie;
@@ -457,7 +459,7 @@ static void del_default_gids(struct ib_device *ib_dev, u8 port,
IB_CACHE_GID_DEFAULT_MODE_DELETE);
}
-static void add_default_gids(struct ib_device *ib_dev, u8 port,
+static void add_default_gids(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
struct net_device *event_ndev = cookie;
@@ -469,7 +471,7 @@ static void add_default_gids(struct ib_device *ib_dev, u8 port,
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -504,7 +506,7 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
* rdma_roce_rescan_device - Rescan all of the network devices in the system
* and add their gids, as needed, to the relevant RoCE devices.
*
- * @device: the rdma device
+ * @ib_dev: the rdma device
*/
void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
@@ -513,8 +515,29 @@ void rdma_roce_rescan_device(struct ib_device *ib_dev)
}
EXPORT_SYMBOL(rdma_roce_rescan_device);
+/**
+ * rdma_roce_rescan_port - Rescan all of the network devices in the system
+ * and add their gids if relevant to the port of the RoCE device.
+ *
+ * @ib_dev: IB device
+ * @port: Port number
+ */
+void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port)
+{
+ struct net_device *ndev = NULL;
+
+ if (rdma_protocol_roce(ib_dev, port)) {
+ ndev = ib_device_get_netdev(ib_dev, port);
+ if (!ndev)
+ return;
+ enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev);
+ dev_put(ndev);
+ }
+}
+EXPORT_SYMBOL(rdma_roce_rescan_port);
+
static void callback_for_addr_gid_device_scan(struct ib_device *device,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -530,10 +553,11 @@ struct upper_list {
struct net_device *upper;
};
-static int netdev_upper_walk(struct net_device *upper, void *data)
+static int netdev_upper_walk(struct net_device *upper,
+ struct netdev_nested_priv *priv)
{
struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
- struct list_head *upper_list = data;
+ struct list_head *upper_list = (struct list_head *)priv->data;
if (!entry)
return 0;
@@ -545,19 +569,21 @@ static int netdev_upper_walk(struct net_device *upper, void *data)
return 0;
}
-static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
+static void handle_netdev_upper(struct ib_device *ib_dev, u32 port,
void *cookie,
void (*handle_netdev)(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *ndev))
{
struct net_device *ndev = cookie;
+ struct netdev_nested_priv priv;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
+ priv.data = &upper_list;
rcu_read_lock();
- netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
+ netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv);
rcu_read_unlock();
handle_netdev(ib_dev, port, ndev);
@@ -570,25 +596,26 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
}
}
-static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
- struct net_device *event_ndev)
+void roce_del_all_netdev_gids(struct ib_device *ib_dev,
+ u32 port, struct net_device *ndev)
{
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev);
}
+EXPORT_SYMBOL(roce_del_all_netdev_gids);
-static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
+static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
- handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
+ handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids);
}
-static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
+static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
}
-static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
+static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -596,8 +623,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
rcu_read_lock();
master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
- if (master_ndev)
- dev_hold(master_ndev);
+ dev_hold(master_ndev);
rcu_read_unlock();
if (master_ndev) {
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 89a5be3a2f97..6354ddf2a274 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -1,15 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
+#include <linux/memremap.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/pci-p2pdma.h>
@@ -28,14 +21,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
/*
- * Check if the device might use memory registration. This is currently only
- * true for iWarp devices. In the future we can hopefully fine tune this based
- * on HCA driver input.
+ * Report whether memory registration should be used. Memory registration must
+ * be used for iWarp devices because of iWARP-specific limitations. Memory
+ * registration is also enabled if registering memory might yield better
+ * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
*/
-static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
+static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u32 port_num)
{
if (rdma_protocol_iwarp(dev, port_num))
return true;
+ if (dev->attrs.max_sgl_rd)
+ return true;
if (unlikely(rdma_rw_force_mr))
return true;
return false;
@@ -43,34 +39,61 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
/*
* Check if the device will use memory registration for this RW operation.
- * We currently always use memory registrations for iWarp RDMA READs, and
- * have a debug option to force usage of MRs.
- *
- * XXX: In the future we can hopefully fine tune this based on HCA driver
- * input.
+ * For RDMA READs we must use MRs on iWarp and can optionally use them as an
+ * optimization otherwise. Additionally we have a debug option to force usage
+ * of MRs to help testing this code path.
*/
-static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
+static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u32 port_num,
enum dma_data_direction dir, int dma_nents)
{
- if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
- return true;
+ if (dir == DMA_FROM_DEVICE) {
+ if (rdma_protocol_iwarp(dev, port_num))
+ return true;
+ if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
+ return true;
+ }
if (unlikely(rdma_rw_force_mr))
return true;
return false;
}
-static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
+static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
+ bool pi_support)
{
+ u32 max_pages;
+
+ if (pi_support)
+ max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
+ else
+ max_pages = dev->attrs.max_fast_reg_page_list_len;
+
/* arbitrary limit to avoid allocating gigantic resources */
- return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
+ return min_t(u32, max_pages, 256);
+}
+
+static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
+{
+ int count = 0;
+
+ if (reg->mr->need_inval) {
+ reg->inv_wr.opcode = IB_WR_LOCAL_INV;
+ reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
+ reg->inv_wr.next = &reg->reg_wr.wr;
+ count++;
+ } else {
+ reg->inv_wr.next = NULL;
+ }
+
+ return count;
}
/* Caller must have zero-initialized *reg. */
-static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
+static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num,
struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
u32 sg_cnt, u32 offset)
{
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
u32 nents = min(sg_cnt, pages_per_mr);
int count = 0, ret;
@@ -78,14 +101,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
if (!reg->mr)
return -EAGAIN;
- if (reg->mr->need_inval) {
- reg->inv_wr.opcode = IB_WR_LOCAL_INV;
- reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
- reg->inv_wr.next = &reg->reg_wr.wr;
- count++;
- } else {
- reg->inv_wr.next = NULL;
- }
+ count += rdma_rw_inv_key(reg);
ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
if (ret < 0 || ret < nents) {
@@ -106,14 +122,15 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
}
static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct rdma_rw_reg_ctx *prev = NULL;
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
int i, j, ret = 0, count = 0;
- ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
@@ -272,21 +289,21 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
* Returns the number of WQEs that will be needed on the workqueue if
* successful, or a negative error code.
*/
-int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
+ struct sg_table sgt = {
+ .sgl = sg,
+ .orig_nents = sg_cnt,
+ };
int ret;
- if (is_pci_p2pdma_page(sg_page(sg)))
- ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
- else
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
-
- if (!ret)
- return -ENOMEM;
- sg_cnt = ret;
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
+ if (ret)
+ return ret;
+ sg_cnt = sgt.nents;
/*
* Skip to the S/G entry that sg_offset falls into:
@@ -322,7 +339,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
return ret;
out_unmap_sg:
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_init);
@@ -345,96 +362,85 @@ EXPORT_SYMBOL(rdma_rw_ctx_init);
* successful, or a negative error code.
*/
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
struct ib_sig_attrs *sig_attrs,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
+ struct sg_table sgt = {
+ .sgl = sg,
+ .orig_nents = sg_cnt,
+ };
+ struct sg_table prot_sgt = {
+ .sgl = prot_sg,
+ .orig_nents = prot_sg_cnt,
+ };
struct ib_rdma_wr *rdma_wr;
- struct ib_send_wr *prev_wr = NULL;
int count = 0, ret;
if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
- pr_err("SG count too large\n");
+ pr_err("SG count too large: sg_cnt=%u, prot_sg_cnt=%u, pages_per_mr=%u\n",
+ sg_cnt, prot_sg_cnt, pages_per_mr);
return -EINVAL;
}
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
- if (!ret)
- return -ENOMEM;
- sg_cnt = ret;
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
+ if (ret)
+ return ret;
- ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
- if (!ret) {
- ret = -ENOMEM;
- goto out_unmap_sg;
+ if (prot_sg_cnt) {
+ ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0);
+ if (ret)
+ goto out_unmap_sg;
}
- prot_sg_cnt = ret;
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
- ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL);
- if (!ctx->sig) {
+ ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
+ if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;
}
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0);
- if (ret < 0)
- goto out_free_ctx;
- count += ret;
- prev_wr = &ctx->sig->data.reg_wr.wr;
-
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
- prot_sg, prot_sg_cnt, 0);
- if (ret < 0)
- goto out_destroy_data_mr;
- count += ret;
-
- if (ctx->sig->prot.inv_wr.next)
- prev_wr->next = &ctx->sig->prot.inv_wr;
- else
- prev_wr->next = &ctx->sig->prot.reg_wr.wr;
- prev_wr = &ctx->sig->prot.reg_wr.wr;
-
- ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
- if (!ctx->sig->sig_mr) {
+ ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
+ if (!ctx->reg->mr) {
ret = -EAGAIN;
- goto out_destroy_prot_mr;
+ goto out_free_ctx;
}
- if (ctx->sig->sig_mr->need_inval) {
- memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr));
+ count += rdma_rw_inv_key(ctx->reg);
- ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV;
- ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey;
+ memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
- prev_wr->next = &ctx->sig->sig_inv_wr;
- prev_wr = &ctx->sig->sig_inv_wr;
+ ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg,
+ prot_sgt.nents, NULL, SZ_4K);
+ if (unlikely(ret)) {
+ pr_err("failed to map PI sg (%u)\n",
+ sgt.nents + prot_sgt.nents);
+ goto out_destroy_sig_mr;
}
- ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
- ctx->sig->sig_wr.wr.wr_cqe = NULL;
- ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge;
- ctx->sig->sig_wr.wr.num_sge = 1;
- ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
- ctx->sig->sig_wr.sig_attrs = sig_attrs;
- ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr;
- if (prot_sg_cnt)
- ctx->sig->sig_wr.prot = &ctx->sig->prot.sge;
- prev_wr->next = &ctx->sig->sig_wr.wr;
- prev_wr = &ctx->sig->sig_wr.wr;
+ ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
+ ctx->reg->reg_wr.wr.wr_cqe = NULL;
+ ctx->reg->reg_wr.wr.num_sge = 0;
+ ctx->reg->reg_wr.wr.send_flags = 0;
+ ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
+ if (rdma_protocol_iwarp(qp->device, port_num))
+ ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
+ ctx->reg->reg_wr.mr = ctx->reg->mr;
+ ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
count++;
- ctx->sig->sig_sge.addr = 0;
- ctx->sig->sig_sge.length = ctx->sig->data.sge.length;
- if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE)
- ctx->sig->sig_sge.length += ctx->sig->prot.sge.length;
+ ctx->reg->sge.addr = ctx->reg->mr->iova;
+ ctx->reg->sge.length = ctx->reg->mr->length;
+ if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
+ ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
- rdma_wr = &ctx->sig->data.wr;
- rdma_wr->wr.sg_list = &ctx->sig->sig_sge;
+ rdma_wr = &ctx->reg->wr;
+ rdma_wr->wr.sg_list = &ctx->reg->sge;
rdma_wr->wr.num_sge = 1;
rdma_wr->remote_addr = remote_addr;
rdma_wr->rkey = rkey;
@@ -442,23 +448,20 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
else
rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- prev_wr->next = &rdma_wr->wr;
- prev_wr = &rdma_wr->wr;
+ ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
count++;
return count;
-out_destroy_prot_mr:
- if (prot_sg_cnt)
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
-out_destroy_data_mr:
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+out_destroy_sig_mr:
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
out_free_ctx:
- kfree(ctx->sig);
+ kfree(ctx->reg);
out_unmap_prot_sg:
- ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (prot_sgt.nents)
+ ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0);
out_unmap_sg:
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
@@ -492,28 +495,13 @@ static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
* completion notification.
*/
struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+ u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
struct ib_send_wr *first_wr, *last_wr;
int i;
switch (ctx->type) {
case RDMA_RW_SIG_MR:
- rdma_rw_update_lkey(&ctx->sig->data, true);
- if (ctx->sig->prot.mr)
- rdma_rw_update_lkey(&ctx->sig->prot, true);
-
- ctx->sig->sig_mr->need_inval = true;
- ib_update_fast_reg_key(ctx->sig->sig_mr,
- ib_inc_rkey(ctx->sig->sig_mr->lkey));
- ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey;
-
- if (ctx->sig->data.inv_wr.next)
- first_wr = &ctx->sig->data.inv_wr;
- else
- first_wr = &ctx->sig->data.reg_wr.wr;
- last_wr = &ctx->sig->data.wr.wr;
- break;
case RDMA_RW_MR:
for (i = 0; i < ctx->nr_ops; i++) {
rdma_rw_update_lkey(&ctx->reg[i],
@@ -564,7 +552,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs);
* is not set @cqe must be set so that the caller gets a completion
* notification.
*/
-int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
struct ib_send_wr *first_wr;
@@ -583,8 +571,9 @@ EXPORT_SYMBOL(rdma_rw_ctx_post);
* @sg_cnt: number of entries in @sg
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
*/
-void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
- struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
+ enum dma_data_direction dir)
{
int i;
@@ -605,15 +594,13 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
break;
}
- /* P2PDMA contexts do not need to be unmapped */
- if (!is_pci_p2pdma_page(sg_page(sg)))
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
/**
* rdma_rw_ctx_destroy_signature - release all resources allocated by
- * rdma_rw_ctx_init_signature
+ * rdma_rw_ctx_signature_init
* @ctx: context to release
* @qp: queue pair to operate on
* @port_num: port num to which the connection is bound
@@ -624,23 +611,19 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy);
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
*/
void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
enum dma_data_direction dir)
{
if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
return;
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
+ kfree(ctx->reg);
- if (ctx->sig->prot.mr) {
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+ if (prot_sg_cnt)
ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
- }
-
- ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr);
- kfree(ctx->sig);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
@@ -655,13 +638,13 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
* compute max_rdma_ctxts and the size of the transport's Send and
* Send Completion Queues.
*/
-unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
unsigned int maxpages)
{
unsigned int mr_pages;
if (rdma_rw_can_use_mr(device, port_num))
- mr_pages = rdma_rw_fr_page_list_len(device);
+ mr_pages = rdma_rw_fr_page_list_len(device, false);
else
mr_pages = device->attrs.max_sge_rd;
return DIV_ROUND_UP(maxpages, mr_pages);
@@ -683,13 +666,12 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
factor = 1;
/*
- * If the devices needs MRs to perform RDMA READ or WRITE operations,
+ * If the device needs MRs to perform RDMA READ or WRITE operations,
* we'll need two additional MRs for the registrations and the
* invalidation.
*/
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- factor += 6; /* (inv + reg) * (data + prot + sig) */
- else if (rdma_rw_can_use_mr(dev, attr->port_num))
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
+ rdma_rw_can_use_mr(dev, attr->port_num))
factor += 2; /* inv + reg */
attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
@@ -705,22 +687,24 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
{
struct ib_device *dev = qp->pd->device;
- u32 nr_mrs = 0, nr_sig_mrs = 0;
+ u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
int ret = 0;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) {
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
nr_sig_mrs = attr->cap.max_rdma_ctxs;
- nr_mrs = attr->cap.max_rdma_ctxs * 2;
+ nr_mrs = attr->cap.max_rdma_ctxs;
+ max_num_sg = rdma_rw_fr_page_list_len(dev, true);
} else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
nr_mrs = attr->cap.max_rdma_ctxs;
+ max_num_sg = rdma_rw_fr_page_list_len(dev, false);
}
if (nr_mrs) {
ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
IB_MR_TYPE_MEM_REG,
- rdma_rw_fr_page_list_len(dev));
+ max_num_sg, 0);
if (ret) {
- pr_err("%s: failed to allocated %d MRs\n",
+ pr_err("%s: failed to allocated %u MRs\n",
__func__, nr_mrs);
return ret;
}
@@ -728,10 +712,10 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
if (nr_sig_mrs) {
ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
- IB_MR_TYPE_SIGNATURE, 2);
+ IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
if (ret) {
- pr_err("%s: failed to allocated %d SIG MRs\n",
- __func__, nr_mrs);
+ pr_err("%s: failed to allocated %u SIG MRs\n",
+ __func__, nr_sig_mrs);
goto out_free_rdma_mrs;
}
}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index cbaaaa92fff3..143de37ae598 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -49,7 +49,7 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num, u8 method,
+ struct ib_device *device, u32 port_num, u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7d8071c7e564..c23e9c847314 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/random.h>
@@ -51,6 +50,7 @@
#include <rdma/ib_marshall.h>
#include <rdma/ib_addr.h>
#include <rdma/opa_addr.h>
+#include <rdma/rdma_cm.h>
#include "sa.h"
#include "core_priv.h"
@@ -95,17 +95,20 @@ struct ib_sa_port {
struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
- u8 port_num;
+ u32 port_num;
};
struct ib_sa_device {
int start_port, end_port;
struct ib_event_handler event_handler;
- struct ib_sa_port port[0];
+ struct ib_sa_port port[];
};
struct ib_sa_query {
- void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
+ void (*callback)(struct ib_sa_query *sa_query, int status,
+ struct ib_sa_mad *mad);
+ void (*rmpp_callback)(struct ib_sa_query *sa_query, int status,
+ struct ib_mad_recv_wc *mad);
void (*release)(struct ib_sa_query *);
struct ib_sa_client *client;
struct ib_sa_port *port;
@@ -123,14 +126,9 @@ struct ib_sa_query {
#define IB_SA_CANCEL 0x00000002
#define IB_SA_QUERY_OPA 0x00000004
-struct ib_sa_service_query {
- void (*callback)(int, struct ib_sa_service_rec *, void *);
- void *context;
- struct ib_sa_query sa_query;
-};
-
struct ib_sa_path_query {
- void (*callback)(int, struct sa_path_rec *, void *);
+ void (*callback)(int status, struct sa_path_rec *rec,
+ unsigned int num_paths, void *context);
void *context;
struct ib_sa_query sa_query;
struct sa_path_rec *conv_pr;
@@ -154,6 +152,13 @@ struct ib_sa_mcmember_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_service_query {
+ void (*callback)(int status, struct sa_service_rec *rec,
+ unsigned int num_services, void *context);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
static LIST_HEAD(ib_nl_request_list);
static DEFINE_SPINLOCK(ib_nl_request_lock);
static atomic_t ib_nl_sa_request_seq;
@@ -174,7 +179,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
};
-static void ib_sa_add_one(struct ib_device *device);
+static int ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);
static struct ib_client sa_client = {
@@ -190,7 +195,7 @@ static u32 tid;
#define PATH_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct sa_path_rec, field), \
- .struct_size_bytes = sizeof((struct sa_path_rec *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
@@ -292,7 +297,7 @@ static const struct ib_field path_rec_table[] = {
.struct_offset_bytes = \
offsetof(struct sa_path_rec, field), \
.struct_size_bytes = \
- sizeof((struct sa_path_rec *)0)->field, \
+ sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field opa_path_rec_table[] = {
@@ -420,7 +425,7 @@ static const struct ib_field opa_path_rec_table[] = {
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \
.field_name = "sa_mcmember_rec:" #field
static const struct ib_field mcmember_rec_table[] = {
@@ -502,57 +507,9 @@ static const struct ib_field mcmember_rec_table[] = {
.size_bits = 23 },
};
-#define SERVICE_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
- .field_name = "sa_service_rec:" #field
-
-static const struct ib_field service_rec_table[] = {
- { SERVICE_REC_FIELD(id),
- .offset_words = 0,
- .offset_bits = 0,
- .size_bits = 64 },
- { SERVICE_REC_FIELD(gid),
- .offset_words = 2,
- .offset_bits = 0,
- .size_bits = 128 },
- { SERVICE_REC_FIELD(pkey),
- .offset_words = 6,
- .offset_bits = 0,
- .size_bits = 16 },
- { SERVICE_REC_FIELD(lease),
- .offset_words = 7,
- .offset_bits = 0,
- .size_bits = 32 },
- { SERVICE_REC_FIELD(key),
- .offset_words = 8,
- .offset_bits = 0,
- .size_bits = 128 },
- { SERVICE_REC_FIELD(name),
- .offset_words = 12,
- .offset_bits = 0,
- .size_bits = 64*8 },
- { SERVICE_REC_FIELD(data8),
- .offset_words = 28,
- .offset_bits = 0,
- .size_bits = 16*8 },
- { SERVICE_REC_FIELD(data16),
- .offset_words = 32,
- .offset_bits = 0,
- .size_bits = 8*16 },
- { SERVICE_REC_FIELD(data32),
- .offset_words = 36,
- .offset_bits = 0,
- .size_bits = 4*32 },
- { SERVICE_REC_FIELD(data64),
- .offset_words = 40,
- .offset_bits = 0,
- .size_bits = 2*64 },
-};
-
#define CLASSPORTINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
- .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \
.field_name = "ib_class_port_info:" #field
static const struct ib_field ib_classport_info_rec_table[] = {
@@ -630,7 +587,7 @@ static const struct ib_field ib_classport_info_rec_table[] = {
.struct_offset_bytes =\
offsetof(struct opa_class_port_info, field), \
.struct_size_bytes = \
- sizeof((struct opa_class_port_info *)0)->field, \
+ sizeof_field(struct opa_class_port_info, field), \
.field_name = "opa_class_port_info:" #field
static const struct ib_field opa_classport_info_rec_table[] = {
@@ -710,7 +667,7 @@ static const struct ib_field opa_classport_info_rec_table[] = {
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
- .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \
.field_name = "sa_guidinfo_rec:" #field
static const struct ib_field guidinfo_rec_table[] = {
@@ -736,6 +693,60 @@ static const struct ib_field guidinfo_rec_table[] = {
.size_bits = 512 },
};
+#define SERVICE_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct sa_service_rec, field), \
+ .struct_size_bytes = sizeof_field(struct sa_service_rec, field), \
+ .field_name = "sa_service_rec:" #field
+
+static const struct ib_field service_rec_table[] = {
+ { SERVICE_REC_FIELD(id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { SERVICE_REC_FIELD(gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(pkey),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 6,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { SERVICE_REC_FIELD(lease),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { SERVICE_REC_FIELD(key),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(name),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 512 },
+ { SERVICE_REC_FIELD(data_8),
+ .offset_words = 28,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_16),
+ .offset_words = 32,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_32),
+ .offset_words = 36,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_64),
+ .offset_words = 40,
+ .offset_bits = 0,
+ .size_bits = 128 },
+};
+
+#define RDMA_PRIMARY_PATH_MAX_REC_NUM 3
+
static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
{
query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
@@ -760,13 +771,14 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Construct the family header first */
header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
- memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
- LS_DEVICE_NAME_MAX);
+ strscpy_pad(header->device_name,
+ dev_name(&query->port->agent->device->dev),
+ LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
sa_rec->reversible != 0)
- query->path_use = LS_RESOLVE_PATH_USE_GMP;
+ query->path_use = LS_RESOLVE_PATH_USE_ALL;
else
query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
header->path_use = query->path_use;
@@ -829,13 +841,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}
-static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
struct ib_sa_mad *mad;
int len;
+ unsigned long flags;
+ unsigned long delay;
+ gfp_t gfp_flag;
+ int ret;
+
+ INIT_LIST_HEAD(&query->list);
+ query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
mad = query->mad_buf->mad;
len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
@@ -860,36 +879,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
-}
+ gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
+ GFP_NOWAIT;
-static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
-{
- unsigned long flags;
- unsigned long delay;
- int ret;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
- INIT_LIST_HEAD(&query->list);
- query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
+ if (ret)
+ goto out;
- /* Put the request on the list first.*/
- spin_lock_irqsave(&ib_nl_request_lock, flags);
+ /* Put the request on the list.*/
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- ret = ib_nl_send_msg(query, gfp_mask);
- if (ret) {
- ret = -EIO;
- /* Remove the request */
- spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_del(&query->list);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- }
+out:
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
return ret;
}
@@ -923,50 +931,77 @@ static void send_handler(struct ib_mad_agent *agent,
static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
const struct nlmsghdr *nlh)
{
+ struct sa_path_rec recs[RDMA_PRIMARY_PATH_MAX_REC_NUM];
+ struct ib_sa_path_query *path_query;
+ struct ib_path_rec_data *rec_data;
struct ib_mad_send_wc mad_send_wc;
- struct ib_sa_mad *mad = NULL;
const struct nlattr *head, *curr;
- struct ib_path_rec_data *rec;
- int len, rem;
+ struct ib_sa_mad *mad = NULL;
+ int len, rem, status = -EIO;
+ unsigned int num_prs = 0;
u32 mask = 0;
- int status = -EIO;
-
- if (query->callback) {
- head = (const struct nlattr *) nlmsg_data(nlh);
- len = nlmsg_len(nlh);
- switch (query->path_use) {
- case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
- mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
- break;
- case LS_RESOLVE_PATH_USE_ALL:
- case LS_RESOLVE_PATH_USE_GMP:
- default:
- mask = IB_PATH_PRIMARY | IB_PATH_GMP |
- IB_PATH_BIDIRECTIONAL;
- break;
- }
- nla_for_each_attr(curr, head, len, rem) {
- if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
- rec = nla_data(curr);
- /*
- * Get the first one. In the future, we may
- * need to get up to 6 pathrecords.
- */
- if ((rec->flags & mask) == mask) {
- mad = query->mad_buf->mad;
- mad->mad_hdr.method |=
- IB_MGMT_METHOD_RESP;
- memcpy(mad->data, rec->path_rec,
- sizeof(rec->path_rec));
- status = 0;
- break;
- }
- }
+ if (!query->callback)
+ goto out;
+
+ path_query = container_of(query, struct ib_sa_path_query, sa_query);
+ mad = query->mad_buf->mad;
+
+ head = (const struct nlattr *) nlmsg_data(nlh);
+ len = nlmsg_len(nlh);
+ switch (query->path_use) {
+ case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
+ mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
+ break;
+
+ case LS_RESOLVE_PATH_USE_ALL:
+ mask = IB_PATH_PRIMARY;
+ break;
+
+ case LS_RESOLVE_PATH_USE_GMP:
+ default:
+ mask = IB_PATH_PRIMARY | IB_PATH_GMP |
+ IB_PATH_BIDIRECTIONAL;
+ break;
+ }
+
+ nla_for_each_attr(curr, head, len, rem) {
+ if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD)
+ continue;
+
+ rec_data = nla_data(curr);
+ if ((rec_data->flags & mask) != mask)
+ continue;
+
+ if ((query->flags & IB_SA_QUERY_OPA) ||
+ path_query->conv_pr) {
+ mad->mad_hdr.method |= IB_MGMT_METHOD_RESP;
+ memcpy(mad->data, rec_data->path_rec,
+ sizeof(rec_data->path_rec));
+ query->callback(query, 0, mad);
+ goto out;
}
- query->callback(query, status, mad);
+
+ status = 0;
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec_data->path_rec, &recs[num_prs]);
+ recs[num_prs].flags = rec_data->flags;
+ recs[num_prs].rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(&recs[num_prs]);
+
+ num_prs++;
+ if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM)
+ break;
}
+ if (!status) {
+ mad->mad_hdr.method |= IB_MGMT_METHOD_RESP;
+ path_query->callback(status, recs, num_prs,
+ path_query->context);
+ } else
+ query->callback(query, status, mad);
+
+out:
mad_send_wc.send_buf = query->mad_buf;
mad_send_wc.status = IB_WC_SUCCESS;
send_handler(query->mad_buf->mad_agent, &mad_send_wc);
@@ -1039,6 +1074,8 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+
delta = timeout - sa_local_svc_timeout_ms;
if (delta < 0)
abs_delta = -delta;
@@ -1046,7 +1083,6 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
abs_delta = delta;
if (delta != 0) {
- spin_lock_irqsave(&ib_nl_request_lock, flags);
sa_local_svc_timeout_ms = timeout;
list_for_each_entry(query, &ib_nl_request_list, list) {
if (delta < 0 && abs_delta > query->timeout)
@@ -1064,11 +1100,12 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
if (delay)
mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
(unsigned long)delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+
settimeout_out:
- return skb->len;
+ return 0;
}
static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
@@ -1092,10 +1129,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
unsigned long flags;
- struct ib_sa_query *query;
+ struct ib_sa_query *query = NULL, *iter;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
- int found = 0;
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
@@ -1103,20 +1139,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_for_each_entry(query, &ib_nl_request_list, list) {
+ list_for_each_entry(iter, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
- if (nlh->nlmsg_seq == query->seq) {
- found = !ib_sa_query_cancelled(query);
- if (found)
- list_del(&query->list);
+ if (nlh->nlmsg_seq == iter->seq) {
+ if (!ib_sa_query_cancelled(iter)) {
+ list_del(&iter->list);
+ query = iter;
+ }
break;
}
}
- if (!found) {
+ if (!query) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}
@@ -1139,7 +1176,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
}
resp_out:
- return skb->len;
+ return 0;
}
static void free_sm_ah(struct kref *kref)
@@ -1176,7 +1213,6 @@ EXPORT_SYMBOL(ib_sa_unregister_client);
void ib_sa_cancel_query(int id, struct ib_sa_query *query)
{
unsigned long flags;
- struct ib_mad_agent *agent;
struct ib_mad_send_buf *mad_buf;
xa_lock_irqsave(&queries, flags);
@@ -1184,7 +1220,6 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
xa_unlock_irqrestore(&queries, flags);
return;
}
- agent = query->port->agent;
mad_buf = query->mad_buf;
xa_unlock_irqrestore(&queries, flags);
@@ -1194,11 +1229,11 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
* sent to the MAD layer and has to be cancelled from there.
*/
if (!ib_nl_cancel_request(query))
- ib_cancel_mad(agent, mad_buf);
+ ib_cancel_mad(mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
-static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
{
struct ib_sa_device *sa_dev;
struct ib_sa_port *port;
@@ -1217,7 +1252,7 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
+static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr)
@@ -1246,7 +1281,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
* @port_num: Port on the specified device.
* @rec: path record entry to use for ah attributes initialization.
* @ah_attr: address handle attributes to initialization from path record.
- * @sgid_attr: SGID attribute to consider during initialization.
+ * @gid_attr: SGID attribute to consider during initialization.
*
* When ib_init_ah_attr_from_path() returns success,
* (a) for IB link layer it optionally contains a reference to SGID attribute
@@ -1255,7 +1290,7 @@ static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
* User must invoke rdma_destroy_ah_attr() to release reference to SGID
* attributes which are initialized using ib_init_ah_attr_from_path().
*/
-int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr,
const struct ib_gid_attr *gid_attr)
@@ -1364,6 +1399,7 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
{
unsigned long flags;
int ret, id;
+ const int nmbr_sa_query_retries = 10;
xa_lock_irqsave(&queries, flags);
ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
@@ -1371,7 +1407,13 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
if (ret < 0)
return ret;
- query->mad_buf->timeout_ms = timeout_ms;
+ query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries;
+ query->mad_buf->retries = nmbr_sa_query_retries;
+ if (!query->mad_buf->timeout_ms) {
+ /* Special case, very small timeout_ms */
+ query->mad_buf->timeout_ms = 1;
+ query->mad_buf->retries = timeout_ms;
+ }
query->mad_buf->context[0] = query;
query->id = id;
@@ -1411,18 +1453,28 @@ void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
}
EXPORT_SYMBOL(ib_sa_pack_path);
+void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute)
+{
+ ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec,
+ attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_service);
+
+void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec)
+{
+ ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), attribute,
+ rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_service);
+
static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num)
+ struct ib_sa_device *sa_dev,
+ u32 port_num)
{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
unsigned long flags;
bool ret = false;
- if (!sa_dev)
- return ret;
-
port = &sa_dev->port[port_num - sa_dev->start_port];
spin_lock_irqsave(&port->classport_lock, flags);
if (!port->classport_info.valid)
@@ -1442,24 +1494,24 @@ enum opa_pr_supported {
PR_IB_SUPPORTED
};
-/**
- * Check if current PR query can be an OPA query.
- * Retuns PR_NOT_SUPPORTED if a path record query is not
+/*
+ * opa_pr_query_possible - Check if current PR query can be an OPA query.
+ *
+ * Returns PR_NOT_SUPPORTED if a path record query is not
* possible, PR_OPA_SUPPORTED if an OPA path record query
* is possible and PR_IB_SUPPORTED if an IB path record
* query is possible.
*/
static int opa_pr_query_possible(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num,
- struct sa_path_rec *rec)
+ struct ib_sa_device *sa_dev,
+ struct ib_device *device, u32 port_num)
{
struct ib_port_attr port_attr;
if (ib_query_port(device, port_num, &port_attr))
return PR_NOT_SUPPORTED;
- if (ib_sa_opa_pathrecord_support(client, device, port_num))
+ if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
return PR_OPA_SUPPORTED;
if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
@@ -1469,40 +1521,101 @@ static int opa_pr_query_possible(struct ib_sa_client *client,
}
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
struct ib_sa_path_query *query =
container_of(sa_query, struct ib_sa_path_query, sa_query);
+ struct sa_path_rec rec = {};
- if (mad) {
- struct sa_path_rec rec;
+ if (!mad) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
- if (sa_query->flags & IB_SA_QUERY_OPA) {
- ib_unpack(opa_path_rec_table,
- ARRAY_SIZE(opa_path_rec_table),
- mad->data, &rec);
- rec.rec_type = SA_PATH_REC_TYPE_OPA;
- query->callback(status, &rec, query->context);
- } else {
- ib_unpack(path_rec_table,
- ARRAY_SIZE(path_rec_table),
- mad->data, &rec);
- rec.rec_type = SA_PATH_REC_TYPE_IB;
- sa_path_set_dmac_zero(&rec);
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, 1, query->context);
+ return;
+ }
- if (query->conv_pr) {
- struct sa_path_rec opa;
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(&rec);
- memset(&opa, 0, sizeof(struct sa_path_rec));
- sa_convert_path_ib_to_opa(&opa, &rec);
- query->callback(status, &opa, query->context);
- } else {
- query->callback(status, &rec, query->context);
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, 1, query->context);
+ } else {
+ query->callback(status, &rec, 1, query->context);
+ }
+}
+
+#define IB_SA_DATA_OFFS 56
+#define IB_SERVICE_REC_SZ 176
+
+static void ib_unpack_service_rmpp(struct sa_service_rec *rec,
+ struct ib_mad_recv_wc *mad_wc,
+ int num_services)
+{
+ unsigned int cp_sz, data_i, data_size, rec_i = 0, buf_i = 0;
+ struct ib_mad_recv_buf *mad_buf;
+ u8 buf[IB_SERVICE_REC_SZ];
+ u8 *data;
+
+ data_size = sizeof(((struct ib_sa_mad *) mad_buf->mad)->data);
+
+ list_for_each_entry(mad_buf, &mad_wc->rmpp_list, list) {
+ data = ((struct ib_sa_mad *) mad_buf->mad)->data;
+ data_i = 0;
+ while (data_i < data_size && rec_i < num_services) {
+ cp_sz = min(IB_SERVICE_REC_SZ - buf_i,
+ data_size - data_i);
+ memcpy(buf + buf_i, data + data_i, cp_sz);
+ data_i += cp_sz;
+ buf_i += cp_sz;
+ if (buf_i == IB_SERVICE_REC_SZ) {
+ ib_sa_unpack_service(buf, rec + rec_i);
+ buf_i = 0;
+ rec_i++;
}
}
- } else
- query->callback(status, NULL, query->context);
+ }
+}
+
+static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+ struct sa_service_rec *rec;
+ int num_services;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
+
+ num_services = (mad_wc->mad_len - IB_SA_DATA_OFFS) / IB_SERVICE_REC_SZ;
+ if (!num_services) {
+ query->callback(-ENODATA, NULL, 0, query->context);
+ return;
+ }
+
+ rec = kmalloc_array(num_services, sizeof(*rec), GFP_KERNEL);
+ if (!rec) {
+ query->callback(-ENOMEM, NULL, 0, query->context);
+ return;
+ }
+
+ ib_unpack_service_rmpp(rec, mad_wc, num_services);
+ query->callback(status, rec, num_services, query->context);
+ kfree(rec);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
@@ -1514,6 +1627,14 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
kfree(query);
}
+static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+
+ kfree(query);
+}
+
/**
* ib_sa_path_rec_get - Start a Path get query
* @client:SA client
@@ -1540,13 +1661,13 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
* the query.
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct sa_path_rec *resp,
- void *context),
+ unsigned int num_paths, void *context),
void *context,
struct ib_sa_query **sa_query)
{
@@ -1574,7 +1695,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->sa_query.port = port;
if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
- status = opa_pr_query_possible(client, device, port_num, rec);
+ status = opa_pr_query_possible(client, sa_dev, device, port_num);
if (status == PR_NOT_SUPPORTED) {
ret = -EINVAL;
goto err1;
@@ -1644,89 +1765,61 @@ err1:
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
-static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
-{
- struct ib_sa_service_query *query =
- container_of(sa_query, struct ib_sa_service_query, sa_query);
-
- if (mad) {
- struct ib_sa_service_rec rec;
-
- ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
- mad->data, &rec);
- query->callback(status, &rec, query->context);
- } else
- query->callback(status, NULL, query->context);
-}
-
-static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
-{
- kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
-}
-
/**
- * ib_sa_service_rec_query - Start Service Record operation
- * @client:SA client
- * @device:device to send request on
- * @port_num: port number to send request on
- * @method:SA method - should be get, set, or delete
- * @rec:Service Record to send in request
- * @comp_mask:component mask to send in request
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when request completes, times out or is
+ * ib_sa_service_rec_get - Start a Service get query
+ * @client: SA client
+ * @device: device to send query on
+ * @port_num: port number to send query on
+ * @rec: Service Record to send in query
+ * @comp_mask: component mask to send in query
+ * @timeout_ms: time to wait for response
+ * @gfp_mask: GFP mask to use for internal allocations
+ * @callback: function called when query completes, times out or is
* canceled
- * @context:opaque user context passed to callback
- * @sa_query:request context, used to cancel request
+ * @context: opaque user context passed to callback
+ * @sa_query: query context, used to cancel query
*
- * Send a Service Record set/get/delete to the SA to register,
- * unregister or query a service record.
- * The callback function will be called when the request completes (or
+ * Send a Service Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
- * If the return value of ib_sa_service_rec_query() is negative, it is an
- * error code. Otherwise it is a request ID that can be used to cancel
+ * If the return value of ib_sa_service_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
* the query.
*/
-int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num, u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask,
- unsigned long timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+int ib_sa_service_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u32 port_num,
+ struct sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct sa_service_rec *resp,
+ unsigned int num_services,
+ void *context),
+ void *context, struct ib_sa_query **sa_query)
{
- struct ib_sa_service_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
+ struct ib_sa_service_query *query;
struct ib_mad_agent *agent;
+ struct ib_sa_port *port;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
- port = &sa_dev->port[port_num - sa_dev->start_port];
+ port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- if (method != IB_MGMT_METHOD_GET &&
- method != IB_MGMT_METHOD_SET &&
- method != IB_SA_METHOD_DELETE)
- return -EINVAL;
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.port = port;
+ query->sa_query.port = port;
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
@@ -1739,16 +1832,17 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
mad = query->sa_query.mad_buf->mad;
init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
- query->sa_query.release = ib_sa_service_rec_release;
- mad->mad_hdr.method = method;
- mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
- mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.rmpp_callback = callback ? ib_sa_service_rec_callback :
+ NULL;
+ query->sa_query.release = ib_sa_service_rec_release;
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET_TABLE;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
- rec, mad->data);
+ ib_sa_pack_service(rec, mad->data);
*sa_query = &query->sa_query;
+ query->sa_query.mad_buf->context[1] = rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
@@ -1760,16 +1854,14 @@ err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
err1:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_service_rec_query);
+EXPORT_SYMBOL(ib_sa_service_rec_get);
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
struct ib_sa_mcmember_query *query =
container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
@@ -1790,7 +1882,7 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
@@ -1860,8 +1952,7 @@ err1:
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
struct ib_sa_guidinfo_query *query =
container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
@@ -1882,7 +1973,7 @@ static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
unsigned long timeout_ms, gfp_t gfp_mask,
@@ -1957,30 +2048,6 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
- struct ib_device *device,
- u8 port_num)
-{
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
- bool ret = false;
- unsigned long flags;
-
- if (!sa_dev)
- return ret;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
-
- spin_lock_irqsave(&port->classport_lock, flags);
- if ((port->classport_info.valid) &&
- (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_IB))
- ret = ib_get_cpi_capmask2(&port->classport_info.data.ib)
- & IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_sa_sendonly_fullmem_support);
-
struct ib_classport_info_context {
struct completion done;
struct ib_sa_query *sa_query;
@@ -1994,8 +2061,7 @@ static void ib_classportinfo_cb(void *context)
}
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
unsigned long flags;
struct ib_sa_classport_info_query *query =
@@ -2163,23 +2229,29 @@ static void send_handler(struct ib_mad_agent *agent,
{
struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
unsigned long flags;
+ int status = 0;
- if (query->callback)
+ if (query->callback || query->rmpp_callback) {
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
/* No callback -- already got recv */
break;
case IB_WC_RESP_TIMEOUT_ERR:
- query->callback(query, -ETIMEDOUT, NULL);
+ status = -ETIMEDOUT;
break;
case IB_WC_WR_FLUSH_ERR:
- query->callback(query, -EINTR, NULL);
+ status = -EINTR;
break;
default:
- query->callback(query, -EIO, NULL);
+ status = -EIO;
break;
}
+ if (status)
+ query->callback ? query->callback(query, status, NULL) :
+ query->rmpp_callback(query, status, NULL);
+ }
+
xa_lock_irqsave(&queries, flags);
__xa_erase(&queries, query->id);
xa_unlock_irqrestore(&queries, flags);
@@ -2195,17 +2267,25 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
+ struct ib_mad *mad;
+
if (!send_buf)
return;
query = send_buf->context[0];
- if (query->callback) {
+ mad = mad_recv_wc->recv_buf.mad;
+
+ if (query->rmpp_callback) {
+ if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
+ query->rmpp_callback(query, mad->mad_hdr.status ?
+ -EINVAL : 0, mad_recv_wc);
+ else
+ query->rmpp_callback(query, -EIO, NULL);
+ } else if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
- query->callback(query,
- mad_recv_wc->recv_buf.mad->mad_hdr.status ?
- -EINVAL : 0,
- (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
+ query->callback(query, mad->mad_hdr.status ?
+ -EINVAL : 0, (struct ib_sa_mad *)mad);
else
query->callback(query, -EIO, NULL);
}
@@ -2295,7 +2375,7 @@ static void ib_sa_event(struct ib_event_handler *handler,
unsigned long flags;
struct ib_sa_device *sa_dev =
container_of(handler, typeof(*sa_dev), event_handler);
- u8 port_num = event->element.port_num - sa_dev->start_port;
+ u32 port_num = event->element.port_num - sa_dev->start_port;
struct ib_sa_port *port = &sa_dev->port[port_num];
if (!rdma_cap_ib_sa(handler->device, port->port_num))
@@ -2325,18 +2405,21 @@ static void ib_sa_event(struct ib_event_handler *handler,
}
}
-static void ib_sa_add_one(struct ib_device *device)
+static int ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
- sa_dev = kzalloc(struct_size(sa_dev, port, e - s + 1), GFP_KERNEL);
+ sa_dev = kzalloc(struct_size(sa_dev, port,
+ size_add(size_sub(e, s), 1)),
+ GFP_KERNEL);
if (!sa_dev)
- return;
+ return -ENOMEM;
sa_dev->start_port = s;
sa_dev->end_port = e;
@@ -2354,10 +2437,13 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
- NULL, 0, send_handler,
- recv_handler, sa_dev, 0);
- if (IS_ERR(sa_dev->port[i].agent))
+ NULL, IB_MGMT_RMPP_VERSION,
+ send_handler, recv_handler,
+ sa_dev, 0);
+ if (IS_ERR(sa_dev->port[i].agent)) {
+ ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
+ }
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
@@ -2366,8 +2452,10 @@ static void ib_sa_add_one(struct ib_device *device)
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &sa_client, sa_dev);
@@ -2386,7 +2474,7 @@ static void ib_sa_add_one(struct ib_device *device)
update_sm_ah(&sa_dev->port[i].update_task);
}
- return;
+ return 0;
err:
while (--i >= 0) {
@@ -2395,7 +2483,7 @@ err:
}
free:
kfree(sa_dev);
- return;
+ return ret;
}
static void ib_sa_remove_one(struct ib_device *device, void *client_data)
@@ -2403,9 +2491,6 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
struct ib_sa_device *sa_dev = client_data;
int i;
- if (!sa_dev)
- return;
-
ib_unregister_event_handler(&sa_dev->event_handler);
flush_workqueue(ib_wq);
@@ -2463,7 +2548,6 @@ err1:
void ib_sa_cleanup(void)
{
cancel_delayed_work(&ib_nl_timed_work);
- flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq);
mcast_cleanup();
ib_unregister_client(&sa_client);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1ab423b19f77..3512c2e54efc 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -72,7 +72,7 @@ static int get_pkey_and_subnet_prefix(struct ib_port_pkey *pp,
if (ret)
return ret;
- ret = ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
+ ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
return ret;
}
@@ -193,7 +193,7 @@ static void qp_to_error(struct ib_qp_security *sec)
static inline void check_pkey_qps(struct pkey_index_qp_list *pkey,
struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
struct ib_port_pkey *pp, *tmp_pp;
@@ -245,7 +245,7 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp)
struct pkey_index_qp_list *tmp_pkey;
struct pkey_index_qp_list *pkey;
struct ib_device *dev;
- u8 port_num = pp->port_num;
+ u32 port_num = pp->port_num;
int ret = 0;
if (pp->state != IB_PORT_PKEY_VALID)
@@ -339,27 +339,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
if (!new_pps)
return NULL;
- if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) {
- if (!qp_pps) {
- new_pps->main.port_num = qp_attr->port_num;
- new_pps->main.pkey_index = qp_attr->pkey_index;
- } else {
- new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ?
- qp_attr->port_num :
- qp_pps->main.port_num;
-
- new_pps->main.pkey_index =
- (qp_attr_mask & IB_QP_PKEY_INDEX) ?
- qp_attr->pkey_index :
- qp_pps->main.pkey_index;
- }
- new_pps->main.state = IB_PORT_PKEY_VALID;
- } else if (qp_pps) {
+ if (qp_attr_mask & IB_QP_PORT)
+ new_pps->main.port_num = qp_attr->port_num;
+ else if (qp_pps)
new_pps->main.port_num = qp_pps->main.port_num;
+
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ new_pps->main.pkey_index = qp_attr->pkey_index;
+ else if (qp_pps)
new_pps->main.pkey_index = qp_pps->main.pkey_index;
- if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID)
- new_pps->main.state = IB_PORT_PKEY_VALID;
- }
+
+ if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
+ (qp_attr_mask & IB_QP_PORT)) ||
+ (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
+ new_pps->main.state = IB_PORT_PKEY_VALID;
if (qp_attr_mask & IB_QP_ALT_PATH) {
new_pps->alt.port_num = qp_attr->alt_port_num;
@@ -426,7 +419,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
int ret;
rdma_for_each_port (dev, i) {
- is_ib = rdma_protocol_ib(dev, i++);
+ is_ib = rdma_protocol_ib(dev, i);
if (is_ib)
break;
}
@@ -545,7 +538,7 @@ void ib_destroy_qp_security_end(struct ib_qp_security *sec)
}
void ib_security_cache_change(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
u64 subnet_prefix)
{
struct pkey_index_qp_list *pkey;
@@ -593,7 +586,7 @@ int ib_security_modify_qp(struct ib_qp *qp,
WARN_ONCE((qp_attr_mask & IB_QP_PORT &&
rdma_protocol_ib(real_qp->device, qp_attr->port_num) &&
!real_qp->qp_sec),
- "%s: QP security is not initialized for IB QP: %d\n",
+ "%s: QP security is not initialized for IB QP: %u\n",
__func__, real_qp->qp_num);
/* The port/pkey settings are maintained only for the real QP. Open
@@ -656,7 +649,7 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
static int ib_security_pkey_access(struct ib_device *dev,
- u8 port_num,
+ u32 port_num,
u16 pkey_index,
void *sec)
{
@@ -671,10 +664,7 @@ static int ib_security_pkey_access(struct ib_device *dev,
if (ret)
return ret;
- ret = ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
-
- if (ret)
- return ret;
+ ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
return security_ib_pkey_access(sec, subnet_prefix, pkey);
}
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index f19b23817c2b..45f09b75c893 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -41,7 +41,7 @@
#include "smi.h"
#include "opa_smi.h"
-static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num,
+static enum smi_action __smi_handle_dr_smp_send(bool is_switch, u32 port_num,
u8 *hop_ptr, u8 hop_cnt,
const u8 *initial_path,
const u8 *return_path,
@@ -127,7 +127,7 @@ static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num,
* Return IB_SMI_DISCARD if the SMP should be discarded
*/
enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
- bool is_switch, int port_num)
+ bool is_switch, u32 port_num)
{
return __smi_handle_dr_smp_send(is_switch, port_num,
&smp->hop_ptr, smp->hop_cnt,
@@ -139,7 +139,7 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
}
enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num)
+ bool is_switch, u32 port_num)
{
return __smi_handle_dr_smp_send(is_switch, port_num,
&smp->hop_ptr, smp->hop_cnt,
@@ -152,7 +152,7 @@ enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
OPA_LID_PERMISSIVE);
}
-static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num,
+static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, u32 port_num,
int phys_port_cnt,
u8 *hop_ptr, u8 hop_cnt,
const u8 *initial_path,
@@ -238,7 +238,7 @@ static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num,
* Return IB_SMI_DISCARD if the SMP should be dropped
*/
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt)
+ u32 port_num, int phys_port_cnt)
{
return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt,
&smp->hop_ptr, smp->hop_cnt,
@@ -254,7 +254,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
* Return IB_SMI_DISCARD if the SMP should be dropped
*/
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt)
+ u32 port_num, int phys_port_cnt)
{
return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt,
&smp->hop_ptr, smp->hop_cnt,
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 91d9b353ab85..e350ed623c45 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -52,11 +52,11 @@ enum smi_forward_action {
};
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int smi_get_fwd_port(struct ib_smp *smp);
extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c78d0c9646ae..0ed862b38b44 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -43,207 +43,261 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
#include <rdma/ib_cache.h>
+#include <rdma/rdma_counter.h>
+#include <rdma/ib_sysfs.h>
-struct ib_port;
+struct port_table_attribute {
+ struct ib_port_attribute attr;
+ char name[8];
+ int index;
+ __be16 attr_id;
+};
struct gid_attr_group {
- struct ib_port *port;
- struct kobject kobj;
- struct attribute_group ndev;
- struct attribute_group type;
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group groups[2];
+ const struct attribute_group *groups_list[3];
+ struct port_table_attribute attrs_list[];
};
+
struct ib_port {
- struct kobject kobj;
- struct ib_device *ibdev;
+ struct kobject kobj;
+ struct ib_device *ibdev;
struct gid_attr_group *gid_attr_group;
- struct attribute_group gid_group;
- struct attribute_group pkey_group;
- struct attribute_group *pma_table;
- struct attribute_group *hw_stats_ag;
- struct rdma_hw_stats *hw_stats;
- u8 port_num;
+ struct hw_stats_port_data *hw_stats_data;
+
+ struct attribute_group groups[3];
+ const struct attribute_group *groups_list[5];
+ u32 port_num;
+ struct port_table_attribute attrs_list[];
};
-struct port_attribute {
- struct attribute attr;
- ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
- ssize_t (*store)(struct ib_port *, struct port_attribute *,
+struct hw_stats_device_attribute {
+ struct device_attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
const char *buf, size_t count);
};
-#define PORT_ATTR(_name, _mode, _show, _store) \
-struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-#define PORT_ATTR_RO(_name) \
-struct port_attribute port_attr_##_name = __ATTR_RO(_name)
+struct hw_stats_port_attribute {
+ struct ib_port_attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
+ const char *buf, size_t count);
+};
-struct port_table_attribute {
- struct port_attribute attr;
- char name[8];
- int index;
- __be16 attr_id;
+struct hw_stats_device_data {
+ struct attribute_group group;
+ struct rdma_hw_stats *stats;
+ struct hw_stats_device_attribute attrs[];
};
-struct hw_stats_attribute {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj,
- struct attribute *attr, char *buf);
- ssize_t (*store)(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t count);
- int index;
- u8 port_num;
+struct hw_stats_port_data {
+ struct rdma_hw_stats *stats;
+ struct hw_stats_port_attribute attrs[];
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->show)
return -EIO;
- return port_attr->show(p, port_attr, buf);
+ return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
}
static ssize_t port_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t count)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->store)
return -EIO;
- return port_attr->store(p, port_attr, buf, count);
+ return port_attr->store(p->ibdev, p->port_num, port_attr, buf, count);
}
+struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj,
+ u32 *port_num)
+{
+ struct ib_port *port = container_of(kobj, struct ib_port, kobj);
+
+ *port_num = port->port_num;
+ return port->ibdev;
+}
+EXPORT_SYMBOL(ib_port_sysfs_get_ibdev_kobj);
+
static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show,
.store = port_attr_store
};
+static ssize_t hw_stat_device_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hw_stats_device_attribute *stat_attr =
+ container_of(attr, struct hw_stats_device_attribute, attr);
+ struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
+
+ return stat_attr->show(ibdev, ibdev->hw_stats_data->stats,
+ stat_attr - ibdev->hw_stats_data->attrs, 0, buf);
+}
+
+static ssize_t hw_stat_device_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_device_attribute *stat_attr =
+ container_of(attr, struct hw_stats_device_attribute, attr);
+ struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
+
+ return stat_attr->store(ibdev, ibdev->hw_stats_data->stats,
+ stat_attr - ibdev->hw_stats_data->attrs, 0, buf,
+ count);
+}
+
+static ssize_t hw_stat_port_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hw_stats_port_attribute *stat_attr =
+ container_of(attr, struct hw_stats_port_attribute, attr);
+ struct ib_port *port = ibdev->port_data[port_num].sysfs;
+
+ return stat_attr->show(ibdev, port->hw_stats_data->stats,
+ stat_attr - port->hw_stats_data->attrs,
+ port->port_num, buf);
+}
+
+static ssize_t hw_stat_port_store(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_port_attribute *stat_attr =
+ container_of(attr, struct hw_stats_port_attribute, attr);
+ struct ib_port *port = ibdev->port_data[port_num].sysfs;
+
+ return stat_attr->store(ibdev, port->hw_stats_data->stats,
+ stat_attr - port->hw_stats_data->attrs,
+ port->port_num, buf, count);
+}
+
static ssize_t gid_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct gid_attr_group,
kobj)->port;
if (!port_attr->show)
return -EIO;
- return port_attr->show(p, port_attr, buf);
+ return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
}
static const struct sysfs_ops gid_attr_sysfs_ops = {
.show = gid_attr_show
};
-static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t state_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- static const char *state_name[] = {
- [IB_PORT_NOP] = "NOP",
- [IB_PORT_DOWN] = "DOWN",
- [IB_PORT_INIT] = "INIT",
- [IB_PORT_ARMED] = "ARMED",
- [IB_PORT_ACTIVE] = "ACTIVE",
- [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
- };
-
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
- state_name[attr.state] : "UNKNOWN");
+ return sysfs_emit(buf, "%d: %s\n", attr.state,
+ ib_port_state_to_str(attr.state));
}
-static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t lid_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.lid);
+ return sysfs_emit(buf, "0x%x\n", attr.lid);
}
-static ssize_t lid_mask_count_show(struct ib_port *p,
- struct port_attribute *unused,
- char *buf)
+static ssize_t lid_mask_count_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.lmc);
+ return sysfs_emit(buf, "%u\n", attr.lmc);
}
-static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t sm_lid_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.sm_lid);
+ return sysfs_emit(buf, "0x%x\n", attr.sm_lid);
}
-static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t sm_sl_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.sm_sl);
+ return sysfs_emit(buf, "%u\n", attr.sm_sl);
}
-static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t cap_mask_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
+ return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags);
}
-static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t rate_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
char *speed = "";
int rate; /* in deci-Gb/sec */
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
@@ -272,6 +326,14 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " HDR";
rate = 500;
break;
+ case IB_SPEED_NDR:
+ speed = " NDR";
+ rate = 1000;
+ break;
+ case IB_SPEED_XDR:
+ speed = " XDR";
+ rate = 2000;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
speed = " SDR";
@@ -283,115 +345,136 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
if (rate < 0)
return -EINVAL;
- return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
- rate / 10, rate % 10 ? ".5" : "",
- ib_width_enum_to_int(attr.active_width), speed);
+ return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
+ rate % 10 ? ".5" : "",
+ ib_width_enum_to_int(attr.active_width), speed);
}
-static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
+{
+ static const char *phys_state_str[] = {
+ "<unknown>",
+ "Sleep",
+ "Polling",
+ "Disabled",
+ "PortConfigurationTraining",
+ "LinkUp",
+ "LinkErrorRecovery",
+ "Phy Test",
+ };
+
+ if (phys_state < ARRAY_SIZE(phys_state_str))
+ return phys_state_str[phys_state];
+ return "<unknown>";
+}
+
+static ssize_t phys_state_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- switch (attr.phys_state) {
- case 1: return sprintf(buf, "1: Sleep\n");
- case 2: return sprintf(buf, "2: Polling\n");
- case 3: return sprintf(buf, "3: Disabled\n");
- case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
- case 5: return sprintf(buf, "5: LinkUp\n");
- case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
- case 7: return sprintf(buf, "7: Phy Test\n");
- default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
- }
+ return sysfs_emit(buf, "%u: %s\n", attr.phys_state,
+ phys_state_to_str(attr.phys_state));
}
-static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t link_layer_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
- switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
+ const char *output;
+
+ switch (rdma_port_get_link_layer(ibdev, port_num)) {
case IB_LINK_LAYER_INFINIBAND:
- return sprintf(buf, "%s\n", "InfiniBand");
+ output = "InfiniBand";
+ break;
case IB_LINK_LAYER_ETHERNET:
- return sprintf(buf, "%s\n", "Ethernet");
+ output = "Ethernet";
+ break;
default:
- return sprintf(buf, "%s\n", "Unknown");
+ output = "Unknown";
+ break;
}
+
+ return sysfs_emit(buf, "%s\n", output);
}
-static PORT_ATTR_RO(state);
-static PORT_ATTR_RO(lid);
-static PORT_ATTR_RO(lid_mask_count);
-static PORT_ATTR_RO(sm_lid);
-static PORT_ATTR_RO(sm_sl);
-static PORT_ATTR_RO(cap_mask);
-static PORT_ATTR_RO(rate);
-static PORT_ATTR_RO(phys_state);
-static PORT_ATTR_RO(link_layer);
+static IB_PORT_ATTR_RO(state);
+static IB_PORT_ATTR_RO(lid);
+static IB_PORT_ATTR_RO(lid_mask_count);
+static IB_PORT_ATTR_RO(sm_lid);
+static IB_PORT_ATTR_RO(sm_sl);
+static IB_PORT_ATTR_RO(cap_mask);
+static IB_PORT_ATTR_RO(rate);
+static IB_PORT_ATTR_RO(phys_state);
+static IB_PORT_ATTR_RO(link_layer);
static struct attribute *port_default_attrs[] = {
- &port_attr_state.attr,
- &port_attr_lid.attr,
- &port_attr_lid_mask_count.attr,
- &port_attr_sm_lid.attr,
- &port_attr_sm_sl.attr,
- &port_attr_cap_mask.attr,
- &port_attr_rate.attr,
- &port_attr_phys_state.attr,
- &port_attr_link_layer.attr,
+ &ib_port_attr_state.attr,
+ &ib_port_attr_lid.attr,
+ &ib_port_attr_lid_mask_count.attr,
+ &ib_port_attr_sm_lid.attr,
+ &ib_port_attr_sm_sl.attr,
+ &ib_port_attr_cap_mask.attr,
+ &ib_port_attr_rate.attr,
+ &ib_port_attr_phys_state.attr,
+ &ib_port_attr_link_layer.attr,
NULL
};
+ATTRIBUTE_GROUPS(port_default);
-static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
struct net_device *ndev;
- size_t ret = -EINVAL;
+ int ret = -EINVAL;
rcu_read_lock();
ndev = rcu_dereference(gid_attr->ndev);
if (ndev)
- ret = sprintf(buf, "%s\n", ndev->name);
+ ret = sysfs_emit(buf, "%s\n", ndev->name);
rcu_read_unlock();
return ret;
}
-static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
{
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+ return sysfs_emit(buf, "%s\n",
+ ib_cache_gid_type_str(gid_attr->gid_type));
}
static ssize_t _show_port_gid_attr(
- struct ib_port *p, struct port_attribute *attr, char *buf,
- size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
+ struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr,
+ char *buf,
+ ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
const struct ib_gid_attr *gid_attr;
ssize_t ret;
- gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
if (IS_ERR(gid_attr))
- return PTR_ERR(gid_attr);
+ /* -EINVAL is returned for user space compatibility reasons. */
+ return -EINVAL;
ret = print(gid_attr, buf);
rdma_put_gid_attr(gid_attr);
return ret;
}
-static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_port_gid(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
const struct ib_gid_attr *gid_attr;
- ssize_t ret;
+ int len;
- gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
if (IS_ERR(gid_attr)) {
const union ib_gid zgid = {};
@@ -404,54 +487,56 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
* space throwing such error on fail to read gid, return zero
* GID as before. This maintains backward compatibility.
*/
- return sprintf(buf, "%pI6\n", zgid.raw);
+ return sysfs_emit(buf, "%pI6\n", zgid.raw);
}
- ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
+ len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw);
rdma_put_gid_attr(gid_attr);
- return ret;
+ return len;
}
-static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
- struct port_attribute *attr, char *buf)
+static ssize_t show_port_gid_attr_ndev(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr,
+ char *buf)
{
- return _show_port_gid_attr(p, attr, buf, print_ndev);
+ return _show_port_gid_attr(ibdev, port_num, attr, buf, print_ndev);
}
-static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
- struct port_attribute *attr,
+static ssize_t show_port_gid_attr_gid_type(struct ib_device *ibdev,
+ u32 port_num,
+ struct ib_port_attribute *attr,
char *buf)
{
- return _show_port_gid_attr(p, attr, buf, print_gid_type);
+ return _show_port_gid_attr(ibdev, port_num, attr, buf, print_gid_type);
}
-static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_port_pkey(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
u16 pkey;
- ssize_t ret;
+ int ret;
- ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
+ ret = ib_query_pkey(ibdev, port_num, tab_attr->index, &pkey);
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
- .attr_id = IB_PMA_PORT_COUNTERS , \
+ .attr_id = IB_PMA_PORT_COUNTERS, \
}
#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
struct port_table_attribute port_pma_attr_ext_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16), \
- .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
+ .attr_id = IB_PMA_PORT_COUNTERS_EXT, \
}
/*
@@ -470,8 +555,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (!dev->ops.process_mad)
return -ENOSYS;
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
@@ -486,10 +571,8 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (attr != IB_PMA_CLASS_PORT_INFO)
in_mad->data[41] = port_num; /* PortSelect field */
- if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY,
- port_num, NULL, NULL,
- (const struct ib_mad_hdr *)in_mad, mad_size,
- (struct ib_mad_hdr *)out_mad, &mad_size,
+ if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL,
+ in_mad, out_mad, &mad_size,
&out_mad_pkey_index) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
@@ -504,47 +587,45 @@ out:
return ret;
}
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_pma_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
- ssize_t ret;
+ int ret;
u8 data[8];
+ int len;
- ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+ ret = get_perf_mad(ibdev, port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data));
if (ret < 0)
return ret;
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (*data >>
- (4 - (offset % 8))) & 0xf);
+ len = sysfs_emit(buf, "%d\n",
+ (*data >> (4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", *data);
+ len = sysfs_emit(buf, "%u\n", *data);
break;
case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)data));
+ len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data));
break;
case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)data));
+ len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data));
break;
case 64:
- ret = sprintf(buf, "%llu\n",
- be64_to_cpup((__be64 *)data));
+ len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data));
break;
-
default:
- ret = 0;
+ len = 0;
+ break;
}
- return ret;
+ return len;
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
@@ -644,72 +725,49 @@ static struct attribute *pma_attrs_noietf[] = {
NULL
};
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
.name = "counters",
.attrs = pma_attrs_ext
};
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
.name = "counters",
.attrs = pma_attrs_noietf
};
static void ib_port_release(struct kobject *kobj)
{
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- struct attribute *a;
+ struct ib_port *port = container_of(kobj, struct ib_port, kobj);
int i;
- if (p->gid_group.attrs) {
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
-
- kfree(p->gid_group.attrs);
- }
-
- if (p->pkey_group.attrs) {
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
-
- kfree(p->pkey_group.attrs);
- }
-
- kfree(p);
+ for (i = 0; i != ARRAY_SIZE(port->groups); i++)
+ kfree(port->groups[i].attrs);
+ if (port->hw_stats_data)
+ rdma_free_hw_stats_struct(port->hw_stats_data->stats);
+ kfree(port->hw_stats_data);
+ kvfree(port);
}
static void ib_port_gid_attr_release(struct kobject *kobj)
{
- struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
- kobj);
- struct attribute *a;
+ struct gid_attr_group *gid_attr_group =
+ container_of(kobj, struct gid_attr_group, kobj);
int i;
- if (g->ndev.attrs) {
- for (i = 0; (a = g->ndev.attrs[i]); ++i)
- kfree(a);
-
- kfree(g->ndev.attrs);
- }
-
- if (g->type.attrs) {
- for (i = 0; (a = g->type.attrs[i]); ++i)
- kfree(a);
-
- kfree(g->type.attrs);
- }
-
- kfree(g);
+ for (i = 0; i != ARRAY_SIZE(gid_attr_group->groups); i++)
+ kfree(gid_attr_group->groups[i].attrs);
+ kfree(gid_attr_group);
}
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
- .default_attrs = port_default_attrs
+ .default_groups = port_default_groups,
};
static struct kobj_type gid_attr_type = {
@@ -717,55 +775,12 @@ static struct kobj_type gid_attr_type = {
.release = ib_port_gid_attr_release
};
-static struct attribute **
-alloc_group_attrs(ssize_t (*show)(struct ib_port *,
- struct port_attribute *, char *buf),
- int len)
-{
- struct attribute **tab_attr;
- struct port_table_attribute *element;
- int i;
-
- tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
- if (!tab_attr)
- return NULL;
-
- for (i = 0; i < len; i++) {
- element = kzalloc(sizeof(struct port_table_attribute),
- GFP_KERNEL);
- if (!element)
- goto err;
-
- if (snprintf(element->name, sizeof(element->name),
- "%d", i) >= sizeof(element->name)) {
- kfree(element);
- goto err;
- }
-
- element->attr.attr.name = element->name;
- element->attr.attr.mode = S_IRUGO;
- element->attr.show = show;
- element->index = i;
- sysfs_attr_init(&element->attr.attr);
-
- tab_attr[i] = &element->attr.attr;
- }
-
- return tab_attr;
-
-err:
- while (--i >= 0)
- kfree(tab_attr[i]);
- kfree(tab_attr);
- return NULL;
-}
-
/*
* Figure out which counter table to use depending on
* the device capabilities.
*/
-static struct attribute_group *get_counter_table(struct ib_device *dev,
- int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
{
struct ib_class_port_info cpi;
@@ -785,7 +800,7 @@ static struct attribute_group *get_counter_table(struct ib_device *dev,
}
static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
- u8 port_num, int index)
+ u32 port_num, int index)
{
int ret;
@@ -800,74 +815,50 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
return 0;
}
-static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf)
+static int print_hw_stat(struct ib_device *dev, int port_num,
+ struct rdma_hw_stats *stats, int index, char *buf)
{
- return sprintf(buf, "%llu\n", stats->value[index]);
+ u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);
+
+ return sysfs_emit(buf, "%llu\n", stats->value[index] + v);
}
-static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t show_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats, unsigned int index,
+ unsigned int port_num, char *buf)
{
- struct ib_device *dev;
- struct ib_port *port;
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int ret;
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- dev = container_of((struct device *)kobj,
- struct ib_device, dev);
- stats = dev->hw_stats;
- } else {
- port = container_of(kobj, struct ib_port, kobj);
- dev = port->ibdev;
- stats = port->hw_stats;
- }
mutex_lock(&stats->lock);
- ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
+ ret = update_hw_stats(ibdev, stats, port_num, index);
if (ret)
goto unlock;
- ret = print_hw_stat(stats, hsa->index, buf);
+ ret = print_hw_stat(ibdev, port_num, stats, index, buf);
unlock:
mutex_unlock(&stats->lock);
return ret;
}
-static ssize_t show_stats_lifespan(struct kobject *kobj,
- struct attribute *attr,
+static ssize_t show_stats_lifespan(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
char *buf)
{
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int msecs;
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- struct ib_device *dev = container_of((struct device *)kobj,
- struct ib_device, dev);
-
- stats = dev->hw_stats;
- } else {
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
-
- stats = p->hw_stats;
- }
-
mutex_lock(&stats->lock);
msecs = jiffies_to_msecs(stats->lifespan);
mutex_unlock(&stats->lock);
- return sprintf(buf, "%d\n", msecs);
+ return sysfs_emit(buf, "%d\n", msecs);
}
-static ssize_t set_stats_lifespan(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t count)
+static ssize_t set_stats_lifespan(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
+ const char *buf, size_t count)
{
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int msecs;
int jiffies;
int ret;
@@ -878,17 +869,6 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (msecs < 0 || msecs > 10000)
return -EINVAL;
jiffies = msecs_to_jiffies(msecs);
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- struct ib_device *dev = container_of((struct device *)kobj,
- struct ib_device, dev);
-
- stats = dev->hw_stats;
- } else {
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
-
- stats = p->hw_stats;
- }
mutex_lock(&stats->lock);
stats->lifespan = jiffies;
@@ -897,298 +877,433 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
return count;
}
-static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group)
+static struct hw_stats_device_data *
+alloc_hw_stats_device(struct ib_device *ibdev)
{
- struct attribute **attr;
-
- sysfs_remove_group(kobj, attr_group);
+ struct hw_stats_device_data *data;
+ struct rdma_hw_stats *stats;
- for (attr = attr_group->attrs; *attr; attr++)
- kfree(*attr);
- kfree(attr_group);
-}
+ if (!ibdev->ops.alloc_hw_device_stats)
+ return ERR_PTR(-EOPNOTSUPP);
+ stats = ibdev->ops.alloc_hw_device_stats(ibdev);
+ if (!stats)
+ return ERR_PTR(-ENOMEM);
+ if (!stats->descs || stats->num_counters <= 0)
+ goto err_free_stats;
-static struct attribute *alloc_hsa(int index, u8 port_num, const char *name)
-{
- struct hw_stats_attribute *hsa;
+ /*
+ * Two extra attribue elements here, one for the lifespan entry and
+ * one to NULL terminate the list for the sysfs core code
+ */
+ data = kzalloc(struct_size(data, attrs, size_add(stats->num_counters, 1)),
+ GFP_KERNEL);
+ if (!data)
+ goto err_free_stats;
+ data->group.attrs = kcalloc(stats->num_counters + 2,
+ sizeof(*data->group.attrs), GFP_KERNEL);
+ if (!data->group.attrs)
+ goto err_free_data;
- hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
- if (!hsa)
- return NULL;
+ data->group.name = "hw_counters";
+ data->stats = stats;
+ return data;
- hsa->attr.name = (char *)name;
- hsa->attr.mode = S_IRUGO;
- hsa->show = show_hw_stats;
- hsa->store = NULL;
- hsa->index = index;
- hsa->port_num = port_num;
+err_free_data:
+ kfree(data);
+err_free_stats:
+ rdma_free_hw_stats_struct(stats);
+ return ERR_PTR(-ENOMEM);
+}
- return &hsa->attr;
+void ib_device_release_hw_stats(struct hw_stats_device_data *data)
+{
+ kfree(data->group.attrs);
+ rdma_free_hw_stats_struct(data->stats);
+ kfree(data);
}
-static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
+int ib_setup_device_attrs(struct ib_device *ibdev)
{
- struct hw_stats_attribute *hsa;
+ struct hw_stats_device_attribute *attr;
+ struct hw_stats_device_data *data;
+ bool opstat_skipped = false;
+ int i, ret, pos = 0;
+
+ data = alloc_hw_stats_device(ibdev);
+ if (IS_ERR(data)) {
+ if (PTR_ERR(data) == -EOPNOTSUPP)
+ return 0;
+ return PTR_ERR(data);
+ }
+ ibdev->hw_stats_data = data;
- hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
- if (!hsa)
- return NULL;
+ ret = ibdev->ops.get_hw_stats(ibdev, data->stats, 0,
+ data->stats->num_counters);
+ if (ret != data->stats->num_counters) {
+ if (WARN_ON(ret >= 0))
+ return -EINVAL;
+ return ret;
+ }
+
+ data->stats->timestamp = jiffies;
+
+ for (i = 0; i < data->stats->num_counters; i++) {
+ if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+ opstat_skipped = true;
+ continue;
+ }
- hsa->attr.name = name;
- hsa->attr.mode = S_IWUSR | S_IRUGO;
- hsa->show = show_stats_lifespan;
- hsa->store = set_stats_lifespan;
- hsa->index = 0;
- hsa->port_num = port_num;
+ WARN_ON(opstat_skipped);
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = data->stats->descs[i].name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = hw_stat_device_show;
+ attr->show = show_hw_stats;
+ data->group.attrs[pos] = &attr->attr.attr;
+ pos++;
+ }
- return &hsa->attr;
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "lifespan";
+ attr->attr.attr.mode = 0644;
+ attr->attr.show = hw_stat_device_show;
+ attr->show = show_stats_lifespan;
+ attr->attr.store = hw_stat_device_store;
+ attr->store = set_stats_lifespan;
+ data->group.attrs[pos] = &attr->attr.attr;
+ for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
+ if (!ibdev->groups[i]) {
+ ibdev->groups[i] = &data->group;
+ ibdev->hw_stats_attr_index = i;
+ return 0;
+ }
+ WARN(true, "struct ib_device->groups is too small");
+ return -EINVAL;
}
-static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
- u8 port_num)
+static struct hw_stats_port_data *
+alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group)
{
- struct attribute_group *hsag;
+ struct ib_device *ibdev = port->ibdev;
+ struct hw_stats_port_data *data;
struct rdma_hw_stats *stats;
- int i, ret;
-
- stats = device->ops.alloc_hw_stats(device, port_num);
+ if (!ibdev->ops.alloc_hw_port_stats)
+ return ERR_PTR(-EOPNOTSUPP);
+ stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num);
if (!stats)
- return;
-
- if (!stats->names || stats->num_counters <= 0)
+ return ERR_PTR(-ENOMEM);
+ if (!stats->descs || stats->num_counters <= 0)
goto err_free_stats;
/*
* Two extra attribue elements here, one for the lifespan entry and
* one to NULL terminate the list for the sysfs core code
*/
- hsag = kzalloc(sizeof(*hsag) +
- sizeof(void *) * (stats->num_counters + 2),
+ data = kzalloc(struct_size(data, attrs, size_add(stats->num_counters, 1)),
GFP_KERNEL);
- if (!hsag)
+ if (!data)
goto err_free_stats;
+ group->attrs = kcalloc(stats->num_counters + 2,
+ sizeof(*group->attrs), GFP_KERNEL);
+ if (!group->attrs)
+ goto err_free_data;
- ret = device->ops.get_hw_stats(device, stats, port_num,
- stats->num_counters);
- if (ret != stats->num_counters)
- goto err_free_hsag;
+ group->name = "hw_counters";
+ data->stats = stats;
+ return data;
- stats->timestamp = jiffies;
-
- hsag->name = "hw_counters";
- hsag->attrs = (void *)hsag + sizeof(*hsag);
+err_free_data:
+ kfree(data);
+err_free_stats:
+ rdma_free_hw_stats_struct(stats);
+ return ERR_PTR(-ENOMEM);
+}
- for (i = 0; i < stats->num_counters; i++) {
- hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
- if (!hsag->attrs[i])
- goto err;
- sysfs_attr_init(hsag->attrs[i]);
+static int setup_hw_port_stats(struct ib_port *port,
+ struct attribute_group *group)
+{
+ struct hw_stats_port_attribute *attr;
+ struct hw_stats_port_data *data;
+ bool opstat_skipped = false;
+ int i, ret, pos = 0;
+
+ data = alloc_hw_stats_port(port, group);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ ret = port->ibdev->ops.get_hw_stats(port->ibdev, data->stats,
+ port->port_num,
+ data->stats->num_counters);
+ if (ret != data->stats->num_counters) {
+ if (WARN_ON(ret >= 0))
+ return -EINVAL;
+ return ret;
}
- mutex_init(&stats->lock);
- /* treat an error here as non-fatal */
- hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
- if (hsag->attrs[i])
- sysfs_attr_init(hsag->attrs[i]);
+ data->stats->timestamp = jiffies;
- if (port) {
- struct kobject *kobj = &port->kobj;
- ret = sysfs_create_group(kobj, hsag);
- if (ret)
- goto err;
- port->hw_stats_ag = hsag;
- port->hw_stats = stats;
- } else {
- struct kobject *kobj = &device->dev.kobj;
- ret = sysfs_create_group(kobj, hsag);
- if (ret)
- goto err;
- device->hw_stats_ag = hsag;
- device->hw_stats = stats;
+ for (i = 0; i < data->stats->num_counters; i++) {
+ if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+ opstat_skipped = true;
+ continue;
+ }
+
+ WARN_ON(opstat_skipped);
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = data->stats->descs[i].name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = hw_stat_port_show;
+ attr->show = show_hw_stats;
+ group->attrs[pos] = &attr->attr.attr;
+ pos++;
}
- return;
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "lifespan";
+ attr->attr.attr.mode = 0644;
+ attr->attr.show = hw_stat_port_show;
+ attr->show = show_stats_lifespan;
+ attr->attr.store = hw_stat_port_store;
+ attr->store = set_stats_lifespan;
+ group->attrs[pos] = &attr->attr.attr;
+
+ port->hw_stats_data = data;
+ return 0;
+}
-err:
- for (; i >= 0; i--)
- kfree(hsag->attrs[i]);
-err_free_hsag:
- kfree(hsag);
-err_free_stats:
- kfree(stats);
- return;
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev,
+ u32 port_num)
+{
+ if (!ibdev->port_data || !rdma_is_port_valid(ibdev, port_num) ||
+ !ibdev->port_data[port_num].sysfs->hw_stats_data)
+ return NULL;
+ return ibdev->port_data[port_num].sysfs->hw_stats_data->stats;
}
-static int add_port(struct ib_core_device *coredev, int port_num)
+static int
+alloc_port_table_group(const char *name, struct attribute_group *group,
+ struct port_table_attribute *attrs, size_t num,
+ ssize_t (*show)(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *, char *buf))
{
- struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
- bool is_full_dev = &device->coredev == coredev;
- struct ib_port *p;
- struct ib_port_attr attr;
+ struct attribute **attr_list;
int i;
- int ret;
-
- ret = ib_query_port(device, port_num, &attr);
- if (ret)
- return ret;
- p = kzalloc(sizeof *p, GFP_KERNEL);
- if (!p)
+ attr_list = kcalloc(num + 1, sizeof(*attr_list), GFP_KERNEL);
+ if (!attr_list)
return -ENOMEM;
- p->ibdev = device;
- p->port_num = port_num;
+ for (i = 0; i < num; i++) {
+ struct port_table_attribute *element = &attrs[i];
- ret = kobject_init_and_add(&p->kobj, &port_type,
- coredev->ports_kobj,
- "%d", port_num);
- if (ret) {
- kfree(p);
- return ret;
- }
+ if (snprintf(element->name, sizeof(element->name), "%d", i) >=
+ sizeof(element->name))
+ goto err;
- p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
- if (!p->gid_attr_group) {
- ret = -ENOMEM;
- goto err_put;
- }
+ sysfs_attr_init(&element->attr.attr);
+ element->attr.attr.name = element->name;
+ element->attr.attr.mode = 0444;
+ element->attr.show = show;
+ element->index = i;
- p->gid_attr_group->port = p;
- ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
- &p->kobj, "gid_attrs");
- if (ret) {
- kfree(p->gid_attr_group);
- goto err_put;
+ attr_list[i] = &element->attr.attr;
}
+ group->name = name;
+ group->attrs = attr_list;
+ return 0;
+err:
+ kfree(attr_list);
+ return -EINVAL;
+}
- if (device->ops.process_mad && is_full_dev) {
- p->pma_table = get_counter_table(device, port_num);
- ret = sysfs_create_group(&p->kobj, p->pma_table);
- if (ret)
- goto err_put_gid_attrs;
- }
+/*
+ * Create the sysfs:
+ * ibp0s9/ports/XX/gid_attrs/{ndevs,types}/YYY
+ * YYY is the gid table index in decimal
+ */
+static int setup_gid_attrs(struct ib_port *port,
+ const struct ib_port_attr *attr)
+{
+ struct gid_attr_group *gid_attr_group;
+ int ret;
- p->gid_group.name = "gids";
- p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
- if (!p->gid_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_pma;
- }
+ gid_attr_group = kzalloc(struct_size(gid_attr_group, attrs_list,
+ size_mul(attr->gid_tbl_len, 2)),
+ GFP_KERNEL);
+ if (!gid_attr_group)
+ return -ENOMEM;
+ gid_attr_group->port = port;
+ kobject_init(&gid_attr_group->kobj, &gid_attr_type);
- ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ ret = alloc_port_table_group("ndevs", &gid_attr_group->groups[0],
+ gid_attr_group->attrs_list,
+ attr->gid_tbl_len,
+ show_port_gid_attr_ndev);
if (ret)
- goto err_free_gid;
+ goto err_put;
+ gid_attr_group->groups_list[0] = &gid_attr_group->groups[0];
- p->gid_attr_group->ndev.name = "ndevs";
- p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
- attr.gid_tbl_len);
- if (!p->gid_attr_group->ndev.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid;
- }
+ ret = alloc_port_table_group(
+ "types", &gid_attr_group->groups[1],
+ gid_attr_group->attrs_list + attr->gid_tbl_len,
+ attr->gid_tbl_len, show_port_gid_attr_gid_type);
+ if (ret)
+ goto err_put;
+ gid_attr_group->groups_list[1] = &gid_attr_group->groups[1];
- ret = sysfs_create_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->ndev);
+ ret = kobject_add(&gid_attr_group->kobj, &port->kobj, "gid_attrs");
if (ret)
- goto err_free_gid_ndev;
+ goto err_put;
+ ret = sysfs_create_groups(&gid_attr_group->kobj,
+ gid_attr_group->groups_list);
+ if (ret)
+ goto err_del;
+ port->gid_attr_group = gid_attr_group;
+ return 0;
- p->gid_attr_group->type.name = "types";
- p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
- attr.gid_tbl_len);
- if (!p->gid_attr_group->type.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_ndev;
- }
+err_del:
+ kobject_del(&gid_attr_group->kobj);
+err_put:
+ kobject_put(&gid_attr_group->kobj);
+ return ret;
+}
- ret = sysfs_create_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->type);
- if (ret)
- goto err_free_gid_type;
+static void destroy_gid_attrs(struct ib_port *port)
+{
+ struct gid_attr_group *gid_attr_group = port->gid_attr_group;
- p->pkey_group.name = "pkeys";
- p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
- attr.pkey_tbl_len);
- if (!p->pkey_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_type;
- }
+ if (!gid_attr_group)
+ return;
+ sysfs_remove_groups(&gid_attr_group->kobj, gid_attr_group->groups_list);
+ kobject_del(&gid_attr_group->kobj);
+ kobject_put(&gid_attr_group->kobj);
+}
+
+/*
+ * Create the sysfs:
+ * ibp0s9/ports/XX/{gids,pkeys,counters}/YYY
+ */
+static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
+ const struct ib_port_attr *attr)
+{
+ struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
+ bool is_full_dev = &device->coredev == coredev;
+ const struct attribute_group **cur_group;
+ struct ib_port *p;
+ int ret;
- ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ p = kvzalloc(struct_size(p, attrs_list,
+ size_add(attr->gid_tbl_len, attr->pkey_tbl_len)),
+ GFP_KERNEL);
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ p->ibdev = device;
+ p->port_num = port_num;
+ kobject_init(&p->kobj, &port_type);
+
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = p;
+
+ cur_group = p->groups_list;
+ ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
+ attr->gid_tbl_len, show_port_gid);
if (ret)
- goto err_free_pkey;
+ goto err_put;
+ *cur_group++ = &p->groups[0];
- if (device->ops.init_port && is_full_dev) {
- ret = device->ops.init_port(device, port_num, &p->kobj);
+ if (attr->pkey_tbl_len) {
+ ret = alloc_port_table_group("pkeys", &p->groups[1],
+ p->attrs_list + attr->gid_tbl_len,
+ attr->pkey_tbl_len, show_port_pkey);
if (ret)
- goto err_remove_pkey;
+ goto err_put;
+ *cur_group++ = &p->groups[1];
}
/*
* If port == 0, it means hw_counters are per device and not per
- * port, so holder should be device. Therefore skip per port conunter
- * initialization.
+ * port, so holder should be device. Therefore skip per port
+ * counter initialization.
*/
- if (device->ops.alloc_hw_stats && port_num && is_full_dev)
- setup_hw_stats(device, p, port_num);
-
- list_add_tail(&p->kobj.entry, &coredev->port_list);
-
- kobject_uevent(&p->kobj, KOBJ_ADD);
- return 0;
-
-err_remove_pkey:
- sysfs_remove_group(&p->kobj, &p->pkey_group);
-
-err_free_pkey:
- for (i = 0; i < attr.pkey_tbl_len; ++i)
- kfree(p->pkey_group.attrs[i]);
-
- kfree(p->pkey_group.attrs);
- p->pkey_group.attrs = NULL;
-
-err_remove_gid_type:
- sysfs_remove_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->type);
-
-err_free_gid_type:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_attr_group->type.attrs[i]);
+ if (port_num && is_full_dev) {
+ ret = setup_hw_port_stats(p, &p->groups[2]);
+ if (ret && ret != -EOPNOTSUPP)
+ goto err_put;
+ if (!ret)
+ *cur_group++ = &p->groups[2];
+ }
- kfree(p->gid_attr_group->type.attrs);
- p->gid_attr_group->type.attrs = NULL;
+ if (device->ops.process_mad && is_full_dev)
+ *cur_group++ = get_counter_table(device, port_num);
-err_remove_gid_ndev:
- sysfs_remove_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->ndev);
+ ret = kobject_add(&p->kobj, coredev->ports_kobj, "%d", port_num);
+ if (ret)
+ goto err_put;
+ ret = sysfs_create_groups(&p->kobj, p->groups_list);
+ if (ret)
+ goto err_del;
+ if (is_full_dev) {
+ ret = sysfs_create_groups(&p->kobj, device->ops.port_groups);
+ if (ret)
+ goto err_groups;
+ }
-err_free_gid_ndev:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_attr_group->ndev.attrs[i]);
+ list_add_tail(&p->kobj.entry, &coredev->port_list);
+ return p;
- kfree(p->gid_attr_group->ndev.attrs);
- p->gid_attr_group->ndev.attrs = NULL;
+err_groups:
+ sysfs_remove_groups(&p->kobj, p->groups_list);
+err_del:
+ kobject_del(&p->kobj);
+err_put:
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = NULL;
+ kobject_put(&p->kobj);
+ return ERR_PTR(ret);
+}
-err_remove_gid:
- sysfs_remove_group(&p->kobj, &p->gid_group);
+static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
+{
+ bool is_full_dev = &port->ibdev->coredev == coredev;
-err_free_gid:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_group.attrs[i]);
+ list_del(&port->kobj.entry);
+ if (is_full_dev)
+ sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
- kfree(p->gid_group.attrs);
- p->gid_group.attrs = NULL;
+ sysfs_remove_groups(&port->kobj, port->groups_list);
+ kobject_del(&port->kobj);
-err_remove_pma:
- if (p->pma_table)
- sysfs_remove_group(&p->kobj, p->pma_table);
+ if (port->ibdev->port_data &&
+ port->ibdev->port_data[port->port_num].sysfs == port)
+ port->ibdev->port_data[port->port_num].sysfs = NULL;
-err_put_gid_attrs:
- kobject_put(&p->gid_attr_group->kobj);
+ kobject_put(&port->kobj);
+}
-err_put:
- kobject_put(&p->kobj);
- return ret;
+static const char *node_type_string(int node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ return "CA";
+ case RDMA_NODE_IB_SWITCH:
+ return "switch";
+ case RDMA_NODE_IB_ROUTER:
+ return "router";
+ case RDMA_NODE_RNIC:
+ return "RNIC";
+ case RDMA_NODE_USNIC:
+ return "usNIC";
+ case RDMA_NODE_USNIC_UDP:
+ return "usNIC UDP";
+ case RDMA_NODE_UNSPECIFIED:
+ return "unspecified";
+ }
+ return "<unknown>";
}
static ssize_t node_type_show(struct device *device,
@@ -1196,16 +1311,8 @@ static ssize_t node_type_show(struct device *device,
{
struct ib_device *dev = rdma_device_to_ibdev(device);
- switch (dev->node_type) {
- case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
- case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
- case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
- case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
- case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type);
- case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
- case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
- default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
- }
+ return sysfs_emit(buf, "%u: %s\n", dev->node_type,
+ node_type_string(dev->node_type));
}
static DEVICE_ATTR_RO(node_type);
@@ -1213,12 +1320,13 @@ static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid;
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(guid[0]),
+ be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]),
+ be16_to_cpu(guid[3]));
}
static DEVICE_ATTR_RO(sys_image_guid);
@@ -1226,12 +1334,13 @@ static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *node_guid = (__be16 *)&dev->node_guid;
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(node_guid[0]),
+ be16_to_cpu(node_guid[1]),
+ be16_to_cpu(node_guid[2]),
+ be16_to_cpu(node_guid[3]));
}
static DEVICE_ATTR_RO(node_guid);
@@ -1240,7 +1349,7 @@ static ssize_t node_desc_show(struct device *device,
{
struct ib_device *dev = rdma_device_to_ibdev(device);
- return sprintf(buf, "%.64s\n", dev->node_desc);
+ return sysfs_emit(buf, "%.64s\n", dev->node_desc);
}
static ssize_t node_desc_store(struct device *device,
@@ -1252,7 +1361,7 @@ static ssize_t node_desc_store(struct device *device,
int ret;
if (!dev->ops.modify_device)
- return -EIO;
+ return -EOPNOTSUPP;
memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
@@ -1267,10 +1376,11 @@ static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct ib_device *dev = rdma_device_to_ibdev(device);
+ char version[IB_FW_VERSION_NAME_MAX] = {};
- ib_get_device_fw_str(dev, buf);
- strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
- return strlen(buf);
+ ib_get_device_fw_str(dev, version);
+
+ return sysfs_emit(buf, "%s\n", version);
}
static DEVICE_ATTR_RO(fw_ver);
@@ -1294,21 +1404,8 @@ void ib_free_port_attrs(struct ib_core_device *coredev)
list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
- list_del(&p->entry);
- if (port->hw_stats_ag)
- free_hsag(&port->kobj, port->hw_stats_ag);
- kfree(port->hw_stats);
-
- if (port->pma_table)
- sysfs_remove_group(p, port->pma_table);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- sysfs_remove_group(&port->gid_attr_group->kobj,
- &port->gid_attr_group->ndev);
- sysfs_remove_group(&port->gid_attr_group->kobj,
- &port->gid_attr_group->type);
- kobject_put(&port->gid_attr_group->kobj);
- kobject_put(p);
+ destroy_gid_attrs(port);
+ destroy_port(coredev, port);
}
kobject_put(coredev->ports_kobj);
@@ -1317,7 +1414,7 @@ void ib_free_port_attrs(struct ib_core_device *coredev)
int ib_setup_port_attrs(struct ib_core_device *coredev)
{
struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
- unsigned int port;
+ u32 port_num;
int ret;
coredev->ports_kobj = kobject_create_and_add("ports",
@@ -1325,12 +1422,24 @@ int ib_setup_port_attrs(struct ib_core_device *coredev)
if (!coredev->ports_kobj)
return -ENOMEM;
- rdma_for_each_port (device, port) {
- ret = add_port(coredev, port);
+ rdma_for_each_port (device, port_num) {
+ struct ib_port_attr attr;
+ struct ib_port *port;
+
+ ret = ib_query_port(device, port_num, &attr);
if (ret)
goto err_put;
- }
+ port = setup_port(coredev, port_num, &attr);
+ if (IS_ERR(port)) {
+ ret = PTR_ERR(port);
+ goto err_put;
+ }
+
+ ret = setup_gid_attrs(port, &attr);
+ if (ret)
+ goto err_put;
+ }
return 0;
err_put:
@@ -1338,68 +1447,27 @@ err_put:
return ret;
}
-int ib_device_register_sysfs(struct ib_device *device)
-{
- int ret;
-
- ret = ib_setup_port_attrs(&device->coredev);
- if (ret)
- return ret;
-
- if (device->ops.alloc_hw_stats)
- setup_hw_stats(device, NULL, 0);
-
- return 0;
-}
-
-void ib_device_unregister_sysfs(struct ib_device *device)
-{
- if (device->hw_stats_ag)
- free_hsag(&device->dev.kobj, device->hw_stats_ag);
- kfree(device->hw_stats);
-
- ib_free_port_attrs(&device->coredev);
-}
-
/**
- * ib_port_register_module_stat - add module counters under relevant port
- * of IB device.
+ * ib_port_register_client_groups - Add an ib_client's attributes to the port
*
- * @device: IB device to add counters
+ * @ibdev: IB device to add counters
* @port_num: valid port number
- * @kobj: pointer to the kobject to initialize
- * @ktype: pointer to the ktype for this kobject.
- * @name: the name of the kobject
+ * @groups: Group list of attributes
+ *
+ * Do not use. Only for legacy sysfs compatibility.
*/
-int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
- struct kobject *kobj, struct kobj_type *ktype,
- const char *name)
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups)
{
- struct kobject *p, *t;
- int ret;
-
- list_for_each_entry_safe(p, t, &device->coredev.port_list, entry) {
- struct ib_port *port = container_of(p, struct ib_port, kobj);
-
- if (port->port_num != port_num)
- continue;
-
- ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s",
- name);
- if (ret)
- return ret;
- }
-
- return 0;
+ return sysfs_create_groups(&ibdev->port_data[port_num].sysfs->kobj,
+ groups);
}
-EXPORT_SYMBOL(ib_port_register_module_stat);
+EXPORT_SYMBOL(ib_port_register_client_groups);
-/**
- * ib_port_unregister_module_stat - release module counters
- * @kobj: pointer to the kobject to release
- */
-void ib_port_unregister_module_stat(struct kobject *kobj)
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups)
{
- kobject_put(kobj);
+ return sysfs_remove_groups(&ibdev->port_data[port_num].sysfs->kobj,
+ groups);
}
-EXPORT_SYMBOL(ib_port_unregister_module_stat);
+EXPORT_SYMBOL(ib_port_unregister_client_groups);
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
new file mode 100644
index 000000000000..31e7860d35bf
--- /dev/null
+++ b/drivers/infiniband/core/trace.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <trace/events/rdma_core.h>
diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c
new file mode 100644
index 000000000000..de5cb8bf0a61
--- /dev/null
+++ b/drivers/infiniband/core/ucaps.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/kref.h>
+#include <linux/cdev.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <rdma/ib_ucaps.h>
+
+#define RDMA_UCAP_FIRST RDMA_UCAP_MLX5_CTRL_LOCAL
+
+static DEFINE_MUTEX(ucaps_mutex);
+static struct ib_ucap *ucaps_list[RDMA_UCAP_MAX];
+static bool ucaps_class_is_registered;
+static dev_t ucaps_base_dev;
+
+struct ib_ucap {
+ struct cdev cdev;
+ struct device dev;
+ struct kref ref;
+};
+
+static const char *ucap_names[RDMA_UCAP_MAX] = {
+ [RDMA_UCAP_MLX5_CTRL_LOCAL] = "mlx5_perm_ctrl_local",
+ [RDMA_UCAP_MLX5_CTRL_OTHER_VHCA] = "mlx5_perm_ctrl_other_vhca"
+};
+
+static char *ucaps_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0600;
+
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static const struct class ucaps_class = {
+ .name = "infiniband_ucaps",
+ .devnode = ucaps_devnode,
+};
+
+static const struct file_operations ucaps_cdev_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+};
+
+/**
+ * ib_cleanup_ucaps - cleanup all API resources and class.
+ *
+ * This is called once, when removing the ib_uverbs module.
+ */
+void ib_cleanup_ucaps(void)
+{
+ mutex_lock(&ucaps_mutex);
+ if (!ucaps_class_is_registered) {
+ mutex_unlock(&ucaps_mutex);
+ return;
+ }
+
+ for (int i = RDMA_UCAP_FIRST; i < RDMA_UCAP_MAX; i++)
+ WARN_ON(ucaps_list[i]);
+
+ class_unregister(&ucaps_class);
+ ucaps_class_is_registered = false;
+ unregister_chrdev_region(ucaps_base_dev, RDMA_UCAP_MAX);
+ mutex_unlock(&ucaps_mutex);
+}
+
+static int get_ucap_from_devt(dev_t devt, u64 *idx_mask)
+{
+ for (int type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
+ if (ucaps_list[type] && ucaps_list[type]->dev.devt == devt) {
+ *idx_mask |= 1 << type;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int get_devt_from_fd(unsigned int fd, dev_t *ret_dev)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ *ret_dev = file_inode(file)->i_rdev;
+ fput(file);
+ return 0;
+}
+
+/**
+ * ib_ucaps_init - Initialization required before ucap creation.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+static int ib_ucaps_init(void)
+{
+ int ret = 0;
+
+ if (ucaps_class_is_registered)
+ return ret;
+
+ ret = class_register(&ucaps_class);
+ if (ret)
+ return ret;
+
+ ret = alloc_chrdev_region(&ucaps_base_dev, 0, RDMA_UCAP_MAX,
+ ucaps_class.name);
+ if (ret < 0) {
+ class_unregister(&ucaps_class);
+ return ret;
+ }
+
+ ucaps_class_is_registered = true;
+
+ return 0;
+}
+
+static void ucap_dev_release(struct device *device)
+{
+ struct ib_ucap *ucap = container_of(device, struct ib_ucap, dev);
+
+ kfree(ucap);
+}
+
+/**
+ * ib_create_ucap - Add a ucap character device
+ * @type: UCAP type
+ *
+ * Creates a ucap character device in the /dev/infiniband directory. By default,
+ * the device has root-only read-write access.
+ *
+ * A driver may call this multiple times with the same UCAP type. A reference
+ * count tracks creations and deletions.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+int ib_create_ucap(enum rdma_user_cap type)
+{
+ struct ib_ucap *ucap;
+ int ret;
+
+ if (type >= RDMA_UCAP_MAX)
+ return -EINVAL;
+
+ mutex_lock(&ucaps_mutex);
+ ret = ib_ucaps_init();
+ if (ret)
+ goto unlock;
+
+ ucap = ucaps_list[type];
+ if (ucap) {
+ kref_get(&ucap->ref);
+ mutex_unlock(&ucaps_mutex);
+ return 0;
+ }
+
+ ucap = kzalloc(sizeof(*ucap), GFP_KERNEL);
+ if (!ucap) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ device_initialize(&ucap->dev);
+ ucap->dev.class = &ucaps_class;
+ ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type);
+ ucap->dev.release = ucap_dev_release;
+ ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]);
+ if (ret)
+ goto err_device;
+
+ cdev_init(&ucap->cdev, &ucaps_cdev_fops);
+ ucap->cdev.owner = THIS_MODULE;
+
+ ret = cdev_device_add(&ucap->cdev, &ucap->dev);
+ if (ret)
+ goto err_device;
+
+ kref_init(&ucap->ref);
+ ucaps_list[type] = ucap;
+ mutex_unlock(&ucaps_mutex);
+
+ return 0;
+
+err_device:
+ put_device(&ucap->dev);
+unlock:
+ mutex_unlock(&ucaps_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ib_create_ucap);
+
+static void ib_release_ucap(struct kref *ref)
+{
+ struct ib_ucap *ucap = container_of(ref, struct ib_ucap, ref);
+ enum rdma_user_cap type;
+
+ for (type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
+ if (ucaps_list[type] == ucap)
+ break;
+ }
+ WARN_ON(type == RDMA_UCAP_MAX);
+
+ ucaps_list[type] = NULL;
+ cdev_device_del(&ucap->cdev, &ucap->dev);
+ put_device(&ucap->dev);
+}
+
+/**
+ * ib_remove_ucap - Remove a ucap character device
+ * @type: User cap type
+ *
+ * Removes the ucap character device according to type. The device is completely
+ * removed from the filesystem when its reference count reaches 0.
+ */
+void ib_remove_ucap(enum rdma_user_cap type)
+{
+ struct ib_ucap *ucap;
+
+ mutex_lock(&ucaps_mutex);
+ ucap = ucaps_list[type];
+ if (WARN_ON(!ucap))
+ goto end;
+
+ kref_put(&ucap->ref, ib_release_ucap);
+end:
+ mutex_unlock(&ucaps_mutex);
+}
+EXPORT_SYMBOL(ib_remove_ucap);
+
+/**
+ * ib_get_ucaps - Get bitmask of ucap types from file descriptors
+ * @fds: Array of file descriptors
+ * @fd_count: Number of file descriptors in the array
+ * @idx_mask: Bitmask to be updated based on the ucaps in the fd list
+ *
+ * Given an array of file descriptors, this function returns a bitmask of
+ * the ucaps where a bit is set if an FD for that ucap type was in the array.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask)
+{
+ int ret = 0;
+ dev_t dev;
+
+ *idx_mask = 0;
+ mutex_lock(&ucaps_mutex);
+ for (int i = 0; i < fd_count; i++) {
+ ret = get_devt_from_fd(fds[i], &dev);
+ if (ret)
+ goto end;
+
+ ret = get_ucap_from_devt(dev, idx_mask);
+ if (ret)
+ goto end;
+ }
+
+end:
+ mutex_unlock(&ucaps_mutex);
+ return ret;
+}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
deleted file mode 100644
index 8e7da2d41fd8..000000000000
--- a/drivers/infiniband/core/ucm.c
+++ /dev/null
@@ -1,1350 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/completion.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/cdev.h>
-#include <linux/xarray.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-
-#include <linux/nospec.h>
-
-#include <linux/uaccess.h>
-
-#include <rdma/ib.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-#include <rdma/ib_marshall.h>
-
-#include "core_priv.h"
-
-MODULE_AUTHOR("Libor Michalek");
-MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
-MODULE_LICENSE("Dual BSD/GPL");
-
-struct ib_ucm_device {
- int devnum;
- struct cdev cdev;
- struct device dev;
- struct ib_device *ib_dev;
-};
-
-struct ib_ucm_file {
- struct mutex file_mutex;
- struct file *filp;
- struct ib_ucm_device *device;
-
- struct list_head ctxs;
- struct list_head events;
- wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
- int id;
- struct completion comp;
- atomic_t ref;
- int events_reported;
-
- struct ib_ucm_file *file;
- struct ib_cm_id *cm_id;
- __u64 uid;
-
- struct list_head events; /* list of pending events. */
- struct list_head file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
- struct ib_ucm_context *ctx;
- struct list_head file_list; /* member in file event list */
- struct list_head ctx_list; /* member in ctx event list */
-
- struct ib_cm_id *cm_id;
- struct ib_ucm_event_resp resp;
- void *data;
- void *info;
- int data_len;
- int info_len;
-};
-
-enum {
- IB_UCM_MAJOR = 231,
- IB_UCM_BASE_MINOR = 224,
- IB_UCM_MAX_DEVICES = RDMA_MAX_PORTS,
- IB_UCM_NUM_FIXED_MINOR = 32,
- IB_UCM_NUM_DYNAMIC_MINOR = IB_UCM_MAX_DEVICES - IB_UCM_NUM_FIXED_MINOR,
-};
-
-#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
-static dev_t dynamic_ucm_dev;
-
-static void ib_ucm_add_one(struct ib_device *device);
-static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
-
-static struct ib_client ucm_client = {
- .name = "ucm",
- .add = ib_ucm_add_one,
- .remove = ib_ucm_remove_one
-};
-
-static DEFINE_XARRAY_ALLOC(ctx_id_table);
-static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
-
-static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
-{
- struct ib_ucm_context *ctx;
-
- xa_lock(&ctx_id_table);
- ctx = xa_load(&ctx_id_table, id);
- if (!ctx)
- ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file)
- ctx = ERR_PTR(-EINVAL);
- else
- atomic_inc(&ctx->ref);
- xa_unlock(&ctx_id_table);
-
- return ctx;
-}
-
-static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
-{
- if (atomic_dec_and_test(&ctx->ref))
- complete(&ctx->comp);
-}
-
-static inline int ib_ucm_new_cm_id(int event)
-{
- return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED;
-}
-
-static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
-{
- struct ib_ucm_event *uevent;
-
- mutex_lock(&ctx->file->file_mutex);
- list_del(&ctx->file_list);
- while (!list_empty(&ctx->events)) {
-
- uevent = list_entry(ctx->events.next,
- struct ib_ucm_event, ctx_list);
- list_del(&uevent->file_list);
- list_del(&uevent->ctx_list);
- mutex_unlock(&ctx->file->file_mutex);
-
- /* clear incoming connections. */
- if (ib_ucm_new_cm_id(uevent->resp.event))
- ib_destroy_cm_id(uevent->cm_id);
-
- kfree(uevent);
- mutex_lock(&ctx->file->file_mutex);
- }
- mutex_unlock(&ctx->file->file_mutex);
-}
-
-static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
-{
- struct ib_ucm_context *ctx;
-
- ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
- if (!ctx)
- return NULL;
-
- atomic_set(&ctx->ref, 1);
- init_completion(&ctx->comp);
- ctx->file = file;
- INIT_LIST_HEAD(&ctx->events);
-
- if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
- goto error;
-
- list_add_tail(&ctx->file_list, &file->ctxs);
- return ctx;
-
-error:
- kfree(ctx);
- return NULL;
-}
-
-static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
- const struct ib_cm_req_event_param *kreq)
-{
- ureq->remote_ca_guid = kreq->remote_ca_guid;
- ureq->remote_qkey = kreq->remote_qkey;
- ureq->remote_qpn = kreq->remote_qpn;
- ureq->qp_type = kreq->qp_type;
- ureq->starting_psn = kreq->starting_psn;
- ureq->responder_resources = kreq->responder_resources;
- ureq->initiator_depth = kreq->initiator_depth;
- ureq->local_cm_response_timeout = kreq->local_cm_response_timeout;
- ureq->flow_control = kreq->flow_control;
- ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout;
- ureq->retry_count = kreq->retry_count;
- ureq->rnr_retry_count = kreq->rnr_retry_count;
- ureq->srq = kreq->srq;
- ureq->port = kreq->port;
-
- ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path);
- if (kreq->alternate_path)
- ib_copy_path_rec_to_user(&ureq->alternate_path,
- kreq->alternate_path);
-}
-
-static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
- const struct ib_cm_rep_event_param *krep)
-{
- urep->remote_ca_guid = krep->remote_ca_guid;
- urep->remote_qkey = krep->remote_qkey;
- urep->remote_qpn = krep->remote_qpn;
- urep->starting_psn = krep->starting_psn;
- urep->responder_resources = krep->responder_resources;
- urep->initiator_depth = krep->initiator_depth;
- urep->target_ack_delay = krep->target_ack_delay;
- urep->failover_accepted = krep->failover_accepted;
- urep->flow_control = krep->flow_control;
- urep->rnr_retry_count = krep->rnr_retry_count;
- urep->srq = krep->srq;
-}
-
-static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
- const struct ib_cm_sidr_rep_event_param *krep)
-{
- urep->status = krep->status;
- urep->qkey = krep->qkey;
- urep->qpn = krep->qpn;
-};
-
-static int ib_ucm_event_process(const struct ib_cm_event *evt,
- struct ib_ucm_event *uvt)
-{
- void *info = NULL;
-
- switch (evt->event) {
- case IB_CM_REQ_RECEIVED:
- ib_ucm_event_req_get(&uvt->resp.u.req_resp,
- &evt->param.req_rcvd);
- uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE;
- uvt->resp.present = IB_UCM_PRES_PRIMARY;
- uvt->resp.present |= (evt->param.req_rcvd.alternate_path ?
- IB_UCM_PRES_ALTERNATE : 0);
- break;
- case IB_CM_REP_RECEIVED:
- ib_ucm_event_rep_get(&uvt->resp.u.rep_resp,
- &evt->param.rep_rcvd);
- uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_RTU_RECEIVED:
- uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_DREQ_RECEIVED:
- uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_DREP_RECEIVED:
- uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_MRA_RECEIVED:
- uvt->resp.u.mra_resp.timeout =
- evt->param.mra_rcvd.service_timeout;
- uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_REJ_RECEIVED:
- uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason;
- uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.rej_rcvd.ari_length;
- info = evt->param.rej_rcvd.ari;
- break;
- case IB_CM_LAP_RECEIVED:
- ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path,
- evt->param.lap_rcvd.alternate_path);
- uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
- uvt->resp.present = IB_UCM_PRES_ALTERNATE;
- break;
- case IB_CM_APR_RECEIVED:
- uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status;
- uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.apr_rcvd.info_len;
- info = evt->param.apr_rcvd.apr_info;
- break;
- case IB_CM_SIDR_REQ_RECEIVED:
- uvt->resp.u.sidr_req_resp.pkey =
- evt->param.sidr_req_rcvd.pkey;
- uvt->resp.u.sidr_req_resp.port =
- evt->param.sidr_req_rcvd.port;
- uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_SIDR_REP_RECEIVED:
- ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp,
- &evt->param.sidr_rep_rcvd);
- uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.sidr_rep_rcvd.info_len;
- info = evt->param.sidr_rep_rcvd.info;
- break;
- default:
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- }
-
- if (uvt->data_len) {
- uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL);
- if (!uvt->data)
- goto err1;
-
- uvt->resp.present |= IB_UCM_PRES_DATA;
- }
-
- if (uvt->info_len) {
- uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL);
- if (!uvt->info)
- goto err2;
-
- uvt->resp.present |= IB_UCM_PRES_INFO;
- }
- return 0;
-
-err2:
- kfree(uvt->data);
-err1:
- return -ENOMEM;
-}
-
-static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
- const struct ib_cm_event *event)
-{
- struct ib_ucm_event *uevent;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- ctx = cm_id->context;
-
- uevent = kzalloc(sizeof *uevent, GFP_KERNEL);
- if (!uevent)
- goto err1;
-
- uevent->ctx = ctx;
- uevent->cm_id = cm_id;
- uevent->resp.uid = ctx->uid;
- uevent->resp.id = ctx->id;
- uevent->resp.event = event->event;
-
- result = ib_ucm_event_process(event, uevent);
- if (result)
- goto err2;
-
- mutex_lock(&ctx->file->file_mutex);
- list_add_tail(&uevent->file_list, &ctx->file->events);
- list_add_tail(&uevent->ctx_list, &ctx->events);
- wake_up_interruptible(&ctx->file->poll_wait);
- mutex_unlock(&ctx->file->file_mutex);
- return 0;
-
-err2:
- kfree(uevent);
-err1:
- /* Destroy new cm_id's */
- return ib_ucm_new_cm_id(event->event);
-}
-
-static ssize_t ib_ucm_event(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_event_get cmd;
- struct ib_ucm_event *uevent;
- int result = 0;
-
- if (out_len < sizeof(struct ib_ucm_event_resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&file->file_mutex);
- while (list_empty(&file->events)) {
- mutex_unlock(&file->file_mutex);
-
- if (file->filp->f_flags & O_NONBLOCK)
- return -EAGAIN;
-
- if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->events)))
- return -ERESTARTSYS;
-
- mutex_lock(&file->file_mutex);
- }
-
- uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
-
- if (ib_ucm_new_cm_id(uevent->resp.event)) {
- ctx = ib_ucm_ctx_alloc(file);
- if (!ctx) {
- result = -ENOMEM;
- goto done;
- }
-
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
-
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &uevent->resp, sizeof(uevent->resp))) {
- result = -EFAULT;
- goto done;
- }
-
- if (uevent->data) {
- if (cmd.data_len < uevent->data_len) {
- result = -ENOMEM;
- goto done;
- }
- if (copy_to_user(u64_to_user_ptr(cmd.data),
- uevent->data, uevent->data_len)) {
- result = -EFAULT;
- goto done;
- }
- }
-
- if (uevent->info) {
- if (cmd.info_len < uevent->info_len) {
- result = -ENOMEM;
- goto done;
- }
- if (copy_to_user(u64_to_user_ptr(cmd.info),
- uevent->info, uevent->info_len)) {
- result = -EFAULT;
- goto done;
- }
- }
-
- list_del(&uevent->file_list);
- list_del(&uevent->ctx_list);
- uevent->ctx->events_reported++;
-
- kfree(uevent->data);
- kfree(uevent->info);
- kfree(uevent);
-done:
- mutex_unlock(&file->file_mutex);
- return result;
-}
-
-static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_create_id cmd;
- struct ib_ucm_create_id_resp resp;
- struct ib_ucm_context *ctx;
- int result;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&file->file_mutex);
- ctx = ib_ucm_ctx_alloc(file);
- mutex_unlock(&file->file_mutex);
- if (!ctx)
- return -ENOMEM;
-
- ctx->uid = cmd.uid;
- ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
- ib_ucm_event_handler, ctx);
- if (IS_ERR(ctx->cm_id)) {
- result = PTR_ERR(ctx->cm_id);
- goto err1;
- }
-
- resp.id = ctx->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp))) {
- result = -EFAULT;
- goto err2;
- }
- return 0;
-
-err2:
- ib_destroy_cm_id(ctx->cm_id);
-err1:
- xa_erase(&ctx_id_table, ctx->id);
- kfree(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_destroy_id cmd;
- struct ib_ucm_destroy_id_resp resp;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- xa_lock(&ctx_id_table);
- ctx = xa_load(&ctx_id_table, cmd.id);
- if (!ctx)
- ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file)
- ctx = ERR_PTR(-EINVAL);
- else
- __xa_erase(&ctx_id_table, ctx->id);
- xa_unlock(&ctx_id_table);
-
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ib_ucm_ctx_put(ctx);
- wait_for_completion(&ctx->comp);
-
- /* No new events will be generated after destroying the cm_id. */
- ib_destroy_cm_id(ctx->cm_id);
- /* Cleanup events not yet reported to the user. */
- ib_ucm_cleanup_events(ctx);
-
- resp.events_reported = ctx->events_reported;
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
- result = -EFAULT;
-
- kfree(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_attr_id_resp resp;
- struct ib_ucm_attr_id cmd;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- resp.service_id = ctx->cm_id->service_id;
- resp.service_mask = ctx->cm_id->service_mask;
- resp.local_id = ctx->cm_id->local_id;
- resp.remote_id = ctx->cm_id->remote_id;
-
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
- result = -EFAULT;
-
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_uverbs_qp_attr resp;
- struct ib_ucm_init_qp_attr cmd;
- struct ib_ucm_context *ctx;
- struct ib_qp_attr qp_attr;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- resp.qp_attr_mask = 0;
- memset(&qp_attr, 0, sizeof qp_attr);
- qp_attr.qp_state = cmd.qp_state;
- result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
- if (result)
- goto out;
-
- ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
-
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
- result = -EFAULT;
-
-out:
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static int ucm_validate_listen(__be64 service_id, __be64 service_mask)
-{
- service_id &= service_mask;
-
- if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) ||
- ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID))
- return -EINVAL;
-
- return 0;
-}
-
-static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_listen cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- result = ucm_validate_listen(cmd.service_id, cmd.service_mask);
- if (result)
- goto out;
-
- result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask);
-out:
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_notify(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_notify cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
-{
- void *data;
-
- *dest = NULL;
-
- if (!len)
- return 0;
-
- data = memdup_user(u64_to_user_ptr(src), len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- *dest = data;
- return 0;
-}
-
-static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
-{
- struct ib_user_path_rec upath;
- struct sa_path_rec *sa_path;
-
- *path = NULL;
-
- if (!src)
- return 0;
-
- sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL);
- if (!sa_path)
- return -ENOMEM;
-
- if (copy_from_user(&upath, u64_to_user_ptr(src),
- sizeof(upath))) {
-
- kfree(sa_path);
- return -EFAULT;
- }
-
- ib_copy_path_rec_from_user(sa_path, &upath);
- *path = sa_path;
- return 0;
-}
-
-static ssize_t ib_ucm_send_req(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_req_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_req cmd;
- int result;
-
- param.private_data = NULL;
- param.primary_path = NULL;
- param.alternate_path = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.primary_path, cmd.primary_path);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.alternate_path, cmd.alternate_path);
- if (result)
- goto done;
-
- param.private_data_len = cmd.len;
- param.service_id = cmd.sid;
- param.qp_num = cmd.qpn;
- param.qp_type = cmd.qp_type;
- param.starting_psn = cmd.psn;
- param.peer_to_peer = cmd.peer_to_peer;
- param.responder_resources = cmd.responder_resources;
- param.initiator_depth = cmd.initiator_depth;
- param.remote_cm_response_timeout = cmd.remote_cm_response_timeout;
- param.flow_control = cmd.flow_control;
- param.local_cm_response_timeout = cmd.local_cm_response_timeout;
- param.retry_count = cmd.retry_count;
- param.rnr_retry_count = cmd.rnr_retry_count;
- param.max_cm_retries = cmd.max_cm_retries;
- param.srq = cmd.srq;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_req(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.primary_path);
- kfree(param.alternate_path);
- return result;
-}
-
-static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_rep_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_rep cmd;
- int result;
-
- param.private_data = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- return result;
-
- param.qp_num = cmd.qpn;
- param.starting_psn = cmd.psn;
- param.private_data_len = cmd.len;
- param.responder_resources = cmd.responder_resources;
- param.initiator_depth = cmd.initiator_depth;
- param.failover_accepted = cmd.failover_accepted;
- param.flow_control = cmd.flow_control;
- param.rnr_retry_count = cmd.rnr_retry_count;
- param.srq = cmd.srq;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- ctx->uid = cmd.uid;
- result = ib_send_cm_rep(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(param.private_data);
- return result;
-}
-
-static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file,
- const char __user *inbuf, int in_len,
- int (*func)(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len))
-{
- struct ib_ucm_private_data cmd;
- struct ib_ucm_context *ctx;
- const void *private_data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len);
- if (result)
- return result;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = func(ctx->cm_id, private_data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(private_data);
- return result;
-}
-
-static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu);
-}
-
-static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq);
-}
-
-static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep);
-}
-
-static ssize_t ib_ucm_send_info(struct ib_ucm_file *file,
- const char __user *inbuf, int in_len,
- int (*func)(struct ib_cm_id *cm_id,
- int status,
- const void *info,
- u8 info_len,
- const void *data,
- u8 data_len))
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_info cmd;
- const void *data = NULL;
- const void *info = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len);
- if (result)
- goto done;
-
- result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len);
- if (result)
- goto done;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = func(ctx->cm_id, cmd.status, info, cmd.info_len,
- data, cmd.data_len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(data);
- kfree(info);
- return result;
-}
-
-static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej);
-}
-
-static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr);
-}
-
-static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_mra cmd;
- const void *data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
- if (result)
- return result;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(data);
- return result;
-}
-
-static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct sa_path_rec *path = NULL;
- struct ib_ucm_lap cmd;
- const void *data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&path, cmd.path);
- if (result)
- goto done;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(data);
- kfree(path);
- return result;
-}
-
-static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_sidr_req_param param = {};
- struct ib_ucm_context *ctx;
- struct ib_ucm_sidr_req cmd;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.path, cmd.path);
- if (result)
- goto done;
-
- param.private_data_len = cmd.len;
- param.service_id = cmd.sid;
- param.timeout_ms = cmd.timeout;
- param.max_cm_retries = cmd.max_cm_retries;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_sidr_req(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.path);
- return result;
-}
-
-static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_sidr_rep_param param;
- struct ib_ucm_sidr_rep cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- param.info = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data,
- cmd.data, cmd.data_len);
- if (result)
- goto done;
-
- result = ib_ucm_alloc_data(&param.info, cmd.info, cmd.info_len);
- if (result)
- goto done;
-
- param.qp_num = cmd.qpn;
- param.qkey = cmd.qkey;
- param.status = cmd.status;
- param.info_length = cmd.info_len;
- param.private_data_len = cmd.data_len;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_sidr_rep(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.info);
- return result;
-}
-
-static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len) = {
- [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id,
- [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id,
- [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id,
- [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen,
- [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify,
- [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req,
- [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep,
- [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu,
- [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq,
- [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep,
- [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej,
- [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra,
- [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap,
- [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr,
- [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req,
- [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep,
- [IB_USER_CM_CMD_EVENT] = ib_ucm_event,
- [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr,
-};
-
-static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
- size_t len, loff_t *pos)
-{
- struct ib_ucm_file *file = filp->private_data;
- struct ib_ucm_cmd_hdr hdr;
- ssize_t result;
-
- if (!ib_safe_file_access(filp)) {
- pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
- task_tgid_vnr(current), current->comm);
- return -EACCES;
- }
-
- if (len < sizeof(hdr))
- return -EINVAL;
-
- if (copy_from_user(&hdr, buf, sizeof(hdr)))
- return -EFAULT;
-
- if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
- return -EINVAL;
- hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table));
-
- if (hdr.in + sizeof(hdr) > len)
- return -EINVAL;
-
- result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
- hdr.in, hdr.out);
- if (!result)
- result = len;
-
- return result;
-}
-
-static __poll_t ib_ucm_poll(struct file *filp,
- struct poll_table_struct *wait)
-{
- struct ib_ucm_file *file = filp->private_data;
- __poll_t mask = 0;
-
- poll_wait(filp, &file->poll_wait, wait);
-
- if (!list_empty(&file->events))
- mask = EPOLLIN | EPOLLRDNORM;
-
- return mask;
-}
-
-/*
- * ib_ucm_open() does not need the BKL:
- *
- * - no global state is referred to;
- * - there is no ioctl method to race against;
- * - no further module initialization is required for open to work
- * after the device is registered.
- */
-static int ib_ucm_open(struct inode *inode, struct file *filp)
-{
- struct ib_ucm_file *file;
-
- file = kmalloc(sizeof(*file), GFP_KERNEL);
- if (!file)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&file->events);
- INIT_LIST_HEAD(&file->ctxs);
- init_waitqueue_head(&file->poll_wait);
-
- mutex_init(&file->file_mutex);
-
- filp->private_data = file;
- file->filp = filp;
- file->device = container_of(inode->i_cdev, struct ib_ucm_device, cdev);
-
- return stream_open(inode, filp);
-}
-
-static int ib_ucm_close(struct inode *inode, struct file *filp)
-{
- struct ib_ucm_file *file = filp->private_data;
- struct ib_ucm_context *ctx;
-
- mutex_lock(&file->file_mutex);
- while (!list_empty(&file->ctxs)) {
- ctx = list_entry(file->ctxs.next,
- struct ib_ucm_context, file_list);
- mutex_unlock(&file->file_mutex);
-
- xa_erase(&ctx_id_table, ctx->id);
- ib_destroy_cm_id(ctx->cm_id);
- ib_ucm_cleanup_events(ctx);
- kfree(ctx);
-
- mutex_lock(&file->file_mutex);
- }
- mutex_unlock(&file->file_mutex);
- kfree(file);
- return 0;
-}
-
-static void ib_ucm_release_dev(struct device *dev)
-{
- struct ib_ucm_device *ucm_dev;
-
- ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- kfree(ucm_dev);
-}
-
-static void ib_ucm_free_dev(struct ib_ucm_device *ucm_dev)
-{
- clear_bit(ucm_dev->devnum, dev_map);
-}
-
-static const struct file_operations ucm_fops = {
- .owner = THIS_MODULE,
- .open = ib_ucm_open,
- .release = ib_ucm_close,
- .write = ib_ucm_write,
- .poll = ib_ucm_poll,
- .llseek = no_llseek,
-};
-
-static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct ib_ucm_device *ucm_dev;
-
- ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- return sprintf(buf, "%s\n", ucm_dev->ib_dev->name);
-}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-
-static void ib_ucm_add_one(struct ib_device *device)
-{
- int devnum;
- dev_t base;
- struct ib_ucm_device *ucm_dev;
-
- if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1))
- return;
-
- ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
- if (!ucm_dev)
- return;
-
- device_initialize(&ucm_dev->dev);
- ucm_dev->ib_dev = device;
- ucm_dev->dev.release = ib_ucm_release_dev;
-
- devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (devnum >= IB_UCM_MAX_DEVICES)
- goto err;
- ucm_dev->devnum = devnum;
- set_bit(devnum, dev_map);
- if (devnum >= IB_UCM_NUM_FIXED_MINOR)
- base = dynamic_ucm_dev + devnum - IB_UCM_NUM_FIXED_MINOR;
- else
- base = IB_UCM_BASE_DEV + devnum;
-
- cdev_init(&ucm_dev->cdev, &ucm_fops);
- ucm_dev->cdev.owner = THIS_MODULE;
- kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
-
- ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dev.parent;
- ucm_dev->dev.devt = base;
-
- dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (cdev_device_add(&ucm_dev->cdev, &ucm_dev->dev))
- goto err_devnum;
-
- if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
- goto err_dev;
-
- ib_set_client_data(device, &ucm_client, ucm_dev);
- return;
-
-err_dev:
- cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
-err_devnum:
- ib_ucm_free_dev(ucm_dev);
-err:
- put_device(&ucm_dev->dev);
- return;
-}
-
-static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
-{
- struct ib_ucm_device *ucm_dev = client_data;
-
- if (!ucm_dev)
- return;
-
- cdev_device_del(&ucm_dev->cdev, &ucm_dev->dev);
- ib_ucm_free_dev(ucm_dev);
- put_device(&ucm_dev->dev);
-}
-
-static CLASS_ATTR_STRING(abi_version, S_IRUGO,
- __stringify(IB_USER_CM_ABI_VERSION));
-
-static int __init ib_ucm_init(void)
-{
- int ret;
-
- ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register device number\n");
- goto error1;
- }
-
- ret = alloc_chrdev_region(&dynamic_ucm_dev, 0, IB_UCM_NUM_DYNAMIC_MINOR,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register dynamic device number\n");
- goto err_alloc;
- }
-
- ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
- if (ret) {
- pr_err("ucm: couldn't create abi_version attribute\n");
- goto error2;
- }
-
- ret = ib_register_client(&ucm_client);
- if (ret) {
- pr_err("ucm: couldn't register client\n");
- goto error3;
- }
- return 0;
-
-error3:
- class_remove_file(&cm_class, &class_attr_abi_version.attr);
-error2:
- unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
-err_alloc:
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
-error1:
- return ret;
-}
-
-static void __exit ib_ucm_cleanup(void)
-{
- ib_unregister_client(&ucm_client);
- class_remove_file(&cm_class, &class_attr_abi_version.attr);
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
- unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
- WARN_ON(!xa_empty(&ctx_id_table));
-}
-
-module_init(ib_ucm_init);
-module_exit(ib_ucm_cleanup);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 140a338a135f..ec3be65a2b88 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -52,6 +52,9 @@
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
+#include <rdma/ib_cm.h>
+#include <rdma/rdma_netlink.h>
+#include "core_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -66,9 +69,10 @@ static struct ctl_table ucma_ctl_table[] = {
.data = &max_backlog,
.maxlen = sizeof max_backlog,
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
- { }
};
struct ucma_file {
@@ -77,34 +81,28 @@ struct ucma_file {
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
- struct workqueue_struct *close_wq;
};
struct ucma_context {
- int id;
+ u32 id;
struct completion comp;
- atomic_t ref;
+ refcount_t ref;
int events_reported;
- int backlog;
+ atomic_t backlog;
struct ucma_file *file;
struct rdma_cm_id *cm_id;
+ struct mutex mutex;
u64 uid;
struct list_head list;
struct list_head mc_list;
- /* mark that device is in process of destroying the internal HW
- * resources, protected by the global mut
- */
- int closing;
- /* sync between removal event and id destroy, protected by file mut */
- int destroying;
struct work_struct close_work;
};
struct ucma_multicast {
struct ucma_context *ctx;
- int id;
+ u32 id;
int events_reported;
u64 uid;
@@ -115,28 +113,27 @@ struct ucma_multicast {
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_context *conn_req_ctx;
struct ucma_multicast *mc;
struct list_head list;
- struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
- struct work_struct close_work;
};
-static DEFINE_MUTEX(mut);
-static DEFINE_IDR(ctx_idr);
-static DEFINE_IDR(multicast_idr);
+static DEFINE_XARRAY_ALLOC(ctx_table);
+static DEFINE_XARRAY_ALLOC(multicast_table);
static const struct file_operations ucma_fops;
+static int ucma_destroy_private_ctx(struct ucma_context *ctx);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
{
struct ucma_context *ctx;
- ctx = idr_find(&ctx_idr, id);
+ ctx = xa_load(&ctx_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file || !ctx->cm_id)
+ else if (ctx->file != file)
ctx = ERR_PTR(-EINVAL);
return ctx;
}
@@ -145,21 +142,18 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
{
struct ucma_context *ctx;
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx = _ucma_find_context(id, file);
- if (!IS_ERR(ctx)) {
- if (ctx->closing)
- ctx = ERR_PTR(-EIO);
- else
- atomic_inc(&ctx->ref);
- }
- mutex_unlock(&mut);
+ if (!IS_ERR(ctx))
+ if (!refcount_inc_not_zero(&ctx->ref))
+ ctx = ERR_PTR(-ENXIO);
+ xa_unlock(&ctx_table);
return ctx;
}
static void ucma_put_ctx(struct ucma_context *ctx)
{
- if (atomic_dec_and_test(&ctx->ref))
+ if (refcount_dec_and_test(&ctx->ref))
complete(&ctx->comp);
}
@@ -180,26 +174,21 @@ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
return ctx;
}
-static void ucma_close_event_id(struct work_struct *work)
-{
- struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
-
- rdma_destroy_id(uevent_close->cm_id);
- kfree(uevent_close);
-}
-
static void ucma_close_id(struct work_struct *work)
{
struct ucma_context *ctx = container_of(work, struct ucma_context, close_work);
/* once all inflight tasks are finished, we close all underlying
* resources. The context is still alive till its explicit destryoing
- * by its creator.
+ * by its creator. This puts back the xarray's reference.
*/
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+
+ /* Reading the cm_id without holding a positive ref is not allowed */
+ ctx->cm_id = NULL;
}
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -211,46 +200,32 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
return NULL;
INIT_WORK(&ctx->close_work, ucma_close_id);
- atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
+ INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
+ mutex_init(&ctx->mutex);
- mutex_lock(&mut);
- ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
- mutex_unlock(&mut);
- if (ctx->id < 0)
- goto error;
-
- list_add_tail(&ctx->list, &file->ctx_list);
+ if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+ kfree(ctx);
+ return NULL;
+ }
return ctx;
-
-error:
- kfree(ctx);
- return NULL;
}
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
+ struct rdma_cm_id *cm_id)
{
- struct ucma_multicast *mc;
-
- mc = kzalloc(sizeof(*mc), GFP_KERNEL);
- if (!mc)
- return NULL;
-
- mutex_lock(&mut);
- mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
- mutex_unlock(&mut);
- if (mc->id < 0)
- goto error;
-
- mc->ctx = ctx;
- list_add_tail(&mc->list, &ctx->mc_list);
- return mc;
+ refcount_set(&ctx->ref, 1);
+ ctx->cm_id = cm_id;
+}
-error:
- kfree(mc);
- return NULL;
+static void ucma_finish_ctx(struct ucma_context *ctx)
+{
+ lockdep_assert_held(&ctx->file->mut);
+ list_add_tail(&ctx->list, &ctx->file->ctx_list);
+ xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
}
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
@@ -260,7 +235,7 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
memcpy(dst->private_data, src->private_data,
src->private_data_len);
dst->private_data_len = src->private_data_len;
- dst->responder_resources =src->responder_resources;
+ dst->responder_resources = src->responder_resources;
dst->initiator_depth = src->initiator_depth;
dst->flow_control = src->flow_control;
dst->retry_count = src->retry_count;
@@ -282,10 +257,15 @@ static void ucma_copy_ud_event(struct ib_device *device,
dst->qkey = src->qkey;
}
-static void ucma_set_event_context(struct ucma_context *ctx,
- struct rdma_cm_event *event,
- struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+ struct rdma_cm_event *event)
{
+ struct ucma_event *uevent;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent)
+ return NULL;
+
uevent->ctx = ctx;
switch (event->event) {
case RDMA_CM_EVENT_MULTICAST_JOIN:
@@ -300,44 +280,60 @@ static void ucma_set_event_context(struct ucma_context *ctx,
uevent->resp.id = ctx->id;
break;
}
+ uevent->resp.event = event->event;
+ uevent->resp.status = event->status;
+
+ if (event->event == RDMA_CM_EVENT_ADDRINFO_RESOLVED)
+ goto out;
+
+ if (ctx->cm_id->qp_type == IB_QPT_UD)
+ ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
+ &event->param.ud);
+ else
+ ucma_copy_conn_event(&uevent->resp.param.conn,
+ &event->param.conn);
+
+out:
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
+ return uevent;
}
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
{
- struct ucma_context *ctx = cm_id->context;
- struct ucma_event *con_req_eve;
- int event_found = 0;
+ struct ucma_context *listen_ctx = cm_id->context;
+ struct ucma_context *ctx;
+ struct ucma_event *uevent;
- if (ctx->destroying)
- return;
+ if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+ return -ENOMEM;
+ ctx = ucma_alloc_ctx(listen_ctx->file);
+ if (!ctx)
+ goto err_backlog;
+ ucma_set_ctx_cm_id(ctx, cm_id);
- /* only if context is pointing to cm_id that it owns it and can be
- * queued to be closed, otherwise that cm_id is an inflight one that
- * is part of that context event list pending to be detached and
- * reattached to its new context as part of ucma_get_event,
- * handled separately below.
- */
- if (ctx->cm_id == cm_id) {
- mutex_lock(&mut);
- ctx->closing = 1;
- mutex_unlock(&mut);
- queue_work(ctx->file->close_wq, &ctx->close_work);
- return;
- }
+ uevent = ucma_create_uevent(listen_ctx, event);
+ if (!uevent)
+ goto err_alloc;
+ uevent->conn_req_ctx = ctx;
+ uevent->resp.id = ctx->id;
- list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
- if (con_req_eve->cm_id == cm_id &&
- con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- list_del(&con_req_eve->list);
- INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
- queue_work(ctx->file->close_wq, &con_req_eve->close_work);
- event_found = 1;
- break;
- }
- }
- if (!event_found)
- pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
+ ctx->cm_id->context = ctx;
+
+ mutex_lock(&ctx->file->mut);
+ ucma_finish_ctx(ctx);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ return 0;
+
+err_alloc:
+ ucma_destroy_private_ctx(ctx);
+err_backlog:
+ atomic_inc(&listen_ctx->backlog);
+ /* Returning error causes the new ID to be destroyed */
+ return -ENOMEM;
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -345,69 +341,49 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
{
struct ucma_event *uevent;
struct ucma_context *ctx = cm_id->context;
- int ret = 0;
- uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
- if (!uevent)
- return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ return ucma_connect_event_handler(cm_id, event);
- mutex_lock(&ctx->file->mut);
- uevent->cm_id = cm_id;
- ucma_set_event_context(ctx, event, uevent);
- uevent->resp.event = event->event;
- uevent->resp.status = event->status;
- if (cm_id->qp_type == IB_QPT_UD)
- ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud,
- &event->param.ud);
- else
- ucma_copy_conn_event(&uevent->resp.param.conn,
- &event->param.conn);
-
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- if (!ctx->backlog) {
- ret = -ENOMEM;
- kfree(uevent);
- goto out;
- }
- ctx->backlog--;
- } else if (!ctx->uid || ctx->cm_id != cm_id) {
- /*
- * We ignore events for new connections until userspace has set
- * their context. This can only happen if an error occurs on a
- * new connection before the user accepts it. This is okay,
- * since the accept will just fail later. However, we do need
- * to release the underlying HW resources in case of a device
- * removal event.
- */
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-
- kfree(uevent);
- goto out;
+ /*
+ * We ignore events for new connections until userspace has set their
+ * context. This can only happen if an error occurs on a new connection
+ * before the user accepts it. This is okay, since the accept will just
+ * fail later. However, we do need to release the underlying HW
+ * resources in case of a device removal event.
+ */
+ if (ctx->uid) {
+ uevent = ucma_create_uevent(ctx, event);
+ if (!uevent)
+ return 0;
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
}
- list_add_tail(&uevent->list, &ctx->file->event_list);
- wake_up_interruptible(&ctx->file->poll_wait);
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-out:
- mutex_unlock(&ctx->file->mut);
- return ret;
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
+ xa_lock(&ctx_table);
+ if (xa_load(&ctx_table, ctx->id) == ctx)
+ queue_work(system_dfl_wq, &ctx->close_work);
+ xa_unlock(&ctx_table);
+ }
+ return 0;
}
static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct ucma_context *ctx;
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
- int ret = 0;
/*
* Old 32 bit user space does not send the 4 byte padding in the
* reserved field. We don't care, allow it to keep working.
*/
- if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) -
+ sizeof(uevent->resp.ece))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -427,35 +403,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
mutex_lock(&file->mut);
}
- uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- ctx = ucma_alloc_ctx(file);
- if (!ctx) {
- ret = -ENOMEM;
- goto done;
- }
- uevent->ctx->backlog++;
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
+ uevent = list_first_entry(&file->event_list, struct ucma_event, list);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&uevent->resp,
min_t(size_t, out_len, sizeof(uevent->resp)))) {
- ret = -EFAULT;
- goto done;
+ mutex_unlock(&file->mut);
+ return -EFAULT;
}
list_del(&uevent->list);
uevent->ctx->events_reported++;
if (uevent->mc)
uevent->mc->events_reported++;
- kfree(uevent);
-done:
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ atomic_inc(&uevent->ctx->backlog);
mutex_unlock(&file->mut);
- return ret;
+
+ kfree(uevent);
+ return 0;
}
static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
@@ -496,40 +462,32 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
if (ret)
return ret;
- mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
- mutex_unlock(&file->mut);
if (!ctx)
return -ENOMEM;
ctx->uid = cmd.uid;
- cm_id = __rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
+ cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
goto err1;
}
+ ucma_set_ctx_cm_id(ctx, cm_id);
resp.id = ctx->id;
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err2;
+ goto err1;
}
- ctx->cm_id = cm_id;
+ mutex_lock(&file->mut);
+ ucma_finish_ctx(ctx);
+ mutex_unlock(&file->mut);
return 0;
-err2:
- rdma_destroy_id(cm_id);
err1:
- mutex_lock(&mut);
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
- mutex_lock(&file->mut);
- list_del(&ctx->list);
- mutex_unlock(&file->mut);
- kfree(ctx);
+ ucma_destroy_private_ctx(ctx);
return ret;
}
@@ -537,19 +495,25 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
struct ucma_multicast *mc, *tmp;
- mutex_lock(&mut);
+ xa_lock(&multicast_table);
list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
list_del(&mc->list);
- idr_remove(&multicast_idr, mc->id);
+ /*
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
+ * lock on the reader and this is enough serialization
+ */
+ __xa_erase(&multicast_table, mc->id);
kfree(mc);
}
- mutex_unlock(&mut);
+ xa_unlock(&multicast_table);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
+ rdma_lock_handler(mc->ctx->cm_id);
+ mutex_lock(&mc->ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
if (uevent->mc != mc)
continue;
@@ -557,45 +521,75 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
list_del(&uevent->list);
kfree(uevent);
}
+ mutex_unlock(&mc->ctx->file->mut);
+ rdma_unlock_handler(mc->ctx->cm_id);
}
-/*
- * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
- * this point, no new events will be reported from the hardware. However, we
- * still need to cleanup the UCMA context for this ID. Specifically, there
- * might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing. We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
- * mutex. After that we release them as needed.
- */
-static int ucma_free_ctx(struct ucma_context *ctx)
+static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
{
int events_reported;
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
-
- ucma_cleanup_multicast(ctx);
-
- /* Cleanup events not yet reported to the user. */
+ /* Cleanup events not yet reported to the user.*/
mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx == ctx)
+ if (uevent->ctx != ctx)
+ continue;
+
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
+ xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
+ uevent->conn_req_ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) == uevent->conn_req_ctx) {
list_move_tail(&uevent->list, &list);
+ continue;
+ }
+ list_del(&uevent->list);
+ kfree(uevent);
}
list_del(&ctx->list);
+ events_reported = ctx->events_reported;
mutex_unlock(&ctx->file->mut);
+ /*
+ * If this was a listening ID then any connections spawned from it that
+ * have not been delivered to userspace are cleaned up too. Must be done
+ * outside any locks.
+ */
list_for_each_entry_safe(uevent, tmp, &list, list) {
- list_del(&uevent->list);
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
+ ucma_destroy_private_ctx(uevent->conn_req_ctx);
kfree(uevent);
}
+ return events_reported;
+}
- events_reported = ctx->events_reported;
+/*
+ * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
+ * the ctx is not public to the user). This either because:
+ * - ucma_finish_ctx() hasn't been called
+ * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
+ */
+static int ucma_destroy_private_ctx(struct ucma_context *ctx)
+{
+ int events_reported;
+
+ /*
+ * Destroy the underlying cm_id. New work queuing is prevented now by
+ * the removal from the xarray. Once the work is cancled ref will either
+ * be 0 because the work ran to completion and consumed the ref from the
+ * xarray, or it will be positive because we still have the ref from the
+ * xarray. This can also be 0 in cases where cm_id was never set
+ */
+ cancel_work_sync(&ctx->close_work);
+ if (refcount_read(&ctx->ref))
+ ucma_close_id(&ctx->close_work);
+
+ events_reported = ucma_cleanup_ctx_events(ctx);
+ ucma_cleanup_multicast(ctx);
+
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
+ GFP_KERNEL) != NULL);
+ mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
}
@@ -614,33 +608,19 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx = _ucma_find_context(cmd.id, file);
- if (!IS_ERR(ctx))
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
+ if (!IS_ERR(ctx)) {
+ if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) != ctx)
+ ctx = ERR_PTR(-ENOENT);
+ }
+ xa_unlock(&ctx_table);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&ctx->file->mut);
- ctx->destroying = 1;
- mutex_unlock(&ctx->file->mut);
-
- flush_workqueue(ctx->file->close_wq);
- /* At this point it's guaranteed that there is no inflight
- * closing task */
- mutex_lock(&mut);
- if (!ctx->closing) {
- mutex_unlock(&mut);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- rdma_destroy_id(ctx->cm_id);
- } else {
- mutex_unlock(&mut);
- }
-
- resp.events_reported = ucma_free_ctx(ctx);
+ resp.events_reported = ucma_destroy_private_ctx(ctx);
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -665,7 +645,10 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
+
ucma_put_ctx(ctx);
return ret;
}
@@ -688,7 +671,9 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -712,8 +697,10 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
(struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -738,8 +725,32 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
(struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_resolve_ib_service(struct ucma_file *file,
+ const char __user *inbuf, int in_len,
+ int out_len)
+{
+ struct rdma_ucm_resolve_ib_service cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_resolve_ib_service(ctx->cm_id, &cmd.ibs);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -759,7 +770,9 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -769,8 +782,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
{
struct rdma_dev_addr *dev_addr;
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
rdma_addr_get_dgid(dev_addr,
@@ -782,7 +795,7 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
case 2:
ib_copy_path_rec_to_user(&resp->ib_route[1],
&route->path_rec[1]);
- /* fall through */
+ fallthrough;
case 1:
ib_copy_path_rec_to_user(&resp->ib_route[0],
&route->path_rec[0]);
@@ -796,8 +809,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
(union ib_gid *)&resp->ib_route[0].dgid);
@@ -808,7 +821,7 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
case 2:
ib_copy_path_rec_to_user(&resp->ib_route[1],
&route->path_rec[1]);
- /* fall through */
+ fallthrough;
case 1:
ib_copy_path_rec_to_user(&resp->ib_route[0],
&route->path_rec[0]);
@@ -838,7 +851,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -848,6 +861,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
memset(&resp, 0, sizeof resp);
addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
@@ -861,6 +875,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
goto out;
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
+ resp.ibdev_index = ctx->cm_id->device->index;
resp.port_num = ctx->cm_id->port_num;
if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
@@ -871,8 +886,9 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
- if (copy_to_user(u64_to_user_ptr(cmd.response),
- &resp, sizeof(resp)))
+ mutex_unlock(&ctx->mutex);
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
+ min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
ucma_put_ctx(ctx);
@@ -886,6 +902,7 @@ static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
return;
resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->ibdev_index = cm_id->device->index;
resp->port_num = cm_id->port_num;
resp->pkey = (__force __u16) cpu_to_be16(
ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
@@ -898,7 +915,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -913,7 +930,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
ucma_query_device_addr(ctx->cm_id, &resp);
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -932,7 +949,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
if (!resp)
return -ENOMEM;
- resp->num_paths = ctx->cm_id->route.num_paths;
+ resp->num_paths = ctx->cm_id->route.num_pri_alt_paths;
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
@@ -951,8 +968,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
}
}
- if (copy_to_user(response, resp,
- sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
+ if (copy_to_user(response, resp, struct_size(resp, path_data, i)))
ret = -EFAULT;
kfree(resp);
@@ -966,7 +982,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
struct sockaddr_ib *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -999,12 +1015,49 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
&ctx->cm_id->route.addr.dst_addr);
}
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
}
+static ssize_t ucma_query_ib_service(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_ib_service_resp *resp;
+ int n, ret = 0;
+
+ if (out_len < sizeof(struct rdma_ucm_query_ib_service_resp))
+ return -ENOSPC;
+
+ if (!ctx->cm_id->route.service_recs)
+ return -ENODATA;
+
+ resp = kzalloc(out_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->num_service_recs = ctx->cm_id->route.num_service_recs;
+
+ n = (out_len - sizeof(struct rdma_ucm_query_ib_service_resp)) /
+ sizeof(struct ib_user_service_rec);
+
+ if (!n)
+ goto out;
+
+ if (n > ctx->cm_id->route.num_service_recs)
+ n = ctx->cm_id->route.num_service_recs;
+
+ memcpy(resp->recs, ctx->cm_id->route.service_recs,
+ sizeof(*resp->recs) * n);
+ if (copy_to_user(response, resp, struct_size(resp, recs, n)))
+ ret = -EFAULT;
+
+out:
+ kfree(resp);
+ return ret;
+}
+
static ssize_t ucma_query(struct ucma_file *file,
const char __user *inbuf,
int in_len, int out_len)
@@ -1022,6 +1075,7 @@ static ssize_t ucma_query(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
switch (cmd.option) {
case RDMA_USER_CM_QUERY_ADDR:
ret = ucma_query_addr(ctx, response, out_len);
@@ -1032,10 +1086,14 @@ static ssize_t ucma_query(struct ucma_file *file,
case RDMA_USER_CM_QUERY_GID:
ret = ucma_query_gid(ctx, response, out_len);
break;
+ case RDMA_USER_CM_QUERY_IB_SERVICE:
+ ret = ucma_query_ib_service(ctx, response, out_len);
+ break;
default:
ret = -ENOSYS;
break;
}
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1047,25 +1105,30 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id,
{
dst->private_data = src->private_data;
dst->private_data_len = src->private_data_len;
- dst->responder_resources =src->responder_resources;
+ dst->responder_resources = src->responder_resources;
dst->initiator_depth = src->initiator_depth;
dst->flow_control = src->flow_control;
dst->retry_count = src->retry_count;
dst->rnr_retry_count = src->rnr_retry_count;
dst->srq = src->srq;
- dst->qp_num = src->qp_num;
+ dst->qp_num = src->qp_num & 0xFFFFFF;
dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
}
static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_connect cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
+ struct rdma_ucm_connect cmd;
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ if (in_len < offsetofend(typeof(cmd), reserved))
+ return -EINVAL;
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
if (!cmd.conn_param.valid)
@@ -1076,7 +1139,14 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
return PTR_ERR(ctx);
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- ret = rdma_connect(ctx->cm_id, &conn_param);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1095,9 +1165,13 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
- cmd.backlog : max_backlog;
- ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+ cmd.backlog = max_backlog;
+ atomic_set(&ctx->backlog, cmd.backlog);
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_listen(ctx->cm_id, cmd.backlog);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1107,26 +1181,44 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
{
struct rdma_ucm_accept cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ if (in_len < offsetofend(typeof(cmd), reserved))
+ return -EINVAL;
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- mutex_lock(&file->mut);
- ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
- if (!ret)
+ mutex_lock(&ctx->mutex);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+ if (!ret) {
+ /* The uid must be set atomically with the handler */
ctx->uid = cmd.uid;
- mutex_unlock(&file->mut);
- } else
- ret = __rdma_accept(ctx->cm_id, NULL, NULL);
-
+ }
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
+ } else {
+ mutex_lock(&ctx->mutex);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
+ }
ucma_put_ctx(ctx);
return ret;
}
@@ -1141,11 +1233,25 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (!cmd.reason)
+ cmd.reason = IB_CM_REJ_CONSUMER_DEFINED;
+
+ switch (cmd.reason) {
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+ mutex_lock(&ctx->mutex);
+ ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len,
+ cmd.reason);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1164,7 +1270,9 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_disconnect(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1195,7 +1303,9 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
resp.qp_attr_mask = 0;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = cmd.qp_state;
+ mutex_lock(&ctx->mutex);
ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+ mutex_unlock(&ctx->mutex);
if (ret)
goto out;
@@ -1281,9 +1391,13 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
struct sa_path_rec opa;
sa_convert_path_ib_to_opa(&opa, &sa_path);
+ mutex_lock(&ctx->mutex);
ret = rdma_set_ib_path(ctx->cm_id, &opa);
+ mutex_unlock(&ctx->mutex);
} else {
+ mutex_lock(&ctx->mutex);
ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
+ mutex_unlock(&ctx->mutex);
}
if (ret)
return ret;
@@ -1316,7 +1430,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
switch (level) {
case RDMA_OPTION_ID:
+ mutex_lock(&ctx->mutex);
ret = ucma_set_option_id(ctx, optname, optval, optlen);
+ mutex_unlock(&ctx->mutex);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
@@ -1376,8 +1492,10 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
if (ctx->cm_id->device)
ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1411,46 +1529,59 @@ static ssize_t ucma_process_join(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&file->mut);
- mc = ucma_alloc_multicast(ctx);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc) {
ret = -ENOMEM;
- goto err1;
+ goto err_put_ctx;
}
+
+ mc->ctx = ctx;
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
+
+ xa_lock(&multicast_table);
+ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_mc;
+ }
+
+ list_add_tail(&mc->list, &ctx->mc_list);
+ xa_unlock(&multicast_table);
+
+ mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
+ mutex_unlock(&ctx->mutex);
if (ret)
- goto err2;
+ goto err_xa_erase;
resp.id = mc->id;
if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err3;
+ goto err_leave_multicast;
}
- mutex_lock(&mut);
- idr_replace(&multicast_idr, mc, mc->id);
- mutex_unlock(&mut);
+ xa_store(&multicast_table, mc->id, mc, 0);
- mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
return 0;
-err3:
+err_leave_multicast:
+ mutex_lock(&ctx->mutex);
rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
-err2:
- mutex_lock(&mut);
- idr_remove(&multicast_idr, mc->id);
- mutex_unlock(&mut);
+err_xa_erase:
+ xa_lock(&multicast_table);
list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+err_free_mc:
+ xa_unlock(&multicast_table);
kfree(mc);
-err1:
- mutex_unlock(&file->mut);
+err_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
@@ -1508,28 +1639,30 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- mutex_lock(&mut);
- mc = idr_find(&multicast_idr, cmd.id);
+ xa_lock(&multicast_table);
+ mc = xa_load(&multicast_table, cmd.id);
if (!mc)
mc = ERR_PTR(-ENOENT);
- else if (mc->ctx->file != file)
+ else if (READ_ONCE(mc->ctx->file) != file)
mc = ERR_PTR(-EINVAL);
- else if (!atomic_inc_not_zero(&mc->ctx->ref))
+ else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
- else
- idr_remove(&multicast_idr, mc->id);
- mutex_unlock(&mut);
if (IS_ERR(mc)) {
+ xa_unlock(&multicast_table);
ret = PTR_ERR(mc);
goto out;
}
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+ xa_unlock(&multicast_table);
+
+ mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
- mutex_lock(&mc->ctx->file->mut);
+ mutex_unlock(&mc->ctx->mutex);
+
ucma_cleanup_mc_events(mc);
- list_del(&mc->list);
- mutex_unlock(&mc->ctx->file->mut);
ucma_put_ctx(mc->ctx);
resp.events_reported = mc->events_reported;
@@ -1542,46 +1675,15 @@ out:
return ret;
}
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- /* Acquire mutex's based on pointer comparison to prevent deadlock. */
- if (file1 < file2) {
- mutex_lock(&file1->mut);
- mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
- } else {
- mutex_lock(&file2->mut);
- mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
- }
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- if (file1 < file2) {
- mutex_unlock(&file2->mut);
- mutex_unlock(&file1->mut);
- } else {
- mutex_unlock(&file1->mut);
- mutex_unlock(&file2->mut);
- }
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
- if (uevent->ctx == ctx)
- list_move_tail(&uevent->list, &file->event_list);
-}
-
static ssize_t ucma_migrate_id(struct ucma_file *new_file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
+ struct ucma_event *uevent, *tmp;
struct ucma_context *ctx;
- struct fd f;
+ LIST_HEAD(event_list);
struct ucma_file *cur_file;
int ret = 0;
@@ -1589,50 +1691,106 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
return -EFAULT;
/* Get current fd to protect against it being closed */
- f = fdget(cmd.fd);
- if (!f.file)
+ CLASS(fd, f)(cmd.fd);
+ if (fd_empty(f))
return -ENOENT;
- if (f.file->f_op != &ucma_fops) {
- ret = -EINVAL;
- goto file_put;
- }
+ if (fd_file(f)->f_op != &ucma_fops)
+ return -EINVAL;
+ cur_file = fd_file(f)->private_data;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(f.file->private_data, cmd.id);
- if (IS_ERR(ctx)) {
- ret = PTR_ERR(ctx);
- goto file_put;
- }
+ ctx = ucma_get_ctx(cur_file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
- cur_file = ctx->file;
- if (cur_file == new_file) {
- resp.events_reported = ctx->events_reported;
- goto response;
+ rdma_lock_handler(ctx->cm_id);
+ /*
+ * ctx->file can only be changed under the handler & xa_lock. xa_load()
+ * must be checked again to ensure the ctx hasn't begun destruction
+ * since the ucma_get_ctx().
+ */
+ xa_lock(&ctx_table);
+ if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+ xa_unlock(&ctx_table);
+ ret = -ENOENT;
+ goto err_unlock;
}
+ ctx->file = new_file;
+ xa_unlock(&ctx_table);
+ mutex_lock(&cur_file->mut);
+ list_del(&ctx->list);
/*
- * Migrate events between fd's, maintaining order, and avoiding new
- * events being added before existing events.
+ * At this point lock_handler() prevents addition of new uevents for
+ * this ctx.
*/
- ucma_lock_files(cur_file, new_file);
- mutex_lock(&mut);
-
- list_move_tail(&ctx->list, &new_file->ctx_list);
- ucma_move_events(ctx, new_file);
- ctx->file = new_file;
+ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &event_list);
resp.events_reported = ctx->events_reported;
+ mutex_unlock(&cur_file->mut);
- mutex_unlock(&mut);
- ucma_unlock_files(cur_file, new_file);
+ mutex_lock(&new_file->mut);
+ list_add_tail(&ctx->list, &new_file->ctx_list);
+ list_splice_tail(&event_list, &new_file->event_list);
+ mutex_unlock(&new_file->mut);
-response:
if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
+err_unlock:
+ rdma_unlock_handler(ctx->cm_id);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_write_cm_event(struct ucma_file *file,
+ const char __user *inbuf, int in_len,
+ int out_len)
+{
+ struct rdma_ucm_write_cm_event cmd;
+ struct rdma_cm_event event = {};
+ struct ucma_event *uevent;
+ struct ucma_context *ctx;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ if ((cmd.event != RDMA_CM_EVENT_USER) &&
+ (cmd.event != RDMA_CM_EVENT_INTERNAL))
+ return -EINVAL;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ event.event = cmd.event;
+ event.status = cmd.status;
+ event.param.arg = cmd.param.arg;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ uevent->ctx = ctx;
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ uevent->resp.event = event.event;
+ uevent->resp.status = event.status;
+ memcpy(uevent->resp.param.arg32, &event.param.arg,
+ sizeof(event.param.arg));
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+
+out:
ucma_put_ctx(ctx);
-file_put:
- fdput(f);
return ret;
}
@@ -1661,7 +1819,9 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_QUERY] = ucma_query,
[RDMA_USER_CM_CMD_BIND] = ucma_bind,
[RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
- [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
+ [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service,
+ [RDMA_USER_CM_CMD_WRITE_CM_EVENT] = ucma_write_cm_event,
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
@@ -1672,8 +1832,8 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
ssize_t ret;
if (!ib_safe_file_access(filp)) {
- pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
- task_tgid_vnr(current), current->comm);
+ pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ __func__, task_tgid_vnr(current), current->comm);
return -EACCES;
}
@@ -1729,13 +1889,6 @@ static int ucma_open(struct inode *inode, struct file *filp)
if (!file)
return -ENOMEM;
- file->close_wq = alloc_ordered_workqueue("ucma_close_id",
- WQ_MEM_RECLAIM);
- if (!file->close_wq) {
- kfree(file);
- return -ENOMEM;
- }
-
INIT_LIST_HEAD(&file->event_list);
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
@@ -1750,40 +1903,23 @@ static int ucma_open(struct inode *inode, struct file *filp)
static int ucma_close(struct inode *inode, struct file *filp)
{
struct ucma_file *file = filp->private_data;
- struct ucma_context *ctx, *tmp;
-
- mutex_lock(&file->mut);
- list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
- ctx->destroying = 1;
- mutex_unlock(&file->mut);
-
- mutex_lock(&mut);
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
- flush_workqueue(file->close_wq);
- /* At that step once ctx was marked as destroying and workqueue
- * was flushed we are safe from any inflights handlers that
- * might put other closing task.
- */
- mutex_lock(&mut);
- if (!ctx->closing) {
- mutex_unlock(&mut);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- /* rdma_destroy_id ensures that no event handlers are
- * inflight for that id before releasing it.
- */
- rdma_destroy_id(ctx->cm_id);
- } else {
- mutex_unlock(&mut);
- }
+ /*
+ * All paths that touch ctx_list or ctx_list starting from write() are
+ * prevented by this being a FD release function. The list_add_tail() in
+ * ucma_connect_event_handler() can run concurrently, however it only
+ * adds to the list *after* a listening ID. By only reading the first of
+ * the list, and relying on ucma_destroy_private_ctx() to block
+ * ucma_connect_event_handler(), no additional locking is needed.
+ */
+ while (!list_empty(&file->ctx_list)) {
+ struct ucma_context *ctx = list_first_entry(
+ &file->ctx_list, struct ucma_context, list);
- ucma_free_ctx(ctx);
- mutex_lock(&file->mut);
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) != ctx);
+ ucma_destroy_private_ctx(ctx);
}
- mutex_unlock(&file->mut);
- destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}
@@ -1794,7 +1930,6 @@ static const struct file_operations ucma_fops = {
.release = ucma_close,
.write = ucma_write,
.poll = ucma_poll,
- .llseek = no_llseek,
};
static struct miscdevice ucma_misc = {
@@ -1805,13 +1940,25 @@ static struct miscdevice ucma_misc = {
.fops = &ucma_fops,
};
-static ssize_t show_abi_version(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static int ucma_get_global_nl_info(struct ib_client_nl_info *res)
{
- return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+ res->abi = RDMA_USER_CM_ABI_VERSION;
+ res->cdev = ucma_misc.this_device;
+ return 0;
}
-static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static struct ib_client rdma_cma_client = {
+ .name = "rdma_cm",
+ .get_global_nl_info = ucma_get_global_nl_info,
+};
+MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
+
+static ssize_t abi_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+}
+static DEVICE_ATTR_RO(abi_version);
static int __init ucma_init(void)
{
@@ -1833,7 +1980,14 @@ static int __init ucma_init(void)
ret = -ENOMEM;
goto err2;
}
+
+ ret = ib_register_client(&rdma_cma_client);
+ if (ret)
+ goto err3;
+
return 0;
+err3:
+ unregister_net_sysctl_table(ucma_ctl_table_hdr);
err2:
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
err1:
@@ -1843,11 +1997,10 @@ err1:
static void __exit ucma_cleanup(void)
{
+ ib_unregister_client(&rdma_cma_client);
unregister_net_sysctl_table(ucma_ctl_table_hdr);
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
misc_deregister(&ucma_misc);
- idr_destroy(&ctx_idr);
- idr_destroy(&multicast_idr);
}
module_init(ucma_init);
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 29a45d2f8898..8d3dfef9ebaa 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -41,7 +41,7 @@
#define STRUCT_FIELD(header, field) \
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
- .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_unpacked_ ## header, field), \
.field_name = #header ":" #field
static const struct ib_field lrh_table[] = {
@@ -462,86 +462,3 @@ int ib_ud_header_pack(struct ib_ud_header *header,
return len;
}
EXPORT_SYMBOL(ib_ud_header_pack);
-
-/**
- * ib_ud_header_unpack - Unpack UD header struct from wire format
- * @header:UD header struct
- * @buf:Buffer to pack into
- *
- * ib_ud_header_pack() unpacks the UD header structure @header from wire
- * format in the buffer @buf.
- */
-int ib_ud_header_unpack(void *buf,
- struct ib_ud_header *header)
-{
- ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
- buf, &header->lrh);
- buf += IB_LRH_BYTES;
-
- if (header->lrh.link_version != 0) {
- pr_warn("Invalid LRH.link_version %d\n",
- header->lrh.link_version);
- return -EINVAL;
- }
-
- switch (header->lrh.link_next_header) {
- case IB_LNH_IBA_LOCAL:
- header->grh_present = 0;
- break;
-
- case IB_LNH_IBA_GLOBAL:
- header->grh_present = 1;
- ib_unpack(grh_table, ARRAY_SIZE(grh_table),
- buf, &header->grh);
- buf += IB_GRH_BYTES;
-
- if (header->grh.ip_version != 6) {
- pr_warn("Invalid GRH.ip_version %d\n",
- header->grh.ip_version);
- return -EINVAL;
- }
- if (header->grh.next_header != 0x1b) {
- pr_warn("Invalid GRH.next_header 0x%02x\n",
- header->grh.next_header);
- return -EINVAL;
- }
- break;
-
- default:
- pr_warn("Invalid LRH.link_next_header %d\n",
- header->lrh.link_next_header);
- return -EINVAL;
- }
-
- ib_unpack(bth_table, ARRAY_SIZE(bth_table),
- buf, &header->bth);
- buf += IB_BTH_BYTES;
-
- switch (header->bth.opcode) {
- case IB_OPCODE_UD_SEND_ONLY:
- header->immediate_present = 0;
- break;
- case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
- header->immediate_present = 1;
- break;
- default:
- pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode);
- return -EINVAL;
- }
-
- if (header->bth.transport_header_version != 0) {
- pr_warn("Invalid BTH.transport_header_version %d\n",
- header->bth.transport_header_version);
- return -EINVAL;
- }
-
- ib_unpack(deth_table, ARRAY_SIZE(deth_table),
- buf, &header->deth);
- buf += IB_DETH_BYTES;
-
- if (header->immediate_present)
- memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_ud_header_unpack);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e7ea819fcb11..8137031c2a65 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -2,6 +2,7 @@
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -39,94 +40,32 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
-static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
-{
- struct sg_page_iter sg_iter;
- struct page *page;
-
- if (umem->nmap > 0)
- ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
- DMA_BIDIRECTIONAL);
-
- for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
- page = sg_page_iter_page(&sg_iter);
- if (!PageDirty(page) && umem->writable && dirty)
- set_page_dirty_lock(page);
- put_page(page);
- }
-
- sg_free_table(&umem->sg_head);
-}
+#define RESCHED_LOOP_CNT_THRESHOLD 0x1000
-/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
- *
- * sg: current scatterlist entry
- * page_list: array of npage struct page pointers
- * npages: number of pages in page_list
- * max_seg_sz: maximum segment size in bytes
- * nents: [out] number of entries in the scatterlist
- *
- * Return new end of scatterlist
- */
-static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
- struct page **page_list,
- unsigned long npages,
- unsigned int max_seg_sz,
- int *nents)
+static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
- unsigned long first_pfn;
- unsigned long i = 0;
- bool update_cur_sg = false;
- bool first = !sg_page(sg);
-
- /* Check if new page_list is contiguous with end of previous page_list.
- * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
- */
- if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
- page_to_pfn(page_list[0])))
- update_cur_sg = true;
-
- while (i != npages) {
- unsigned long len;
- struct page *first_page = page_list[i];
+ bool make_dirty = umem->writable && dirty;
+ struct scatterlist *sg;
+ unsigned int i;
- first_pfn = page_to_pfn(first_page);
+ if (dirty)
+ ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
+ DMA_BIDIRECTIONAL, 0);
- /* Compute the number of contiguous pages we have starting
- * at i
- */
- for (len = 0; i != npages &&
- first_pfn + len == page_to_pfn(page_list[i]) &&
- len < (max_seg_sz >> PAGE_SHIFT);
- len++)
- i++;
-
- /* Squash N contiguous pages from page_list into current sge */
- if (update_cur_sg) {
- if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
- sg_set_page(sg, sg_page(sg),
- sg->length + (len << PAGE_SHIFT),
- 0);
- update_cur_sg = false;
- continue;
- }
- update_cur_sg = false;
- }
+ for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
+ unpin_user_page_range_dirty_lock(sg_page(sg),
+ DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
- /* Squash N contiguous pages into next sge or first sge */
- if (!first)
- sg = sg_next(sg);
-
- (*nents)++;
- sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
- first = false;
+ if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD))
+ cond_resched();
}
- return sg;
+ sg_free_append_table(&umem->sgt_append);
}
/**
@@ -147,77 +86,94 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt)
{
- struct scatterlist *sg;
- unsigned int best_pg_bit;
+ unsigned long curr_len = 0;
+ dma_addr_t curr_base = ~0;
unsigned long va, pgoff;
+ struct scatterlist *sg;
dma_addr_t mask;
+ dma_addr_t end;
int i;
- /* At minimum, drivers must support PAGE_SIZE or smaller */
- if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
- return 0;
+ umem->iova = va = virt;
- va = virt;
- /* max page size not to exceed MR length */
- mask = roundup_pow_of_two(umem->length);
+ if (umem->is_odp) {
+ unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
+
+ /* ODP must always be self consistent. */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+ return page_size;
+ }
+
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+ */
+ mask = pgsz_bitmap &
+ GENMASK(BITS_PER_LONG - 1,
+ bits_per((umem->length - 1 + virt) ^ virt));
/* offset into first SGL */
pgoff = umem->address & ~PAGE_MASK;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
- /* Walk SGL and reduce max page size if VA/PA bits differ
- * for any address.
+ for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
+ /* If the current entry is physically contiguous with the previous
+ * one, no need to take its start addresses into consideration.
*/
- mask |= (sg_dma_address(sg) + pgoff) ^ va;
- if (i && i != (umem->nmap - 1))
- /* restrict by length as well for interior SGEs */
- mask |= sg_dma_len(sg);
+ if (check_add_overflow(curr_base, curr_len, &end) ||
+ end != sg_dma_address(sg)) {
+
+ curr_base = sg_dma_address(sg);
+ curr_len = 0;
+
+ /* Reduce max page size if VA/PA bits differ */
+ mask |= (curr_base + pgoff) ^ va;
+
+ /* The alignment of any VA matching a discontinuity point
+ * in the physical memory sets the maximum possible page
+ * size as this must be a starting point of a new page that
+ * needs to be aligned.
+ */
+ if (i != 0)
+ mask |= va;
+ }
+
+ curr_len += sg_dma_len(sg);
va += sg_dma_len(sg) - pgoff;
+
pgoff = 0;
}
- best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
- return BIT_ULL(best_pg_bit);
+ /* The mask accumulates 1's in each position where the VA and physical
+ * address differ, thus the length of trailing 0 is the largest page
+ * size that can pass the VA through to the physical.
+ */
+ if (mask)
+ pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+ return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
}
EXPORT_SYMBOL(ib_umem_find_best_pgsz);
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
- * If access flags indicate ODP memory, avoid pinning. Instead, stores
- * the mm for future page fault handling in conjunction with MMU notifiers.
- *
- * @udata: userspace context to pin memory for
+ * @device: IB device to connect UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
- * @dmasync: flush in-flight DMA when the memory region is written
*/
-struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
- size_t size, int access, int dmasync)
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
+ size_t size, int access)
{
- struct ib_ucontext *context;
struct ib_umem *umem;
struct page **page_list;
unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base;
+ unsigned long dma_attr = 0;
struct mm_struct *mm;
unsigned long npages;
- int ret;
- unsigned long dma_attrs = 0;
- struct scatterlist *sg;
- unsigned int gup_flags = FOLL_WRITE;
-
- if (!udata)
- return ERR_PTR(-EIO);
-
- context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
- ->context;
- if (!context)
- return ERR_PTR(-EIO);
-
- if (dmasync)
- dma_attrs |= DMA_ATTR_WRITE_BARRIER;
+ int pinned, ret;
+ unsigned int gup_flags = FOLL_LONGTERM;
/*
* If the combination of the addr and size requested for this memory
@@ -230,37 +186,24 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
- if (access & IB_ACCESS_ON_DEMAND) {
- umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
- umem->is_odp = 1;
- } else {
- umem = kzalloc(sizeof(*umem), GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
- }
+ if (access & IB_ACCESS_ON_DEMAND)
+ return ERR_PTR(-EOPNOTSUPP);
- umem->context = context;
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+ umem->ibdev = device;
umem->length = size;
umem->address = addr;
- umem->page_shift = PAGE_SHIFT;
+ /*
+ * Drivers should call ib_umem_find_best_pgsz() to set the iova
+ * correctly.
+ */
+ umem->iova = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
- if (access & IB_ACCESS_ON_DEMAND) {
- if (WARN_ON_ONCE(!context->invalidate_range)) {
- ret = -EINVAL;
- goto umem_kfree;
- }
-
- ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
- if (ret)
- goto umem_kfree;
- return umem;
- }
-
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
ret = -ENOMEM;
@@ -284,56 +227,44 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
cur_base = addr & PAGE_MASK;
- ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
- if (ret)
- goto vma;
-
- if (!umem->writable)
- gup_flags |= FOLL_FORCE;
-
- sg = umem->sg_head.sgl;
+ if (umem->writable)
+ gup_flags |= FOLL_WRITE;
while (npages) {
- down_read(&mm->mmap_sem);
- ret = get_user_pages(cur_base,
- min_t(unsigned long, npages,
- PAGE_SIZE / sizeof (struct page *)),
- gup_flags | FOLL_LONGTERM,
- page_list, NULL);
- if (ret < 0) {
- up_read(&mm->mmap_sem);
+ cond_resched();
+ pinned = pin_user_pages_fast(cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE /
+ sizeof(struct page *)),
+ gup_flags, page_list);
+ if (pinned < 0) {
+ ret = pinned;
goto umem_release;
}
- cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(context->device->dma_device),
- &umem->sg_nents);
-
- up_read(&mm->mmap_sem);
+ cur_base += pinned * PAGE_SIZE;
+ npages -= pinned;
+ ret = sg_alloc_append_table_from_pages(
+ &umem->sgt_append, page_list, pinned, 0,
+ pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
+ npages, GFP_KERNEL);
+ if (ret) {
+ unpin_user_pages_dirty_lock(page_list, pinned, 0);
+ goto umem_release;
+ }
}
- sg_mark_end(sg);
-
- umem->nmap = ib_dma_map_sg_attrs(context->device,
- umem->sg_head.sgl,
- umem->sg_nents,
- DMA_BIDIRECTIONAL,
- dma_attrs);
+ if (access & IB_ACCESS_RELAXED_ORDERING)
+ dma_attr |= DMA_ATTR_WEAK_ORDERING;
- if (!umem->nmap) {
- ret = -ENOMEM;
+ ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
+ DMA_BIDIRECTIONAL, dma_attr);
+ if (ret)
goto umem_release;
- }
-
- ret = 0;
goto out;
umem_release:
- __ib_umem_release(context->device, umem, 0);
-vma:
+ __ib_umem_release(device, umem, 0);
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
free_page((unsigned long) page_list);
@@ -346,51 +277,27 @@ umem_kfree:
}
EXPORT_SYMBOL(ib_umem_get);
-static void __ib_umem_release_tail(struct ib_umem *umem)
-{
- mmdrop(umem->owning_mm);
- if (umem->is_odp)
- kfree(to_ib_umem_odp(umem));
- else
- kfree(umem);
-}
-
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
- if (umem->is_odp) {
- ib_umem_odp_release(to_ib_umem_odp(umem));
- __ib_umem_release_tail(umem);
+ if (!umem)
return;
- }
+ if (umem->is_dmabuf)
+ return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
+ if (umem->is_odp)
+ return ib_umem_odp_release(to_ib_umem_odp(umem));
- __ib_umem_release(umem->context->device, umem, 1);
+ __ib_umem_release(umem->ibdev, umem, 1);
atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
- __ib_umem_release_tail(umem);
+ mmdrop(umem->owning_mm);
+ kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
-{
- int i;
- int n;
- struct scatterlist *sg;
-
- if (umem->is_odp)
- return ib_umem_num_pages(umem);
-
- n = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> umem->page_shift;
-
- return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
/*
* Copy from the given ib_umem's pages to the given buffer.
*
@@ -408,12 +315,13 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
int ret;
if (offset > umem->length || length > umem->length - offset) {
- pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
- offset, umem->length, end);
+ pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n",
+ __func__, offset, umem->length, end);
return -EINVAL;
}
- ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length,
+ ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
+ umem->sgt_append.sgt.orig_nents, dst, length,
offset + ib_umem_offset(umem));
if (ret < 0)
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
new file mode 100644
index 000000000000..0ec2e4120cc9
--- /dev/null
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+
+#include "uverbs.h"
+
+MODULE_IMPORT_NS("DMA_BUF");
+
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct sg_table *sgt;
+ struct scatterlist *sg;
+ unsigned long start, end, cur = 0;
+ unsigned int nmap = 0;
+ long ret;
+ int i;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (umem_dmabuf->revoked)
+ return -EINVAL;
+
+ if (umem_dmabuf->sgt)
+ goto wait_fence;
+
+ sgt = dma_buf_map_attachment(umem_dmabuf->attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ /* modify the sg list in-place to match umem address and length */
+
+ start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
+ end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
+ PAGE_SIZE);
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ if (start < cur + sg_dma_len(sg) && cur < end)
+ nmap++;
+ if (cur <= start && start < cur + sg_dma_len(sg)) {
+ unsigned long offset = start - cur;
+
+ umem_dmabuf->first_sg = sg;
+ umem_dmabuf->first_sg_offset = offset;
+ sg_dma_address(sg) += offset;
+ sg_dma_len(sg) -= offset;
+ cur += offset;
+ }
+ if (cur < end && end <= cur + sg_dma_len(sg)) {
+ unsigned long trim = cur + sg_dma_len(sg) - end;
+
+ umem_dmabuf->last_sg = sg;
+ umem_dmabuf->last_sg_trim = trim;
+ sg_dma_len(sg) -= trim;
+ break;
+ }
+ cur += sg_dma_len(sg);
+ }
+
+ umem_dmabuf->umem.sgt_append.sgt.sgl = umem_dmabuf->first_sg;
+ umem_dmabuf->umem.sgt_append.sgt.nents = nmap;
+ umem_dmabuf->sgt = sgt;
+
+wait_fence:
+ /*
+ * Although the sg list is valid now, the content of the pages
+ * may be not up-to-date. Wait for the exporter to finish
+ * the migration.
+ */
+ ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
+ DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -ETIMEDOUT;
+ return 0;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
+
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt)
+ return;
+
+ /* retore the original sg list */
+ if (umem_dmabuf->first_sg) {
+ sg_dma_address(umem_dmabuf->first_sg) -=
+ umem_dmabuf->first_sg_offset;
+ sg_dma_len(umem_dmabuf->first_sg) +=
+ umem_dmabuf->first_sg_offset;
+ umem_dmabuf->first_sg = NULL;
+ umem_dmabuf->first_sg_offset = 0;
+ }
+ if (umem_dmabuf->last_sg) {
+ sg_dma_len(umem_dmabuf->last_sg) +=
+ umem_dmabuf->last_sg_trim;
+ umem_dmabuf->last_sg = NULL;
+ umem_dmabuf->last_sg_trim = 0;
+ }
+
+ dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
+ DMA_BIDIRECTIONAL);
+
+ umem_dmabuf->sgt = NULL;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages);
+
+static struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
+ struct device *dma_device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops)
+{
+ struct dma_buf *dmabuf;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ unsigned long end;
+ struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL);
+
+ if (check_add_overflow(offset, (unsigned long)size, &end))
+ return ret;
+
+ if (unlikely(!ops || !ops->move_notify))
+ return ret;
+
+ dmabuf = dma_buf_get(fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ if (dmabuf->size < end)
+ goto out_release_dmabuf;
+
+ umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL);
+ if (!umem_dmabuf) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out_release_dmabuf;
+ }
+
+ umem = &umem_dmabuf->umem;
+ umem->ibdev = device;
+ umem->length = size;
+ umem->address = offset;
+ umem->writable = ib_access_writable(access);
+ umem->is_dmabuf = 1;
+
+ if (!ib_umem_num_pages(umem))
+ goto out_free_umem;
+
+ umem_dmabuf->attach = dma_buf_dynamic_attach(
+ dmabuf,
+ dma_device,
+ ops,
+ umem_dmabuf);
+ if (IS_ERR(umem_dmabuf->attach)) {
+ ret = ERR_CAST(umem_dmabuf->attach);
+ goto out_free_umem;
+ }
+ return umem_dmabuf;
+
+out_free_umem:
+ kfree(umem_dmabuf);
+
+out_release_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops)
+{
+ return ib_umem_dmabuf_get_with_dma_device(device, device->dma_device,
+ offset, size, fd, access, ops);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get);
+
+static void
+ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+
+ ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev,
+ "Invalidate callback should not be called when memory is pinned\n");
+}
+
+static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = {
+ .allow_peer2peer = true,
+ .move_notify = ib_umem_dmabuf_unsupported_move_notify,
+};
+
+struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
+ struct device *dma_device,
+ unsigned long offset, size_t size,
+ int fd, int access)
+{
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int err;
+
+ umem_dmabuf = ib_umem_dmabuf_get_with_dma_device(device, dma_device, offset,
+ size, fd, access,
+ &ib_umem_dmabuf_attach_pinned_ops);
+ if (IS_ERR(umem_dmabuf))
+ return umem_dmabuf;
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = dma_buf_pin(umem_dmabuf->attach);
+ if (err)
+ goto err_release;
+ umem_dmabuf->pinned = 1;
+
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err)
+ goto err_unpin;
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ return umem_dmabuf;
+
+err_unpin:
+ dma_buf_unpin(umem_dmabuf->attach);
+err_release:
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned_with_dma_device);
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access)
+{
+ return ib_umem_dmabuf_get_pinned_with_dma_device(device, device->dma_device,
+ offset, size, fd, access);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned);
+
+void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+ dma_resv_lock(dmabuf->resv, NULL);
+ if (umem_dmabuf->revoked)
+ goto end;
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ if (umem_dmabuf->pinned) {
+ dma_buf_unpin(umem_dmabuf->attach);
+ umem_dmabuf->pinned = 0;
+ }
+ umem_dmabuf->revoked = 1;
+end:
+ dma_resv_unlock(dmabuf->resv);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_revoke);
+
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+ ib_umem_dmabuf_revoke(umem_dmabuf);
+
+ dma_buf_detach(dmabuf, umem_dmabuf->attach);
+ dma_buf_put(dmabuf);
+ kfree(umem_dmabuf);
+}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f962b5bbfa40..572a91a62a7b 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -39,424 +39,241 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
-#include <linux/interval_tree_generic.h>
+#include <linux/interval_tree.h>
+#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
#include <linux/pagemap.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
-/*
- * The ib_umem list keeps track of memory regions for which the HW
- * device request to receive notification when the related memory
- * mapping is changed.
- *
- * ib_umem_lock protects the list.
- */
-
-static u64 node_start(struct umem_odp_node *n)
-{
- struct ib_umem_odp *umem_odp =
- container_of(n, struct ib_umem_odp, interval_tree);
-
- return ib_umem_start(&umem_odp->umem);
-}
+#include "uverbs.h"
-/* Note that the representation of the intervals in the interval tree
- * considers the ending point as contained in the interval, while the
- * function ib_umem_end returns the first address which is not contained
- * in the umem.
- */
-static u64 node_last(struct umem_odp_node *n)
+static void ib_init_umem_implicit_odp(struct ib_umem_odp *umem_odp)
{
- struct ib_umem_odp *umem_odp =
- container_of(n, struct ib_umem_odp, interval_tree);
-
- return ib_umem_end(&umem_odp->umem) - 1;
-}
-
-INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
- node_start, node_last, static, rbt_ib_umem)
-
-static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
-{
- mutex_lock(&umem_odp->umem_mutex);
- if (umem_odp->notifiers_count++ == 0)
- /*
- * Initialize the completion object for waiting on
- * notifiers. Since notifier_count is zero, no one should be
- * waiting right now.
- */
- reinit_completion(&umem_odp->notifier_completion);
- mutex_unlock(&umem_odp->umem_mutex);
+ umem_odp->is_implicit_odp = 1;
+ umem_odp->umem.is_odp = 1;
+ mutex_init(&umem_odp->umem_mutex);
}
-static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
+static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
+ const struct mmu_interval_notifier_ops *ops)
{
- mutex_lock(&umem_odp->umem_mutex);
- /*
- * This sequence increase will notify the QP page fault that the page
- * that is going to be mapped in the spte could have been freed.
- */
- ++umem_odp->notifiers_seq;
- if (--umem_odp->notifiers_count == 0)
- complete_all(&umem_odp->notifier_completion);
- mutex_unlock(&umem_odp->umem_mutex);
-}
+ struct ib_device *dev = umem_odp->umem.ibdev;
+ size_t page_size = 1UL << umem_odp->page_shift;
+ struct hmm_dma_map *map;
+ unsigned long start;
+ unsigned long end;
+ size_t nr_entries;
+ int ret = 0;
-static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
- u64 start, u64 end, void *cookie)
-{
- struct ib_umem *umem = &umem_odp->umem;
+ umem_odp->umem.is_odp = 1;
+ mutex_init(&umem_odp->umem_mutex);
+ start = ALIGN_DOWN(umem_odp->umem.address, page_size);
+ if (check_add_overflow(umem_odp->umem.address,
+ (unsigned long)umem_odp->umem.length, &end))
+ return -EOVERFLOW;
+ end = ALIGN(end, page_size);
+ if (unlikely(end < page_size))
+ return -EOVERFLOW;
/*
- * Increase the number of notifiers running, to
- * prevent any further fault handling on this MR.
+ * The mmu notifier can be called within reclaim contexts and takes the
+ * umem_mutex. This is rare to trigger in testing, teach lockdep about
+ * it.
*/
- ib_umem_notifier_start_account(umem_odp);
- umem_odp->dying = 1;
- /* Make sure that the fact the umem is dying is out before we release
- * all pending page faults. */
- smp_wmb();
- complete_all(&umem_odp->notifier_completion);
- umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
- return 0;
-}
-
-static void ib_umem_notifier_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct ib_ucontext_per_mm *per_mm =
- container_of(mn, struct ib_ucontext_per_mm, mn);
-
- down_read(&per_mm->umem_rwsem);
- if (per_mm->active)
- rbt_ib_umem_for_each_in_range(
- &per_mm->umem_tree, 0, ULLONG_MAX,
- ib_umem_notifier_release_trampoline, true, NULL);
- up_read(&per_mm->umem_rwsem);
-}
-
-static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
- u64 start, u64 end, void *cookie)
-{
- ib_umem_notifier_start_account(item);
- item->umem.context->invalidate_range(item, start, end);
- return 0;
-}
-
-static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct ib_ucontext_per_mm *per_mm =
- container_of(mn, struct ib_ucontext_per_mm, mn);
-
- if (mmu_notifier_range_blockable(range))
- down_read(&per_mm->umem_rwsem);
- else if (!down_read_trylock(&per_mm->umem_rwsem))
- return -EAGAIN;
-
- if (!per_mm->active) {
- up_read(&per_mm->umem_rwsem);
- /*
- * At this point active is permanently set and visible to this
- * CPU without a lock, that fact is relied on to skip the unlock
- * in range_end.
- */
- return 0;
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ mutex_lock(&umem_odp->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
+ fs_reclaim_release(GFP_KERNEL);
}
- return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
- range->end,
- invalidate_range_start_trampoline,
- mmu_notifier_range_blockable(range),
- NULL);
-}
-
-static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
- u64 end, void *cookie)
-{
- ib_umem_notifier_end_account(item);
- return 0;
-}
-
-static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct ib_ucontext_per_mm *per_mm =
- container_of(mn, struct ib_ucontext_per_mm, mn);
-
- if (unlikely(!per_mm->active))
- return;
+ nr_entries = (end - start) >> PAGE_SHIFT;
+ if (!(nr_entries * PAGE_SIZE / page_size))
+ return -EINVAL;
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
- range->end,
- invalidate_range_end_trampoline, true, NULL);
- up_read(&per_mm->umem_rwsem);
-}
+ map = &umem_odp->map;
+ if (ib_uses_virt_dma(dev)) {
+ map->pfn_list = kvcalloc(nr_entries, sizeof(*map->pfn_list),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!map->pfn_list)
+ ret = -ENOMEM;
+ } else
+ ret = hmm_dma_map_alloc(dev->dma_device, map,
+ (end - start) >> PAGE_SHIFT,
+ 1 << umem_odp->page_shift);
+ if (ret)
+ return ret;
+
+ ret = mmu_interval_notifier_insert(&umem_odp->notifier,
+ umem_odp->umem.owning_mm, start,
+ end - start, ops);
+ if (ret)
+ goto out_free_map;
-static const struct mmu_notifier_ops ib_umem_notifiers = {
- .release = ib_umem_notifier_release,
- .invalidate_range_start = ib_umem_notifier_invalidate_range_start,
- .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
-};
+ return 0;
-static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
-{
- struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
- struct ib_umem *umem = &umem_odp->umem;
-
- down_write(&per_mm->umem_rwsem);
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_insert(&umem_odp->interval_tree,
- &per_mm->umem_tree);
- up_write(&per_mm->umem_rwsem);
+out_free_map:
+ if (ib_uses_virt_dma(dev))
+ kvfree(map->pfn_list);
+ else
+ hmm_dma_map_free(dev->dma_device, map);
+ return ret;
}
-static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
+/**
+ * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem
+ *
+ * Implicit ODP umems do not have a VA range and do not have any page lists.
+ * They exist only to hold the per_mm reference to help the driver create
+ * children umems.
+ *
+ * @device: IB device to create UMEM
+ * @access: ib_reg_mr access flags
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
+ int access)
{
- struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
- struct ib_umem *umem = &umem_odp->umem;
-
- down_write(&per_mm->umem_rwsem);
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_remove(&umem_odp->interval_tree,
- &per_mm->umem_tree);
- complete_all(&umem_odp->notifier_completion);
+ struct ib_umem *umem;
+ struct ib_umem_odp *umem_odp;
- up_write(&per_mm->umem_rwsem);
-}
+ if (access & IB_ACCESS_HUGETLB)
+ return ERR_PTR(-EINVAL);
-static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
- struct mm_struct *mm)
-{
- struct ib_ucontext_per_mm *per_mm;
- int ret;
-
- per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
- if (!per_mm)
+ umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
+ if (!umem_odp)
return ERR_PTR(-ENOMEM);
-
- per_mm->context = ctx;
- per_mm->mm = mm;
- per_mm->umem_tree = RB_ROOT_CACHED;
- init_rwsem(&per_mm->umem_rwsem);
- per_mm->active = true;
-
- rcu_read_lock();
- per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
- rcu_read_unlock();
-
- WARN_ON(mm != current->mm);
-
- per_mm->mn.ops = &ib_umem_notifiers;
- ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
- if (ret) {
- dev_err(&ctx->device->dev,
- "Failed to register mmu_notifier %d\n", ret);
- goto out_pid;
- }
-
- list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
- return per_mm;
-
-out_pid:
- put_pid(per_mm->tgid);
- kfree(per_mm);
- return ERR_PTR(ret);
-}
-
-static int get_per_mm(struct ib_umem_odp *umem_odp)
-{
- struct ib_ucontext *ctx = umem_odp->umem.context;
- struct ib_ucontext_per_mm *per_mm;
-
- /*
- * Generally speaking we expect only one or two per_mm in this list,
- * so no reason to optimize this search today.
- */
- mutex_lock(&ctx->per_mm_list_lock);
- list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
- if (per_mm->mm == umem_odp->umem.owning_mm)
- goto found;
- }
-
- per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
- if (IS_ERR(per_mm)) {
- mutex_unlock(&ctx->per_mm_list_lock);
- return PTR_ERR(per_mm);
- }
-
-found:
- umem_odp->per_mm = per_mm;
- per_mm->odp_mrs_count++;
- mutex_unlock(&ctx->per_mm_list_lock);
-
- return 0;
+ umem = &umem_odp->umem;
+ umem->ibdev = device;
+ umem->writable = ib_access_writable(access);
+ umem->owning_mm = current->mm;
+ umem_odp->page_shift = PAGE_SHIFT;
+
+ umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ ib_init_umem_implicit_odp(umem_odp);
+ return umem_odp;
}
+EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
-static void free_per_mm(struct rcu_head *rcu)
-{
- kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
-}
-
-static void put_per_mm(struct ib_umem_odp *umem_odp)
+/**
+ * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit
+ * parent ODP umem
+ *
+ * @root: The parent umem enclosing the child. This must be allocated using
+ * ib_alloc_implicit_odp_umem()
+ * @addr: The starting userspace VA
+ * @size: The length of the userspace VA
+ * @ops: MMU interval ops, currently only @invalidate
+ */
+struct ib_umem_odp *
+ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
+ size_t size,
+ const struct mmu_interval_notifier_ops *ops)
{
- struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
- struct ib_ucontext *ctx = umem_odp->umem.context;
- bool need_free;
-
- mutex_lock(&ctx->per_mm_list_lock);
- umem_odp->per_mm = NULL;
- per_mm->odp_mrs_count--;
- need_free = per_mm->odp_mrs_count == 0;
- if (need_free)
- list_del(&per_mm->ucontext_list);
- mutex_unlock(&ctx->per_mm_list_lock);
-
- if (!need_free)
- return;
-
/*
- * NOTE! mmu_notifier_unregister() can happen between a start/end
- * callback, resulting in an start/end, and thus an unbalanced
- * lock. This doesn't really matter to us since we are about to kfree
- * the memory that holds the lock, however LOCKDEP doesn't like this.
+ * Caller must ensure that root cannot be freed during the call to
+ * ib_alloc_odp_umem.
*/
- down_write(&per_mm->umem_rwsem);
- per_mm->active = false;
- up_write(&per_mm->umem_rwsem);
-
- WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
- mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
- put_pid(per_mm->tgid);
- mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
-}
-
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
- unsigned long addr, size_t size)
-{
- struct ib_ucontext_per_mm *per_mm = root->per_mm;
- struct ib_ucontext *ctx = per_mm->context;
struct ib_umem_odp *odp_data;
struct ib_umem *umem;
- int pages = size >> PAGE_SHIFT;
int ret;
+ if (WARN_ON(!root->is_implicit_odp))
+ return ERR_PTR(-EINVAL);
+
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!odp_data)
return ERR_PTR(-ENOMEM);
umem = &odp_data->umem;
- umem->context = ctx;
+ umem->ibdev = root->umem.ibdev;
umem->length = size;
umem->address = addr;
- umem->page_shift = PAGE_SHIFT;
umem->writable = root->umem.writable;
- umem->is_odp = 1;
- odp_data->per_mm = per_mm;
- umem->owning_mm = per_mm->mm;
- mmgrab(umem->owning_mm);
-
- mutex_init(&odp_data->umem_mutex);
- init_completion(&odp_data->notifier_completion);
-
- odp_data->page_list =
- vzalloc(array_size(pages, sizeof(*odp_data->page_list)));
- if (!odp_data->page_list) {
- ret = -ENOMEM;
- goto out_odp_data;
- }
-
- odp_data->dma_list =
- vzalloc(array_size(pages, sizeof(*odp_data->dma_list)));
- if (!odp_data->dma_list) {
- ret = -ENOMEM;
- goto out_page_list;
- }
+ umem->owning_mm = root->umem.owning_mm;
+ odp_data->page_shift = PAGE_SHIFT;
+ odp_data->notifier.ops = ops;
/*
- * Caller must ensure that the umem_odp that the per_mm came from
- * cannot be freed during the call to ib_alloc_odp_umem.
+ * A mmget must be held when registering a notifier, the owming_mm only
+ * has a mm_grab at this point.
*/
- mutex_lock(&ctx->per_mm_list_lock);
- per_mm->odp_mrs_count++;
- mutex_unlock(&ctx->per_mm_list_lock);
- add_umem_to_per_mm(odp_data);
+ if (!mmget_not_zero(umem->owning_mm)) {
+ ret = -EFAULT;
+ goto out_free;
+ }
+ odp_data->tgid = get_pid(root->tgid);
+ ret = ib_init_umem_odp(odp_data, ops);
+ if (ret)
+ goto out_tgid;
+ mmput(umem->owning_mm);
return odp_data;
-out_page_list:
- vfree(odp_data->page_list);
-out_odp_data:
- mmdrop(umem->owning_mm);
+out_tgid:
+ put_pid(odp_data->tgid);
+ mmput(umem->owning_mm);
+out_free:
kfree(odp_data);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_odp_umem);
+EXPORT_SYMBOL(ib_umem_odp_alloc_child);
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+/**
+ * ib_umem_odp_get - Create a umem_odp for a userspace va
+ *
+ * @device: IB device struct to get UMEM
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ * @ops: MMU interval ops, currently only @invalidate
+ *
+ * The driver should use when the access flags indicate ODP memory. It avoids
+ * pinning, instead, stores the mm for future page fault handling in
+ * conjunction with MMU notifiers.
+ */
+struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
+ unsigned long addr, size_t size, int access,
+ const struct mmu_interval_notifier_ops *ops)
{
- struct ib_umem *umem = &umem_odp->umem;
- /*
- * NOTE: This must called in a process context where umem->owning_mm
- * == current->mm
- */
- struct mm_struct *mm = umem->owning_mm;
- int ret_val;
-
- if (access & IB_ACCESS_HUGETLB) {
- struct vm_area_struct *vma;
- struct hstate *h;
-
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, ib_umem_start(umem));
- if (!vma || !is_vm_hugetlb_page(vma)) {
- up_read(&mm->mmap_sem);
- return -EINVAL;
- }
- h = hstate_vma(vma);
- umem->page_shift = huge_page_shift(h);
- up_read(&mm->mmap_sem);
- }
-
- mutex_init(&umem_odp->umem_mutex);
-
- init_completion(&umem_odp->notifier_completion);
-
- if (ib_umem_num_pages(umem)) {
- umem_odp->page_list =
- vzalloc(array_size(sizeof(*umem_odp->page_list),
- ib_umem_num_pages(umem)));
- if (!umem_odp->page_list)
- return -ENOMEM;
-
- umem_odp->dma_list =
- vzalloc(array_size(sizeof(*umem_odp->dma_list),
- ib_umem_num_pages(umem)));
- if (!umem_odp->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
- }
- }
+ struct ib_umem_odp *umem_odp;
+ int ret;
- ret_val = get_per_mm(umem_odp);
- if (ret_val)
- goto out_dma_list;
- add_umem_to_per_mm(umem_odp);
+ if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
+ return ERR_PTR(-EINVAL);
- return 0;
+ umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+ if (!umem_odp)
+ return ERR_PTR(-ENOMEM);
-out_dma_list:
- vfree(umem_odp->dma_list);
-out_page_list:
- vfree(umem_odp->page_list);
- return ret_val;
+ umem_odp->umem.ibdev = device;
+ umem_odp->umem.length = size;
+ umem_odp->umem.address = addr;
+ umem_odp->umem.writable = ib_access_writable(access);
+ umem_odp->umem.owning_mm = current->mm;
+ umem_odp->notifier.ops = ops;
+
+ umem_odp->page_shift = PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (access & IB_ACCESS_HUGETLB)
+ umem_odp->page_shift = HPAGE_SHIFT;
+#endif
+
+ umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ ret = ib_init_umem_odp(umem_odp, ops);
+ if (ret)
+ goto err_put_pid;
+ return umem_odp;
+
+err_put_pid:
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
+ return ERR_PTR(ret);
}
+EXPORT_SYMBOL(ib_umem_odp_get);
-void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
+static void ib_umem_odp_free(struct ib_umem_odp *umem_odp)
{
- struct ib_umem *umem = &umem_odp->umem;
+ struct ib_device *dev = umem_odp->umem.ibdev;
/*
* Ensure that no more pages are mapped in the umem.
@@ -464,107 +281,36 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
-
- remove_umem_from_per_mm(umem_odp);
- put_per_mm(umem_odp);
- vfree(umem_odp->dma_list);
- vfree(umem_odp->page_list);
+ mutex_lock(&umem_odp->umem_mutex);
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
+ mutex_unlock(&umem_odp->umem_mutex);
+ mmu_interval_notifier_remove(&umem_odp->notifier);
+ if (ib_uses_virt_dma(dev))
+ kvfree(umem_odp->map.pfn_list);
+ else
+ hmm_dma_map_free(dev->dma_device, &umem_odp->map);
}
-/*
- * Map for DMA and insert a single page into the on-demand paging page tables.
- *
- * @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
- * @page: the page struct to map and add.
- * @access_mask: access permissions needed for this page.
- * @current_seq: sequence number for synchronization with invalidations.
- * the sequence number is taken from
- * umem_odp->notifiers_seq.
- *
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
- *
- * The page is released via put_page even if the operation failed. For
- * on-demand pinning, the page is released whenever it isn't stored in the
- * umem.
- */
-static int ib_umem_odp_map_dma_single_page(
- struct ib_umem_odp *umem_odp,
- int page_index,
- struct page *page,
- u64 access_mask,
- unsigned long current_seq)
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
- struct ib_umem *umem = &umem_odp->umem;
- struct ib_device *dev = umem->context->device;
- dma_addr_t dma_addr;
- int remove_existing_mapping = 0;
- int ret = 0;
-
- /*
- * Note: we avoid writing if seq is different from the initial seq, to
- * handle case of a racing notifier. This check also allows us to bail
- * early if we have a notifier running in parallel with us.
- */
- if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
- ret = -EAGAIN;
- goto out;
- }
- if (!(umem_odp->dma_list[page_index])) {
- dma_addr = ib_dma_map_page(dev,
- page,
- 0, BIT(umem->page_shift),
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(dev, dma_addr)) {
- ret = -EFAULT;
- goto out;
- }
- umem_odp->dma_list[page_index] = dma_addr | access_mask;
- umem_odp->page_list[page_index] = page;
- umem_odp->npages++;
- } else if (umem_odp->page_list[page_index] == page) {
- umem_odp->dma_list[page_index] |= access_mask;
- } else {
- pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem_odp->page_list[page_index], page);
- /* Better remove the mapping now, to prevent any further
- * damage. */
- remove_existing_mapping = 1;
- }
+ if (!umem_odp->is_implicit_odp)
+ ib_umem_odp_free(umem_odp);
-out:
- put_page(page);
-
- if (remove_existing_mapping) {
- ib_umem_notifier_start_account(umem_odp);
- umem->context->invalidate_range(
- umem_odp,
- ib_umem_start(umem) + (page_index << umem->page_shift),
- ib_umem_start(umem) +
- ((page_index + 1) << umem->page_shift));
- ib_umem_notifier_end_account(umem_odp);
- ret = -EAGAIN;
- }
-
- return ret;
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
}
+EXPORT_SYMBOL(ib_umem_odp_release);
/**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
*
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem_odp->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- * An -ENOENT error code indicates that userspace process is being terminated
- * and mm was already destroyed.
* @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -573,229 +319,151 @@ out:
* the return value.
* @access_mask: bit mask of the requested access permissions for the given
* range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- * invalidations. the sequance number is read from
- * umem_odp->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
- u64 bcnt, u64 access_mask,
- unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask, bool fault)
+ __acquires(&umem_odp->umem_mutex)
{
- struct ib_umem *umem = &umem_odp->umem;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
- struct page **local_page_list = NULL;
- u64 page_mask, off;
- int j, k, ret = 0, start_idx, npages = 0, page_shift;
- unsigned int flags = 0;
- phys_addr_t p = 0;
-
- if (access_mask == 0)
- return -EINVAL;
-
- if (user_virt < ib_umem_start(umem) ||
- user_virt + bcnt > ib_umem_end(umem))
+ int pfn_index, dma_index, ret = 0, start_idx;
+ unsigned int page_shift, hmm_order, pfn_start_idx;
+ unsigned long num_pfns, current_seq;
+ struct hmm_range range = {};
+ unsigned long timeout;
+
+ if (user_virt < ib_umem_start(umem_odp) ||
+ user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
- local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
- if (!local_page_list)
- return -ENOMEM;
-
- page_shift = umem->page_shift;
- page_mask = ~(BIT(page_shift) - 1);
- off = user_virt & (~page_mask);
- user_virt = user_virt & page_mask;
- bcnt += off; /* Charge for the first page offset as well. */
+ page_shift = umem_odp->page_shift;
/*
* owning_process is allowed to be NULL, this means somehow the mm is
* existing beyond the lifetime of the originating process.. Presumably
* mmget_not_zero will fail in this case.
*/
- owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
+ owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID);
if (!owning_process || !mmget_not_zero(owning_mm)) {
ret = -EINVAL;
goto out_put_task;
}
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
- flags |= FOLL_WRITE;
+ range.notifier = &umem_odp->notifier;
+ range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+ range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+ pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+ if (fault) {
+ range.default_flags = HMM_PFN_REQ_FAULT;
+
+ if (access_mask & HMM_PFN_WRITE)
+ range.default_flags |= HMM_PFN_REQ_WRITE;
+ }
+
+ range.hmm_pfns = &(umem_odp->map.pfn_list[pfn_start_idx]);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+ current_seq = range.notifier_seq =
+ mmu_interval_read_begin(&umem_odp->notifier);
- start_idx = (user_virt - ib_umem_start(umem)) >> page_shift;
- k = start_idx;
+ mmap_read_lock(owning_mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(owning_mm);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_put_mm;
+ }
- while (bcnt > 0) {
- const size_t gup_num_pages = min_t(size_t,
- (bcnt + BIT(page_shift) - 1) >> page_shift,
- PAGE_SIZE / sizeof(struct page *));
+ start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+ dma_index = start_idx;
+
+ mutex_lock(&umem_odp->umem_mutex);
+ if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
+ mutex_unlock(&umem_odp->umem_mutex);
+ goto retry;
+ }
+
+ for (pfn_index = 0; pfn_index < num_pfns;
+ pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
- down_read(&owning_mm->mmap_sem);
/*
- * Note: this might result in redundent page getting. We can
- * avoid this by checking dma_list to be 0 before calling
- * get_user_pages. However, this make the code much more
- * complex (and doesn't gain us much performance in most use
- * cases).
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
*/
- npages = get_user_pages_remote(owning_process, owning_mm,
- user_virt, gup_num_pages,
- flags, local_page_list, NULL, NULL);
- up_read(&owning_mm->mmap_sem);
-
- if (npages < 0) {
- if (npages != -EAGAIN)
- pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- else
- pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
- break;
- }
+ WARN_ON(fault && range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(fault && !(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID))
+ continue;
- bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- mutex_lock(&umem_odp->umem_mutex);
- for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
- if (user_virt & ~page_mask) {
- p += PAGE_SIZE;
- if (page_to_phys(local_page_list[j]) != p) {
- ret = -EFAULT;
- break;
- }
- put_page(local_page_list[j]);
- continue;
- }
-
- ret = ib_umem_odp_map_dma_single_page(
- umem_odp, k, local_page_list[j],
- access_mask, current_seq);
- if (ret < 0) {
- if (ret != -EAGAIN)
- pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- else
- pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
- break;
- }
-
- p = page_to_phys(local_page_list[j]);
- k++;
- }
- mutex_unlock(&umem_odp->umem_mutex);
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_DMA_MAPPED)
+ continue;
- if (ret < 0) {
- /*
- * Release pages, remembering that the first page
- * to hit an error was already released by
- * ib_umem_odp_map_dma_single_page().
- */
- if (npages - (j + 1) > 0)
- release_pages(&local_page_list[j+1],
- npages - (j + 1));
+ hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+ /* If a hugepage was detected and ODP wasn't set for, the umem
+ * page_shift will be used, the opposite case is an error.
+ */
+ if (hmm_order + PAGE_SHIFT < page_shift) {
+ ret = -EINVAL;
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "%s: un-expected hmm_order %u, page_shift %u\n",
+ __func__, hmm_order, page_shift);
break;
}
}
+ /* upon success lock should stay on hold for the callee */
+ if (!ret)
+ ret = dma_index - start_idx;
+ else
+ mutex_unlock(&umem_odp->umem_mutex);
- if (ret >= 0) {
- if (npages < 0 && k == start_idx)
- ret = npages;
- else
- ret = k - start_idx;
- }
-
- mmput(owning_mm);
+out_put_mm:
+ mmput_async(owning_mm);
out_put_task:
if (owning_process)
put_task_struct(owning_process);
- free_page((unsigned long)local_page_list);
return ret;
}
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
- struct ib_umem *umem = &umem_odp->umem;
- int idx;
+ struct ib_device *dev = umem_odp->umem.ibdev;
u64 addr;
- struct ib_device *dev = umem->context->device;
-
- virt = max_t(u64, virt, ib_umem_start(umem));
- bound = min_t(u64, bound, ib_umem_end(umem));
- /* Note that during the run of this function, the
- * notifiers_count of the MR is > 0, preventing any racing
- * faults from completion. We might be racing with other
- * invalidations, so we must make sure we free each page only
- * once. */
- mutex_lock(&umem_odp->umem_mutex);
- for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
- idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
- if (umem_odp->page_list[idx]) {
- struct page *page = umem_odp->page_list[idx];
- dma_addr_t dma = umem_odp->dma_list[idx];
- dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
-
- WARN_ON(!dma_addr);
-
- ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- if (dma & ODP_WRITE_ALLOWED_BIT) {
- struct page *head_page = compound_head(page);
- /*
- * set_page_dirty prefers being called with
- * the page lock. However, MMU notifiers are
- * called sometimes with and sometimes without
- * the lock. We rely on the umem_mutex instead
- * to prevent other mmu notifiers from
- * continuing and allowing the page mapping to
- * be removed.
- */
- set_page_dirty(head_page);
- }
- umem_odp->page_list[idx] = NULL;
- umem_odp->dma_list[idx] = 0;
- umem_odp->npages--;
- }
- }
- mutex_unlock(&umem_odp->umem_mutex);
-}
-EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
-/* @last is not a part of the interval. See comment for function
- * node_last.
- */
-int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
- u64 start, u64 last,
- umem_call_back cb,
- bool blockable,
- void *cookie)
-{
- int ret_val = 0;
- struct umem_odp_node *node, *next;
- struct ib_umem_odp *umem;
-
- if (unlikely(start == last))
- return ret_val;
-
- for (node = rbt_ib_umem_iter_first(root, start, last - 1);
- node; node = next) {
- /* TODO move the blockable decision up to the callback */
- if (!blockable)
- return -EAGAIN;
- next = rbt_ib_umem_iter_next(node, start, last - 1);
- umem = container_of(node, struct ib_umem_odp, interval_tree);
- ret_val = cb(umem, start, last, cookie) || ret_val;
- }
+ lockdep_assert_held(&umem_odp->umem_mutex);
- return ret_val;
-}
-EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
-
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
- u64 addr, u64 length)
-{
- struct umem_odp_node *node;
+ virt = max_t(u64, virt, ib_umem_start(umem_odp));
+ bound = min_t(u64, bound, ib_umem_end(umem_odp));
+ for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
+ u64 offset = addr - ib_umem_start(umem_odp);
+ size_t idx = offset >> umem_odp->page_shift;
+ unsigned long pfn = umem_odp->map.pfn_list[idx];
- node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
- if (node)
- return container_of(node, struct ib_umem_odp, interval_tree);
- return NULL;
+ if (!hmm_dma_unmap_pfn(dev->dma_device, &umem_odp->map, idx))
+ goto clear;
+ if (pfn & HMM_PFN_WRITE) {
+ struct page *page = hmm_pfn_to_page(pfn);
+ struct page *head_page = compound_head(page);
+ /*
+ * set_page_dirty prefers being called with
+ * the page lock. However, MMU notifiers are
+ * called sometimes with and sometimes without
+ * the lock. We rely on the umem_mutex instead
+ * to prevent other mmu notifiers from
+ * continuing and allowing the page mapping to
+ * be removed.
+ */
+ set_page_dirty(head_page);
+ }
+ umem_odp->npages--;
+clear:
+ umem_odp->map.pfn_list[idx] &= ~HMM_PFN_FLAGS;
+ }
}
-EXPORT_SYMBOL(rbt_ib_umem_lookup);
+EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 671f07ba1fad..fd67fc9fe85a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -49,11 +49,13 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
+#include <rdma/rdma_netlink.h>
#include "core_priv.h"
@@ -61,6 +63,8 @@ MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
MODULE_LICENSE("Dual BSD/GPL");
+#define MAX_UMAD_RECV_LIST_SIZE 200000
+
enum {
IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS,
IB_UMAD_MAX_AGENTS = 32,
@@ -99,7 +103,7 @@ struct ib_umad_port {
struct ib_device *ib_dev;
struct ib_umad_device *umad_dev;
int dev_num;
- u8 port_num;
+ u32 port_num;
};
struct ib_umad_device {
@@ -111,6 +115,7 @@ struct ib_umad_file {
struct mutex mutex;
struct ib_umad_port *port;
struct list_head recv_list;
+ atomic_t recv_list_size;
struct list_head send_list;
struct list_head port_list;
spinlock_t send_lock;
@@ -129,6 +134,11 @@ struct ib_umad_packet {
struct ib_user_mad mad;
};
+struct ib_rmpp_mad_hdr {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+} __packed;
+
#define CREATE_TRACE_POINTS
#include <trace/events/ib_umad.h>
@@ -140,7 +150,7 @@ static dev_t dynamic_issm_dev;
static DEFINE_IDA(umad_ida);
-static void ib_umad_add_one(struct ib_device *device);
+static int ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
static void ib_umad_dev_free(struct kref *kref)
@@ -163,8 +173,8 @@ static void ib_umad_dev_put(struct ib_umad_device *dev)
static int hdr_size(struct ib_umad_file *file)
{
- return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
- sizeof (struct ib_user_mad_hdr_old);
+ return file->use_pkey_index ? sizeof(struct ib_user_mad_hdr) :
+ sizeof(struct ib_user_mad_hdr_old);
}
/* caller must hold file->mutex */
@@ -173,24 +183,28 @@ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
return file->agents_dead ? NULL : file->agent[id];
}
-static int queue_packet(struct ib_umad_file *file,
- struct ib_mad_agent *agent,
- struct ib_umad_packet *packet)
+static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent,
+ struct ib_umad_packet *packet, bool is_recv_mad)
{
int ret = 1;
mutex_lock(&file->mutex);
+ if (is_recv_mad &&
+ atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE)
+ goto unlock;
+
for (packet->mad.hdr.id = 0;
packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
packet->mad.hdr.id++)
if (agent == __get_agent(file, packet->mad.hdr.id)) {
list_add_tail(&packet->list, &file->recv_list);
+ atomic_inc(&file->recv_list_size);
wake_up_interruptible(&file->recv_wait);
ret = 0;
break;
}
-
+unlock:
mutex_unlock(&file->mutex);
return ret;
@@ -217,7 +231,7 @@ static void send_handler(struct ib_mad_agent *agent,
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
packet->length = IB_MGMT_MAD_HDR;
packet->mad.hdr.status = ETIMEDOUT;
- if (!queue_packet(file, agent, packet))
+ if (!queue_packet(file, agent, packet, false))
return;
}
kfree(packet);
@@ -277,7 +291,7 @@ static void recv_handler(struct ib_mad_agent *agent,
rdma_destroy_ah_attr(&ah_attr);
}
- if (queue_packet(file, agent, packet))
+ if (queue_packet(file, agent, packet, true))
goto err2;
return;
@@ -377,6 +391,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
while (list_empty(&file->recv_list)) {
mutex_unlock(&file->mutex);
@@ -390,8 +409,14 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
}
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
list_del(&packet->list);
+ atomic_dec(&file->recv_list_size);
mutex_unlock(&file->mutex);
@@ -404,6 +429,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
/* Requeue packet */
mutex_lock(&file->mutex);
list_add(&packet->list, &file->recv_list);
+ atomic_inc(&file->recv_list_size);
mutex_unlock(&file->mutex);
} else {
if (packet->recv_wc)
@@ -482,11 +508,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_umad_file *file = filp->private_data;
+ struct ib_rmpp_mad_hdr *rmpp_mad_hdr;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
- struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
int ret, data_len, hdr_len, copy_offset, rmpp_active;
u8 base_version;
@@ -494,7 +520,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
return -EINVAL;
- packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
@@ -522,7 +548,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
- ret = -EINVAL;
+ ret = -EIO;
goto err_up;
}
@@ -548,13 +574,13 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
goto err_up;
}
- rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
- hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
+ rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data;
+ hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class);
- if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
&& ib_mad_kernel_rmpp_agent(agent)) {
copy_offset = IB_MGMT_RMPP_HDR;
- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE;
} else {
copy_offset = IB_MGMT_MAD_HDR;
@@ -603,12 +629,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
(be64_to_cpup(tid) & 0xffffffff));
- rmpp_mad->mad_hdr.tid = *tid;
+ rmpp_mad_hdr->mad_hdr.tid = *tid;
}
if (!ib_mad_kernel_rmpp_agent(agent)
- && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
- && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
spin_lock_irq(&file->send_lock);
list_add_tail(&packet->list, &file->send_list);
spin_unlock_irq(&file->send_lock);
@@ -651,10 +677,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
/* we will always be able to post a MAD send */
__poll_t mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_lock(&file->mutex);
poll_wait(filp, &file->recv_wait, wait);
if (!list_empty(&file->recv_list))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (file->agents_dead)
+ mask = EPOLLERR;
+ mutex_unlock(&file->mutex);
return mask;
}
@@ -672,8 +702,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent: invalid device\n");
+ dev_notice(&file->port->dev, "%s: invalid device\n", __func__);
ret = -EPIPE;
goto out;
}
@@ -685,7 +714,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (ureq.qpn != 0 && ureq.qpn != 1) {
dev_notice(&file->port->dev,
- "ib_umad_reg_agent: invalid QPN %d specified\n",
+ "%s: invalid QPN %u specified\n", __func__,
ureq.qpn);
ret = -EINVAL;
goto out;
@@ -695,9 +724,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ dev_notice(&file->port->dev, "%s: Max Agents (%u) reached\n", __func__,
IB_UMAD_MAX_AGENTS);
+
ret = -ENOMEM;
goto out;
@@ -744,7 +773,7 @@ found:
"process %s did not enable P_Key index support.\n",
current->comm);
dev_warn(&file->port->dev,
- " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
+ " Documentation/infiniband/user_mad.rst has info on the new ABI.\n");
}
}
@@ -774,8 +803,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent2: invalid device\n");
+ dev_notice(&file->port->dev, "%s: invalid device\n", __func__);
ret = -EPIPE;
goto out;
}
@@ -786,17 +814,16 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent2: invalid QPN %d specified\n",
- ureq.qpn);
+ dev_notice(&file->port->dev, "%s: invalid QPN %u specified\n",
+ __func__, ureq.qpn);
ret = -EINVAL;
goto out;
}
if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
dev_notice(&file->port->dev,
- "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
- ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+ "%s failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+ __func__, ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
ret = -EINVAL;
if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
@@ -811,8 +838,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(&file->port->dev,
- "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+ dev_notice(&file->port->dev, "%s: Max Agents (%u) reached\n", __func__,
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
@@ -824,7 +850,7 @@ found:
req.mgmt_class_version = ureq.mgmt_class_version;
if (ureq.oui & 0xff000000) {
dev_notice(&file->port->dev,
- "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+ "%s failed: oui invalid 0x%08x\n", __func__,
ureq.oui);
ret = -EINVAL;
goto out;
@@ -883,11 +909,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
if (get_user(id, arg))
return -EFAULT;
+ if (id >= IB_UMAD_MAX_AGENTS)
+ return -EINVAL;
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
- if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ id = array_index_nospec(id, IB_UMAD_MAX_AGENTS);
+ if (!__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
@@ -1037,7 +1066,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
ib_unregister_mad_agent(file->agent[i]);
mutex_unlock(&file->port->file_mutex);
-
+ mutex_destroy(&file->mutex);
kfree(file);
return 0;
}
@@ -1053,7 +1082,6 @@ static const struct file_operations umad_fops = {
#endif
.open = ib_umad_open,
.release = ib_umad_close,
- .llseek = no_llseek,
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -1121,14 +1149,61 @@ static const struct file_operations umad_sm_fops = {
.owner = THIS_MODULE,
.open = ib_umad_sm_open,
.release = ib_umad_sm_close,
- .llseek = no_llseek,
};
+static struct ib_umad_port *get_port(struct ib_device *ibdev,
+ struct ib_umad_device *umad_dev,
+ u32 port)
+{
+ if (!umad_dev)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (!rdma_is_port_valid(ibdev, port))
+ return ERR_PTR(-EINVAL);
+ if (!rdma_cap_ib_mad(ibdev, port))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return &umad_dev->ports[port - rdma_start_port(ibdev)];
+}
+
+static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
+
+ if (IS_ERR(port))
+ return PTR_ERR(port);
+
+ res->abi = IB_USER_MAD_ABI_VERSION;
+ res->cdev = &port->dev;
+ return 0;
+}
+
static struct ib_client umad_client = {
.name = "umad",
.add = ib_umad_add_one,
- .remove = ib_umad_remove_one
+ .remove = ib_umad_remove_one,
+ .get_nl_info = ib_umad_get_nl_info,
};
+MODULE_ALIAS_RDMA_CLIENT("umad");
+
+static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
+
+ if (IS_ERR(port))
+ return PTR_ERR(port);
+
+ res->abi = IB_USER_MAD_ABI_VERSION;
+ res->cdev = &port->sm_dev;
+ return 0;
+}
+
+static struct ib_client issm_client = {
+ .name = "issm",
+ .get_nl_info = ib_issm_get_nl_info,
+};
+MODULE_ALIAS_RDMA_CLIENT("issm");
static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1138,7 +1213,7 @@ static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
+ return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
static DEVICE_ATTR_RO(ibdev);
@@ -1150,7 +1225,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%d\n", port->port_num);
+ return sysfs_emit(buf, "%d\n", port->port_num);
}
static DEVICE_ATTR_RO(port);
@@ -1161,15 +1236,15 @@ static struct attribute *umad_class_dev_attrs[] = {
};
ATTRIBUTE_GROUPS(umad_class_dev);
-static char *umad_devnode(struct device *dev, umode_t *mode)
+static char *umad_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
}
-static ssize_t abi_version_show(struct class *class,
- struct class_attribute *attr, char *buf)
+static ssize_t abi_version_show(const struct class *class,
+ const struct class_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+ return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
}
static CLASS_ATTR_RO(abi_version);
@@ -1244,15 +1319,17 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (ret)
goto err_cdev;
- ib_umad_init_port_dev(&port->sm_dev, port, device);
- port->sm_dev.devt = base_issm;
- dev_set_name(&port->sm_dev, "issm%d", port->dev_num);
- cdev_init(&port->sm_cdev, &umad_sm_fops);
- port->sm_cdev.owner = THIS_MODULE;
+ if (rdma_cap_ib_smi(device, port_num)) {
+ ib_umad_init_port_dev(&port->sm_dev, port, device);
+ port->sm_dev.devt = base_issm;
+ dev_set_name(&port->sm_dev, "issm%d", port->dev_num);
+ cdev_init(&port->sm_cdev, &umad_sm_fops);
+ port->sm_cdev.owner = THIS_MODULE;
- ret = cdev_device_add(&port->sm_cdev, &port->sm_dev);
- if (ret)
- goto err_dev;
+ ret = cdev_device_add(&port->sm_cdev, &port->sm_dev);
+ if (ret)
+ goto err_dev;
+ }
return 0;
@@ -1268,8 +1345,15 @@ err_cdev:
static void ib_umad_kill_port(struct ib_umad_port *port)
{
struct ib_umad_file *file;
+ bool has_smi = false;
int id;
+ if (rdma_cap_ib_smi(port->ib_dev, port->port_num)) {
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ has_smi = true;
+ }
+ cdev_device_del(&port->cdev, &port->dev);
+
mutex_lock(&port->file_mutex);
/* Mark ib_dev NULL and block ioctl or other file ops to progress
@@ -1280,6 +1364,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
list_for_each_entry(file, &port->file_list, port_list) {
mutex_lock(&file->mutex);
file->agents_dead = 1;
+ wake_up_interruptible(&file->recv_wait);
mutex_unlock(&file->mutex);
for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
@@ -1289,46 +1374,51 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
- cdev_device_del(&port->sm_cdev, &port->sm_dev);
- cdev_device_del(&port->cdev, &port->dev);
ida_free(&umad_ida, port->dev_num);
/* balances device_initialize() */
- put_device(&port->sm_dev);
+ if (has_smi)
+ put_device(&port->sm_dev);
put_device(&port->dev);
}
-static void ib_umad_add_one(struct ib_device *device)
+static int ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
- umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL);
+ umad_dev = kzalloc(struct_size(umad_dev, ports,
+ size_add(size_sub(e, s), 1)),
+ GFP_KERNEL);
if (!umad_dev)
- return;
+ return -ENOMEM;
kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->ports[i - s]))
+ ret = ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->ports[i - s]);
+ if (ret)
goto err;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &umad_client, umad_dev);
- return;
+ return 0;
err:
while (--i >= s) {
@@ -1340,6 +1430,7 @@ err:
free:
/* balances kref_init */
ib_umad_dev_put(umad_dev);
+ return ret;
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
@@ -1347,9 +1438,6 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data)
struct ib_umad_device *umad_dev = client_data;
unsigned int i;
- if (!umad_dev)
- return;
-
rdma_for_each_port (device, i) {
if (rdma_cap_ib_mad(device, i))
ib_umad_kill_port(
@@ -1387,13 +1475,17 @@ static int __init ib_umad_init(void)
}
ret = ib_register_client(&umad_client);
- if (ret) {
- pr_err("couldn't register ib_umad client\n");
+ if (ret)
goto out_class;
- }
+
+ ret = ib_register_client(&issm_client);
+ if (ret)
+ goto out_client;
return 0;
+out_client:
+ ib_unregister_client(&umad_client);
out_class:
class_unregister(&umad_class);
@@ -1411,6 +1503,7 @@ out:
static void __exit ib_umad_cleanup(void)
{
+ ib_unregister_client(&issm_client);
ib_unregister_client(&umad_client);
class_unregister(&umad_class);
unregister_chrdev_region(base_umad_dev,
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 1e5aeb39f774..797e2fcc8072 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -97,8 +97,8 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
*/
struct ib_uverbs_device {
- atomic_t refcount;
- int num_comp_vectors;
+ refcount_t refcount;
+ u32 num_comp_vectors;
struct completion comp;
struct device dev;
/* First group for device attributes, NULL terminated array */
@@ -111,7 +111,6 @@ struct ib_uverbs_device {
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
- struct list_head uverbs_events_file_list;
struct uverbs_api *uapi;
};
@@ -124,10 +123,9 @@ struct ib_uverbs_event_queue {
};
struct ib_uverbs_async_event_file {
+ struct ib_uobject uobj;
struct ib_uverbs_event_queue ev_queue;
- struct ib_uverbs_file *uverbs_file;
- struct kref ref;
- struct list_head list;
+ struct ib_event_handler event_handler;
};
struct ib_uverbs_completion_event_file {
@@ -135,36 +133,6 @@ struct ib_uverbs_completion_event_file {
struct ib_uverbs_event_queue ev_queue;
};
-struct ib_uverbs_file {
- struct kref ref;
- struct ib_uverbs_device *device;
- struct mutex ucontext_lock;
- /*
- * ucontext must be accessed via ib_uverbs_get_ucontext() or with
- * ucontext_lock held
- */
- struct ib_ucontext *ucontext;
- struct ib_event_handler event_handler;
- struct ib_uverbs_async_event_file *async_file;
- struct list_head list;
-
- /*
- * To access the uobjects list hw_destroy_rwsem must be held for write
- * OR hw_destroy_rwsem held for read AND uobjects_lock held.
- * hw_destroy_rwsem should be called across any destruction of the HW
- * object of an associated uobject.
- */
- struct rw_semaphore hw_destroy_rwsem;
- spinlock_t uobjects_lock;
- struct list_head uobjects;
-
- struct mutex umap_lock;
- struct list_head umaps;
- struct page *disassociate_page;
-
- struct xarray idr;
-};
-
struct ib_uverbs_event {
union {
struct ib_uverbs_async_event_desc async;
@@ -183,6 +151,8 @@ struct ib_uverbs_mcast_entry {
struct ib_uevent_object {
struct ib_uobject uobject;
+ struct ib_uverbs_async_event_file *event_file;
+ /* List member for ib_uverbs_async_event_file list */
struct list_head event_list;
u32 events_reported;
};
@@ -210,34 +180,35 @@ struct ib_uwq_object {
};
struct ib_ucq_object {
- struct ib_uobject uobject;
+ struct ib_uevent_object uevent;
struct list_head comp_list;
- struct list_head async_list;
u32 comp_events_reported;
- u32 async_events_reported;
};
extern const struct file_operations uverbs_event_fops;
+extern const struct file_operations uverbs_async_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
-struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev);
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+int uverbs_async_event_release(struct inode *inode, struct file *filp);
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_completion_event_file *ev_file,
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
void ib_uverbs_release_file(struct kref *ref);
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list, u32 *counter);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event);
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs);
@@ -276,23 +247,6 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
size_t kern_filter_sz,
union ib_flow_spec *ib_spec);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
-extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
-
/*
* ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
* PortInfo CapabilityMask, but was extended with unique bits.
@@ -314,6 +268,24 @@ static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
return res;
}
+static inline struct ib_uverbs_async_event_file *
+ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs,
+ u16 id)
+{
+ struct ib_uobject *async_ev_file_uobj;
+ struct ib_uverbs_async_event_file *async_ev_file;
+
+ async_ev_file_uobj = uverbs_attr_get_uobject(attrs, id);
+ if (IS_ERR(async_ev_file_uobj))
+ async_ev_file = READ_ONCE(attrs->ufile->default_async_file);
+ else
+ async_ev_file = container_of(async_ev_file_uobj,
+ struct ib_uverbs_async_event_file,
+ uobj);
+ if (async_ev_file)
+ uverbs_uobject_get(&async_ev_file->uobj);
+ return async_ev_file;
+}
void copy_port_attr_to_resp(struct ib_port_attr *attr,
struct ib_uverbs_query_port_resp *resp,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5a3a1780ceea..ce16404cdfb8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -42,6 +42,7 @@
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_ucaps.h>
#include "rdma_core.h"
#include "uverbs.h"
@@ -161,7 +162,7 @@ static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
{
const void __user *res = iter->cur;
- if (iter->cur + len > iter->end)
+ if (len > iter->end - iter->cur)
return (void __force __user *)ERR_PTR(-ENOSPC);
iter->cur += len;
return res;
@@ -174,6 +175,17 @@ static int uverbs_request_finish(struct uverbs_req_iter *iter)
return 0;
}
+/*
+ * When calling a destroy function during an error unwind we need to pass in
+ * the udata that is sanitized of all user arguments. Ie from the driver
+ * perspective it looks like no udata was passed.
+ */
+struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs)
+{
+ attrs->driver_udata = (struct ib_udata){};
+ return &attrs->driver_udata;
+}
+
static struct ib_uverbs_completion_event_file *
_ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
{
@@ -181,7 +193,7 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
fd, attrs);
if (IS_ERR(uobj))
- return (void *)uobj;
+ return ERR_CAST(uobj);
uverbs_uobject_get(uobj);
uobj_put_read(uobj);
@@ -192,84 +204,74 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
-static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_file *file = attrs->ufile;
- struct ib_uverbs_get_context cmd;
- struct ib_uverbs_get_context_resp resp;
- struct ib_ucontext *ucontext;
- struct file *filp;
- struct ib_rdmacg_object cg_obj;
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
- int ret;
- ret = uverbs_request(attrs, &cmd, sizeof(cmd));
- if (ret)
- return ret;
+ ib_dev = srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu);
+ if (!ib_dev)
+ return -EIO;
- mutex_lock(&file->ucontext_lock);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
- goto err;
- }
+ ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
+ if (!ucontext)
+ return -ENOMEM;
+ ucontext->device = ib_dev;
+ ucontext->ufile = ufile;
+ xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
+
+ rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+ rdma_restrack_set_name(&ucontext->res, NULL);
+ attrs->context = ucontext;
+ return 0;
+}
+
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucontext *ucontext = attrs->context;
+ struct ib_uverbs_file *file = attrs->ufile;
+ int *fd_array;
+ int fd_count;
+ int ret;
+
+ if (!down_read_trylock(&file->hw_destroy_rwsem))
+ return -EIO;
+ mutex_lock(&file->ucontext_lock);
if (file->ucontext) {
ret = -EINVAL;
goto err;
}
- ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ ret = ib_rdmacg_try_charge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
if (ret)
goto err;
- ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
- if (!ucontext) {
- ret = -ENOMEM;
- goto err_alloc;
- }
-
- attrs->context = ucontext;
-
- ucontext->res.type = RDMA_RESTRACK_CTX;
- ucontext->device = ib_dev;
- ucontext->cg_obj = cg_obj;
- /* ufile is required when some objects are released */
- ucontext->ufile = file;
-
- ucontext->closing = false;
- ucontext->cleanup_retryable = false;
-
- mutex_init(&ucontext->per_mm_list_lock);
- INIT_LIST_HEAD(&ucontext->per_mm_list);
-
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- goto err_free;
- resp.async_fd = ret;
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_GET_CONTEXT_FD_ARR)) {
+ fd_count = uverbs_attr_ptr_get_array_size(attrs,
+ UVERBS_ATTR_GET_CONTEXT_FD_ARR,
+ sizeof(int));
+ if (fd_count < 0) {
+ ret = fd_count;
+ goto err_uncharge;
+ }
- filp = ib_uverbs_alloc_async_event_file(file, ib_dev);
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto err_fd;
+ fd_array = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_GET_CONTEXT_FD_ARR);
+ ret = ib_get_ucaps(fd_array, fd_count, &ucontext->enabled_caps);
+ if (ret)
+ goto err_uncharge;
}
- resp.num_comp_vectors = file->device->num_comp_vectors;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_file;
-
- ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
+ ret = ucontext->device->ops.alloc_ucontext(ucontext,
+ &attrs->driver_udata);
if (ret)
- goto err_file;
- if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
- ucontext->invalidate_range = NULL;
-
- rdma_restrack_uadd(&ucontext->res);
+ goto err_uncharge;
- fd_install(resp.async_fd, filp);
+ rdma_restrack_add(&ucontext->res);
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
@@ -278,24 +280,63 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
smp_store_release(&file->ucontext, ucontext);
mutex_unlock(&file->ucontext_lock);
-
+ up_read(&file->hw_destroy_rwsem);
return 0;
-err_file:
- ib_uverbs_free_async_event_file(file);
- fput(filp);
+err_uncharge:
+ ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+err:
+ mutex_unlock(&file->ucontext_lock);
+ up_read(&file->hw_destroy_rwsem);
+ return ret;
+}
-err_fd:
- put_unused_fd(resp.async_fd);
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_uverbs_get_context cmd;
+ struct ib_device *ib_dev;
+ struct ib_uobject *uobj;
+ int ret;
-err_free:
- kfree(ucontext);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
-err_alloc:
- ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
-err:
- mutex_unlock(&file->ucontext_lock);
+ uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ ret = PTR_ERR(uobj);
+ goto err_ucontext;
+ }
+
+ resp = (struct ib_uverbs_get_context_resp){
+ .num_comp_vectors = attrs->ufile->device->num_comp_vectors,
+ .async_fd = uobj->id,
+ };
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
+ goto err_uobj;
+
+ ret = ib_init_ucontext(attrs);
+ if (ret)
+ goto err_uobj;
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
+
+err_uobj:
+ rdma_alloc_abort_uobject(uobj, attrs, false);
+err_ucontext:
+ rdma_restrack_put(&attrs->context->res);
+ kfree(attrs->context);
+ attrs->context = NULL;
return ret;
}
@@ -315,7 +356,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
+ resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
@@ -337,14 +378,12 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
resp->max_ah = attr->max_ah;
- resp->max_fmr = attr->max_fmr;
- resp->max_map_per_fmr = attr->max_map_per_fmr;
resp->max_srq = attr->max_srq;
resp->max_srq_wr = attr->max_srq_wr;
resp->max_srq_sge = attr->max_srq_sge;
resp->max_pkeys = attr->max_pkeys;
resp->local_ca_ack_delay = attr->local_ca_ack_delay;
- resp->phys_port_cnt = ib_dev->phys_port_cnt;
+ resp->phys_port_cnt = min_t(u32, ib_dev->phys_port_cnt, U8_MAX);
}
static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
@@ -398,8 +437,8 @@ static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_alloc_pd_resp resp = {};
struct ib_uverbs_alloc_pd cmd;
- struct ib_uverbs_alloc_pd_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
@@ -421,29 +460,24 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
pd->device = ib_dev;
pd->uobject = uobj;
- pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
- pd->res.type = RDMA_RESTRACK_PD;
+
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, NULL);
ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
if (ret)
goto err_alloc;
+ rdma_restrack_add(&pd->res);
uobj->object = pd;
- memset(&resp, 0, sizeof resp);
- resp.pd_handle = uobj->id;
- rdma_restrack_uadd(&pd->res);
+ uobj_finalize_uobj_create(uobj, attrs);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
- return uobj_alloc_commit(uobj, attrs);
+ resp.pd_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- ib_dealloc_pd_user(pd, &attrs->driver_udata);
- pd = NULL;
err_alloc:
+ rdma_restrack_put(&pd->res);
kfree(pd);
err:
uobj_alloc_abort(uobj, attrs);
@@ -550,15 +584,15 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_device *ibudev = attrs->ufile->device;
+ struct ib_uverbs_open_xrcd_resp resp = {};
struct ib_uverbs_open_xrcd cmd;
- struct ib_uverbs_open_xrcd_resp resp;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
- struct fd f = {NULL, 0};
struct inode *inode = NULL;
- int ret = 0;
int new_xrcd = 0;
struct ib_device *ib_dev;
+ struct fd f = EMPTY_FD;
+ int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
@@ -569,12 +603,12 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
if (cmd.fd != -1) {
/* search for file descriptor */
f = fdget(cmd.fd);
- if (!f.file) {
+ if (fd_empty(f)) {
ret = -EBADF;
goto err_tree_mutex_unlock;
}
- inode = file_inode(f.file);
+ inode = file_inode(fd_file(f));
xrcd = find_xrcd(ibudev, inode);
if (!xrcd && !(cmd.oflags & O_CREAT)) {
/* no file descriptor. Need CREATE flag */
@@ -596,24 +630,16 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
}
if (!xrcd) {
- xrcd = ib_dev->ops.alloc_xrcd(ib_dev, &attrs->driver_udata);
+ xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
}
-
- xrcd->inode = inode;
- xrcd->device = ib_dev;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
new_xrcd = 1;
}
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- memset(&resp, 0, sizeof resp);
- resp.xrcd_handle = obj->uobject.id;
if (inode) {
if (new_xrcd) {
@@ -625,33 +651,22 @@ static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
atomic_inc(&xrcd->usecnt);
}
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
- if (f.file)
- fdput(f);
+ fdput(f);
mutex_unlock(&ibudev->xrcd_tree_mutex);
+ uobj_finalize_uobj_create(&obj->uobject, attrs);
- return uobj_alloc_commit(&obj->uobject, attrs);
-
-err_copy:
- if (inode) {
- if (new_xrcd)
- xrcd_table_delete(ibudev, inode);
- atomic_dec(&xrcd->usecnt);
- }
+ resp.xrcd_handle = obj->uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_dealloc_xrcd:
- ib_dealloc_xrcd(xrcd, &attrs->driver_udata);
+ ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs));
err:
uobj_alloc_abort(&obj->uobject, attrs);
err_tree_mutex_unlock:
- if (f.file)
- fdput(f);
+ fdput(f);
mutex_unlock(&ibudev->xrcd_tree_mutex);
@@ -682,9 +697,8 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
return 0;
- ret = ib_dealloc_xrcd(xrcd, &attrs->driver_udata);
-
- if (ib_is_destroy_retryable(ret, why, uobject)) {
+ ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata);
+ if (ret) {
atomic_inc(&xrcd->usecnt);
return ret;
}
@@ -692,13 +706,13 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
if (inode)
xrcd_table_delete(dev, inode);
- return ret;
+ return 0;
}
static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_reg_mr_resp resp = {};
struct ib_uverbs_reg_mr cmd;
- struct ib_uverbs_reg_mr_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
@@ -712,31 +726,22 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
- ret = ib_check_mr_access(cmd.access_flags);
- if (ret)
- return ret;
-
uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ ret = ib_check_mr_access(ib_dev, cmd.access_flags);
+ if (ret)
goto err_free;
- }
- if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- if (!(pd->device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
- pr_debug("ODP support not available\n");
- ret = -EINVAL;
- goto err_put;
- }
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err_free;
}
mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags,
+ cmd.access_flags, NULL,
&attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
@@ -745,33 +750,29 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
mr->dm = NULL;
+ mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_uadd(&mr->res);
+ mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
- uobj->object = mr;
-
- memset(&resp, 0, sizeof resp);
- resp.lkey = mr->lkey;
- resp.rkey = mr->rkey;
- resp.mr_handle = uobj->id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- return uobj_alloc_commit(uobj, attrs);
-
-err_copy:
- ib_dereg_mr_user(mr, &attrs->driver_udata);
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
+ resp.mr_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
uobj_put_obj_read(pd);
-
err_free:
uobj_alloc_abort(uobj, attrs);
return ret;
@@ -781,23 +782,28 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_pd *pd = NULL;
struct ib_mr *mr;
- struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
+ struct ib_uobject *new_uobj;
+ struct ib_device *ib_dev;
+ struct ib_pd *orig_pd;
+ struct ib_pd *new_pd;
+ struct ib_mr *new_mr;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ if (!cmd.flags)
return -EINVAL;
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED)
+ return -EOPNOTSUPP;
+
if ((cmd.flags & IB_MR_REREG_TRANS) &&
- (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
- (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
- return -EINVAL;
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
if (IS_ERR(uobj))
@@ -811,32 +817,72 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
}
if (cmd.flags & IB_MR_REREG_ACCESS) {
- ret = ib_check_mr_access(cmd.access_flags);
+ ret = ib_check_mr_access(mr->device, cmd.access_flags);
if (ret)
goto put_uobjs;
}
+ orig_pd = mr->pd;
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- attrs);
- if (!pd) {
- ret = -EINVAL;
+ new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ attrs);
+ if (IS_ERR(new_pd)) {
+ ret = PTR_ERR(new_pd);
goto put_uobjs;
}
+ } else {
+ new_pd = mr->pd;
}
- old_pd = mr->pd;
- ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd,
- &attrs->driver_udata);
- if (ret)
+ /*
+ * The driver might create a new HW object as part of the rereg, we need
+ * to have a uobject ready to hold it.
+ */
+ new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(new_uobj)) {
+ ret = PTR_ERR(new_uobj);
goto put_uobj_pd;
+ }
- if (cmd.flags & IB_MR_REREG_PD) {
- atomic_inc(&pd->usecnt);
- mr->pd = pd;
- atomic_dec(&old_pd->usecnt);
+ new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+ cmd.hca_va, cmd.access_flags, new_pd,
+ &attrs->driver_udata);
+ if (IS_ERR(new_mr)) {
+ ret = PTR_ERR(new_mr);
+ goto put_new_uobj;
+ }
+ if (new_mr) {
+ new_mr->device = new_pd->device;
+ new_mr->pd = new_pd;
+ new_mr->type = IB_MR_TYPE_USER;
+ new_mr->uobject = uobj;
+ atomic_inc(&new_pd->usecnt);
+ new_uobj->object = new_mr;
+
+ rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&new_mr->res, NULL);
+ rdma_restrack_add(&new_mr->res);
+
+ /*
+ * The new uobj for the new HW object is put into the same spot
+ * in the IDR and the old uobj & HW object is deleted.
+ */
+ rdma_assign_uobject(uobj, new_uobj, attrs);
+ rdma_alloc_commit_uobject(new_uobj, attrs);
+ uobj_put_destroy(uobj);
+ new_uobj = NULL;
+ uobj = NULL;
+ mr = new_mr;
+ } else {
+ if (cmd.flags & IB_MR_REREG_PD) {
+ atomic_dec(&orig_pd->usecnt);
+ mr->pd = new_pd;
+ atomic_inc(&new_pd->usecnt);
+ }
+ if (cmd.flags & IB_MR_REREG_TRANS) {
+ mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
+ }
}
memset(&resp, 0, sizeof(resp));
@@ -845,12 +891,16 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
ret = uverbs_response(attrs, &resp, sizeof(resp));
+put_new_uobj:
+ if (new_uobj)
+ uobj_alloc_abort(new_uobj, attrs);
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- uobj_put_obj_read(pd);
+ uobj_put_obj_read(new_pd);
put_uobjs:
- uobj_put_write(uobj);
+ if (uobj)
+ uobj_put_write(uobj);
return ret;
}
@@ -870,7 +920,7 @@ static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
- struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uverbs_alloc_mw_resp resp = {};
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
@@ -886,8 +936,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
return PTR_ERR(uobj);
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_free;
}
@@ -896,32 +946,33 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
goto err_put;
}
- mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
- if (IS_ERR(mw)) {
- ret = PTR_ERR(mw);
+ mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+ if (!mw) {
+ ret = -ENOMEM;
goto err_put;
}
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = ib_dev;
+ mw->pd = pd;
mw->uobject = uobj;
+ mw->type = cmd.mw_type;
+
+ ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
+ if (ret)
+ goto err_alloc;
+
atomic_inc(&pd->usecnt);
uobj->object = mw;
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- memset(&resp, 0, sizeof(resp));
- resp.rkey = mw->rkey;
+ resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
- uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, attrs);
-
-err_copy:
- uverbs_dealloc_mw(mw);
+err_alloc:
+ kfree(mw);
err_put:
uobj_put_obj_read(pd);
err_free:
@@ -958,39 +1009,33 @@ static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- resp.fd = uobj->id;
-
ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
+ uobj_finalize_uobj_create(uobj, attrs);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret) {
- uobj_alloc_abort(uobj, attrs);
- return ret;
- }
-
- return uobj_alloc_commit(uobj, attrs);
+ resp.fd = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
- struct ib_uverbs_ex_create_cq *cmd)
+static int create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
- struct ib_uverbs_ex_create_cq_resp resp;
+ struct ib_uverbs_ex_create_cq_resp resp = {};
struct ib_cq_init_attr attr = {};
struct ib_device *ib_dev;
if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
&ib_dev);
if (IS_ERR(obj))
- return obj;
+ return PTR_ERR(obj);
if (cmd->comp_channel >= 0) {
ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
@@ -1000,65 +1045,60 @@ static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
}
}
- obj->uobject.user_handle = cmd->user_handle;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
attr.flags = cmd->flags;
- cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
goto err_file;
}
-
cq->device = ib_dev;
- cq->uobject = &obj->uobject;
+ cq->uobject = obj;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- obj->uobject.object = cq;
- memset(&resp, 0, sizeof resp);
- resp.base.cq_handle = obj->uobject.id;
- resp.base.cqe = cq->cqe;
- resp.response_length = uverbs_response_length(attrs, sizeof(resp));
-
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_uadd(&cq->res);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
- ret = uverbs_response(attrs, &resp, sizeof(resp));
+ ret = ib_dev->ops.create_cq(cq, &attr, attrs);
if (ret)
- goto err_cb;
+ goto err_free;
+ rdma_restrack_add(&cq->res);
- ret = uobj_alloc_commit(&obj->uobject, attrs);
- if (ret)
- return ERR_PTR(ret);
- return obj;
+ obj->uevent.uobject.object = cq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
-err_cb:
- ib_destroy_cq(cq);
+ resp.base.cq_handle = obj->uevent.uobject.id;
+ resp.base.cqe = cq->cqe;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
+err_free:
+ rdma_restrack_put(&cq->res);
+ kfree(cq);
err_file:
if (ev_file)
- ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
-
+ ib_uverbs_release_ucq(ev_file, obj);
err:
- uobj_alloc_abort(&obj->uobject, attrs);
-
- return ERR_PTR(ret);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
+ return ret;
}
static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_ucq_object *obj;
int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1071,14 +1111,12 @@ static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(attrs, &cmd_ex);
- return PTR_ERR_OR_ZERO(obj);
+ return create_cq(attrs, &cmd_ex);
}
static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_ex_create_cq cmd;
- struct ib_ucq_object *obj;
int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1091,8 +1129,7 @@ static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
if (cmd.reserved)
return -EINVAL;
- obj = create_cq(attrs, &cmd);
- return PTR_ERR_OR_ZERO(obj);
+ return create_cq(attrs, &cmd);
}
static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
@@ -1100,15 +1137,15 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp = {};
struct ib_cq *cq;
- int ret = -EINVAL;
+ int ret;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
@@ -1118,7 +1155,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
- uobj_put_obj_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -1168,8 +1206,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
return ret;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
header_ptr = attrs->ucore.outbuf;
@@ -1201,7 +1239,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT);
out_put:
- uobj_put_obj_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -1216,14 +1255,14 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
return ret;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- uobj_put_obj_read(cq);
-
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return 0;
}
@@ -1243,10 +1282,10 @@ static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- obj = container_of(uobj, struct ib_ucq_object, uobject);
+ obj = container_of(uobj, struct ib_ucq_object, uevent.uobject);
memset(&resp, 0, sizeof(resp));
resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
+ resp.async_events_reported = obj->uevent.events_reported;
uobj_put_destroy(uobj);
@@ -1265,14 +1304,27 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
struct ib_srq *srq = NULL;
struct ib_qp *qp;
struct ib_qp_init_attr attr = {};
- struct ib_uverbs_ex_create_qp_resp resp;
+ struct ib_uverbs_ex_create_qp_resp resp = {};
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
struct ib_device *ib_dev;
- if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- return -EPERM;
+ switch (cmd->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (!rdma_uattrs_has_raw_cap(attrs))
+ return -EPERM;
+ fallthrough;
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_DRIVER:
+ break;
+ default:
+ return -EINVAL;
+ }
obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
@@ -1286,8 +1338,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
ind_tbl = uobj_get_obj_read(rwq_ind_table,
UVERBS_OBJECT_RWQ_IND_TBL,
cmd->rwq_ind_tbl_handle, attrs);
- if (!ind_tbl) {
- ret = -EINVAL;
+ if (IS_ERR(ind_tbl)) {
+ ret = PTR_ERR(ind_tbl);
goto err_put;
}
@@ -1325,8 +1377,10 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (cmd->is_srq) {
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
cmd->srq_handle, attrs);
- if (!srq || srq->srq_type == IB_SRQT_XRC) {
- ret = -EINVAL;
+ if (IS_ERR(srq) ||
+ srq->srq_type == IB_SRQT_XRC) {
+ ret = IS_ERR(srq) ? PTR_ERR(srq) :
+ -EINVAL;
goto err_put;
}
}
@@ -1336,23 +1390,29 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
rcq = uobj_get_obj_read(
cq, UVERBS_OBJECT_CQ,
cmd->recv_cq_handle, attrs);
- if (!rcq) {
- ret = -EINVAL;
+ if (IS_ERR(rcq)) {
+ ret = PTR_ERR(rcq);
goto err_put;
}
}
}
}
- if (has_sq)
+ if (has_sq) {
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
cmd->send_cq_handle, attrs);
- if (!ind_tbl)
+ if (IS_ERR(scq)) {
+ ret = PTR_ERR(scq);
+ goto err_put;
+ }
+ }
+
+ if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
attrs);
- if (!pd || (!scq && has_sq)) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_put;
}
@@ -1360,7 +1420,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = attrs->ufile;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1368,7 +1427,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
IB_SIGNAL_REQ_WR;
attr.qp_type = cmd->qp_type;
- attr.create_flags = 0;
attr.cap.max_send_wr = cmd->max_send_wr;
attr.cap.max_recv_wr = cmd->max_recv_wr;
@@ -1376,7 +1434,6 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.cap.max_recv_sge = cmd->max_recv_sge;
attr.cap.max_inline_data = cmd->max_inline_data;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
@@ -1394,7 +1451,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
}
if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
- if (!capable(CAP_NET_RAW)) {
+ if (!rdma_uattrs_has_raw_cap(attrs)) {
ret = -EPERM;
goto err_put;
}
@@ -1402,61 +1459,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
attr.source_qpn = cmd->source_qpn;
}
- if (cmd->qp_type == IB_QPT_XRC_TGT)
- qp = ib_create_qp(pd, &attr);
- else
- qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
- &obj->uevent.uobject);
-
+ qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+ KBUILD_MODNAME);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
-
- if (cmd->qp_type != IB_QPT_XRC_TGT) {
- ret = ib_create_qp_security(qp, device);
- if (ret)
- goto err_cb;
-
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->rwq_ind_tbl = ind_tbl;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
- atomic_set(&qp->usecnt, 0);
- atomic_inc(&pd->usecnt);
- qp->port = 0;
- if (attr.send_cq)
- atomic_inc(&attr.send_cq->usecnt);
- if (attr.recv_cq)
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
- if (ind_tbl)
- atomic_inc(&ind_tbl->usecnt);
- } else {
- /* It is done in _ib_create_qp for other QP types */
- qp->uobject = &obj->uevent.uobject;
- }
+ ib_qp_usecnt_inc(qp);
obj->uevent.uobject.object = qp;
-
- memset(&resp, 0, sizeof resp);
- resp.base.qpn = qp->qp_num;
- resp.base.qp_handle = obj->uevent.uobject.id;
- resp.base.max_recv_sge = attr.cap.max_recv_sge;
- resp.base.max_send_sge = attr.cap.max_send_sge;
- resp.base.max_recv_wr = attr.cap.max_recv_wr;
- resp.base.max_send_wr = attr.cap.max_send_wr;
- resp.base.max_inline_data = attr.cap.max_inline_data;
- resp.response_length = uverbs_response_length(attrs, sizeof(resp));
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_cb;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1468,30 +1482,43 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (pd)
uobj_put_obj_read(pd);
if (scq)
- uobj_put_obj_read(scq);
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (rcq && rcq != scq)
- uobj_put_obj_read(rcq);
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (srq)
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
-err_cb:
- ib_destroy_qp(qp);
+ resp.base.qpn = qp->qp_num;
+ resp.base.qp_handle = obj->uevent.uobject.id;
+ resp.base.max_recv_sge = attr.cap.max_recv_sge;
+ resp.base.max_send_sge = attr.cap.max_send_sge;
+ resp.base.max_recv_wr = attr.cap.max_recv_wr;
+ resp.base.max_send_wr = attr.cap.max_send_wr;
+ resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
if (!IS_ERR(xrcd_uobj))
uobj_put_read(xrcd_uobj);
- if (pd)
+ if (!IS_ERR_OR_NULL(pd))
uobj_put_obj_read(pd);
- if (scq)
- uobj_put_obj_read(scq);
- if (rcq && rcq != scq)
- uobj_put_obj_read(rcq);
- if (srq)
- uobj_put_obj_read(srq);
- if (ind_tbl)
+ if (!IS_ERR_OR_NULL(scq))
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (!IS_ERR_OR_NULL(rcq) && rcq != scq)
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (!IS_ERR_OR_NULL(srq))
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (!IS_ERR_OR_NULL(ind_tbl))
uobj_put_obj_read(ind_tbl);
uobj_alloc_abort(&obj->uevent.uobject, attrs);
@@ -1546,14 +1573,14 @@ static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_create_qp_resp resp = {};
struct ib_uverbs_open_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_qp *qp;
- struct ib_qp_open_attr attr;
+ struct ib_qp_open_attr attr = {};
int ret;
+ struct ib_uobject *xrcd_uobj;
struct ib_device *ib_dev;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -1578,11 +1605,9 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = attrs->ufile;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
@@ -1595,23 +1620,16 @@ static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
obj->uevent.uobject.object = qp;
obj->uevent.uobject.user_handle = cmd.user_handle;
- memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_destroy;
-
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- qp->uobject = &obj->uevent.uobject;
+ qp->uobject = obj;
uobj_put_read(xrcd_uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_destroy:
- ib_destroy_qp(qp);
err_xrcd:
uobj_put_read(xrcd_uobj);
err_put:
@@ -1662,14 +1680,15 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
}
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
goto out;
@@ -1767,8 +1786,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
@@ -1856,8 +1875,15 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
attr->path_mtu = cmd->base.path_mtu;
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
attr->path_mig_state = cmd->base.path_mig_state;
- if (cmd->base.attr_mask & IB_QP_QKEY)
+ if (cmd->base.attr_mask & IB_QP_QKEY) {
+ if (cmd->base.qkey & IB_QP_SET_QKEY &&
+ !(rdma_nl_get_privileged_qkey() ||
+ rdma_uattrs_has_raw_cap(attrs))) {
+ ret = -EPERM;
+ goto release_qp;
+ }
attr->qkey = cmd->base.qkey;
+ }
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
attr->rq_psn = cmd->base.rq_psn;
if (cmd->base.attr_mask & IB_QP_SQ_PSN)
@@ -1906,7 +1932,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
&attrs->driver_udata);
release_qp:
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
out:
kfree(attr);
@@ -1922,8 +1949,7 @@ static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
if (ret)
return ret;
- if (cmd.base.attr_mask &
- ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
return modify_qp(attrs, &cmd);
@@ -1945,10 +1971,7 @@ static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
- BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
-
- if (cmd.base.attr_mask &
- ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
return -EOPNOTSUPP;
ret = modify_qp(attrs, &cmd);
@@ -1985,12 +2008,13 @@ static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
- if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof (struct ib_sge))) /
- sizeof (struct ib_sge))
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof(struct ib_sge))) /
+ sizeof(struct ib_sge))
return NULL;
- return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
- num_sge * sizeof (struct ib_sge), GFP_KERNEL);
+ return kmalloc(ALIGN(wr_size, sizeof(struct ib_sge)) +
+ num_sge * sizeof(struct ib_sge),
+ GFP_KERNEL);
}
static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
@@ -2012,11 +2036,13 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (ret)
return ret;
- wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count);
+ wqes = uverbs_request_next_ptr(&iter, size_mul(cmd.wqe_size,
+ cmd.wr_count));
if (IS_ERR(wqes))
return PTR_ERR(wqes);
- sgls = uverbs_request_next_ptr(
- &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge));
+ sgls = uverbs_request_next_ptr(&iter,
+ size_mul(cmd.sge_count,
+ sizeof(struct ib_uverbs_sge)));
if (IS_ERR(sgls))
return PTR_ERR(sgls);
ret = uverbs_request_finish(&iter);
@@ -2028,8 +2054,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
return -ENOMEM;
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
@@ -2066,9 +2092,9 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
user_wr->wr.ud.ah, attrs);
- if (!ud->ah) {
+ if (IS_ERR(ud->ah)) {
+ ret = PTR_ERR(ud->ah);
kfree(ud);
- ret = -EINVAL;
goto out_put;
}
ud->remote_qpn = user_wr->wr.ud.remote_qpn;
@@ -2170,7 +2196,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
ret = ret2;
out_put:
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
@@ -2198,14 +2225,14 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
const struct ib_sge __user *sgls;
const void __user *wqes;
- if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+ if (wqe_size < sizeof(struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
- wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
+ wqes = uverbs_request_next_ptr(iter, size_mul(wqe_size, wr_count));
if (IS_ERR(wqes))
return ERR_CAST(wqes);
- sgls = uverbs_request_next_ptr(
- iter, sge_count * sizeof(struct ib_uverbs_sge));
+ sgls = uverbs_request_next_ptr(iter, size_mul(sge_count,
+ sizeof(struct ib_uverbs_sge)));
if (IS_ERR(sgls))
return ERR_CAST(sgls);
ret = uverbs_request_finish(iter);
@@ -2231,14 +2258,14 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
}
if (user_wr->num_sge >=
- (U32_MAX - ALIGN(sizeof *next, sizeof (struct ib_sge))) /
- sizeof (struct ib_sge)) {
+ (U32_MAX - ALIGN(sizeof(*next), sizeof(struct ib_sge))) /
+ sizeof(struct ib_sge)) {
ret = -EINVAL;
goto err;
}
- next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
- user_wr->num_sge * sizeof (struct ib_sge),
+ next = kmalloc(ALIGN(sizeof(*next), sizeof(struct ib_sge)) +
+ user_wr->num_sge * sizeof(struct ib_sge),
GFP_KERNEL);
if (!next) {
ret = -ENOMEM;
@@ -2256,8 +2283,8 @@ ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
next->num_sge = user_wr->num_sge;
if (next->num_sge) {
- next->sg_list = (void *) next +
- ALIGN(sizeof *next, sizeof (struct ib_sge));
+ next->sg_list = (void *)next +
+ ALIGN(sizeof(*next), sizeof(struct ib_sge));
if (copy_from_user(next->sg_list, sgls + sg_ind,
next->num_sge *
sizeof(struct ib_sge))) {
@@ -2304,15 +2331,16 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
return PTR_ERR(wr);
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
resp.bad_wr = 0;
ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
@@ -2354,15 +2382,16 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
return PTR_ERR(wr);
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
- if (!srq) {
- ret = -EINVAL;
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
goto out;
}
resp.bad_wr = 0;
ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr);
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2410,8 +2439,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
}
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err;
}
@@ -2442,22 +2471,14 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
ah->uobject = uobj;
uobj->user_handle = cmd.user_handle;
uobj->object = ah;
-
- resp.ah_handle = uobj->id;
-
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, attrs);
+ uobj_finalize_uobj_create(uobj, attrs);
-err_copy:
- rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
+ resp.ah_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
uobj_put_obj_read(pd);
-
err:
uobj_alloc_abort(uobj, attrs);
return ret;
@@ -2488,10 +2509,10 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
return ret;
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp)
- return -EINVAL;
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -2518,7 +2539,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
out_put:
mutex_unlock(&obj->mcast_lock);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -2529,7 +2551,7 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
- int ret = -EINVAL;
+ int ret;
bool found = false;
ret = uverbs_request(attrs, &cmd, sizeof(cmd));
@@ -2537,10 +2559,10 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
return ret;
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp)
- return -EINVAL;
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
@@ -2561,7 +2583,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
out_put:
mutex_unlock(&obj->mcast_lock);
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -2671,8 +2694,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
attrs);
- if (!ib_spec->action.act)
- return -EINVAL;
+ if (IS_ERR(ib_spec->action.act))
+ return PTR_ERR(ib_spec->action.act);
ib_spec->action.size =
sizeof(struct ib_flow_spec_action_handle);
flow_resources_add(uflow_res,
@@ -2689,8 +2712,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
attrs);
- if (!ib_spec->flow_count.counters)
- return -EINVAL;
+ if (IS_ERR(ib_spec->flow_count.counters))
+ return PTR_ERR(ib_spec->flow_count.counters);
ib_spec->flow_count.size =
sizeof(struct ib_flow_spec_action_count);
flow_resources_add(uflow_res,
@@ -2704,12 +2727,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
return 0;
}
-static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
-{
- /* Returns user space filter size, includes padding */
- return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
-}
-
static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
@@ -2748,7 +2765,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
- ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_eth_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2759,7 +2776,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_IPV4:
- ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_ipv4_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2770,7 +2787,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_IPV6:
- ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_ipv6_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2786,7 +2803,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
- ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_tcp_udp_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2797,7 +2814,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_VXLAN_TUNNEL:
- ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_tunnel_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2812,7 +2829,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
return -EINVAL;
break;
case IB_FLOW_SPEC_ESP:
- ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_esp_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2823,7 +2840,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_GRE:
- ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_gre_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2834,7 +2851,7 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_MPLS:
- ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_mpls_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -2853,11 +2870,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec)
{
- ssize_t kern_filter_sz;
+ size_t kern_filter_sz;
void *kern_spec_mask;
void *kern_spec_val;
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+ if (check_sub_overflow((size_t)kern_spec->hdr.size,
+ sizeof(struct ib_uverbs_flow_spec_hdr),
+ &kern_filter_sz))
+ return -EINVAL;
+
+ kern_filter_sz /= 2;
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
@@ -2909,26 +2931,25 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
return PTR_ERR(obj);
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- err = -EINVAL;
+ if (IS_ERR(pd)) {
+ err = PTR_ERR(pd);
goto err_uobj;
}
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq) {
- err = -EINVAL;
+ if (IS_ERR(cq)) {
+ err = PTR_ERR(cq);
goto err_put_pd;
}
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
wq_init_attr.create_flags = cmd.create_flags;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd.user_handle;
wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
@@ -2936,37 +2957,34 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
goto err_put_cq;
}
- wq->uobject = &obj->uevent.uobject;
+ wq->uobject = obj;
obj->uevent.uobject.object = wq;
wq->wq_type = wq_init_attr.wq_type;
wq->cq = cq;
wq->pd = pd;
wq->device = pd->device;
- wq->wq_context = wq_init_attr.wq_context;
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
- wq->uobject = &obj->uevent.uobject;
- obj->uevent.uobject.object = wq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+
+ uobj_put_obj_read(pd);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- uobj_put_obj_read(pd);
- uobj_put_obj_read(cq);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- ib_destroy_wq(wq, &attrs->driver_udata);
err_put_cq:
- uobj_put_obj_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_pd:
uobj_put_obj_read(pd);
err_uobj:
@@ -3021,18 +3039,36 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
return -EINVAL;
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
- if (!wq)
- return -EINVAL;
+ if (IS_ERR(wq))
+ return PTR_ERR(wq);
- wq_attr.curr_wq_state = cmd.curr_wq_state;
- wq_attr.wq_state = cmd.wq_state;
if (cmd.attr_mask & IB_WQ_FLAGS) {
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
+
+ if (cmd.attr_mask & IB_WQ_CUR_STATE) {
+ if (cmd.curr_wq_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ wq_attr.curr_wq_state = cmd.curr_wq_state;
+ } else {
+ wq_attr.curr_wq_state = wq->state;
+ }
+
+ if (cmd.attr_mask & IB_WQ_STATE) {
+ if (cmd.wq_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ wq_attr.wq_state = cmd.wq_state;
+ } else {
+ wq_attr.wq_state = wq_attr.curr_wq_state;
+ }
+
ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
&attrs->driver_udata);
- uobj_put_obj_read(wq);
+ rdma_lookup_put_uobject(&wq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -3040,14 +3076,14 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
- struct ib_uobject *uobj;
+ struct ib_uobject *uobj;
int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_wq **wqs = NULL;
+ struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
- int i, j, num_read_wqs;
+ int i, num_read_wqs;
u32 num_wq_handles;
struct uverbs_req_iter iter;
struct ib_device *ib_dev;
@@ -3087,12 +3123,13 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
num_read_wqs++) {
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
wqs_handles[num_read_wqs], attrs);
- if (!wq) {
- err = -EINVAL;
+ if (IS_ERR(wq)) {
+ err = PTR_ERR(wq);
goto put_wqs;
}
wqs[num_read_wqs] = wq;
+ atomic_inc(&wqs[num_read_wqs]->usecnt);
}
uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
@@ -3101,17 +3138,15 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
goto put_wqs;
}
- init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
- init_attr.ind_tbl = wqs;
-
- rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
- &attrs->driver_udata);
-
- if (IS_ERR(rwq_ind_tbl)) {
- err = PTR_ERR(rwq_ind_tbl);
+ rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+ if (!rwq_ind_tbl) {
+ err = -ENOMEM;
goto err_uobj;
}
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+
rwq_ind_tbl->ind_tbl = wqs;
rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
rwq_ind_tbl->uobject = uobj;
@@ -3119,31 +3154,32 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
rwq_ind_tbl->device = ib_dev;
atomic_set(&rwq_ind_tbl->usecnt, 0);
+ err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+ &attrs->driver_udata);
+ if (err)
+ goto err_create;
+
for (i = 0; i < num_wq_handles; i++)
- atomic_inc(&wqs[i]->usecnt);
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ kfree(wqs_handles);
+ uobj_finalize_uobj_create(uobj, attrs);
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- kfree(wqs_handles);
-
- for (j = 0; j < num_read_wqs; j++)
- uobj_put_obj_read(wqs[j]);
-
- return uobj_alloc_commit(uobj, attrs);
-
-err_copy:
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
+err_create:
+ kfree(rwq_ind_tbl);
err_uobj:
uobj_alloc_abort(uobj, attrs);
put_wqs:
- for (j = 0; j < num_read_wqs; j++)
- uobj_put_obj_read(wqs[j]);
+ for (i = 0; i < num_read_wqs; i++) {
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ atomic_dec(&wqs[i]->usecnt);
+ }
err_free:
kfree(wqs_handles);
kfree(wqs);
@@ -3169,7 +3205,7 @@ static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
- struct ib_uverbs_create_flow_resp resp;
+ struct ib_uverbs_create_flow_resp resp = {};
struct ib_uobject *uobj;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
@@ -3190,7 +3226,7 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
if (cmd.comp_mask)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
+ if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
@@ -3237,12 +3273,17 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
+ if (!rdma_is_port_valid(uobj->context->device, cmd.flow_attr.port)) {
err = -EINVAL;
goto err_uobj;
}
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp)) {
+ err = PTR_ERR(qp);
+ goto err_uobj;
+ }
+
if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
err = -EINVAL;
goto err_put;
@@ -3286,14 +3327,14 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
ib_spec += ((union ib_flow_spec *) ib_spec)->size;
}
if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
- pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ pr_warn("create flow failed, flow %d: %u bytes left from uverb cmd\n",
i, cmd.flow_attr.size);
err = -EINVAL;
goto err_free;
}
- flow_id = qp->device->ops.create_flow(
- qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
+ flow_id = qp->device->ops.create_flow(qp, flow_attr,
+ &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
@@ -3302,27 +3343,24 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
- memset(&resp, 0, sizeof(resp));
- resp.flow_handle = uobj->id;
-
- err = uverbs_response(attrs, &resp, sizeof(resp));
- if (err)
- goto err_copy;
-
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
kfree(flow_attr);
+
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return uobj_alloc_commit(uobj, attrs);
-err_copy:
- if (!qp->device->ops.destroy_flow(flow_id))
- atomic_dec(&qp->usecnt);
+ uobj_finalize_uobj_create(uobj, attrs);
+
+ resp.flow_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+
err_free:
ib_uverbs_flow_resources_free(uflow_res);
err_free_flow_attr:
kfree(flow_attr);
err_put:
- uobj_put_obj_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_uobj:
uobj_alloc_abort(uobj, attrs);
err_free_attr:
@@ -3350,13 +3388,13 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
- struct ib_uverbs_create_srq_resp resp;
+ struct ib_uverbs_create_srq_resp resp = {};
struct ib_usrq_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
+ struct ib_uobject *xrcd_uobj;
struct ib_device *ib_dev;
obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
@@ -3388,92 +3426,63 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
if (ib_srq_has_cq(cmd->srq_type)) {
attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
cmd->cq_handle, attrs);
- if (!attr.ext.cq) {
- ret = -EINVAL;
+ if (IS_ERR(attr.ext.cq)) {
+ ret = PTR_ERR(attr.ext.cq);
goto err_put_xrcd;
}
}
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
attr.attr.srq_limit = cmd->srq_limit;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd->user_handle;
- srq = rdma_zalloc_drv_obj(ib_dev, ib_srq);
- if (!srq) {
- ret = -ENOMEM;
- goto err_put;
- }
-
- srq->device = pd->device;
- srq->pd = pd;
- srq->srq_type = cmd->srq_type;
- srq->uobject = &obj->uevent.uobject;
- srq->event_handler = attr.event_handler;
- srq->srq_context = attr.srq_context;
-
- ret = pd->device->ops.create_srq(srq, &attr, udata);
- if (ret)
- goto err_free;
-
- if (ib_srq_has_cq(cmd->srq_type)) {
- srq->ext.cq = attr.ext.cq;
- atomic_inc(&attr.ext.cq->usecnt);
- }
-
- if (cmd->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
- atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+ srq = ib_create_srq_user(pd, &attr, obj, udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err_put_pd;
}
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
-
obj->uevent.uobject.object = srq;
obj->uevent.uobject.user_handle = cmd->user_handle;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
- memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uevent.uobject.id;
- resp.max_wr = attr.attr.max_wr;
- resp.max_sge = attr.attr.max_sge;
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- ret = uverbs_response(attrs, &resp, sizeof(resp));
- if (ret)
- goto err_copy;
-
if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
if (ib_srq_has_cq(cmd->srq_type))
- uobj_put_obj_read(attr.ext.cq);
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
uobj_put_obj_read(pd);
- return uobj_alloc_commit(&obj->uevent.uobject, attrs);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
-err_copy:
- ib_destroy_srq_user(srq, &attrs->driver_udata);
+ resp.srq_handle = obj->uevent.uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_free:
- kfree(srq);
-err_put:
+err_put_pd:
uobj_put_obj_read(pd);
-
err_put_cq:
if (ib_srq_has_cq(cmd->srq_type))
- uobj_put_obj_read(attr.ext.cq);
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
@@ -3532,8 +3541,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
return ret;
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
- if (!srq)
- return -EINVAL;
+ if (IS_ERR(srq))
+ return PTR_ERR(srq);
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
@@ -3541,7 +3550,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask,
&attrs->driver_udata);
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -3559,12 +3569,13 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
return ret;
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
- if (!srq)
- return -EINVAL;
+ if (IS_ERR(srq))
+ return PTR_ERR(srq);
ret = ib_query_srq(srq, &attr);
- uobj_put_obj_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
return ret;
@@ -3684,13 +3695,13 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
return -EOPNOTSUPP;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
- uobj_put_obj_read(cq);
-
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
@@ -3703,13 +3714,10 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
* trailing driver_data flex array. In this case the size of the base struct
* cannot be changed.
*/
-#define offsetof_after(_struct, _member) \
- (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member))
-
#define UAPI_DEF_WRITE_IO(req, resp) \
.write.has_resp = 1 + \
BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
- BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ BUILD_BUG_ON_ZERO(sizeof_field(req, response) != \
sizeof(u64)), \
.write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
@@ -3736,11 +3744,11 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
*/
#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
.write.has_resp = 1, \
- .write.req_size = offsetof_after(req, req_last_member), \
- .write.resp_size = offsetof_after(resp, resp_last_member)
+ .write.req_size = offsetofend(req, req_last_member), \
+ .write.resp_size = offsetofend(resp, resp_last_member)
#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
- .write.req_size = offsetof_after(req, req_last_member)
+ .write.req_size = offsetofend(req, req_last_member)
const struct uapi_definition uverbs_def_write_intf[] = {
DECLARE_UVERBS_OBJECT(
@@ -3749,13 +3757,13 @@ const struct uapi_definition uverbs_def_write_intf[] = {
ib_uverbs_create_ah,
UAPI_DEF_WRITE_UDATA_IO(
struct ib_uverbs_create_ah,
- struct ib_uverbs_create_ah_resp),
- UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ struct ib_uverbs_create_ah_resp)),
DECLARE_UVERBS_WRITE(
IB_USER_VERBS_CMD_DESTROY_AH,
ib_uverbs_destroy_ah,
- UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
- UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)),
+ UAPI_DEF_OBJ_NEEDS_FN(create_user_ah),
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
DECLARE_UVERBS_OBJECT(
UVERBS_OBJECT_COMP_CHANNEL,
@@ -3809,7 +3817,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
IB_USER_VERBS_EX_CMD_MODIFY_CQ,
ib_uverbs_ex_modify_cq,
UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
- UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_cq))),
DECLARE_UVERBS_OBJECT(
UVERBS_OBJECT_DEVICE,
@@ -4055,8 +4063,7 @@ const struct uapi_definition uverbs_def_write_intf[] = {
DECLARE_UVERBS_WRITE(
IB_USER_VERBS_CMD_CLOSE_XRCD,
ib_uverbs_close_xrcd,
- UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
- UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)),
DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
ib_uverbs_open_qp,
UAPI_DEF_WRITE_UDATA_IO(
@@ -4066,8 +4073,9 @@ const struct uapi_definition uverbs_def_write_intf[] = {
ib_uverbs_open_xrcd,
UAPI_DEF_WRITE_UDATA_IO(
struct ib_uverbs_open_xrcd,
- struct ib_uverbs_open_xrcd_resp),
- UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+ struct ib_uverbs_open_xrcd_resp)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
{},
};
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 829b0c6944d8..f80da6a67e24 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -36,13 +36,15 @@
#include "uverbs.h"
struct bundle_alloc_head {
- struct bundle_alloc_head *next;
+ struct_group_tagged(bundle_alloc_head_hdr, hdr,
+ struct bundle_alloc_head *next;
+ );
u8 data[];
};
struct bundle_priv {
/* Must be first */
- struct bundle_alloc_head alloc_head;
+ struct bundle_alloc_head_hdr alloc_head;
struct bundle_alloc_head *allocated_mem;
size_t internal_avail;
size_t internal_used;
@@ -58,12 +60,13 @@ struct bundle_priv {
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN);
/*
* Must be last. bundle ends in a flex array which overlaps
* internal_buffer.
*/
- struct uverbs_attr_bundle bundle;
+ struct uverbs_attr_bundle_hdr bundle;
u64 internal_buffer[32];
};
@@ -76,9 +79,10 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs)
{
struct bundle_priv *pbundle;
+ struct uverbs_attr_bundle *bundle;
size_t bundle_size =
offsetof(struct bundle_priv, internal_buffer) +
- sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
+ sizeof(*bundle->attrs) * method_elm->key_bitmap_len +
sizeof(*pbundle->uattrs) * num_attrs;
method_elm->use_stack = bundle_size <= sizeof(*pbundle);
@@ -90,7 +94,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
}
/**
- * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
* @bundle: The bundle
* @size: Number of bytes to allocate
* @flags: Allocator flags
@@ -106,7 +110,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
gfp_t flags)
{
struct bundle_priv *pbundle =
- container_of(bundle, struct bundle_priv, bundle);
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
size_t new_used;
void *res;
@@ -127,7 +131,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
res = (void *)pbundle->internal_buffer + pbundle->internal_used;
pbundle->internal_used =
ALIGN(new_used, sizeof(*pbundle->internal_buffer));
- if (flags & __GFP_ZERO)
+ if (want_init_on_alloc(flags))
memset(res, 0, size);
return res;
}
@@ -136,7 +140,7 @@ EXPORT_SYMBOL(_uverbs_alloc);
static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
u16 len)
{
- if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+ if (uattr->len > sizeof_field(struct ib_uverbs_attr, data))
return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
uattr->len - len);
@@ -148,7 +152,7 @@ static int uverbs_set_output(const struct uverbs_attr_bundle *bundle,
const struct uverbs_attr *attr)
{
struct bundle_priv *pbundle =
- container_of(bundle, struct bundle_priv, bundle);
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
u16 flags;
flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
@@ -165,6 +169,8 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
struct ib_uverbs_attr *uattr,
u32 attr_bkey)
{
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
size_t array_len;
u32 *idr_vals;
@@ -183,7 +189,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
return -EINVAL;
attr->uobjects =
- uverbs_alloc(&pbundle->bundle,
+ uverbs_alloc(bundle,
array_size(array_len, sizeof(*attr->uobjects)));
if (IS_ERR(attr->uobjects))
return PTR_ERR(attr->uobjects);
@@ -208,7 +214,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
for (i = 0; i != array_len; i++) {
attr->uobjects[i] = uverbs_get_uobject_from_file(
spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access,
- idr_vals[i], &pbundle->bundle);
+ idr_vals[i], bundle);
if (IS_ERR(attr->uobjects[i])) {
ret = PTR_ERR(attr->uobjects[i]);
break;
@@ -220,24 +226,18 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
return ret;
}
-static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
- struct uverbs_objs_arr_attr *attr,
- bool commit, struct uverbs_attr_bundle *attrs)
+static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ bool commit,
+ struct uverbs_attr_bundle *attrs)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
- int current_ret;
- int ret = 0;
size_t i;
- for (i = 0; i != attr->len; i++) {
- current_ret = uverbs_finalize_object(attr->uobjects[i],
- spec->u2.objs_arr.access,
- commit, attrs);
- if (!ret)
- ret = current_ret;
- }
-
- return ret;
+ for (i = 0; i != attr->len; i++)
+ uverbs_finalize_object(attr->uobjects[i],
+ spec->u2.objs_arr.access, false, commit,
+ attrs);
}
static int uverbs_process_attr(struct bundle_priv *pbundle,
@@ -245,7 +245,9 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
struct ib_uverbs_attr *uattr, u32 attr_bkey)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
- struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
+ struct uverbs_attr *e = &bundle->attrs[attr_bkey];
const struct uverbs_attr_spec *val_spec = spec;
struct uverbs_obj_attr *o_attr;
@@ -264,7 +266,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
return -EOPNOTSUPP;
e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
- /* fall through */
+ fallthrough;
case UVERBS_ATTR_TYPE_PTR_IN:
/* Ensure that any data provided by userspace beyond the known
* struct is zero. Userspace that knows how to use some future
@@ -276,7 +278,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
!uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
return -EOPNOTSUPP;
- /* fall through */
+ fallthrough;
case UVERBS_ATTR_TYPE_PTR_OUT:
if (uattr->len < val_spec->u.ptr.min_len ||
(!val_spec->zero_trailing &&
@@ -293,7 +295,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
void *p;
- p = uverbs_alloc(&pbundle->bundle, uattr->len);
+ p = uverbs_alloc(bundle, uattr->len);
if (IS_ERR(p))
return PTR_ERR(p);
@@ -326,7 +328,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
*/
o_attr->uobject = uverbs_get_uobject_from_file(
spec->u.obj.obj_type, spec->u.obj.access,
- uattr->data_s64, &pbundle->bundle);
+ uattr->data_s64, bundle);
if (IS_ERR(o_attr->uobject))
return PTR_ERR(o_attr->uobject);
__set_bit(attr_bkey, pbundle->uobj_finalize);
@@ -342,6 +344,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
break;
+ case UVERBS_ATTR_TYPE_RAW_FD:
+ if (uattr->attr_data.reserved || uattr->len != 0 ||
+ uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX)
+ return -EINVAL;
+ /* _uverbs_get_const_signed() is the accessor */
+ e->ptr_attr.data = uattr->data_s64;
+ break;
+
case UVERBS_ATTR_TYPE_IDRS_ARRAY:
return uverbs_process_idrs_array(pbundle, attr_uapi,
&e->objs_arr_attr, uattr,
@@ -419,6 +429,8 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
unsigned int num_attrs)
{
int (*handler)(struct uverbs_attr_bundle *attrs);
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
unsigned int i;
@@ -431,7 +443,7 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
if (!handler)
return -EIO;
- pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
+ pbundle->uattrs = uverbs_alloc(bundle, uattrs_size);
if (IS_ERR(pbundle->uattrs))
return PTR_ERR(pbundle->uattrs);
if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
@@ -450,25 +462,23 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
return -EINVAL;
if (pbundle->method_elm->has_udata)
- uverbs_fill_udata(&pbundle->bundle,
- &pbundle->bundle.driver_udata,
+ uverbs_fill_udata(bundle, &pbundle->bundle.driver_udata,
UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT);
else
pbundle->bundle.driver_udata = (struct ib_udata){};
if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
- struct uverbs_obj_attr *destroy_attr =
- &pbundle->bundle.attrs[destroy_bkey].obj_attr;
+ struct uverbs_obj_attr *destroy_attr = &bundle->attrs[destroy_bkey].obj_attr;
- ret = uobj_destroy(destroy_attr->uobject, &pbundle->bundle);
+ ret = uobj_destroy(destroy_attr->uobject, bundle);
if (ret)
return ret;
__clear_bit(destroy_bkey, pbundle->uobj_finalize);
- ret = handler(&pbundle->bundle);
+ ret = handler(bundle);
uobj_put_destroy(destroy_attr->uobject);
} else {
- ret = handler(&pbundle->bundle);
+ ret = handler(bundle);
}
/*
@@ -478,10 +488,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
*/
if (!ret && pbundle->method_elm->has_udata) {
const struct uverbs_attr *attr =
- uverbs_attr_get(&pbundle->bundle, UVERBS_ATTR_UHW_OUT);
+ uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
if (!IS_ERR(attr))
- ret = uverbs_set_output(&pbundle->bundle, attr);
+ ret = uverbs_set_output(bundle, attr);
}
/*
@@ -495,35 +505,33 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
return ret;
}
-static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
+static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
{
unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
struct bundle_alloc_head *memblock;
unsigned int i;
- int ret = 0;
/* fast path for simple uobjects */
i = -1;
while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
- struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
- int current_ret;
+ struct uverbs_attr *attr = &bundle->attrs[i];
- current_ret = uverbs_finalize_object(
+ uverbs_finalize_object(
attr->obj_attr.uobject,
- attr->obj_attr.attr_elm->spec.u.obj.access, commit,
- &pbundle->bundle);
- if (!ret)
- ret = current_ret;
+ attr->obj_attr.attr_elm->spec.u.obj.access,
+ test_bit(i, pbundle->uobj_hw_obj_valid),
+ commit, bundle);
}
i = -1;
while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
- struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+ struct uverbs_attr *attr = &bundle->attrs[i];
const struct uverbs_api_attr *attr_uapi;
void __rcu **slot;
- int current_ret;
slot = uapi_get_attr_for_method(
pbundle,
@@ -534,11 +542,8 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
attr_uapi = rcu_dereference_protected(*slot, true);
if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
- current_ret = uverbs_free_idrs_array(
- attr_uapi, &attr->objs_arr_attr, commit,
- &pbundle->bundle);
- if (!ret)
- ret = current_ret;
+ uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr,
+ commit, bundle);
}
}
@@ -548,8 +553,6 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
memblock = memblock->next;
kvfree(tmp);
}
-
- return ret;
}
static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
@@ -562,7 +565,6 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
struct bundle_priv *pbundle;
struct bundle_priv onstack;
void __rcu **slot;
- int destroy_ret;
int ret;
if (unlikely(hdr->driver_id != uapi->driver_id))
@@ -584,7 +586,8 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
method_elm->bundle_size -
offsetof(struct bundle_priv, internal_buffer);
pbundle->alloc_head.next = NULL;
- pbundle->allocated_mem = &pbundle->alloc_head;
+ pbundle->allocated_mem = container_of(&pbundle->alloc_head,
+ struct bundle_alloc_head, hdr);
} else {
pbundle = &onstack;
pbundle->internal_avail = sizeof(pbundle->internal_buffer);
@@ -602,18 +605,18 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
pbundle->user_attrs = user_attrs;
pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
- sizeof(*pbundle->bundle.attrs),
- sizeof(*pbundle->internal_buffer));
+ sizeof(*container_of(&pbundle->bundle,
+ struct uverbs_attr_bundle, hdr)->attrs),
+ sizeof(*pbundle->internal_buffer));
memset(pbundle->bundle.attr_present, 0,
sizeof(pbundle->bundle.attr_present));
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
+ memset(pbundle->uobj_hw_obj_valid, 0,
+ sizeof(pbundle->uobj_hw_obj_valid));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
- destroy_ret = bundle_destroy(pbundle, ret == 0);
- if (unlikely(destroy_ret && !ret))
- return destroy_ret;
-
+ bundle_destroy(pbundle, ret == 0);
return ret;
}
@@ -707,11 +710,13 @@ void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
unsigned int attr_out)
{
struct bundle_priv *pbundle =
- container_of(bundle, struct bundle_priv, bundle);
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
+ struct uverbs_attr_bundle *bundle_aux =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
const struct uverbs_attr *in =
- uverbs_attr_get(&pbundle->bundle, attr_in);
+ uverbs_attr_get(bundle_aux, attr_in);
const struct uverbs_attr *out =
- uverbs_attr_get(&pbundle->bundle, attr_out);
+ uverbs_attr_get(bundle_aux, attr_out);
if (!IS_ERR(in)) {
udata->inlen = in->ptr_attr.len;
@@ -767,9 +772,10 @@ int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx)
return uverbs_set_output(bundle, attr);
}
-int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
- size_t idx, s64 lower_bound, u64 upper_bound,
- s64 *def_val)
+int _uverbs_get_const_signed(s64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
{
const struct uverbs_attr *attr;
@@ -788,13 +794,39 @@ int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
return 0;
}
-EXPORT_SYMBOL(_uverbs_get_const);
+EXPORT_SYMBOL(_uverbs_get_const_signed);
+
+int _uverbs_get_const_unsigned(u64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 upper_bound, u64 *def_val)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr)) {
+ if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+ return PTR_ERR(attr);
+
+ *to = *def_val;
+ } else {
+ *to = attr->ptr_attr.data;
+ }
+
+ if (*to > upper_bound)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const_unsigned);
int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
size_t idx, const void *from, size_t size)
{
const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
if (size < attr->ptr_attr.len) {
if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size,
attr->ptr_attr.len - size))
@@ -802,3 +834,16 @@ int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
}
return uverbs_copy_to(bundle, idx, from, size);
}
+EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero);
+
+/* Once called an abort will call through to the type's destroy_hw() */
+void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle,
+ u16 idx)
+{
+ struct bundle_priv *pbundle =
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
+
+ __set_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+ pbundle->uobj_hw_obj_valid);
+}
+EXPORT_SYMBOL(uverbs_finalize_uobj_create);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 84a5e9a6d483..973fe2c7ef53 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -51,6 +51,8 @@
#include <rdma/ib.h>
#include <rdma/uverbs_std_types.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/ib_ucaps.h>
#include "uverbs.h"
#include "core_priv.h"
@@ -71,11 +73,23 @@ enum {
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
static dev_t dynamic_uverbs_dev;
-static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-static void ib_uverbs_add_one(struct ib_device *device);
+static int ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+static struct ib_client uverbs_client;
+
+static char *uverbs_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static const struct class uverbs_class = {
+ .name = "infiniband_verbs",
+ .devnode = uverbs_devnode,
+};
/*
* Must be called with the ufile->device->disassociate_srcu held, and the lock
@@ -107,8 +121,11 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
int ret;
ret = mw->device->ops.dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+ kfree(mw);
return ret;
}
@@ -119,20 +136,13 @@ static void ib_uverbs_release_dev(struct device *device)
uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
+ mutex_destroy(&dev->lists_mutex);
+ mutex_destroy(&dev->xrcd_tree_mutex);
kfree(dev);
}
-static void ib_uverbs_release_async_event_file(struct kref *ref)
-{
- struct ib_uverbs_async_event_file *file =
- container_of(ref, struct ib_uverbs_async_event_file, ref);
-
- kfree(file);
-}
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_completion_event_file *ev_file,
- struct ib_ucq_object *uobj)
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
+ struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
@@ -147,25 +157,24 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
uverbs_uobject_put(&ev_file->uobj);
}
- spin_lock_irq(&file->async_file->ev_queue.lock);
- list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file->ev_queue.lock);
+ ib_uverbs_release_uevent(&uobj->uevent);
}
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj)
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
{
+ struct ib_uverbs_async_event_file *async_file = uobj->event_file;
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->ev_queue.lock);
+ if (!async_file)
+ return;
+
+ spin_lock_irq(&async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->ev_queue.lock);
+ spin_unlock_irq(&async_file->ev_queue.lock);
+ uverbs_uobject_put(&async_file->uobj);
}
void ib_uverbs_detach_umcast(struct ib_qp *qp,
@@ -198,24 +207,25 @@ void ib_uverbs_release_file(struct kref *ref)
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
if (ib_dev && !ib_dev->ops.disassociate_ucontext)
- module_put(ib_dev->owner);
+ module_put(ib_dev->ops.owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- if (atomic_dec_and_test(&file->device->refcount))
+ if (refcount_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
- if (file->async_file)
- kref_put(&file->async_file->ref,
- ib_uverbs_release_async_event_file);
+ if (file->default_async_file)
+ uverbs_uobject_put(&file->default_async_file->uobj);
put_device(&file->device->dev);
if (file->disassociate_page)
__free_pages(file->disassociate_page, 0);
+ mutex_destroy(&file->disassociation_lock);
+ mutex_destroy(&file->umap_lock);
+ mutex_destroy(&file->ucontext_lock);
kfree(file);
}
static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
- struct ib_uverbs_file *uverbs_file,
struct file *filp, char __user *buf,
size_t count, loff_t *pos,
size_t eventsz)
@@ -226,25 +236,20 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
while (list_empty(&ev_queue->event_list)) {
- spin_unlock_irq(&ev_queue->lock);
+ if (ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
if (wait_event_interruptible(ev_queue->poll_wait,
(!list_empty(&ev_queue->event_list) ||
- /* The barriers built into wait_event_interruptible()
- * and wake_up() guarentee this will see the null set
- * without using RCU
- */
- !uverbs_file->device->ib_dev)))
+ ev_queue->is_closed)))
return -ERESTARTSYS;
- /* If device was disassociated and no event exists set an error */
- if (list_empty(&ev_queue->event_list) &&
- !uverbs_file->device->ib_dev)
- return -EIO;
-
spin_lock_irq(&ev_queue->lock);
}
@@ -280,8 +285,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
{
struct ib_uverbs_async_event_file *file = filp->private_data;
- return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp,
- buf, count, pos,
+ return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
sizeof(struct ib_uverbs_async_event_desc));
}
@@ -291,9 +295,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
struct ib_uverbs_completion_event_file *comp_ev_file =
filp->private_data;
- return ib_uverbs_event_read(&comp_ev_file->ev_queue,
- comp_ev_file->uobj.ufile, filp,
- buf, count, pos,
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
+ pos,
sizeof(struct ib_uverbs_comp_event_desc));
}
@@ -308,6 +311,8 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
spin_lock_irq(&ev_queue->lock);
if (!list_empty(&ev_queue->event_list))
pollflags = EPOLLIN | EPOLLRDNORM;
+ else if (ev_queue->is_closed)
+ pollflags = EPOLLERR;
spin_unlock_irq(&ev_queue->lock);
return pollflags;
@@ -316,7 +321,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
static __poll_t ib_uverbs_async_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
- return ib_uverbs_event_poll(filp->private_data, filp, wait);
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
}
static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
@@ -330,9 +337,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
{
- struct ib_uverbs_event_queue *ev_queue = filp->private_data;
+ struct ib_uverbs_async_event_file *file = filp->private_data;
- return fasync_helper(fd, filp, on, &ev_queue->async_queue);
+ return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
}
static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
@@ -343,72 +350,20 @@ static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
}
-static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
-{
- struct ib_uverbs_async_event_file *file = filp->private_data;
- struct ib_uverbs_file *uverbs_file = file->uverbs_file;
- struct ib_uverbs_event *entry, *tmp;
- int closed_already = 0;
-
- mutex_lock(&uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->ev_queue.lock);
- closed_already = file->ev_queue.is_closed;
- file->ev_queue.is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
- if (entry->counter)
- list_del(&entry->obj_list);
- kfree(entry);
- }
- spin_unlock_irq(&file->ev_queue.lock);
- if (!closed_already) {
- list_del(&file->list);
- ib_unregister_event_handler(&uverbs_file->event_handler);
- }
- mutex_unlock(&uverbs_file->device->lists_mutex);
-
- kref_put(&uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_async_event_file);
-
- return 0;
-}
-
-static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
-{
- struct ib_uobject *uobj = filp->private_data;
- struct ib_uverbs_completion_event_file *file = container_of(
- uobj, struct ib_uverbs_completion_event_file, uobj);
- struct ib_uverbs_event *entry, *tmp;
-
- spin_lock_irq(&file->ev_queue.lock);
- list_for_each_entry_safe(entry, tmp, &file->ev_queue.event_list, list) {
- if (entry->counter)
- list_del(&entry->obj_list);
- kfree(entry);
- }
- file->ev_queue.is_closed = 1;
- spin_unlock_irq(&file->ev_queue.lock);
-
- uverbs_close_fd(filp);
-
- return 0;
-}
-
const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_comp_event_read,
.poll = ib_uverbs_comp_event_poll,
- .release = ib_uverbs_comp_event_close,
+ .release = uverbs_uobject_fd_release,
.fasync = ib_uverbs_comp_event_fasync,
- .llseek = no_llseek,
};
-static const struct file_operations uverbs_async_event_fops = {
+const struct file_operations uverbs_async_event_fops = {
.owner = THIS_MODULE,
.read = ib_uverbs_async_event_read,
.poll = ib_uverbs_async_event_poll,
- .release = ib_uverbs_async_event_close,
+ .release = uverbs_async_event_release,
.fasync = ib_uverbs_async_event_fasync,
- .llseek = no_llseek,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
@@ -433,9 +388,9 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
return;
}
- uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ uobj = cq->uobject;
- entry->desc.comp.cq_handle = cq->uobject->user_handle;
+ entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
entry->counter = &uobj->comp_events_reported;
list_add_tail(&entry->list, &ev_queue->event_list);
@@ -446,102 +401,81 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
-static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
- __u64 element, __u64 event,
- struct list_head *obj_list,
- u32 *counter)
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list, u32 *counter)
{
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->ev_queue.lock, flags);
- if (file->async_file->ev_queue.is_closed) {
- spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+ if (!async_file)
+ return;
+
+ spin_lock_irqsave(&async_file->ev_queue.lock, flags);
+ if (async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
- entry->desc.async.element = element;
+ entry->desc.async.element = element;
entry->desc.async.event_type = event;
- entry->desc.async.reserved = 0;
- entry->counter = counter;
+ entry->desc.async.reserved = 0;
+ entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->ev_queue.event_list);
+ list_add_tail(&entry->list, &async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->ev_queue.poll_wait);
- kill_fasync(&file->async_file->ev_queue.async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&async_file->ev_queue.poll_wait);
+ kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
-void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+static void uverbs_uobj_event(struct ib_uevent_object *eobj,
+ struct ib_event *event)
{
- struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
- struct ib_ucq_object, uobject);
+ ib_uverbs_async_handler(eobj->event_file,
+ eobj->uobject.user_handle, event->event,
+ &eobj->event_list, &eobj->events_reported);
+}
- ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
- event->event, &uobj->async_list,
- &uobj->async_events_reported);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
/* for XRC target qp's, check that qp is live */
if (!event->element.qp->uobject)
return;
- uobj = container_of(event->element.qp->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
}
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
- uobj = container_of(event->element.srq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
}
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
{
- struct ib_uverbs_file *file =
- container_of(handler, struct ib_uverbs_file, event_handler);
-
- ib_uverbs_async_handler(file, event->element.port_num, event->event,
- NULL, NULL);
-}
-
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
-{
- kref_put(&file->async_file->ref, ib_uverbs_release_async_event_file);
- file->async_file = NULL;
+ ib_uverbs_async_handler(
+ container_of(handler, struct ib_uverbs_async_event_file,
+ event_handler),
+ event->element.port_num, event->event, NULL, NULL);
}
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
@@ -553,45 +487,26 @@ void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
ev_queue->async_queue = NULL;
}
-struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev)
+void ib_uverbs_init_async_event_file(
+ struct ib_uverbs_async_event_file *async_file)
{
- struct ib_uverbs_async_event_file *ev_file;
- struct file *filp;
-
- ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
- if (!ev_file)
- return ERR_PTR(-ENOMEM);
-
- ib_uverbs_init_event_queue(&ev_file->ev_queue);
- ev_file->uverbs_file = uverbs_file;
- kref_get(&ev_file->uverbs_file->ref);
- kref_init(&ev_file->ref);
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_async_event_fops,
- ev_file, O_RDONLY);
- if (IS_ERR(filp))
- goto err_put_refs;
-
- mutex_lock(&uverbs_file->device->lists_mutex);
- list_add_tail(&ev_file->list,
- &uverbs_file->device->uverbs_events_file_list);
- mutex_unlock(&uverbs_file->device->lists_mutex);
-
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ib_register_event_handler(&uverbs_file->event_handler);
- /* At that point async file stuff was fully set */
+ struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
+ struct ib_device *ib_dev = async_file->uobj.context->device;
- return filp;
+ ib_uverbs_init_event_queue(&async_file->ev_queue);
-err_put_refs:
- kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_async_event_file);
- return filp;
+ /* The first async_event_file becomes the default one for the file. */
+ mutex_lock(&uverbs_file->ucontext_lock);
+ if (!uverbs_file->default_async_file) {
+ /* Pairs with the put in ib_uverbs_release_file */
+ uverbs_uobject_get(&async_file->uobj);
+ smp_store_release(&uverbs_file->default_async_file, async_file);
+ }
+ mutex_unlock(&uverbs_file->ucontext_lock);
+
+ INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
+ ib_uverbs_event_handler);
+ ib_register_event_handler(&async_file->event_handler);
}
static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
@@ -632,7 +547,7 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (hdr->in_words * 4 != count)
return -EINVAL;
- if (count < method_elm->req_size + sizeof(hdr)) {
+ if (count < method_elm->req_size + sizeof(*hdr)) {
/*
* rdma-core v18 and v19 have a bug where they send DESTROY_CQ
* with a 16 byte write instead of 24. Old kernels didn't
@@ -699,6 +614,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
bundle.ufile = file;
bundle.context = NULL; /* only valid if bundle has uobject */
+ bundle.uobject = NULL;
if (!method_elm->is_ex) {
size_t in_len = hdr.in_words * 4 - sizeof(hdr);
size_t out_len = hdr.out_words * 4;
@@ -762,11 +678,16 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
}
ret = method_elm->handler(&bundle);
+ if (bundle.uobject)
+ uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
+ !ret, &bundle);
out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return (ret) ? : count;
}
+static const struct vm_operations_struct rdma_umap_ops;
+
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
@@ -781,45 +702,18 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
goto out;
}
+ mutex_lock(&file->disassociation_lock);
+
+ vma->vm_ops = &rdma_umap_ops;
ret = ucontext->device->ops.mmap(ucontext, vma);
+
+ mutex_unlock(&file->disassociation_lock);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
}
/*
- * Each time we map IO memory into user space this keeps track of the mapping.
- * When the device is hot-unplugged we 'zap' the mmaps in user space to point
- * to the zero page and allow the hot unplug to proceed.
- *
- * This is necessary for cases like PCI physical hot unplug as the actual BAR
- * memory may vanish after this and access to it from userspace could MCE.
- *
- * RDMA drivers supporting disassociation must have their user space designed
- * to cope in some way with their IO pages going to the zero page.
- */
-struct rdma_umap_priv {
- struct vm_area_struct *vma;
- struct list_head list;
-};
-
-static const struct vm_operations_struct rdma_umap_ops;
-
-static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
- struct vm_area_struct *vma)
-{
- struct ib_uverbs_file *ufile = vma->vm_file->private_data;
-
- priv->vma = vma;
- vma->vm_private_data = priv;
- vma->vm_ops = &rdma_umap_ops;
-
- mutex_lock(&ufile->umap_lock);
- list_add(&priv->list, &ufile->umaps);
- mutex_unlock(&ufile->umap_lock);
-}
-
-/*
* The VMA has been dup'd, initialize the vm_private_data with a new tracking
* struct
*/
@@ -835,6 +729,8 @@ static void rdma_umap_open(struct vm_area_struct *vma)
/* We are racing with disassociation */
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
goto out_zap;
+ mutex_lock(&ufile->disassociation_lock);
+
/*
* Disassociation already completed, the VMA should already be zapped.
*/
@@ -844,12 +740,14 @@ static void rdma_umap_open(struct vm_area_struct *vma)
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
goto out_unlock;
- rdma_umap_priv_init(priv, vma);
+ rdma_umap_priv_init(priv, vma, opriv->entry);
+ mutex_unlock(&ufile->disassociation_lock);
up_read(&ufile->hw_destroy_rwsem);
return;
out_unlock:
+ mutex_unlock(&ufile->disassociation_lock);
up_read(&ufile->hw_destroy_rwsem);
out_zap:
/*
@@ -875,6 +773,9 @@ static void rdma_umap_close(struct vm_area_struct *vma)
* this point.
*/
mutex_lock(&ufile->umap_lock);
+ if (priv->entry)
+ rdma_user_mmap_entry_put(priv->entry);
+
list_del(&priv->list);
mutex_unlock(&ufile->umap_lock);
kfree(priv);
@@ -926,49 +827,11 @@ static const struct vm_operations_struct rdma_umap_ops = {
.fault = rdma_umap_fault,
};
-/*
- * Map IO memory into a process. This is to be called by drivers as part of
- * their mmap() functions if they wish to send something like PCI-E BAR memory
- * to userspace.
- */
-int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
- unsigned long pfn, unsigned long size, pgprot_t prot)
-{
- struct ib_uverbs_file *ufile = ucontext->ufile;
- struct rdma_umap_priv *priv;
-
- if (!(vma->vm_flags & VM_SHARED))
- return -EINVAL;
-
- if (vma->vm_end - vma->vm_start != size)
- return -EINVAL;
-
- /* Driver is using this wrong, must be called by ib_uverbs_mmap */
- if (WARN_ON(!vma->vm_file ||
- vma->vm_file->private_data != ufile))
- return -EINVAL;
- lockdep_assert_held(&ufile->device->disassociate_srcu);
-
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- vma->vm_page_prot = prot;
- if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
- kfree(priv);
- return -EAGAIN;
- }
-
- rdma_umap_priv_init(priv, vma);
- return 0;
-}
-EXPORT_SYMBOL(rdma_user_mmap_io);
-
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
- lockdep_assert_held(&ufile->hw_destroy_rwsem);
+ mutex_lock(&ufile->disassociation_lock);
while (1) {
struct mm_struct *mm = NULL;
@@ -984,24 +847,28 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
ret = mmget_not_zero(mm);
if (!ret) {
list_del_init(&priv->list);
+ if (priv->entry) {
+ rdma_user_mmap_entry_put(priv->entry);
+ priv->entry = NULL;
+ }
mm = NULL;
continue;
}
break;
}
mutex_unlock(&ufile->umap_lock);
- if (!mm)
+ if (!mm) {
+ mutex_unlock(&ufile->disassociation_lock);
return;
+ }
/*
- * The umap_lock is nested under mmap_sem since it used within
+ * The umap_lock is nested under mmap_lock since it used within
* the vma_ops callbacks, so we have to clean the list one mm
* at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal.
*/
- down_read(&mm->mmap_sem);
- if (!mmget_still_valid(mm))
- goto skip_mm;
+ mmap_read_lock(mm);
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
@@ -1013,14 +880,42 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
+
+ if (priv->entry) {
+ rdma_user_mmap_entry_put(priv->entry);
+ priv->entry = NULL;
+ }
}
mutex_unlock(&ufile->umap_lock);
- skip_mm:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmput(mm);
}
+
+ mutex_unlock(&ufile->disassociation_lock);
}
+/**
+ * rdma_user_mmap_disassociate() - Revoke mmaps for a device
+ * @device: device to revoke
+ *
+ * This function should be called by drivers that need to disable mmaps for the
+ * device, for instance because it is going to be reset.
+ */
+void rdma_user_mmap_disassociate(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev =
+ ib_get_client_data(device, &uverbs_client);
+ struct ib_uverbs_file *ufile;
+
+ mutex_lock(&uverbs_dev->lists_mutex);
+ list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
+ if (ufile->ucontext)
+ uverbs_user_mmap_disassociate(ufile);
+ }
+ mutex_unlock(&uverbs_dev->lists_mutex);
+}
+EXPORT_SYMBOL(rdma_user_mmap_disassociate);
+
/*
* ib_uverbs_open() does not need the BKL:
*
@@ -1041,7 +936,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
int srcu_key;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
- if (!atomic_inc_not_zero(&dev->refcount))
+ if (!refcount_inc_not_zero(&dev->refcount))
return -ENXIO;
get_device(&dev->dev);
@@ -1065,7 +960,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
module_dependent = !(ib_dev->ops.disassociate_ucontext);
if (module_dependent) {
- if (!try_module_get(ib_dev->owner)) {
+ if (!try_module_get(ib_dev->ops.owner)) {
ret = -ENODEV;
goto err;
}
@@ -1090,6 +985,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_init(&file->umap_lock);
INIT_LIST_HEAD(&file->umaps);
+ mutex_init(&file->disassociation_lock);
+
filp->private_data = file;
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);
@@ -1100,12 +997,12 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
return stream_open(inode, filp);
err_module:
- module_put(ib_dev->owner);
+ module_put(ib_dev->ops.owner);
err:
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- if (atomic_dec_and_test(&dev->refcount))
+ if (refcount_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
put_device(&dev->dev);
@@ -1132,9 +1029,8 @@ static const struct file_operations uverbs_fops = {
.write = ib_uverbs_write,
.open = ib_uverbs_open,
.release = ib_uverbs_close,
- .llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -1143,17 +1039,45 @@ static const struct file_operations uverbs_mmap_fops = {
.mmap = ib_uverbs_mmap,
.open = ib_uverbs_open,
.release = ib_uverbs_close,
- .llseek = no_llseek,
.unlocked_ioctl = ib_uverbs_ioctl,
- .compat_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
+static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_uverbs_device *uverbs_dev = client_data;
+ int ret;
+
+ if (res->port != -1)
+ return -EINVAL;
+
+ res->abi = ibdev->ops.uverbs_abi_ver;
+ res->cdev = &uverbs_dev->dev;
+
+ /*
+ * To support DRIVER_ID binding in userspace some of the driver need
+ * upgrading to expose their PCI dependent revision information
+ * through get_context instead of relying on modalias matching. When
+ * the drivers are fixed they can drop this flag.
+ */
+ if (!ibdev->ops.uverbs_no_driver_id_binding) {
+ ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
+ ibdev->ops.driver_id);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
static struct ib_client uverbs_client = {
.name = "uverbs",
.no_kverbs_req = true,
.add = ib_uverbs_add_one,
- .remove = ib_uverbs_remove_one
+ .remove = ib_uverbs_remove_one,
+ .get_nl_info = ib_uverbs_get_nl_info,
};
+MODULE_ALIAS_RDMA_CLIENT("uverbs");
static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf)
@@ -1167,7 +1091,7 @@ static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
+ ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
@@ -1186,7 +1110,7 @@ static ssize_t abi_version_show(struct device *device,
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
+ ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
@@ -1219,53 +1143,56 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
return 0;
}
-static void ib_uverbs_add_one(struct ib_device *device)
+static int ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
dev_t base;
struct ib_uverbs_device *uverbs_dev;
int ret;
- if (!device->ops.alloc_ucontext)
- return;
+ if (!device->ops.alloc_ucontext ||
+ device->type == RDMA_DEVICE_TYPE_SMI)
+ return -EOPNOTSUPP;
uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
- return;
+ return -ENOMEM;
ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
if (ret) {
kfree(uverbs_dev);
- return;
+ return -ENOMEM;
}
device_initialize(&uverbs_dev->dev);
- uverbs_dev->dev.class = uverbs_class;
+ uverbs_dev->dev.class = &uverbs_class;
uverbs_dev->dev.parent = device->dev.parent;
uverbs_dev->dev.release = ib_uverbs_release_dev;
uverbs_dev->groups[0] = &dev_attr_group;
uverbs_dev->dev.groups = uverbs_dev->groups;
- atomic_set(&uverbs_dev->refcount, 1);
+ refcount_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
- INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
GFP_KERNEL);
- if (devnum < 0)
+ if (devnum < 0) {
+ ret = -ENOMEM;
goto err;
+ }
uverbs_dev->devnum = devnum;
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else
base = IB_UVERBS_BASE_DEV + devnum;
- if (ib_uverbs_create_uapi(device, uverbs_dev))
+ ret = ib_uverbs_create_uapi(device, uverbs_dev);
+ if (ret)
goto err_uapi;
uverbs_dev->dev.devt = base;
@@ -1280,30 +1207,25 @@ static void ib_uverbs_add_one(struct ib_device *device)
goto err_uapi;
ib_set_client_data(device, &uverbs_client, uverbs_dev);
- return;
+ return 0;
err_uapi:
ida_free(&uverbs_ida, devnum);
err:
- if (atomic_dec_and_test(&uverbs_dev->refcount))
+ if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
put_device(&uverbs_dev->dev);
- return;
+ return ret;
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_async_event_file *event_file;
- struct ib_event event;
/* Pending running commands to terminate */
uverbs_disassociate_api_pre(uverbs_dev);
- event.event = IB_EVENT_DEVICE_FATAL;
- event.element.port_num = 0;
- event.device = ib_dev;
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
@@ -1319,31 +1241,11 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
*/
mutex_unlock(&uverbs_dev->lists_mutex);
- ib_uverbs_event_handler(&file->event_handler, &event);
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
kref_put(&file->ref, ib_uverbs_release_file);
mutex_lock(&uverbs_dev->lists_mutex);
}
-
- while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
- event_file = list_first_entry(&uverbs_dev->
- uverbs_events_file_list,
- struct ib_uverbs_async_event_file,
- list);
- spin_lock_irq(&event_file->ev_queue.lock);
- event_file->ev_queue.is_closed = 1;
- spin_unlock_irq(&event_file->ev_queue.lock);
-
- list_del(&event_file->list);
- ib_unregister_event_handler(
- &event_file->uverbs_file->event_handler);
- event_file->uverbs_file->event_handler.device =
- NULL;
-
- wake_up_interruptible(&event_file->ev_queue.poll_wait);
- kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
- }
mutex_unlock(&uverbs_dev->lists_mutex);
uverbs_disassociate_api(uverbs_dev->uapi);
@@ -1354,9 +1256,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
struct ib_uverbs_device *uverbs_dev = client_data;
int wait_clients = 1;
- if (!uverbs_dev)
- return;
-
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
@@ -1376,7 +1275,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
wait_clients = 0;
}
- if (atomic_dec_and_test(&uverbs_dev->refcount))
+ if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
@@ -1384,13 +1283,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
put_device(&uverbs_dev->dev);
}
-static char *uverbs_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
-}
-
static int __init ib_uverbs_init(void)
{
int ret;
@@ -1411,16 +1303,13 @@ static int __init ib_uverbs_init(void)
goto out_alloc;
}
- uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
- if (IS_ERR(uverbs_class)) {
- ret = PTR_ERR(uverbs_class);
+ ret = class_register(&uverbs_class);
+ if (ret) {
pr_err("user_verbs: couldn't create class infiniband_verbs\n");
goto out_chrdev;
}
- uverbs_class->devnode = uverbs_devnode;
-
- ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
+ ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr);
if (ret) {
pr_err("user_verbs: couldn't create abi_version attribute\n");
goto out_class;
@@ -1435,7 +1324,7 @@ static int __init ib_uverbs_init(void)
return 0;
out_class:
- class_destroy(uverbs_class);
+ class_unregister(&uverbs_class);
out_chrdev:
unregister_chrdev_region(dynamic_uverbs_dev,
@@ -1452,11 +1341,13 @@ out:
static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
- class_destroy(uverbs_class);
+ class_unregister(&uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV,
IB_UVERBS_NUM_FIXED_MINOR);
unregister_chrdev_region(dynamic_uverbs_dev,
IB_UVERBS_NUM_DYNAMIC_MINOR);
+ ib_cleanup_ucaps();
+ mmu_notifier_synchronize();
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index b8d715c68ca4..e803f609ec87 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device,
struct rdma_ah_attr *src = ah_attr;
struct rdma_ah_attr conv_ah;
- memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
+ memset(&dst->grh, 0, sizeof(dst->grh));
if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
(rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) &&
@@ -171,45 +171,3 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
__ib_copy_path_rec_to_user(dst, src);
}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-
-void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
- struct ib_user_path_rec *src)
-{
- u32 slid, dlid;
-
- memset(dst, 0, sizeof(*dst));
- if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
- (ib_is_opa_gid((union ib_gid *)src->dgid))) {
- dst->rec_type = SA_PATH_REC_TYPE_OPA;
- slid = opa_get_lid_from_gid((union ib_gid *)src->sgid);
- dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid);
- } else {
- dst->rec_type = SA_PATH_REC_TYPE_IB;
- slid = ntohs(src->slid);
- dlid = ntohs(src->dlid);
- }
- memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
- memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
-
- sa_path_set_dlid(dst, dlid);
- sa_path_set_slid(dst, slid);
- sa_path_set_raw_traffic(dst, src->raw_traffic);
- dst->flow_label = src->flow_label;
- dst->hop_limit = src->hop_limit;
- dst->traffic_class = src->traffic_class;
- dst->reversible = src->reversible;
- dst->numb_path = src->numb_path;
- dst->pkey = src->pkey;
- dst->sl = src->sl;
- dst->mtu_selector = src->mtu_selector;
- dst->mtu = src->mtu;
- dst->rate_selector = src->rate_selector;
- dst->rate = src->rate;
- dst->packet_life_time = src->packet_life_time;
- dst->preference = src->preference;
- dst->packet_life_time_selector = src->packet_life_time_selector;
-
- /* TODO: No need to set this */
- sa_path_set_dmac_zero(dst);
-}
-EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 35b2e2c640cc..13776a66e2e4 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -75,96 +75,28 @@ static int uverbs_free_mw(struct ib_uobject *uobject,
return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
}
-static int uverbs_free_qp(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_qp *qp = uobject->object;
- struct ib_uqp_object *uqp =
- container_of(uobject, struct ib_uqp_object, uevent.uobject);
- int ret;
-
- /*
- * If this is a user triggered destroy then do not allow destruction
- * until the user cleans up all the mcast bindings. Unlike in other
- * places we forcibly clean up the mcast attachments for !DESTROY
- * because the mcast attaches are not ubojects and will not be
- * destroyed by anything else during cleanup processing.
- */
- if (why == RDMA_REMOVE_DESTROY) {
- if (!list_empty(&uqp->mcast_list))
- return -EBUSY;
- } else if (qp == qp->real_qp) {
- ib_uverbs_detach_umcast(qp, uqp);
- }
-
- ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- if (uqp->uxrcd)
- atomic_dec(&uqp->uxrcd->refcnt);
-
- ib_uverbs_release_uevent(attrs->ufile, &uqp->uevent);
- return ret;
-}
-
static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
- int ret;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- kfree(ind_tbl);
- return ret;
-}
+ u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+ int ret, i;
-static int uverbs_free_wq(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_wq *wq = uobject->object;
- struct ib_uwq_object *uwq =
- container_of(uobject, struct ib_uwq_object, uevent.uobject);
- int ret;
-
- ret = ib_destroy_wq(wq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
- return ret;
-
- ib_uverbs_release_uevent(attrs->ufile, &uwq->uevent);
- return ret;
-}
-
-static int uverbs_free_srq(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_srq *srq = uobject->object;
- struct ib_uevent_object *uevent =
- container_of(uobject, struct ib_uevent_object, uobject);
- enum ib_srq_type srq_type = srq->srq_type;
- int ret;
+ if (atomic_read(&rwq_ind_tbl->usecnt))
+ return -EBUSY;
- ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
+ if (ret)
return ret;
- if (srq_type == IB_SRQT_XRC) {
- struct ib_usrq_object *us =
- container_of(uevent, struct ib_usrq_object, uevent);
-
- atomic_dec(&us->uxrcd->refcnt);
- }
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
- ib_uverbs_release_uevent(attrs->ufile, uevent);
- return ret;
+ kfree(rwq_ind_tbl);
+ kfree(ind_tbl);
+ return 0;
}
static int uverbs_free_xrcd(struct ib_uobject *uobject,
@@ -176,9 +108,8 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
- ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&uxrcd->refcnt))
+ return -EBUSY;
mutex_lock(&attrs->ufile->device->xrcd_tree_mutex);
ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs);
@@ -192,34 +123,47 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_pd *pd = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&pd->usecnt))
+ return -EBUSY;
- ib_dealloc_pd_user(pd, &attrs->driver_udata);
- return 0;
+ return ib_dealloc_pd_user(pd, &attrs->driver_udata);
}
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
{
- struct ib_uverbs_completion_event_file *comp_event_file =
- container_of(uobj, struct ib_uverbs_completion_event_file,
- uobj);
- struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
+ struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&event_queue->lock);
+ /*
+ * The user must ensure that no new items are added to the event_list
+ * once is_closed is set.
+ */
event_queue->is_closed = 1;
spin_unlock_irq(&event_queue->lock);
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
- if (why == RDMA_REMOVE_DRIVER_REMOVE) {
- wake_up_interruptible(&event_queue->poll_wait);
- kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+ spin_lock_irq(&event_queue->lock);
+ list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ list_del(&entry->list);
+ kfree(entry);
}
- return 0;
-};
+ spin_unlock_irq(&event_queue->lock);
+}
+
+static void
+uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *file =
+ container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
+
+ ib_uverbs_free_event_queue(&file->ev_queue);
+}
int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
{
@@ -230,15 +174,11 @@ EXPORT_SYMBOL(uverbs_destroy_def_handler);
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_COMP_CHANNEL,
UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
+ uverbs_completion_event_file_destroy_uobj,
&uverbs_event_fops,
"[infinibandevent]",
O_RDONLY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_QP,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MW_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
@@ -250,11 +190,6 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
&UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_SRQ,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
- uverbs_free_srq));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_AH_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
@@ -279,10 +214,6 @@ DECLARE_UVERBS_NAMED_OBJECT(
uverbs_free_flow),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
-DECLARE_UVERBS_NAMED_OBJECT(
- UVERBS_OBJECT_WQ,
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
@@ -323,18 +254,12 @@ const struct uapi_definition uverbs_def_obj_intf[] = {
UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
- UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
UVERBS_OBJECT_RWQ_IND_TBL,
UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c
new file mode 100644
index 000000000000..cc24cfdf7aee
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC);
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ return 0;
+}
+
+static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_async_event_file *event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ ib_unregister_event_handler(&event_file->event_handler);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE)
+ ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL,
+ NULL, NULL);
+}
+
+int uverbs_async_event_release(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *event_file;
+ struct ib_uobject *uobj = filp->private_data;
+ int ret;
+
+ if (!uobj)
+ return uverbs_uobject_fd_release(inode, filp);
+
+ event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ /*
+ * The async event FD has to deliver IB_EVENT_DEVICE_FATAL even after
+ * disassociation, so cleaning the event list must only happen after
+ * release. The user knows it has reached the end of the event stream
+ * when it sees IB_EVENT_DEVICE_FATAL.
+ */
+ uverbs_uobject_get(uobj);
+ ret = uverbs_uobject_fd_release(inode, filp);
+ ib_uverbs_free_event_queue(&event_file->ev_queue);
+ uverbs_uobject_put(uobj);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+ UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file),
+ uverbs_async_event_destroy_uobj,
+ &uverbs_async_event_fops,
+ "[infinibandevent]",
+ O_RDONLY),
+ &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC));
+
+const struct uapi_definition uverbs_def_obj_async_fd[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 9f013304e677..381aa5797641 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -42,11 +42,14 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
struct ib_counters *counters = uobject->object;
int ret;
- ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+ if (atomic_read(&counters->usecnt))
+ return -EBUSY;
+
+ ret = counters->device->ops.destroy_counters(counters);
if (ret)
return ret;
-
- return counters->device->ops.destroy_counters(counters);
+ kfree(counters);
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
@@ -66,20 +69,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
if (!ib_dev->ops.create_counters)
return -EOPNOTSUPP;
- counters = ib_dev->ops.create_counters(ib_dev, attrs);
- if (IS_ERR(counters)) {
- ret = PTR_ERR(counters);
- goto err_create_counters;
- }
+ counters = rdma_zalloc_drv_obj(ib_dev, ib_counters);
+ if (!counters)
+ return -ENOMEM;
counters->device = ib_dev;
counters->uobject = uobj;
uobj->object = counters;
atomic_set(&counters->usecnt, 0);
- return 0;
+ ret = ib_dev->ops.create_counters(counters, attrs);
+ if (ret)
+ kfree(counters);
-err_create_counters:
return ret;
}
@@ -105,6 +107,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
return ret;
uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
+ if (IS_ERR(uattr))
+ return PTR_ERR(uattr);
read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
read_attr.counters_buff = uverbs_zalloc(
attrs, array_size(read_attr.ncounters, sizeof(u64)));
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index db5c46a1bb2d..fab5d914029d 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -33,6 +33,7 @@
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
+#include "restrack.h"
static int uverbs_free_cq(struct ib_uobject *uobject,
enum rdma_remove_reason why,
@@ -41,21 +42,20 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
struct ib_cq *cq = uobject->object;
struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
struct ib_ucq_object *ucq =
- container_of(uobject, struct ib_ucq_object, uobject);
+ container_of(uobject, struct ib_ucq_object, uevent.uobject);
int ret;
ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
- if (ib_is_destroy_retryable(ret, why, uobject))
+ if (ret)
return ret;
ib_uverbs_release_ucq(
- attrs->ufile,
ev_queue ? container_of(ev_queue,
struct ib_uverbs_completion_event_file,
ev_queue) :
NULL,
ucq);
- return ret;
+ return 0;
}
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
@@ -63,16 +63,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
{
struct ib_ucq_object *obj = container_of(
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
- typeof(*obj), uobject);
+ typeof(*obj), uevent.uobject);
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_device *ib_dev = attrs->context->device;
- int ret;
- u64 user_handle;
+ struct ib_umem_dmabuf *umem_dmabuf;
struct ib_cq_init_attr attr = {};
- struct ib_cq *cq;
- struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_uobject *ev_file_uobj;
+ struct ib_umem *umem = NULL;
+ u64 buffer_length;
+ u64 buffer_offset;
+ struct ib_cq *cq;
+ u64 user_handle;
+ u64 buffer_va;
+ int buffer_fd;
+ int ret;
- if (!ib_dev->ops.create_cq || !ib_dev->ops.destroy_cq)
+ if ((!ib_dev->ops.create_cq && !ib_dev->ops.create_cq_umem) || !ib_dev->ops.destroy_cq)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -101,43 +107,111 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
uverbs_uobject_get(ev_file_uobj);
}
+ obj->uevent.event_file = ib_uverbs_get_async_event(
+ attrs, UVERBS_ATTR_CREATE_CQ_EVENT_FD);
+
if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
}
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA)) {
- cq = ib_dev->ops.create_cq(ib_dev, &attr, &attrs->driver_udata);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
+ ret = uverbs_copy_from(&buffer_va, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA);
+ if (ret)
+ goto err_event_file;
+
+ ret = uverbs_copy_from(&buffer_length, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
+ if (ret)
+ goto err_event_file;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD) ||
+ uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
+ !ib_dev->ops.create_cq_umem) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ umem = ib_umem_get(ib_dev, buffer_va, buffer_length, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ ret = PTR_ERR(umem);
+ goto err_event_file;
+ }
+ } else if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD)) {
+
+ ret = uverbs_get_raw_fd(&buffer_fd, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD);
+ if (ret)
+ goto err_event_file;
+
+ ret = uverbs_copy_from(&buffer_offset, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET);
+ if (ret)
+ goto err_event_file;
+
+ ret = uverbs_copy_from(&buffer_length, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
+ if (ret)
+ goto err_event_file;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA) ||
+ !ib_dev->ops.create_cq_umem) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ib_dev, buffer_offset, buffer_length,
+ buffer_fd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem_dmabuf)) {
+ ret = PTR_ERR(umem_dmabuf);
+ goto err_event_file;
+ }
+ umem = &umem_dmabuf->umem;
+ } else if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
+ uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH) ||
+ !ib_dev->ops.create_cq) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
+ ib_umem_release(umem);
goto err_event_file;
}
cq->device = ib_dev;
- cq->uobject = &obj->uobject;
+ cq->uobject = obj;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
- obj->uobject.object = cq;
- obj->uobject.user_handle = user_handle;
atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_uadd(&cq->res);
- ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
- sizeof(cq->cqe));
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
+
+ ret = umem ? ib_dev->ops.create_cq_umem(cq, &attr, umem, attrs) :
+ ib_dev->ops.create_cq(cq, &attr, attrs);
if (ret)
- goto err_cq;
+ goto err_free;
- return 0;
-err_cq:
- ib_destroy_cq(cq);
+ obj->uevent.uobject.object = cq;
+ obj->uevent.uobject.user_handle = user_handle;
+ rdma_restrack_add(&cq->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
+ return ret;
+
+err_free:
+ ib_umem_release(umem);
+ rdma_restrack_put(&cq->res);
+ kfree(cq);
err_event_file:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
if (ev_file)
uverbs_uobject_put(ev_file_uobj);
return ret;
@@ -167,6 +241,21 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_VA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_RAW_FD(UVERBS_ATTR_CREATE_CQ_BUFFER_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
@@ -175,10 +264,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
struct ib_ucq_object *obj =
- container_of(uobj, struct ib_ucq_object, uobject);
+ container_of(uobj, struct ib_ucq_object, uevent.uobject);
struct ib_uverbs_destroy_cq_resp resp = {
.comp_events_reported = obj->comp_events_reported,
- .async_events_reported = obj->async_events_reported
+ .async_events_reported = obj->uevent.events_reported
};
return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
@@ -198,11 +287,8 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_CQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
-
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
&UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
-#endif
);
const struct uapi_definition uverbs_def_obj_cq[] = {
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 2a3f2f01028d..c0fd283d9d6c 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -3,11 +3,13 @@
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
+#include <linux/overflow.h>
#include <rdma/uverbs_std_types.h>
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_ioctl.h>
#include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
/*
* This ioctl method allows calling any defined write or write_ex
@@ -38,7 +40,12 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
attrs->ucore.outlen < method_elm->resp_size)
return -ENOSPC;
- return method_elm->handler(attrs);
+ attrs->uobject = NULL;
+ rc = method_elm->handler(attrs);
+ if (attrs->uobject)
+ uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true,
+ !rc, attrs);
+ return rc;
}
DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
@@ -110,8 +117,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
return ret;
uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
- if (!uapi_object)
- return -EINVAL;
+ if (IS_ERR(uapi_object))
+ return PTR_ERR(uapi_object);
handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
out_len, &total);
@@ -160,7 +167,8 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr,
resp->subnet_timeout = attr->subnet_timeout;
resp->init_type_reply = attr->init_type_reply;
resp->active_width = attr->active_width;
- resp->active_speed = attr->active_speed;
+ /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+ resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
resp->phys_state = attr->phys_state;
resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
}
@@ -195,11 +203,253 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num);
resp.port_cap_flags2 = attr.port_cap_flags2;
+ resp.active_speed_ex = attr.active_speed;
return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP,
&resp, sizeof(resp));
}
+static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u32 num_comp = attrs->ufile->device->num_comp_vectors;
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
+ ret = ib_init_ucontext(attrs);
+ if (ret) {
+ kfree(attrs->context);
+ attrs->context = NULL;
+ return ret;
+ }
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ u32 num_comp;
+ int ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!ib_dev->ops.query_ucontext)
+ return -EOPNOTSUPP;
+
+ num_comp = attrs->ufile->device->num_comp_vectors;
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ return ucontext->device->ops.query_ucontext(ucontext, attrs);
+}
+
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_gid_entry *entries,
+ size_t num_entries, size_t user_entry_size)
+{
+ const struct uverbs_attr *attr;
+ void __user *user_entries;
+ size_t copy_len;
+ int ret;
+ int i;
+
+ if (user_entry_size == sizeof(*entries)) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ entries, sizeof(*entries) * num_entries);
+ return ret;
+ }
+
+ copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+ attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+ for (i = 0; i < num_entries; i++) {
+ if (copy_to_user(user_entries, entries, copy_len))
+ return -EFAULT;
+
+ if (user_entry_size > sizeof(*entries)) {
+ if (clear_user(user_entries + sizeof(*entries),
+ user_entry_size - sizeof(*entries)))
+ return -EFAULT;
+ }
+
+ entries++;
+ user_entries += user_entry_size;
+ }
+
+ return uverbs_output_written(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry *entries;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ size_t user_entry_size;
+ ssize_t num_entries;
+ int max_entries;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&user_entry_size, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+ if (ret)
+ return ret;
+
+ if (!user_entry_size)
+ return -EINVAL;
+
+ max_entries = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ user_entry_size);
+ if (max_entries <= 0)
+ return max_entries ?: -EINVAL;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries));
+ if (IS_ERR(entries))
+ return PTR_ERR(entries);
+
+ num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+ if (num_entries < 0)
+ return -EINVAL;
+
+ ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+ user_entry_size);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ &num_entries, sizeof(num_entries));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry entry = {};
+ const struct ib_gid_attr *gid_attr;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ struct net_device *ndev;
+ u32 gid_index;
+ u32 port_num;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&gid_index, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+ if (ret)
+ return ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!rdma_is_port_valid(ib_dev, port_num))
+ return -EINVAL;
+
+ gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+
+ memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+ entry.gid_index = gid_attr->index;
+ entry.port_num = gid_attr->port_num;
+ entry.gid_type = gid_attr->gid_type;
+
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+ if (IS_ERR(ndev)) {
+ if (PTR_ERR(ndev) != -ENODEV) {
+ ret = PTR_ERR(ndev);
+ rcu_read_unlock();
+ goto out;
+ }
+ } else {
+ entry.netdev_ifindex = ndev->ifindex;
+ }
+ rcu_read_unlock();
+
+ ret = uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+ sizeof(entry));
+out:
+ rdma_put_gid_attr(gid_attr);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_GET_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_GET_CONTEXT_FD_ARR,
+ UVERBS_ATTR_MIN_SIZE(sizeof(int)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_UHW());
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL));
+
DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_INFO_HANDLES,
/* Also includes any device specific object ids */
@@ -216,13 +466,41 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_PTR_OUT(
UVERBS_ATTR_QUERY_PORT_RESP,
UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex,
- reserved),
+ active_speed_ex),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_ENTRY,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+ netdev_ifindex),
+ UA_MANDATORY));
+
DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
&UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
&UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
- &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
const struct uapi_definition uverbs_def_obj_device[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index d5a1de33c2c9..98c522cf86d6 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -39,11 +39,9 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_dm *dm = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&dm->usecnt))
+ return -EBUSY;
return dm->device->ops.dealloc_dm(dm, attrs);
}
diff --git a/drivers/infiniband/core/uverbs_std_types_dmah.c b/drivers/infiniband/core/uverbs_std_types_dmah.c
new file mode 100644
index 000000000000..453ce656c6f2
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dmah.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
+
+static int uverbs_free_dmah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dmah *dmah = uobject->object;
+ int ret;
+
+ if (atomic_read(&dmah->usecnt))
+ return -EBUSY;
+
+ ret = dmah->device->ops.dealloc_dmah(dmah, attrs);
+ if (ret)
+ return ret;
+
+ rdma_restrack_del(&dmah->res);
+ kfree(dmah);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE)
+ ->obj_attr.uobject;
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_dmah *dmah;
+ int ret;
+
+ dmah = rdma_zalloc_drv_obj(ib_dev, ib_dmah);
+ if (!dmah)
+ return -ENOMEM;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_CPU_ID)) {
+ ret = uverbs_copy_from(&dmah->cpu_id, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_CPU_ID);
+ if (ret)
+ goto err;
+
+ if (!cpumask_test_cpu(dmah->cpu_id, current->cpus_ptr)) {
+ ret = -EPERM;
+ goto err;
+ }
+
+ dmah->valid_fields |= BIT(IB_DMAH_CPU_ID_EXISTS);
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE)) {
+ dmah->mem_type = uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE);
+ dmah->valid_fields |= BIT(IB_DMAH_MEM_TYPE_EXISTS);
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_PH)) {
+ ret = uverbs_copy_from(&dmah->ph, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_PH);
+ if (ret)
+ goto err;
+
+ /* Per PCIe spec 6.2-1.0, only the lowest two bits are applicable */
+ if (dmah->ph & 0xFC) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ dmah->valid_fields |= BIT(IB_DMAH_PH_EXISTS);
+ }
+
+ dmah->device = ib_dev;
+ dmah->uobject = uobj;
+ atomic_set(&dmah->usecnt, 0);
+
+ rdma_restrack_new(&dmah->res, RDMA_RESTRACK_DMAH);
+ rdma_restrack_set_name(&dmah->res, NULL);
+
+ ret = ib_dev->ops.alloc_dmah(dmah, attrs);
+ if (ret) {
+ rdma_restrack_put(&dmah->res);
+ goto err;
+ }
+
+ uobj->object = dmah;
+ rdma_restrack_add(&dmah->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE);
+ return 0;
+err:
+ kfree(dmah);
+ return ret;
+}
+
+static const struct uverbs_attr_spec uverbs_dmah_mem_type[] = {
+ [TPH_MEM_TYPE_VM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [TPH_MEM_TYPE_PM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DMAH_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DMAH_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_CPU_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE,
+ uverbs_dmah_mem_type,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_PH,
+ UVERBS_ATTR_TYPE(u8),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DMAH_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DMA_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DMAH,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dmah),
+ &UVERBS_METHOD(UVERBS_METHOD_DMAH_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DMAH_FREE));
+
+const struct uapi_definition uverbs_def_obj_dmah[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMAH,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dmah),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_dmah)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 459cf165b231..0ddcf6da66c4 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -39,394 +39,13 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
struct ib_flow_action *action = uobject->object;
- int ret;
- ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
- if (ret)
- return ret;
+ if (atomic_read(&action->usecnt))
+ return -EBUSY;
return action->device->ops.destroy_flow_action(action);
}
-static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
- u32 flags, bool is_modify)
-{
- u64 verbs_flags = flags;
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
-
- if (is_modify && uverbs_attr_is_valid(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
- verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
-
- return verbs_flags;
-};
-
-static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
-{
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
- &keymat->keymat.aes_gcm;
-
- if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return -EOPNOTSUPP;
-
- if (aes_gcm->key_len != 32 &&
- aes_gcm->key_len != 24 &&
- aes_gcm->key_len != 16)
- return -EINVAL;
-
- if (aes_gcm->icv_len != 16 &&
- aes_gcm->icv_len != 8 &&
- aes_gcm->icv_len != 12)
- return -EINVAL;
-
- return 0;
-}
-
-static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
-};
-
-static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* This is used in order to modify an esp flow action with an enabled
- * replay protection to a disabled one. This is only supported via
- * modify, as in create verb we can simply drop the REPLAY attribute and
- * achieve the same thing.
- */
- return is_modify ? 0 : -EINVAL;
-}
-
-static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify)
-{
- /* Some replay protections could always be enabled without validating
- * anything.
- */
- return 0;
-}
-
-static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
- bool is_modify) = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
-};
-
-static int parse_esp_ip(enum ib_flow_spec_type proto,
- const void __user *val_ptr,
- size_t len, union ib_flow_spec *out)
-{
- int ret;
- const struct ib_uverbs_flow_ipv4_filter ipv4 = {
- .src_ip = cpu_to_be32(0xffffffffUL),
- .dst_ip = cpu_to_be32(0xffffffffUL),
- .proto = 0xff,
- .tos = 0xff,
- .ttl = 0xff,
- .flags = 0xff,
- };
- const struct ib_uverbs_flow_ipv6_filter ipv6 = {
- .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
- .flow_label = cpu_to_be32(0xffffffffUL),
- .next_hdr = 0xff,
- .traffic_class = 0xff,
- .hop_limit = 0xff,
- };
- union {
- struct ib_uverbs_flow_ipv4_filter ipv4;
- struct ib_uverbs_flow_ipv6_filter ipv6;
- } user_val = {};
- const void *user_pmask;
- size_t val_len;
-
- /* If the flow IPv4/IPv6 flow specifications are extended, the mask
- * should be changed as well.
- */
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
- sizeof(ipv4.flags) != sizeof(ipv4));
- BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
- sizeof(ipv6.reserved) != sizeof(ipv6));
-
- switch (proto) {
- case IB_FLOW_SPEC_IPV4:
- if (len > sizeof(user_val.ipv4) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
- len - sizeof(user_val.ipv4)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv4));
- ret = copy_from_user(&user_val.ipv4, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv4;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (len > sizeof(user_val.ipv6) &&
- !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
- len - sizeof(user_val.ipv6)))
- return -EOPNOTSUPP;
-
- val_len = min_t(size_t, len, sizeof(user_val.ipv6));
- ret = copy_from_user(&user_val.ipv6, val_ptr,
- val_len);
- if (ret)
- return -EFAULT;
-
- user_pmask = &ipv6;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
- &user_val,
- val_len, out);
-}
-
-static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uverbs_flow_action_esp_encap uverbs_encap;
- int ret;
-
- ret = uverbs_copy_from(&uverbs_encap, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
- if (ret)
- return ret;
-
- /* We currently support only one encap */
- if (uverbs_encap.next_ptr)
- return -EOPNOTSUPP;
-
- if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
- uverbs_encap.type != IB_FLOW_SPEC_IPV6)
- return -EOPNOTSUPP;
-
- return parse_esp_ip(uverbs_encap.type,
- u64_to_user_ptr(uverbs_encap.val_ptr),
- uverbs_encap.len,
- &out->spec);
-}
-
-struct ib_flow_action_esp_attr {
- struct ib_flow_action_attrs_esp hdr;
- struct ib_flow_action_attrs_esp_keymats keymat;
- struct ib_flow_action_attrs_esp_replays replay;
- /* We currently support only one spec */
- struct ib_flow_spec_list encap;
-};
-
-#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
-static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct uverbs_attr_bundle *attrs,
- struct ib_flow_action_esp_attr *esp_attr,
- bool is_modify)
-{
- struct ib_uverbs_flow_action_esp uverbs_esp = {};
- int ret;
-
- /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
- ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
- if (IS_UVERBS_COPY_ERR(ret))
- return ret;
-
- /* This can be called from FLOW_ACTION_ESP_MODIFY where
- * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
- */
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
- ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
- if (ret)
- return ret;
-
- if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
- return -EOPNOTSUPP;
-
- esp_attr->hdr.spi = uverbs_esp.spi;
- esp_attr->hdr.seq = uverbs_esp.seq;
- esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
- esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
- }
- esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
- is_modify);
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
- esp_attr->keymat.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
- if (ret)
- return ret;
-
- ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
- if (ret)
- return ret;
-
- esp_attr->hdr.keymat = &esp_attr->keymat;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
- esp_attr->replay.protocol =
- uverbs_attr_get_enum_id(attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
-
- ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
- attrs,
- UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
- if (ret)
- return ret;
-
- ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
- is_modify);
- if (ret)
- return ret;
-
- esp_attr->hdr.replay = &esp_attr->replay;
- }
-
- if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
- ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
- if (ret)
- return ret;
-
- esp_attr->hdr.encap = &esp_attr->encap;
- }
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
- struct ib_device *ib_dev = attrs->context->device;
- int ret;
- struct ib_flow_action *action;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!ib_dev->ops.create_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
- if (ret)
- return ret;
-
- /* No need to check as this attribute is marked as MANDATORY */
- action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr,
- attrs);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, ib_dev,
- IB_FLOW_ACTION_ESP);
-
- return 0;
-}
-
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
- struct ib_flow_action *action = uobj->object;
- int ret;
- struct ib_flow_action_esp_attr esp_attr = {};
-
- if (!action->device->ops.modify_flow_action_esp)
- return -EOPNOTSUPP;
-
- ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
- if (ret)
- return ret;
-
- if (action->type != IB_FLOW_ACTION_ESP)
- return -EINVAL;
-
- return action->device->ops.modify_flow_action_esp(action,
- &esp_attr.hdr,
- attrs);
-}
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
- aes_key),
- },
-};
-
-static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
- size),
- },
-};
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD(
- UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
- UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_WRITE,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
- hard_limit_pkts),
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
- UVERBS_ATTR_TYPE(__u32),
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_OPTIONAL),
- UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(
- UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
- UA_OPTIONAL));
-
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_FLOW_ACTION_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
@@ -437,9 +56,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_FLOW_ACTION,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY));
const struct uapi_definition uverbs_def_obj_flow_action[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 610d3b9f7654..570b9656801d 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,7 @@
#include "rdma_core.h"
#include "uverbs.h"
#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
static int uverbs_free_mr(struct ib_uobject *uobject,
enum rdma_remove_reason why,
@@ -69,7 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
num_sge = uverbs_attr_ptr_get_array_size(
attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
- if (num_sge < 0)
+ if (num_sge <= 0)
return num_sge;
sg_list = uverbs_attr_get_alloced_ptr(attrs,
@@ -114,7 +116,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
if (!(attr.access_flags & IB_ZERO_BASED))
return -EINVAL;
- ret = ib_check_mr_access(attr.access_flags);
+ ret = ib_check_mr_access(ib_dev, attr.access_flags);
if (ret)
return ret;
@@ -128,28 +130,268 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_DM;
mr->dm = dm;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
atomic_inc(&dm->usecnt);
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
sizeof(mr->lkey));
if (ret)
- goto err_dereg;
+ return ret;
ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
&mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *mr =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE);
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ &mr->length, sizeof(mr->length));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ &mr->iova, sizeof(mr->iova));
+
+ return IS_UVERBS_COPY_ERR(ret) ? ret : 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
+ u64 offset, length, iova;
+ u32 fd, access_flags;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->ops.reg_user_mr_dmabuf)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&offset, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&length, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&iova, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_IOVA);
+ if (ret)
+ return ret;
+
+ if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+
+ ret = uverbs_copy_from(&fd, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_FD);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&access_flags, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (ret)
+ return ret;
+
+ ret = ib_check_mr_access(ib_dev, access_flags);
+ if (ret)
+ return ret;
+
+ mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd,
+ access_flags, NULL,
+ attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
+
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ &mr->lkey, sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_MR_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_MR_PD_HANDLE);
+ u32 valid_access_flags = IB_ACCESS_SUPPORTED;
+ u64 length, iova, fd_offset = 0, addr = 0;
+ struct ib_device *ib_dev = pd->device;
+ struct ib_dmah *dmah = NULL;
+ bool has_fd_offset = false;
+ bool has_addr = false;
+ bool has_fd = false;
+ u32 access_flags;
+ struct ib_mr *mr;
+ int fd;
+ int ret;
+
+ ret = uverbs_copy_from(&iova, attrs, UVERBS_ATTR_REG_MR_IOVA);
if (ret)
- goto err_dereg;
+ return ret;
+
+ ret = uverbs_copy_from(&length, attrs, UVERBS_ATTR_REG_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_ADDR)) {
+ ret = uverbs_copy_from(&addr, attrs,
+ UVERBS_ATTR_REG_MR_ADDR);
+ if (ret)
+ return ret;
+ has_addr = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_FD_OFFSET)) {
+ ret = uverbs_copy_from(&fd_offset, attrs,
+ UVERBS_ATTR_REG_MR_FD_OFFSET);
+ if (ret)
+ return ret;
+ has_fd_offset = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_FD)) {
+ ret = uverbs_get_raw_fd(&fd, attrs,
+ UVERBS_ATTR_REG_MR_FD);
+ if (ret)
+ return ret;
+ has_fd = true;
+ }
+
+ if (has_fd) {
+ if (!ib_dev->ops.reg_user_mr_dmabuf)
+ return -EOPNOTSUPP;
+
+ /* FD requires offset and can't come with addr */
+ if (!has_fd_offset || has_addr)
+ return -EINVAL;
+
+ if ((fd_offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+
+ valid_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC |
+ IB_ACCESS_RELAXED_ORDERING;
+ } else {
+ if (!has_addr || has_fd_offset)
+ return -EINVAL;
+
+ if ((addr & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_DMA_HANDLE)) {
+ dmah = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_REG_MR_DMA_HANDLE);
+ if (IS_ERR(dmah))
+ return PTR_ERR(dmah);
+ }
+
+ ret = uverbs_get_flags32(&access_flags, attrs,
+ UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
+ valid_access_flags);
+ if (ret)
+ return ret;
+
+ ret = ib_check_mr_access(ib_dev, access_flags);
+ if (ret)
+ return ret;
+
+ if (has_fd)
+ mr = pd->device->ops.reg_user_mr_dmabuf(pd, fd_offset, length,
+ iova, fd, access_flags,
+ dmah, attrs);
+ else
+ mr = pd->device->ops.reg_user_mr(pd, addr, length, iova,
+ access_flags, dmah, NULL);
- return 0;
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ if (dmah) {
+ mr->dmah = dmah;
+ atomic_inc(&dmah->usecnt);
+ }
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
-err_dereg:
- ib_dereg_mr_user(mr, &attrs->driver_udata);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_MR_HANDLE);
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_MR_RESP_LKEY,
+ &mr->lkey, sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
return ret;
}
@@ -171,6 +413,25 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_ALLOC_AND_COPY));
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_DM_MR_REG,
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
UVERBS_OBJECT_MR,
@@ -199,6 +460,75 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_REG_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_DMA_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
+ enum ib_access_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_ADDR,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_FD_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_RAW_FD(UVERBS_ATTR_REG_MR_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
UVERBS_METHOD_MR_DESTROY,
UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
@@ -209,9 +539,12 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_MR,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
&UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_REG_MR));
const struct uapi_definition uverbs_def_obj_mr[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
new file mode 100644
index 000000000000..be0730e8509e
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include "core_priv.h"
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(&uqp->uevent);
+ return 0;
+}
+
+static int check_creation_flags(enum ib_qp_type qp_type,
+ u32 create_flags)
+{
+ create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+
+ if (!create_flags || qp_type == IB_QPT_DRIVER)
+ return 0;
+
+ if (qp_type != IB_QPT_RAW_PACKET && qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ if ((create_flags & IB_UVERBS_QP_CREATE_SCATTER_FCS ||
+ create_flags & IB_UVERBS_QP_CREATE_CVLAN_STRIPPING) &&
+ qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_caps(struct ib_qp_init_attr *attr,
+ struct ib_uverbs_qp_cap *cap, bool req)
+{
+ if (req) {
+ attr->cap.max_send_wr = cap->max_send_wr;
+ attr->cap.max_recv_wr = cap->max_recv_wr;
+ attr->cap.max_send_sge = cap->max_send_sge;
+ attr->cap.max_recv_sge = cap->max_recv_sge;
+ attr->cap.max_inline_data = cap->max_inline_data;
+ } else {
+ cap->max_send_wr = attr->cap.max_send_wr;
+ cap->max_recv_wr = attr->cap.max_recv_wr;
+ cap->max_send_sge = attr->cap.max_send_sge;
+ cap->max_recv_sge = attr->cap.max_recv_sge;
+ cap->max_inline_data = attr->cap.max_inline_data;
+ }
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uqp_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_QP_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_qp_init_attr attr = {};
+ struct ib_uverbs_qp_cap cap = {};
+ struct ib_rwq_ind_table *rwq_ind_tbl = NULL;
+ struct ib_qp *qp;
+ struct ib_pd *pd = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_cq *recv_cq = NULL;
+ struct ib_cq *send_cq = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *xrcd_uobj = NULL;
+ struct ib_device *device;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from_or_zero(&cap, attrs,
+ UVERBS_ATTR_CREATE_QP_CAP);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_QP_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.qp_type, attrs,
+ UVERBS_ATTR_CREATE_QP_TYPE);
+ if (ret)
+ return ret;
+
+ switch (attr.qp_type) {
+ case IB_QPT_XRC_TGT:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE))
+ return -EINVAL;
+
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!xrcd)
+ return -EINVAL;
+ device = xrcd->device;
+ break;
+ case IB_UVERBS_QPT_RAW_PACKET:
+ if (!rdma_uattrs_has_raw_cap(attrs))
+ return -EPERM;
+ fallthrough;
+ case IB_UVERBS_QPT_RC:
+ case IB_UVERBS_QPT_UC:
+ case IB_UVERBS_QPT_UD:
+ case IB_UVERBS_QPT_XRC_INI:
+ case IB_UVERBS_QPT_DRIVER:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE) ||
+ (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE) &&
+ attr.qp_type == IB_QPT_XRC_INI))
+ return -EINVAL;
+
+ pd = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE);
+ if (IS_ERR(pd))
+ return PTR_ERR(pd);
+
+ rwq_ind_tbl = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE);
+ if (!IS_ERR(rwq_ind_tbl)) {
+ if (cap.max_recv_wr || cap.max_recv_sge ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
+ return -EINVAL;
+
+ /* send_cq is optional */
+ if (cap.max_send_wr) {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+ }
+ attr.rwq_ind_tbl = rwq_ind_tbl;
+ } else {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+
+ if (attr.qp_type != IB_QPT_XRC_INI) {
+ recv_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE);
+ if (IS_ERR(recv_cq))
+ return PTR_ERR(recv_cq);
+ }
+ }
+
+ device = pd->device;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = uverbs_get_flags32(&attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_QP_FLAGS,
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_UVERBS_QP_CREATE_SCATTER_FCS |
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING |
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING |
+ IB_UVERBS_QP_CREATE_SQ_SIG_ALL);
+ if (ret)
+ return ret;
+
+ ret = check_creation_flags(attr.qp_type, attr.create_flags);
+ if (ret)
+ return ret;
+
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN)) {
+ ret = uverbs_copy_from(&attr.source_qpn, attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN);
+ if (ret)
+ return ret;
+ attr.create_flags |= IB_QP_CREATE_SOURCE_QPN;
+ }
+
+ srq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE);
+ if (!IS_ERR(srq)) {
+ if ((srq->srq_type == IB_SRQT_XRC &&
+ attr.qp_type != IB_QPT_XRC_TGT) ||
+ (srq->srq_type != IB_SRQT_XRC &&
+ attr.qp_type == IB_QPT_XRC_TGT))
+ return -EINVAL;
+ attr.srq = srq;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_QP_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+ obj->uevent.uobject.user_handle = user_handle;
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.send_cq = send_cq;
+ attr.recv_cq = recv_cq;
+ attr.xrcd = xrcd;
+ if (attr.create_flags & IB_UVERBS_QP_CREATE_SQ_SIG_ALL) {
+ /* This creation bit is uverbs one, need to mask before
+ * calling drivers. It was added to prevent an extra user attr
+ * only for that when using ioctl.
+ */
+ attr.create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+ attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ } else {
+ attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ }
+
+ set_caps(&attr, &cap, true);
+ mutex_init(&obj->mcast_lock);
+
+ qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+ KBUILD_MODNAME);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+ ib_qp_usecnt_inc(qp);
+
+ if (attr.qp_type == IB_QPT_XRC_TGT) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ }
+
+ obj->uevent.uobject.object = qp;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_QP_HANDLE);
+
+ set_caps(&attr, &cap, false);
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ UVERBS_ATTR_CREATE_QP_RESP_CAP, &cap,
+ sizeof(cap));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ &qp->qp_num,
+ sizeof(qp->qp_num));
+
+ return ret;
+err_put:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_QP_TYPE,
+ enum ib_uverbs_qp_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_QP_FLAGS,
+ enum ib_uverbs_qp_create_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_SOURCE_QPN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_QP_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_QP_HANDLE);
+ struct ib_uqp_object *obj =
+ container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ struct ib_uverbs_destroy_qp_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_QP_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_QP_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_qp[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c
new file mode 100644
index 000000000000..e5513f828bdc
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_srq.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uobject, struct ib_usrq_object,
+ uevent.uobject);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uevent);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_usrq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_SRQ_PD_HANDLE);
+ struct ib_srq_init_attr attr = {};
+ struct ib_uobject *xrcd_uobj;
+ struct ib_srq *srq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from(&attr.attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.srq_limit, attrs,
+ UVERBS_ATTR_CREATE_SRQ_LIMIT);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_SRQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.srq_type, attrs,
+ UVERBS_ATTR_CREATE_SRQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (ib_srq_has_cq(attr.srq_type)) {
+ attr.ext.cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE);
+ if (IS_ERR(attr.ext.cq))
+ return PTR_ERR(attr.ext.cq);
+ }
+
+ switch (attr.srq_type) {
+ case IB_UVERBS_SRQT_XRC:
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd)
+ return -EINVAL;
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ break;
+ case IB_UVERBS_SRQT_TM:
+ ret = uverbs_copy_from(&attr.ext.tag_matching.max_num_tags,
+ attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS);
+ if (ret)
+ return ret;
+ break;
+ case IB_UVERBS_SRQT_BASIC:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_SRQ_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ attr.event_handler = ib_uverbs_srq_event_handler;
+ obj->uevent.uobject.user_handle = user_handle;
+
+ srq = ib_create_srq_user(pd, &attr, obj, &attrs->driver_udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = srq;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ &attr.attr.max_wr,
+ sizeof(attr.attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ &attr.attr.max_sge,
+ sizeof(attr.attr.max_sge));
+ if (ret)
+ return ret;
+
+ if (attr.srq_type == IB_SRQT_XRC) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ &srq->ext.xrc.srq_num,
+ sizeof(srq->ext.xrc.srq_num));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ if (attr.srq_type == IB_SRQT_XRC)
+ atomic_dec(&obj->uxrcd->refcnt);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_SRQ_TYPE,
+ enum ib_uverbs_srq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_LIMIT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_SRQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_SRQ_HANDLE);
+ struct ib_usrq_object *obj =
+ container_of(uobj, struct ib_usrq_object, uevent.uobject);
+ struct ib_uverbs_destroy_srq_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_SRQ_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_SRQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_srq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_srq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c
new file mode 100644
index 000000000000..7ded8339346f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_wq.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ ib_uverbs_release_uevent(&uwq->uevent);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uwq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_PD_HANDLE);
+ struct ib_cq *cq =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_CQ_HANDLE);
+ struct ib_wq_init_attr wq_init_attr = {};
+ struct ib_wq *wq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_get_flags32(&wq_init_attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_WQ_FLAGS,
+ IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING |
+ IB_UVERBS_WQ_FLAGS_SCATTER_FCS |
+ IB_UVERBS_WQ_FLAGS_DELAY_DROP |
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_WQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&wq_init_attr.wq_type, attrs,
+ UVERBS_ATTR_CREATE_WQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (wq_init_attr.wq_type != IB_WQT_RQ)
+ return -EINVAL;
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_WQ_EVENT_FD);
+ obj->uevent.uobject.user_handle = user_handle;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ wq_init_attr.wq_context = attrs->ufile;
+ wq_init_attr.cq = cq;
+
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
+ if (IS_ERR(wq)) {
+ ret = PTR_ERR(wq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = wq;
+ wq->wq_type = wq_init_attr.wq_type;
+ wq->cq = cq;
+ wq->pd = pd;
+ wq->device = pd->device;
+ wq->wq_context = wq_init_attr.wq_context;
+ atomic_set(&wq->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&cq->usecnt);
+ wq->uobject = obj;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ &wq_init_attr.max_wr,
+ sizeof(wq_init_attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ &wq_init_attr.max_sge,
+ sizeof(wq_init_attr.max_sge));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ &wq->wq_num,
+ sizeof(wq->wq_num));
+ return ret;
+
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_WQ_TYPE,
+ enum ib_wq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_WQ_FLAGS,
+ enum ib_uverbs_wq_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_WQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_WQ_HANDLE);
+ struct ib_uwq_object *obj =
+ container_of(uobj, struct ib_uwq_object, uevent.uobject);
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_WQ_RESP,
+ &obj->uevent.events_reported,
+ sizeof(obj->uevent.events_reported));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_WQ_RESP,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_wq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 7a987acf0c0b..e00ea63175bd 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -22,6 +22,8 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
return ERR_PTR(-EOVERFLOW);
elm = kzalloc(alloc_size, GFP_KERNEL);
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
rc = radix_tree_insert(&uapi->radix, key, elm);
if (rc) {
kfree(elm);
@@ -77,10 +79,7 @@ static int uapi_create_write(struct uverbs_api *uapi,
method_elm->is_ex = def->write.is_ex;
method_elm->handler = def->func_write;
- if (def->write.is_ex)
- method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
- BIT_ULL(def->write.command_num));
- else
+ if (!def->write.is_ex)
method_elm->disabled = !(ibdev->uverbs_cmd_mask &
BIT_ULL(def->write.command_num));
@@ -193,9 +192,9 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi,
* disassociation, and the FD types require the driver to use
* struct file_operations.owner to prevent the driver module
* code from unloading while the file is open. This provides
- * enough safety that uverbs_close_fd() will continue to work.
- * Drivers using FD are responsible to handle disassociation of
- * the device on their own.
+ * enough safety that uverbs_uobject_fd_release() will
+ * continue to work. Drivers using FD are responsible to
+ * handle disassociation of the device on their own.
*/
if (WARN_ON(is_driver &&
obj->type_attrs->type_class != &uverbs_idr_class &&
@@ -448,6 +447,9 @@ static int uapi_finalize(struct uverbs_api *uapi)
uapi->num_write_ex = max_write_ex + 1;
data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
sizeof(*uapi->write_methods), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
data[i] = &uapi->notsupp_method;
uapi->write_methods = data;
@@ -518,7 +520,7 @@ static void uapi_key_okay(u32 key)
count++;
if (uapi_key_is_attr(key))
count++;
- WARN(count != 1, "Bad count %d key=%x", count, key);
+ WARN(count != 1, "Bad count %u key=%x", count, key);
}
static void uapi_finalize_disable(struct uverbs_api *uapi)
@@ -624,13 +626,18 @@ void uverbs_destroy_api(struct uverbs_api *uapi)
}
static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
UAPI_DEF_CHAIN(uverbs_def_obj_counters),
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
UAPI_DEF_CHAIN(uverbs_def_obj_device),
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
UAPI_DEF_CHAIN(uverbs_def_obj_intf),
UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_obj_qp),
+ UAPI_DEF_CHAIN(uverbs_def_obj_srq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_wq),
UAPI_DEF_CHAIN(uverbs_def_write_intf),
{},
};
@@ -645,7 +652,7 @@ struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
return ERR_PTR(-ENOMEM);
INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
- uapi->driver_id = ibdev->driver_id;
+ uapi->driver_id = ibdev->ops.driver_id;
rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
if (rc)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index e666a1f7608d..11b1a194de44 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -50,8 +50,10 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include <rdma/rw.h>
+#include <rdma/lag.h>
#include "core_priv.h"
+#include <trace/events/rdma_core.h>
static int ib_resolve_eth_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr);
@@ -94,10 +96,10 @@ static const char * const wc_statuses[] = {
[IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error",
[IB_WC_LOC_PROT_ERR] = "local protection error",
[IB_WC_WR_FLUSH_ERR] = "WR flushed",
- [IB_WC_MW_BIND_ERR] = "memory management operation error",
+ [IB_WC_MW_BIND_ERR] = "memory bind operation error",
[IB_WC_BAD_RESP_ERR] = "bad response error",
[IB_WC_LOC_ACCESS_ERR] = "local access error",
- [IB_WC_REM_INV_REQ_ERR] = "invalid request error",
+ [IB_WC_REM_INV_REQ_ERR] = "remote invalid request error",
[IB_WC_REM_ACCESS_ERR] = "remote access error",
[IB_WC_REM_OP_ERR] = "remote operation error",
[IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded",
@@ -145,6 +147,8 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
case IB_RATE_50_GBPS: return 20;
case IB_RATE_400_GBPS: return 160;
case IB_RATE_600_GBPS: return 240;
+ case IB_RATE_800_GBPS: return 320;
+ case IB_RATE_1600_GBPS: return 640;
default: return -1;
}
}
@@ -174,6 +178,8 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
case 20: return IB_RATE_50_GBPS;
case 160: return IB_RATE_400_GBPS;
case 240: return IB_RATE_600_GBPS;
+ case 320: return IB_RATE_800_GBPS;
+ case 640: return IB_RATE_1600_GBPS;
default: return IB_RATE_PORT_CURRENT;
}
}
@@ -203,13 +209,15 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
case IB_RATE_50_GBPS: return 53125;
case IB_RATE_400_GBPS: return 425000;
case IB_RATE_600_GBPS: return 637500;
+ case IB_RATE_800_GBPS: return 850000;
+ case IB_RATE_1600_GBPS: return 1700000;
default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mbps);
__attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type)
+rdma_node_get_transport(unsigned int node_type)
{
if (node_type == RDMA_NODE_USNIC)
@@ -225,7 +233,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
}
EXPORT_SYMBOL(rdma_node_get_transport);
-enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
+ u32 port_num)
{
enum rdma_transport_type lt;
if (device->ops.get_link_layer)
@@ -242,8 +251,10 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
/* Protection domains */
/**
- * ib_alloc_pd - Allocates an unused protection domain.
+ * __ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ * @caller: caller's build-time module name
*
* A protection domain object provides an association between QPs, shared
* receive queues, address handles, memory regions, and memory windows.
@@ -263,22 +274,20 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
return ERR_PTR(-ENOMEM);
pd->device = device;
- pd->uobject = NULL;
- pd->__internal_mr = NULL;
- atomic_set(&pd->usecnt, 0);
pd->flags = flags;
- pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_set_task(&pd->res, caller);
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, caller);
ret = device->ops.alloc_pd(pd, NULL);
if (ret) {
+ rdma_restrack_put(&pd->res);
kfree(pd);
return ERR_PTR(ret);
}
- rdma_restrack_kadd(&pd->res);
+ rdma_restrack_add(&pd->res);
- if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
@@ -299,12 +308,13 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_DMA;
mr->uobject = NULL;
mr->need_inval = false;
pd->__internal_mr = mr;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
+ if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
pd->local_dma_lkey = pd->__internal_mr->lkey;
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -316,7 +326,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
EXPORT_SYMBOL(__ib_alloc_pd);
/**
- * ib_dealloc_pd - Deallocates a protection domain.
+ * ib_dealloc_pd_user - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
* @udata: Valid user data or NULL for kernel object
*
@@ -324,7 +334,7 @@ EXPORT_SYMBOL(__ib_alloc_pd);
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
-void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
{
int ret;
@@ -334,13 +344,13 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
pd->__internal_mr = NULL;
}
- /* uverbs manipulates usecnt with proper locking, while the kabi
- requires the caller to guarantee we can't race here. */
- WARN_ON(atomic_read(&pd->usecnt));
+ ret = pd->device->ops.dealloc_pd(pd, udata);
+ if (ret)
+ return ret;
rdma_restrack_del(&pd->res);
- pd->device->ops.dealloc_pd(pd, udata);
kfree(pd);
+ return ret;
}
EXPORT_SYMBOL(ib_dealloc_pd_user);
@@ -362,7 +372,7 @@ void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
EXPORT_SYMBOL(rdma_copy_ah_attr);
/**
- * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
+ * rdma_replace_ah_attr - Replace valid ah_attr with new one.
* @old: Pointer to existing ah_attr which needs to be replaced.
* old is assumed to be valid or zero'd
* @new: Pointer to the new ah_attr.
@@ -496,15 +506,17 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
u32 flags,
- struct ib_udata *udata)
+ struct ib_udata *udata,
+ struct net_device *xmit_slave)
{
+ struct rdma_ah_init_attr init_attr = {};
struct ib_device *device = pd->device;
struct ib_ah *ah;
int ret;
might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
- if (!device->ops.create_ah)
+ if (!udata && !device->ops.create_ah)
return ERR_PTR(-EOPNOTSUPP);
ah = rdma_zalloc_drv_obj_gfp(
@@ -517,9 +529,17 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
ah->pd = pd;
ah->type = ah_attr->type;
ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+ init_attr.ah_attr = ah_attr;
+ init_attr.flags = flags;
+ init_attr.xmit_slave = xmit_slave;
- ret = device->ops.create_ah(ah, ah_attr, flags, udata);
+ if (udata)
+ ret = device->ops.create_user_ah(ah, &init_attr, udata);
+ else
+ ret = device->ops.create_ah(ah, &init_attr, NULL);
if (ret) {
+ if (ah->sgid_attr)
+ rdma_put_gid_attr(ah->sgid_attr);
kfree(ah);
return ERR_PTR(ret);
}
@@ -543,15 +563,22 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
u32 flags)
{
const struct ib_gid_attr *old_sgid_attr;
+ struct net_device *slave;
struct ib_ah *ah;
int ret;
ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
if (ret)
return ERR_PTR(ret);
-
- ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
-
+ slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ?
+ GFP_KERNEL : GFP_ATOMIC);
+ if (IS_ERR(slave)) {
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ERR_CAST(slave);
+ }
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
+ rdma_lag_put_ah_roce_slave(slave);
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
}
@@ -590,7 +617,8 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
}
}
- ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
+ udata, NULL);
out:
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
@@ -632,7 +660,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
EXPORT_SYMBOL(ib_get_rdma_header_version);
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
const struct ib_grh *grh)
{
int grh_version;
@@ -661,20 +689,21 @@ static bool find_gid_index(const union ib_gid *gid,
void *context)
{
struct find_gid_index_context *ctx = context;
+ u16 vlan_id = 0xffff;
+ int ret;
if (ctx->gid_type != gid_attr->gid_type)
return false;
- if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
- (is_vlan_dev(gid_attr->ndev) &&
- vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
return false;
- return true;
+ return ctx->vlan_id == vlan_id;
}
static const struct ib_gid_attr *
-get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
+get_sgid_attr_from_eth(struct ib_device *device, u32 port_num,
u16 vlan_id, const union ib_gid *sgid,
enum ib_gid_type gid_type)
{
@@ -709,7 +738,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
(struct in6_addr *)dgid);
return 0;
} else if (net_type == RDMA_NETWORK_IPV6 ||
- net_type == RDMA_NETWORK_IB) {
+ net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
*dgid = hdr->ibgrh.dgid;
*sgid = hdr->ibgrh.sgid;
return 0;
@@ -721,7 +750,7 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
/* Resolve destination mac address and hop limit for unicast destination
* GID entry, considering the source GID entry as well.
- * ah_attribute must have have valid port_num, sgid_index.
+ * ah_attribute must have valid port_num, sgid_index.
*/
static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr)
@@ -761,7 +790,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
* On success the caller is responsible to call rdma_destroy_ah_attr on the
* attr.
*/
-int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct rdma_ah_attr *ah_attr)
{
@@ -892,7 +921,7 @@ void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
EXPORT_SYMBOL(rdma_destroy_ah_attr);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
- const struct ib_grh *grh, u8 port_num)
+ const struct ib_grh *grh, u32 port_num)
{
struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
@@ -945,32 +974,50 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
{
const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
+ int ret;
might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
pd = ah->pd;
- ah->device->ops.destroy_ah(ah, flags);
+ ret = ah->device->ops.destroy_ah(ah, flags);
+ if (ret)
+ return ret;
+
atomic_dec(&pd->usecnt);
if (sgid_attr)
rdma_put_gid_attr(sgid_attr);
kfree(ah);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(rdma_destroy_ah_user);
/* Shared receive queues */
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr)
+/**
+ * ib_create_srq_user - Creates a SRQ associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ * SRQ. If SRQ creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created SRQ.
+ * @uobject: uobject pointer if this is not a kernel SRQ
+ * @udata: udata pointer if this is not a kernel SRQ
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return. If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_usrq_object *uobject,
+ struct ib_udata *udata)
{
struct ib_srq *srq;
int ret;
- if (!pd->device->ops.create_srq)
- return ERR_PTR(-EOPNOTSUPP);
-
srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
if (!srq)
return ERR_PTR(-ENOMEM);
@@ -980,6 +1027,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
srq->event_handler = srq_init_attr->event_handler;
srq->srq_context = srq_init_attr->srq_context;
srq->srq_type = srq_init_attr->srq_type;
+ srq->uobject = uobject;
if (ib_srq_has_cq(srq->srq_type)) {
srq->ext.cq = srq_init_attr->ext.cq;
@@ -987,14 +1035,19 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
}
if (srq->srq_type == IB_SRQT_XRC) {
srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
- atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+ if (srq->ext.xrc.xrcd)
+ atomic_inc(&srq->ext.xrc.xrcd->usecnt);
}
atomic_inc(&pd->usecnt);
- ret = pd->device->ops.create_srq(srq, srq_init_attr, NULL);
+ rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ);
+ rdma_restrack_parent_name(&srq->res, &pd->res);
+
+ ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
if (ret) {
- atomic_dec(&srq->pd->usecnt);
- if (srq->srq_type == IB_SRQT_XRC)
+ rdma_restrack_put(&srq->res);
+ atomic_dec(&pd->usecnt);
+ if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
atomic_dec(&srq->ext.xrc.xrcd->usecnt);
if (ib_srq_has_cq(srq->srq_type))
atomic_dec(&srq->ext.cq->usecnt);
@@ -1002,9 +1055,11 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
return ERR_PTR(ret);
}
+ rdma_restrack_add(&srq->res);
+
return srq;
}
-EXPORT_SYMBOL(ib_create_srq);
+EXPORT_SYMBOL(ib_create_srq_user);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
@@ -1026,41 +1081,49 @@ EXPORT_SYMBOL(ib_query_srq);
int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
{
+ int ret;
+
if (atomic_read(&srq->usecnt))
return -EBUSY;
- srq->device->ops.destroy_srq(srq, udata);
+ ret = srq->device->ops.destroy_srq(srq, udata);
+ if (ret)
+ return ret;
atomic_dec(&srq->pd->usecnt);
- if (srq->srq_type == IB_SRQT_XRC)
+ if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
atomic_dec(&srq->ext.xrc.xrcd->usecnt);
if (ib_srq_has_cq(srq->srq_type))
atomic_dec(&srq->ext.cq->usecnt);
+ rdma_restrack_del(&srq->res);
kfree(srq);
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_srq_user);
/* Queue pairs */
+static void __ib_qp_event_handler(struct ib_event *event, void *context)
+{
+ struct ib_qp *qp = event->element.qp;
+
+ if (event->event == IB_EVENT_QP_LAST_WQE_REACHED)
+ complete(&qp->srq_completion);
+ if (qp->registered_event_handler)
+ qp->registered_event_handler(event, qp->qp_context);
+}
+
static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
{
struct ib_qp *qp = context;
unsigned long flags;
- spin_lock_irqsave(&qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&qp->device->qp_open_list_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
event->element.qp->event_handler(event, event->element.qp->qp_context);
- spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
-}
-
-static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
-{
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
}
static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
@@ -1090,9 +1153,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
qp->qp_num = real_qp->qp_num;
qp->qp_type = real_qp->qp_type;
- spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
list_add(&qp->open_list, &real_qp->open_list);
- spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
return qp;
}
@@ -1105,25 +1168,24 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
return ERR_PTR(-EINVAL);
- qp = ERR_PTR(-EINVAL);
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
- if (real_qp->qp_num == qp_open_attr->qp_num) {
- qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
- qp_open_attr->qp_context);
- break;
- }
+ down_read(&xrcd->tgt_qps_rwsem);
+ real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num);
+ if (!real_qp) {
+ up_read(&xrcd->tgt_qps_rwsem);
+ return ERR_PTR(-EINVAL);
}
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+ qp_open_attr->qp_context);
+ up_read(&xrcd->tgt_qps_rwsem);
return qp;
}
EXPORT_SYMBOL(ib_open_qp);
static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata)
+ struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *real_qp = qp;
+ int err;
qp->event_handler = __ib_shared_qp_event_handler;
qp->qp_context = qp;
@@ -1139,84 +1201,171 @@ static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
if (IS_ERR(qp))
return qp;
- __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+ err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num,
+ real_qp, GFP_KERNEL));
+ if (err) {
+ ib_close_qp(qp);
+ return ERR_PTR(err);
+ }
return qp;
}
-struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata)
+static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
{
- struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
+ struct ib_udata dummy = {};
struct ib_qp *qp;
int ret;
- if (qp_init_attr->rwq_ind_tbl &&
- (qp_init_attr->recv_cq ||
- qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
- qp_init_attr->cap.max_recv_sge))
- return ERR_PTR(-EINVAL);
-
- /*
- * If the callers is using the RDMA API calculate the resources
- * needed for the RDMA READ/WRITE operations.
- *
- * Note that these callers need to pass in a port number.
- */
- if (qp_init_attr->cap.max_rdma_ctxs)
- rdma_rw_init_qp(device, qp_init_attr);
+ if (!dev->ops.create_qp)
+ return ERR_PTR(-EOPNOTSUPP);
- qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
- if (IS_ERR(qp))
- return qp;
+ qp = rdma_zalloc_drv_obj_numa(dev, ib_qp);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
- ret = ib_create_qp_security(qp, device);
- if (ret)
- goto err;
+ qp->device = dev;
+ qp->pd = pd;
+ qp->uobject = uobj;
+ qp->real_qp = qp;
- qp->qp_type = qp_init_attr->qp_type;
- qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
+ qp->qp_type = attr->qp_type;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->srq = attr->srq;
+ qp->event_handler = __ib_qp_event_handler;
+ qp->registered_event_handler = attr->event_handler;
+ qp->port = attr->port_num;
+ qp->qp_context = attr->qp_context;
- atomic_set(&qp->usecnt, 0);
- qp->mrs_used = 0;
spin_lock_init(&qp->mr_lock);
INIT_LIST_HEAD(&qp->rdma_mrs);
INIT_LIST_HEAD(&qp->sig_mrs);
- qp->port = 0;
+ init_completion(&qp->srq_completion);
- if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
- struct ib_qp *xrc_qp =
- create_xrc_qp_user(qp, qp_init_attr, udata);
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
- if (IS_ERR(xrc_qp)) {
- ret = PTR_ERR(xrc_qp);
- goto err;
- }
- return xrc_qp;
- }
+ rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
+ WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
+ rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
+ ret = dev->ops.create_qp(qp, attr, udata);
+ if (ret)
+ goto err_create;
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
- if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
- qp->recv_cq = NULL;
- qp->srq = NULL;
- } else {
- qp->recv_cq = qp_init_attr->recv_cq;
- if (qp_init_attr->recv_cq)
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
- qp->srq = qp_init_attr->srq;
- if (qp->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
+ /*
+ * TODO: The mlx4 internally overwrites send_cq and recv_cq.
+ * Unfortunately, it is not an easy task to fix that driver.
+ */
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+
+ ret = ib_create_qp_security(qp, dev);
+ if (ret)
+ goto err_security;
+
+ rdma_restrack_add(&qp->res);
+ return qp;
+
+err_security:
+ qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL);
+err_create:
+ rdma_restrack_put(&qp->res);
+ kfree(qp);
+ return ERR_PTR(ret);
+
+}
+
+/**
+ * ib_create_qp_user - Creates a QP associated with the specified protection
+ * domain.
+ * @dev: IB device
+ * @pd: The protection domain associated with the QP.
+ * @attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ * @udata: User data
+ * @uobj: uverbs obect
+ * @caller: caller's build-time module name
+ */
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
+{
+ struct ib_qp *qp, *xrc_qp;
+
+ if (attr->qp_type == IB_QPT_XRC_TGT)
+ qp = create_qp(dev, pd, attr, NULL, NULL, caller);
+ else
+ qp = create_qp(dev, pd, attr, udata, uobj, NULL);
+ if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp))
+ return qp;
+
+ xrc_qp = create_xrc_qp_user(qp, attr);
+ if (IS_ERR(xrc_qp)) {
+ ib_destroy_qp(qp);
+ return xrc_qp;
}
- qp->send_cq = qp_init_attr->send_cq;
- qp->xrcd = NULL;
+ xrc_qp->uobject = uobj;
+ return xrc_qp;
+}
+EXPORT_SYMBOL(ib_create_qp_user);
- atomic_inc(&pd->usecnt);
- if (qp_init_attr->send_cq)
- atomic_inc(&qp_init_attr->send_cq->usecnt);
- if (qp_init_attr->rwq_ind_tbl)
+void ib_qp_usecnt_inc(struct ib_qp *qp)
+{
+ if (qp->pd)
+ atomic_inc(&qp->pd->usecnt);
+ if (qp->send_cq)
+ atomic_inc(&qp->send_cq->usecnt);
+ if (qp->recv_cq)
+ atomic_inc(&qp->recv_cq->usecnt);
+ if (qp->srq)
+ atomic_inc(&qp->srq->usecnt);
+ if (qp->rwq_ind_tbl)
atomic_inc(&qp->rwq_ind_tbl->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_inc);
+
+void ib_qp_usecnt_dec(struct ib_qp *qp)
+{
+ if (qp->rwq_ind_tbl)
+ atomic_dec(&qp->rwq_ind_tbl->usecnt);
+ if (qp->srq)
+ atomic_dec(&qp->srq->usecnt);
+ if (qp->recv_cq)
+ atomic_dec(&qp->recv_cq->usecnt);
+ if (qp->send_cq)
+ atomic_dec(&qp->send_cq->usecnt);
+ if (qp->pd)
+ atomic_dec(&qp->pd->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_dec);
+
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller)
+{
+ struct ib_device *device = pd->device;
+ struct ib_qp *qp;
+ int ret;
+
+ /*
+ * If the callers is using the RDMA API calculate the resources
+ * needed for the RDMA READ/WRITE operations.
+ *
+ * Note that these callers need to pass in a port number.
+ */
+ if (qp_init_attr->cap.max_rdma_ctxs)
+ rdma_rw_init_qp(device, qp_init_attr);
+
+ qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
+ if (IS_ERR(qp))
+ return qp;
+
+ ib_qp_usecnt_inc(qp);
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
@@ -1232,6 +1381,8 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd,
qp->max_write_sge = qp_init_attr->cap.max_send_sge;
qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
device->attrs.max_sge_rd);
+ if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN)
+ qp->integrity_en = true;
return qp;
@@ -1240,7 +1391,7 @@ err:
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_create_qp_user);
+EXPORT_SYMBOL(ib_create_qp_kernel);
static const struct {
int valid;
@@ -1613,22 +1764,48 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
qp->qp_type == IB_QPT_XRC_TGT);
}
-/**
+/*
* IB core internal function to perform QP attributes modification.
*/
static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
const struct ib_gid_attr *old_sgid_attr_av;
const struct ib_gid_attr *old_sgid_attr_alt_av;
int ret;
+ attr->xmit_slave = NULL;
if (attr_mask & IB_QP_AV) {
ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
&old_sgid_attr_av);
if (ret)
return ret;
+
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ struct net_device *slave;
+
+ /*
+ * If the user provided the qp_attr then we have to
+ * resolve it. Kerne users have to provide already
+ * resolved rdma_ah_attr's.
+ */
+ if (udata) {
+ ret = ib_resolve_eth_dmac(qp->device,
+ &attr->ah_attr);
+ if (ret)
+ goto out_av;
+ }
+ slave = rdma_lag_get_ah_roce_slave(qp->device,
+ &attr->ah_attr,
+ GFP_KERNEL);
+ if (IS_ERR(slave)) {
+ ret = PTR_ERR(slave);
+ goto out_av;
+ }
+ attr->xmit_slave = slave;
+ }
}
if (attr_mask & IB_QP_ALT_PATH) {
/*
@@ -1650,23 +1827,11 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (!(rdma_protocol_ib(qp->device,
attr->alt_ah_attr.port_num) &&
rdma_protocol_ib(qp->device, port))) {
- ret = EINVAL;
+ ret = -EINVAL;
goto out;
}
}
- /*
- * If the user provided the qp_attr then we have to resolve it. Kernel
- * users have to provide already resolved rdma_ah_attr's
- */
- if (udata && (attr_mask & IB_QP_AV) &&
- attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
- is_qp_type_connected(qp)) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- goto out;
- }
-
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
dev_warn(&qp->device->dev,
@@ -1683,6 +1848,14 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
}
+ /*
+ * Bind this qp to a counter automatically based on the rdma counter
+ * rules. This only set in RST2INIT with port specified
+ */
+ if (!qp->counter && (attr_mask & IB_QP_PORT) &&
+ ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
+ rdma_counter_bind_qp_auto(qp, attr->port_num);
+
ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
if (ret)
goto out;
@@ -1700,8 +1873,10 @@ out:
if (attr_mask & IB_QP_ALT_PATH)
rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
out_av:
- if (attr_mask & IB_QP_AV)
+ if (attr_mask & IB_QP_AV) {
+ rdma_lag_put_ah_roce_slave(attr->xmit_slave);
rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
+ }
return ret;
}
@@ -1723,12 +1898,95 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
-int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
+static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
+ u16 *speed, u8 *width)
+{
+ if (!lanes) {
+ if (netdev_speed <= SPEED_1000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_SDR;
+ } else if (netdev_speed <= SPEED_10000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_FDR10;
+ } else if (netdev_speed <= SPEED_20000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_DDR;
+ } else if (netdev_speed <= SPEED_25000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_40000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else if (netdev_speed <= SPEED_50000) {
+ *width = IB_WIDTH_2X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_100000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_200000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_HDR;
+ } else {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_NDR;
+ }
+
+ return;
+ }
+
+ switch (lanes) {
+ case 1:
+ *width = IB_WIDTH_1X;
+ break;
+ case 2:
+ *width = IB_WIDTH_2X;
+ break;
+ case 4:
+ *width = IB_WIDTH_4X;
+ break;
+ case 8:
+ *width = IB_WIDTH_8X;
+ break;
+ case 12:
+ *width = IB_WIDTH_12X;
+ break;
+ default:
+ *width = IB_WIDTH_1X;
+ }
+
+ switch (netdev_speed / lanes) {
+ case SPEED_2500:
+ *speed = IB_SPEED_SDR;
+ break;
+ case SPEED_5000:
+ *speed = IB_SPEED_DDR;
+ break;
+ case SPEED_10000:
+ *speed = IB_SPEED_FDR10;
+ break;
+ case SPEED_14000:
+ *speed = IB_SPEED_FDR;
+ break;
+ case SPEED_25000:
+ *speed = IB_SPEED_EDR;
+ break;
+ case SPEED_50000:
+ *speed = IB_SPEED_HDR;
+ break;
+ case SPEED_100000:
+ *speed = IB_SPEED_NDR;
+ break;
+ default:
+ *speed = IB_SPEED_SDR;
+ }
+}
+
+int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
{
int rc;
u32 netdev_speed;
struct net_device *netdev;
- struct ethtool_link_ksettings lksettings;
+ struct ethtool_link_ksettings lksettings = {};
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
@@ -1743,33 +2001,17 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
dev_put(netdev);
- if (!rc) {
+ if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
netdev_speed = lksettings.base.speed;
} else {
netdev_speed = SPEED_1000;
- pr_warn("%s speed is unknown, defaulting to %d\n", netdev->name,
- netdev_speed);
+ if (rc)
+ pr_warn("%s speed is unknown, defaulting to %u\n",
+ netdev->name, netdev_speed);
}
- if (netdev_speed <= SPEED_1000) {
- *width = IB_WIDTH_1X;
- *speed = IB_SPEED_SDR;
- } else if (netdev_speed <= SPEED_10000) {
- *width = IB_WIDTH_1X;
- *speed = IB_SPEED_FDR10;
- } else if (netdev_speed <= SPEED_20000) {
- *width = IB_WIDTH_4X;
- *speed = IB_SPEED_DDR;
- } else if (netdev_speed <= SPEED_25000) {
- *width = IB_WIDTH_1X;
- *speed = IB_SPEED_EDR;
- } else if (netdev_speed <= SPEED_40000) {
- *width = IB_WIDTH_4X;
- *speed = IB_SPEED_FDR10;
- } else {
- *width = IB_WIDTH_4X;
- *speed = IB_SPEED_EDR;
- }
+ ib_get_width_and_speed(netdev_speed, lksettings.lanes,
+ speed, width);
return 0;
}
@@ -1806,9 +2048,9 @@ int ib_close_qp(struct ib_qp *qp)
if (real_qp == qp)
return -EINVAL;
- spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
list_del(&qp->open_list);
- spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
atomic_dec(&real_qp->usecnt);
if (qp->qp_sec)
@@ -1827,21 +2069,18 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
real_qp = qp->real_qp;
xrcd = real_qp->xrcd;
-
- mutex_lock(&xrcd->tgt_qp_mutex);
+ down_write(&xrcd->tgt_qps_rwsem);
ib_close_qp(qp);
if (atomic_read(&real_qp->usecnt) == 0)
- list_del(&real_qp->xrcd_list);
+ xa_erase(&xrcd->tgt_qps, real_qp->qp_num);
else
real_qp = NULL;
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ up_write(&xrcd->tgt_qps_rwsem);
if (real_qp) {
ret = ib_destroy_qp(real_qp);
if (!ret)
atomic_dec(&xrcd->usecnt);
- else
- __ib_insert_xrcd_qp(xrcd, real_qp);
}
return 0;
@@ -1851,10 +2090,6 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
{
const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
- struct ib_pd *pd;
- struct ib_cq *scq, *rcq;
- struct ib_srq *srq;
- struct ib_rwq_ind_table *ind_tbl;
struct ib_qp_security *sec;
int ret;
@@ -1866,11 +2101,6 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
if (qp->real_qp != qp)
return __ib_destroy_shared_qp(qp);
- pd = qp->pd;
- scq = qp->send_cq;
- rcq = qp->recv_cq;
- srq = qp->srq;
- ind_tbl = qp->rwq_ind_tbl;
sec = qp->qp_sec;
if (sec)
ib_destroy_qp_security_begin(sec);
@@ -1878,30 +2108,25 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
- rdma_restrack_del(&qp->res);
+ rdma_counter_unbind_qp(qp, qp->port, true);
ret = qp->device->ops.destroy_qp(qp, udata);
- if (!ret) {
- if (alt_path_sgid_attr)
- rdma_put_gid_attr(alt_path_sgid_attr);
- if (av_sgid_attr)
- rdma_put_gid_attr(av_sgid_attr);
- if (pd)
- atomic_dec(&pd->usecnt);
- if (scq)
- atomic_dec(&scq->usecnt);
- if (rcq)
- atomic_dec(&rcq->usecnt);
- if (srq)
- atomic_dec(&srq->usecnt);
- if (ind_tbl)
- atomic_dec(&ind_tbl->usecnt);
- if (sec)
- ib_destroy_qp_security_end(sec);
- } else {
+ if (ret) {
if (sec)
ib_destroy_qp_security_abort(sec);
+ return ret;
}
+ if (alt_path_sgid_attr)
+ rdma_put_gid_attr(alt_path_sgid_attr);
+ if (av_sgid_attr)
+ rdma_put_gid_attr(av_sgid_attr);
+
+ ib_qp_usecnt_dec(qp);
+ if (sec)
+ ib_destroy_qp_security_end(sec);
+
+ rdma_restrack_del(&qp->res);
+ kfree(qp);
return ret;
}
EXPORT_SYMBOL(ib_destroy_qp_user);
@@ -1916,27 +2141,39 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
const char *caller)
{
struct ib_cq *cq;
+ int ret;
+
+ cq = rdma_zalloc_drv_obj(device, ib_cq);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ cq->device = device;
+ cq->uobject = NULL;
+ cq->comp_handler = comp_handler;
+ cq->event_handler = event_handler;
+ cq->cq_context = cq_context;
+ atomic_set(&cq->usecnt, 0);
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
- cq = device->ops.create_cq(device, cq_attr, NULL);
-
- if (!IS_ERR(cq)) {
- cq->device = device;
- cq->uobject = NULL;
- cq->comp_handler = comp_handler;
- cq->event_handler = event_handler;
- cq->cq_context = cq_context;
- atomic_set(&cq->usecnt, 0);
- cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_kadd(&cq->res);
+ ret = device->ops.create_cq(cq, cq_attr, NULL);
+ if (ret) {
+ rdma_restrack_put(&cq->res);
+ kfree(cq);
+ return ERR_PTR(ret);
}
+ rdma_restrack_add(&cq->res);
return cq;
}
EXPORT_SYMBOL(__ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
return cq->device->ops.modify_cq ?
cq->device->ops.modify_cq(cq, cq_count,
cq_period) : -EOPNOTSUPP;
@@ -1945,16 +2182,29 @@ EXPORT_SYMBOL(rdma_set_cq_moderation);
int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
+ int ret;
+
+ if (WARN_ON_ONCE(cq->shared))
+ return -EOPNOTSUPP;
+
if (atomic_read(&cq->usecnt))
return -EBUSY;
+ ret = cq->device->ops.destroy_cq(cq, udata);
+ if (ret)
+ return ret;
+
rdma_restrack_del(&cq->res);
- return cq->device->ops.destroy_cq(cq, udata);
+ kfree(cq);
+ return ret;
}
EXPORT_SYMBOL(ib_destroy_cq_user);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
return cq->device->ops.resize_cq ?
cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
@@ -1962,18 +2212,73 @@ EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags)
+{
+ struct ib_mr *mr;
+
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ if (!(pd->device->attrs.kernel_cap_flags &
+ IBK_ON_DEMAND_PAGING)) {
+ pr_debug("ODP support not available\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
+ access_flags, NULL, NULL);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->device = pd->device;
+ mr->type = IB_MR_TYPE_USER;
+ mr->pd = pd;
+ mr->dm = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->iova = virt_addr;
+ mr->length = length;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_user_mr);
+
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ if (!pd->device->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ if (!num_sge)
+ return 0;
+
+ return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ NULL);
+}
+EXPORT_SYMBOL(ib_advise_mr);
+
int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
struct ib_dm *dm = mr->dm;
+ struct ib_dmah *dmah = mr->dmah;
+ struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
+ trace_mr_dereg(mr);
rdma_restrack_del(&mr->res);
ret = mr->device->ops.dereg_mr(mr, udata);
if (!ret) {
atomic_dec(&pd->usecnt);
if (dm)
atomic_dec(&dm->usecnt);
+ if (dmah)
+ atomic_dec(&dmah->usecnt);
+ kfree(sig_attrs);
}
return ret;
@@ -1985,7 +2290,6 @@ EXPORT_SYMBOL(ib_dereg_mr_user);
* @pd: protection domain associated with the region
* @mr_type: memory region type
* @max_num_sg: maximum sg entries available for registration.
- * @udata: user data or null for kernel objects
*
* Notes:
* Memory registeration page/sg lists must not exceed max_num_sg.
@@ -1993,77 +2297,104 @@ EXPORT_SYMBOL(ib_dereg_mr_user);
* max_num_sg * used_page_size.
*
*/
-struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
{
struct ib_mr *mr;
- if (!pd->device->ops.alloc_mr)
- return ERR_PTR(-EOPNOTSUPP);
+ if (!pd->device->ops.alloc_mr) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
- mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata);
- if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
- mr->dm = NULL;
- mr->uobject = NULL;
- atomic_inc(&pd->usecnt);
- mr->need_inval = false;
- mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_kadd(&mr->res);
+ if (mr_type == IB_MR_TYPE_INTEGRITY) {
+ WARN_ON_ONCE(1);
+ mr = ERR_PTR(-EINVAL);
+ goto out;
}
+ mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
+ if (IS_ERR(mr))
+ goto out;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->type = mr_type;
+ mr->sig_attrs = NULL;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
+out:
+ trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
}
-EXPORT_SYMBOL(ib_alloc_mr_user);
-
-/* "Fast" memory regions */
+EXPORT_SYMBOL(ib_alloc_mr);
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
+/**
+ * ib_alloc_mr_integrity() - Allocates an integrity memory region
+ * @pd: protection domain associated with the region
+ * @max_num_data_sg: maximum data sg entries available for registration
+ * @max_num_meta_sg: maximum metadata sg entries available for
+ * registration
+ *
+ * Notes:
+ * Memory registration page/sg lists must not exceed max_num_sg,
+ * also the integrity page/sg lists must not exceed max_num_meta_sg.
+ *
+ */
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg)
{
- struct ib_fmr *fmr;
-
- if (!pd->device->ops.alloc_fmr)
- return ERR_PTR(-EOPNOTSUPP);
+ struct ib_mr *mr;
+ struct ib_sig_attrs *sig_attrs;
- fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr);
- if (!IS_ERR(fmr)) {
- fmr->device = pd->device;
- fmr->pd = pd;
- atomic_inc(&pd->usecnt);
+ if (!pd->device->ops.alloc_mr_integrity ||
+ !pd->device->ops.map_mr_sg_pi) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
}
- return fmr;
-}
-EXPORT_SYMBOL(ib_alloc_fmr);
-
-int ib_unmap_fmr(struct list_head *fmr_list)
-{
- struct ib_fmr *fmr;
-
- if (list_empty(fmr_list))
- return 0;
+ if (!max_num_meta_sg) {
+ mr = ERR_PTR(-EINVAL);
+ goto out;
+ }
- fmr = list_entry(fmr_list->next, struct ib_fmr, list);
- return fmr->device->ops.unmap_fmr(fmr_list);
-}
-EXPORT_SYMBOL(ib_unmap_fmr);
+ sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
+ if (!sig_attrs) {
+ mr = ERR_PTR(-ENOMEM);
+ goto out;
+ }
-int ib_dealloc_fmr(struct ib_fmr *fmr)
-{
- struct ib_pd *pd;
- int ret;
+ mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
+ max_num_meta_sg);
+ if (IS_ERR(mr)) {
+ kfree(sig_attrs);
+ goto out;
+ }
- pd = fmr->pd;
- ret = fmr->device->ops.dealloc_fmr(fmr);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->type = IB_MR_TYPE_INTEGRITY;
+ mr->sig_attrs = sig_attrs;
- return ret;
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
+out:
+ trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
+ return mr;
}
-EXPORT_SYMBOL(ib_dealloc_fmr);
+EXPORT_SYMBOL(ib_alloc_mr_integrity);
/* Multicast groups */
@@ -2072,7 +2403,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
struct ib_qp_init_attr init_attr = {};
struct ib_qp_attr attr = {};
int num_eth_ports = 0;
- int port;
+ unsigned int port;
/* If QP state >= init, it is assigned to a port and we can check this
* port only.
@@ -2087,7 +2418,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
}
/* Can't get a quick answer, iterate over all ports */
- for (port = 0; port < qp->device->phys_port_cnt; port++)
+ rdma_for_each_port(qp->device, port)
if (rdma_port_get_link_layer(qp->device, port) !=
IB_LINK_LAYER_INFINIBAND)
num_eth_ports++;
@@ -2141,44 +2472,61 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
+/**
+ * ib_alloc_xrcd_user - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ * @inode: inode to connect XRCD
+ * @udata: Valid user data or NULL for kernel object
+ */
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+ struct inode *inode, struct ib_udata *udata)
{
struct ib_xrcd *xrcd;
+ int ret;
if (!device->ops.alloc_xrcd)
return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->ops.alloc_xrcd(device, NULL);
- if (!IS_ERR(xrcd)) {
- xrcd->device = device;
- xrcd->inode = NULL;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
- }
+ xrcd = rdma_zalloc_drv_obj(device, ib_xrcd);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+ xrcd->device = device;
+ xrcd->inode = inode;
+ atomic_set(&xrcd->usecnt, 0);
+ init_rwsem(&xrcd->tgt_qps_rwsem);
+ xa_init(&xrcd->tgt_qps);
+
+ ret = device->ops.alloc_xrcd(xrcd, udata);
+ if (ret)
+ goto err;
return xrcd;
+err:
+ kfree(xrcd);
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL(__ib_alloc_xrcd);
+EXPORT_SYMBOL(ib_alloc_xrcd_user);
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
+/**
+ * ib_dealloc_xrcd_user - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
+ */
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
- struct ib_qp *qp;
int ret;
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
- while (!list_empty(&xrcd->tgt_qp_list)) {
- qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
- ret = ib_destroy_qp(qp);
- if (ret)
- return ret;
- }
-
- return xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ WARN_ON(!xa_empty(&xrcd->tgt_qps));
+ ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ if (ret)
+ return ret;
+ kfree(xrcd);
+ return ret;
}
-EXPORT_SYMBOL(ib_dealloc_xrcd);
+EXPORT_SYMBOL(ib_dealloc_xrcd_user);
/**
* ib_create_wq - Creates a WQ associated with the specified protection
@@ -2220,110 +2568,28 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
EXPORT_SYMBOL(ib_create_wq);
/**
- * ib_destroy_wq - Destroys the specified user WQ.
+ * ib_destroy_wq_user - Destroys the specified user WQ.
* @wq: The WQ to destroy.
* @udata: Valid user data
*/
-int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
{
- int err;
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
+ int ret;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- err = wq->device->ops.destroy_wq(wq, udata);
- if (!err) {
- atomic_dec(&pd->usecnt);
- atomic_dec(&cq->usecnt);
- }
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_wq);
-
-/**
- * ib_modify_wq - Modifies the specified WQ.
- * @wq: The WQ to modify.
- * @wq_attr: On input, specifies the WQ attributes to modify.
- * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
- * are being modified.
- * On output, the current values of selected WQ attributes are returned.
- */
-int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
- u32 wq_attr_mask)
-{
- int err;
-
- if (!wq->device->ops.modify_wq)
- return -EOPNOTSUPP;
-
- err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL);
- return err;
-}
-EXPORT_SYMBOL(ib_modify_wq);
-
-/*
- * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
- * @device: The device on which to create the rwq indirection table.
- * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
- * create the Indirection Table.
- *
- * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
- * than the created ib_rwq_ind_table object and the caller is responsible
- * for its memory allocation/free.
- */
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr)
-{
- struct ib_rwq_ind_table *rwq_ind_table;
- int i;
- u32 table_size;
-
- if (!device->ops.create_rwq_ind_table)
- return ERR_PTR(-EOPNOTSUPP);
-
- table_size = (1 << init_attr->log_ind_tbl_size);
- rwq_ind_table = device->ops.create_rwq_ind_table(device,
- init_attr, NULL);
- if (IS_ERR(rwq_ind_table))
- return rwq_ind_table;
-
- rwq_ind_table->ind_tbl = init_attr->ind_tbl;
- rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
- rwq_ind_table->device = device;
- rwq_ind_table->uobject = NULL;
- atomic_set(&rwq_ind_table->usecnt, 0);
-
- for (i = 0; i < table_size; i++)
- atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
-
- return rwq_ind_table;
-}
-EXPORT_SYMBOL(ib_create_rwq_ind_table);
-
-/*
- * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
- * @wq_ind_table: The Indirection Table to destroy.
-*/
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
-{
- int err, i;
- u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
- struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
-
- if (atomic_read(&rwq_ind_table->usecnt))
- return -EBUSY;
-
- err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
- if (!err) {
- for (i = 0; i < table_size; i++)
- atomic_dec(&ind_tbl[i]->usecnt);
- }
+ ret = wq->device->ops.destroy_wq(wq, udata);
+ if (ret)
+ return ret;
- return err;
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&cq->usecnt);
+ return ret;
}
-EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
+EXPORT_SYMBOL(ib_destroy_wq_user);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
@@ -2335,7 +2601,7 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
}
EXPORT_SYMBOL(ib_check_mr_status);
-int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
int state)
{
if (!device->ops.set_vf_link_state)
@@ -2345,7 +2611,7 @@ int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
}
EXPORT_SYMBOL(ib_set_vf_link_state);
-int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info)
{
if (!device->ops.get_vf_config)
@@ -2355,7 +2621,7 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
}
EXPORT_SYMBOL(ib_get_vf_config);
-int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats)
{
if (!device->ops.get_vf_stats)
@@ -2365,7 +2631,7 @@ int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
}
EXPORT_SYMBOL(ib_get_vf_stats);
-int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
int type)
{
if (!device->ops.set_vf_guid)
@@ -2375,6 +2641,53 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
}
EXPORT_SYMBOL(ib_set_vf_guid);
+int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ if (!device->ops.get_vf_guid)
+ return -EOPNOTSUPP;
+
+ return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid);
+}
+EXPORT_SYMBOL(ib_get_vf_guid);
+/**
+ * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
+ * information) and set an appropriate memory region for registration.
+ * @mr: memory region
+ * @data_sg: dma mapped scatterlist for data
+ * @data_sg_nents: number of entries in data_sg
+ * @data_sg_offset: offset in bytes into data_sg
+ * @meta_sg: dma mapped scatterlist for metadata
+ * @meta_sg_nents: number of entries in meta_sg
+ * @meta_sg_offset: offset in bytes into meta_sg
+ * @page_size: page vector desired page size
+ *
+ * Constraints:
+ * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY.
+ *
+ * Return: 0 on success.
+ *
+ * After this completes successfully, the memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset, unsigned int page_size)
+{
+ if (unlikely(!mr->device->ops.map_mr_sg_pi ||
+ WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY)))
+ return -EOPNOTSUPP;
+
+ mr->page_size = page_size;
+
+ return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg,
+ meta_sg_nents, meta_sg_offset);
+}
+EXPORT_SYMBOL(ib_map_mr_sg_pi);
+
/**
* ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
* and set it the memory region.
@@ -2385,6 +2698,7 @@ EXPORT_SYMBOL(ib_set_vf_guid);
* @page_size: page vector desired page size
*
* Constraints:
+ *
* - The first sg element is allowed to have an offset.
* - Each sg element must either be aligned to page_size or virtually
* contiguous to the previous element. In case an sg element has a
@@ -2418,10 +2732,12 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
- * @sg_offset_p: IN: start offset in bytes into sg
- * OUT: offset in bytes for element n of the sg of the first
+ * @sg_offset_p: ==== =======================================================
+ * IN start offset in bytes into sg
+ * OUT offset in bytes for element n of the sg of the first
* byte that has not been processed where n is the return
* value of this function.
+ * ==== =======================================================
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest
@@ -2586,6 +2902,72 @@ static void __ib_drain_rq(struct ib_qp *qp)
wait_for_completion(&rdrain.done);
}
+/*
+ * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout
+ * expires.
+ * @qp: queue pair associated with SRQ to drain
+ *
+ * Quoting 10.3.1 Queue Pair and EE Context States:
+ *
+ * Note, for QPs that are associated with an SRQ, the Consumer should take the
+ * QP through the Error State before invoking a Destroy QP or a Modify QP to the
+ * Reset State. The Consumer may invoke the Destroy QP without first performing
+ * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
+ * Last WQE Reached Event. However, if the Consumer does not wait for the
+ * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
+ * leakage may occur. Therefore, it is good programming practice to tear down a
+ * QP that is associated with an SRQ by using the following process:
+ *
+ * - Put the QP in the Error State
+ * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
+ * - either:
+ * drain the CQ by invoking the Poll CQ verb and either wait for CQ
+ * to be empty or the number of Poll CQ operations has exceeded
+ * CQ capacity size;
+ * - or
+ * post another WR that completes on the same CQ and wait for this
+ * WR to return as a WC;
+ * - and then invoke a Destroy QP or Reset QP.
+ *
+ * We use the first option.
+ */
+static void __ib_drain_srq(struct ib_qp *qp)
+{
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct ib_cq *cq;
+ int n, polled = 0;
+ int ret;
+
+ if (!qp->srq) {
+ WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp);
+ return;
+ }
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret);
+ return;
+ }
+
+ if (ib_srq_has_cq(qp->srq->srq_type)) {
+ cq = qp->srq->ext.cq;
+ } else if (qp->recv_cq) {
+ cq = qp->recv_cq;
+ } else {
+ WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp);
+ return;
+ }
+
+ if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) {
+ while (polled != cq->cqe) {
+ n = ib_process_cq_direct(cq, cq->cqe - polled);
+ if (!n)
+ return;
+ polled += n;
+ }
+ }
+}
+
/**
* ib_drain_sq() - Block until all SQ CQEs have been consumed by the
* application.
@@ -2611,6 +2993,7 @@ void ib_drain_sq(struct ib_qp *qp)
qp->device->ops.drain_sq(qp);
else
__ib_drain_sq(qp);
+ trace_cq_drain_complete(qp->send_cq);
}
EXPORT_SYMBOL(ib_drain_sq);
@@ -2639,6 +3022,7 @@ void ib_drain_rq(struct ib_qp *qp)
qp->device->ops.drain_rq(qp);
else
__ib_drain_rq(qp);
+ trace_cq_drain_complete(qp->recv_cq);
}
EXPORT_SYMBOL(ib_drain_rq);
@@ -2662,10 +3046,12 @@ void ib_drain_qp(struct ib_qp *qp)
ib_drain_sq(qp);
if (!qp->srq)
ib_drain_rq(qp);
+ else
+ __ib_drain_srq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);
-struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *))
@@ -2691,7 +3077,7 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
}
EXPORT_SYMBOL(rdma_alloc_netdev);
-int rdma_init_netdev(struct ib_device *device, u8 port_num,
+int rdma_init_netdev(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type, const char *name,
unsigned char name_assign_type,
void (*setup)(struct net_device *),
@@ -2729,20 +3115,73 @@ EXPORT_SYMBOL(__rdma_block_iter_start);
bool __rdma_block_iter_next(struct ib_block_iter *biter)
{
unsigned int block_offset;
+ unsigned int delta;
if (!biter->__sg_nents || !biter->__sg)
return false;
biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
- biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset;
+ delta = BIT_ULL(biter->__pg_bit) - block_offset;
- if (biter->__sg_advance >= sg_dma_len(biter->__sg)) {
+ while (biter->__sg_nents && biter->__sg &&
+ sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) {
+ delta -= sg_dma_len(biter->__sg) - biter->__sg_advance;
biter->__sg_advance = 0;
biter->__sg = sg_next(biter->__sg);
biter->__sg_nents--;
}
+ biter->__sg_advance += delta;
return true;
}
EXPORT_SYMBOL(__rdma_block_iter_next);
+
+/**
+ * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
+ * for the drivers.
+ * @descs: array of static descriptors
+ * @num_counters: number of elements in array
+ * @lifespan: milliseconds between updates
+ */
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+ const struct rdma_stat_desc *descs, int num_counters,
+ unsigned long lifespan)
+{
+ struct rdma_hw_stats *stats;
+
+ stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL);
+ if (!stats)
+ return NULL;
+
+ stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!stats->is_disabled)
+ goto err;
+
+ stats->descs = descs;
+ stats->num_counters = num_counters;
+ stats->lifespan = msecs_to_jiffies(lifespan);
+ mutex_init(&stats->lock);
+
+ return stats;
+
+err:
+ kfree(stats);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
+
+/**
+ * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats
+ * @stats: statistics to release
+ */
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
+{
+ if (!stats)
+ return;
+
+ kfree(stats->is_disabled);
+ kfree(stats);
+}
+EXPORT_SYMBOL(rdma_free_hw_stats_struct);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 77094be1b262..c42b22ac3303 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -1,17 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
-obj-$(CONFIG_INFINIBAND_QIB) += qib/
-obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
obj-$(CONFIG_INFINIBAND_EFA) += efa/
-obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/
+obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/
+obj-$(CONFIG_MANA_INFINIBAND) += mana/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
-obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
-obj-$(CONFIG_INFINIBAND_HNS) += hns/
+obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
+obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re/
+obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/
+obj-$(CONFIG_INFINIBAND_IONIC) += ionic/
diff --git a/drivers/infiniband/hw/bng_re/Kconfig b/drivers/infiniband/hw/bng_re/Kconfig
new file mode 100644
index 000000000000..85845f72c64d
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_BNG_RE
+ tristate "Broadcom Next generation RoCE HCA support"
+ depends on 64BIT
+ depends on INET && DCB && BNGE
+ help
+ This driver supports Broadcom Next generation
+ 50/100/200/400/800 gigabit RoCE HCAs. The module
+ will be called bng_re. To compile this driver
+ as a module, choose M here.
diff --git a/drivers/infiniband/hw/bng_re/Makefile b/drivers/infiniband/hw/bng_re/Makefile
new file mode 100644
index 000000000000..c6aaaf853c77
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnge -I $(srctree)/drivers/infiniband/hw/bnxt_re
+
+obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re.o
+
+bng_re-y := bng_dev.o bng_fw.o \
+ bng_res.o bng_sp.o \
+ bng_debugfs.o
diff --git a/drivers/infiniband/hw/bng_re/bng_debugfs.c b/drivers/infiniband/hw/bng_re/bng_debugfs.c
new file mode 100644
index 000000000000..9ec5a8785250
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_debugfs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "bng_res.h"
+#include "bng_fw.h"
+#include "bnge.h"
+#include "bnge_auxr.h"
+#include "bng_re.h"
+#include "bng_debugfs.h"
+
+static struct dentry *bng_re_debugfs_root;
+
+void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->aux_dev->pdev;
+
+ rdev->dbg_root =
+ debugfs_create_dir(dev_name(&pdev->dev), bng_re_debugfs_root);
+}
+
+void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev)
+{
+ debugfs_remove_recursive(rdev->dbg_root);
+ rdev->dbg_root = NULL;
+}
+
+void bng_re_register_debugfs(void)
+{
+ bng_re_debugfs_root = debugfs_create_dir("bng_re", NULL);
+}
+
+void bng_re_unregister_debugfs(void)
+{
+ debugfs_remove(bng_re_debugfs_root);
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_debugfs.h b/drivers/infiniband/hw/bng_re/bng_debugfs.h
new file mode 100644
index 000000000000..baef71df4242
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_debugfs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_RE_DEBUGFS__
+#define __BNG_RE_DEBUGFS__
+
+void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev);
+void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev);
+
+void bng_re_register_debugfs(void);
+void bng_re_unregister_debugfs(void);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c
new file mode 100644
index 000000000000..d8f8d7f7075f
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_dev.c
@@ -0,0 +1,534 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/auxiliary_bus.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "bng_res.h"
+#include "bng_sp.h"
+#include "bng_fw.h"
+#include "bnge.h"
+#include "bnge_auxr.h"
+#include "bng_re.h"
+#include "bnge_hwrm.h"
+#include "bng_debugfs.h"
+
+MODULE_AUTHOR("Siva Reddy Kallam <siva.kallam@broadcom.com>");
+MODULE_DESCRIPTION(BNG_RE_DESC);
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct bng_re_dev *bng_re_dev_add(struct auxiliary_device *adev,
+ struct bnge_auxr_dev *aux_dev)
+{
+ struct bng_re_dev *rdev;
+
+ /* Allocate bng_re_dev instance */
+ rdev = ib_alloc_device(bng_re_dev, ibdev);
+ if (!rdev) {
+ pr_err("%s: bng_re_dev allocation failure!", KBUILD_MODNAME);
+ return NULL;
+ }
+
+ /* Assign auxiliary device specific data */
+ rdev->netdev = aux_dev->net;
+ rdev->aux_dev = aux_dev;
+ rdev->adev = adev;
+ rdev->fn_id = rdev->aux_dev->pdev->devfn;
+
+ return rdev;
+}
+
+
+static int bng_re_register_netdev(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev;
+
+ aux_dev = rdev->aux_dev;
+ return bnge_register_dev(aux_dev, rdev->adev);
+}
+
+static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev)
+{
+ struct bng_re_chip_ctx *chip_ctx;
+
+ if (!rdev->chip_ctx)
+ return;
+
+ kfree(rdev->dev_attr);
+ rdev->dev_attr = NULL;
+
+ chip_ctx = rdev->chip_ctx;
+ rdev->chip_ctx = NULL;
+ rdev->rcfw.res = NULL;
+ rdev->bng_res.cctx = NULL;
+ rdev->bng_res.pdev = NULL;
+ kfree(chip_ctx);
+}
+
+static int bng_re_setup_chip_ctx(struct bng_re_dev *rdev)
+{
+ struct bng_re_chip_ctx *chip_ctx;
+ struct bnge_auxr_dev *aux_dev;
+ int rc = -ENOMEM;
+
+ aux_dev = rdev->aux_dev;
+ rdev->bng_res.pdev = aux_dev->pdev;
+ rdev->rcfw.res = &rdev->bng_res;
+ chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
+ if (!chip_ctx)
+ return -ENOMEM;
+ chip_ctx->chip_num = aux_dev->chip_num;
+ chip_ctx->hw_stats_size = aux_dev->hw_ring_stats_size;
+
+ rdev->chip_ctx = chip_ctx;
+ rdev->bng_res.cctx = rdev->chip_ctx;
+ rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
+ if (!rdev->dev_attr)
+ goto free_chip_ctx;
+ rdev->bng_res.dattr = rdev->dev_attr;
+
+ return 0;
+free_chip_ctx:
+ kfree(rdev->chip_ctx);
+ rdev->chip_ctx = NULL;
+ return rc;
+}
+
+static void bng_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(-1);
+ hdr->target_id = cpu_to_le16(-1);
+}
+
+static void bng_re_fill_fw_msg(struct bnge_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+static int bng_re_net_ring_free(struct bng_re_dev *rdev,
+ u16 fw_ring_id, int type)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_ring_free_input req = {};
+ struct hwrm_ring_free_output resp;
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (!rdev)
+ return rc;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
+ req.ring_type = type;
+ req.ring_id = cpu_to_le16(fw_ring_id);
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
+ req.ring_id, rc);
+ return rc;
+}
+
+static int bng_re_net_ring_alloc(struct bng_re_dev *rdev,
+ struct bng_re_ring_attr *ring_attr,
+ u16 *fw_ring_id)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_ring_alloc_input req = {};
+ struct hwrm_ring_alloc_output resp;
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
+ req.enables = 0;
+ req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
+ if (ring_attr->pages > 1) {
+ /* Page size is in log2 units */
+ req.page_size = BNGE_PAGE_SHIFT;
+ req.page_tbl_depth = 1;
+ }
+ req.fbo = 0;
+ /* Association of ring index with doorbell index and MSIX number */
+ req.logical_id = cpu_to_le16(ring_attr->lrid);
+ req.length = cpu_to_le32(ring_attr->depth + 1);
+ req.ring_type = ring_attr->type;
+ req.int_mode = ring_attr->mode;
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (!rc)
+ *fw_ring_id = le16_to_cpu(resp.ring_id);
+
+ return rc;
+}
+
+static int bng_re_stats_ctx_free(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_stat_ctx_free_input req = {};
+ struct hwrm_stat_ctx_free_output resp = {};
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
+ req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id);
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
+ rc);
+
+ return rc;
+}
+
+static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct bng_re_stats *stats = &rdev->stats_ctx;
+ struct hwrm_stat_ctx_alloc_output resp = {};
+ struct hwrm_stat_ctx_alloc_input req = {};
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ stats->fw_id = BNGE_INVALID_STATS_CTX_ID;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
+ req.update_period_ms = cpu_to_le32(1000);
+ req.stats_dma_addr = cpu_to_le64(stats->dma_map);
+ req.stats_dma_length = cpu_to_le16(rdev->chip_ctx->hw_stats_size);
+ req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (!rc)
+ stats->fw_id = le32_to_cpu(resp.stat_ctx_id);
+ return rc;
+}
+
+static void bng_re_query_hwrm_version(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_ver_get_output ver_get_resp = {};
+ struct hwrm_ver_get_input ver_get_req = {};
+ struct bng_re_chip_ctx *cctx;
+ struct bnge_fw_msg fw_msg = {};
+ int rc;
+
+ bng_re_init_hwrm_hdr((void *)&ver_get_req, HWRM_VER_GET);
+ ver_get_req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ ver_get_req.hwrm_intf_min = HWRM_VERSION_MINOR;
+ ver_get_req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ bng_re_fill_fw_msg(&fw_msg, (void *)&ver_get_req, sizeof(ver_get_req),
+ (void *)&ver_get_resp, sizeof(ver_get_resp),
+ BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
+ rc);
+ return;
+ }
+
+ cctx = rdev->chip_ctx;
+ cctx->hwrm_intf_ver =
+ (u64)le16_to_cpu(ver_get_resp.hwrm_intf_major) << 48 |
+ (u64)le16_to_cpu(ver_get_resp.hwrm_intf_minor) << 32 |
+ (u64)le16_to_cpu(ver_get_resp.hwrm_intf_build) << 16 |
+ le16_to_cpu(ver_get_resp.hwrm_intf_patch);
+
+ cctx->hwrm_cmd_max_timeout = le16_to_cpu(ver_get_resp.max_req_timeout);
+
+ if (!cctx->hwrm_cmd_max_timeout)
+ cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT;
+}
+
+static void bng_re_dev_uninit(struct bng_re_dev *rdev)
+{
+ int rc;
+ bng_re_debugfs_rem_pdev(rdev);
+
+ if (test_and_clear_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+ rc = bng_re_deinit_rcfw(&rdev->rcfw);
+ if (rc)
+ ibdev_warn(&rdev->ibdev,
+ "Failed to deinitialize RCFW: %#x", rc);
+ bng_re_stats_ctx_free(rdev);
+ bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx);
+ bng_re_disable_rcfw_channel(&rdev->rcfw);
+ bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id,
+ RING_ALLOC_REQ_RING_TYPE_NQ);
+ bng_re_free_rcfw_channel(&rdev->rcfw);
+ }
+
+ kfree(rdev->nqr);
+ rdev->nqr = NULL;
+ bng_re_destroy_chip_ctx(rdev);
+ if (test_and_clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
+ bnge_unregister_dev(rdev->aux_dev);
+}
+
+static int bng_re_dev_init(struct bng_re_dev *rdev)
+{
+ struct bng_re_ring_attr rattr = {};
+ struct bng_re_creq_ctx *creq;
+ u32 db_offt;
+ int vid;
+ u8 type;
+ int rc;
+
+ /* Registered a new RoCE device instance to netdev */
+ rc = bng_re_register_netdev(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to register with netedev: %#x\n", rc);
+ return -EINVAL;
+ }
+
+ set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+ if (rdev->aux_dev->auxr_info->msix_requested < BNG_RE_MIN_MSIX) {
+ ibdev_err(&rdev->ibdev,
+ "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
+ rdev->aux_dev->auxr_info->msix_requested);
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return -EINVAL;
+ }
+ ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
+ rdev->aux_dev->auxr_info->msix_requested);
+
+ rc = bng_re_setup_chip_ctx(rdev);
+ if (rc) {
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
+ return -EINVAL;
+ }
+
+ bng_re_query_hwrm_version(rdev);
+
+ rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate RCFW Channel: %#x\n", rc);
+ goto fail;
+ }
+
+ /* Allocate nq record memory */
+ rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
+ if (!rdev->nqr) {
+ bng_re_destroy_chip_ctx(rdev);
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return -ENOMEM;
+ }
+
+ rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested;
+ memcpy(rdev->nqr->msix_entries, rdev->aux_dev->msix_info,
+ sizeof(struct bnge_msix_info) * rdev->nqr->num_msix);
+
+ type = RING_ALLOC_REQ_RING_TYPE_NQ;
+ creq = &rdev->rcfw.creq;
+ rattr.dma_arr = creq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr;
+ rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
+ rattr.type = type;
+ rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ rattr.depth = BNG_FW_CREQE_MAX_CNT - 1;
+ rattr.lrid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].ring_idx;
+ rc = bng_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
+ goto free_rcfw;
+ }
+ db_offt = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].db_offset;
+ vid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].vector;
+
+ rc = bng_re_enable_fw_channel(&rdev->rcfw,
+ vid, db_offt);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
+ rc);
+ goto free_ring;
+ }
+
+ rc = bng_re_get_dev_attr(&rdev->rcfw);
+ if (rc)
+ goto disable_rcfw;
+
+ bng_re_debugfs_add_pdev(rdev);
+ rc = bng_re_alloc_stats_ctx_mem(rdev->bng_res.pdev, rdev->chip_ctx,
+ &rdev->stats_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate stats context: %#x\n", rc);
+ goto disable_rcfw;
+ }
+
+ rc = bng_re_stats_ctx_alloc(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate QPLIB context: %#x\n", rc);
+ goto free_stats_ctx;
+ }
+
+ rc = bng_re_init_rcfw(&rdev->rcfw, &rdev->stats_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to initialize RCFW: %#x\n", rc);
+ goto free_sctx;
+ }
+ set_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+ return 0;
+free_sctx:
+ bng_re_stats_ctx_free(rdev);
+free_stats_ctx:
+ bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx);
+disable_rcfw:
+ bng_re_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+ bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
+free_rcfw:
+ bng_re_free_rcfw_channel(&rdev->rcfw);
+fail:
+ bng_re_dev_uninit(rdev);
+ return rc;
+}
+
+static int bng_re_add_device(struct auxiliary_device *adev)
+{
+ struct bnge_auxr_priv *auxr_priv =
+ container_of(adev, struct bnge_auxr_priv, aux_dev);
+ struct bng_re_en_dev_info *dev_info;
+ struct bng_re_dev *rdev;
+ int rc;
+
+ dev_info = auxiliary_get_drvdata(adev);
+
+ rdev = bng_re_dev_add(adev, auxr_priv->auxr_dev);
+ if (!rdev) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ dev_info->rdev = rdev;
+
+ rc = bng_re_dev_init(rdev);
+ if (rc)
+ goto re_dev_dealloc;
+
+ return 0;
+
+re_dev_dealloc:
+ ib_dealloc_device(&rdev->ibdev);
+exit:
+ return rc;
+}
+
+
+static void bng_re_remove_device(struct bng_re_dev *rdev,
+ struct auxiliary_device *aux_dev)
+{
+ bng_re_dev_uninit(rdev);
+ ib_dealloc_device(&rdev->ibdev);
+}
+
+
+static int bng_re_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct bnge_auxr_priv *aux_priv =
+ container_of(adev, struct bnge_auxr_priv, aux_dev);
+ struct bng_re_en_dev_info *en_info;
+ int rc;
+
+ en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
+ if (!en_info)
+ return -ENOMEM;
+
+ en_info->auxr_dev = aux_priv->auxr_dev;
+
+ auxiliary_set_drvdata(adev, en_info);
+
+ rc = bng_re_add_device(adev);
+ if (rc)
+ kfree(en_info);
+
+ return rc;
+}
+
+static void bng_re_remove(struct auxiliary_device *adev)
+{
+ struct bng_re_en_dev_info *dev_info = auxiliary_get_drvdata(adev);
+ struct bng_re_dev *rdev;
+
+ rdev = dev_info->rdev;
+
+ if (rdev)
+ bng_re_remove_device(rdev, adev);
+ kfree(dev_info);
+}
+
+static const struct auxiliary_device_id bng_re_id_table[] = {
+ { .name = BNG_RE_ADEV_NAME ".rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, bng_re_id_table);
+
+static struct auxiliary_driver bng_re_driver = {
+ .name = "rdma",
+ .probe = bng_re_probe,
+ .remove = bng_re_remove,
+ .id_table = bng_re_id_table,
+};
+
+static int __init bng_re_mod_init(void)
+{
+ int rc;
+
+
+ bng_re_register_debugfs();
+
+ rc = auxiliary_driver_register(&bng_re_driver);
+ if (rc) {
+ pr_err("%s: Failed to register auxiliary driver\n",
+ KBUILD_MODNAME);
+ goto unreg_debugfs;
+ }
+ return 0;
+unreg_debugfs:
+ bng_re_unregister_debugfs();
+ return rc;
+}
+
+static void __exit bng_re_mod_exit(void)
+{
+ auxiliary_driver_unregister(&bng_re_driver);
+ bng_re_unregister_debugfs();
+}
+
+module_init(bng_re_mod_init);
+module_exit(bng_re_mod_exit);
diff --git a/drivers/infiniband/hw/bng_re/bng_fw.c b/drivers/infiniband/hw/bng_re/bng_fw.c
new file mode 100644
index 000000000000..7d9539113cf5
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_fw.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+#include <linux/pci.h>
+
+#include "roce_hsi.h"
+#include "bng_res.h"
+#include "bng_fw.h"
+#include "bng_sp.h"
+
+/**
+ * bng_re_map_rc - map return type based on opcode
+ * @opcode: roce slow path opcode
+ *
+ * case #1
+ * Firmware initiated error recovery is a safe state machine and
+ * driver can consider all the underlying rdma resources are free.
+ * In this state, it is safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * case #2
+ * If driver detect potential firmware stall, it is not safe state machine
+ * and the driver can not consider all the underlying rdma resources are
+ * freed.
+ * In this state, it is not safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * Scope of this helper function is only for case #1.
+ *
+ * Returns:
+ * 0 to communicate success to caller.
+ * Non zero error code to communicate failure to caller.
+ */
+static int bng_re_map_rc(u8 opcode)
+{
+ switch (opcode) {
+ case CMDQ_BASE_OPCODE_DESTROY_QP:
+ case CMDQ_BASE_OPCODE_DESTROY_SRQ:
+ case CMDQ_BASE_OPCODE_DESTROY_CQ:
+ case CMDQ_BASE_OPCODE_DEALLOCATE_KEY:
+ case CMDQ_BASE_OPCODE_DEREGISTER_MR:
+ case CMDQ_BASE_OPCODE_DELETE_GID:
+ case CMDQ_BASE_OPCODE_DESTROY_QP1:
+ case CMDQ_BASE_OPCODE_DESTROY_AH:
+ case CMDQ_BASE_OPCODE_DEINITIALIZE_FW:
+ case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC:
+ case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE:
+ return 0;
+ default:
+ return -ETIMEDOUT;
+ }
+}
+
+void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw)
+{
+ kfree(rcfw->crsqe_tbl);
+ bng_re_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
+ bng_re_free_hwq(rcfw->res, &rcfw->creq.hwq);
+ rcfw->pdev = NULL;
+}
+
+int bng_re_alloc_fw_channel(struct bng_re_res *res,
+ struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_hwq_attr hwq_attr = {};
+ struct bng_re_sg_info sginfo = {};
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_creq_ctx *creq;
+
+ rcfw->pdev = res->pdev;
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ rcfw->res = res;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.res = rcfw->res;
+ hwq_attr.depth = BNG_FW_CREQE_MAX_CNT;
+ hwq_attr.stride = BNG_FW_CREQE_UNITS;
+ hwq_attr.type = BNG_HWQ_TYPE_QUEUE;
+
+ if (bng_re_alloc_init_hwq(&creq->hwq, &hwq_attr)) {
+ dev_err(&rcfw->pdev->dev,
+ "HW channel CREQ allocation failed\n");
+ goto fail;
+ }
+
+ rcfw->cmdq_depth = BNG_FW_CMDQE_MAX_CNT;
+
+ sginfo.pgsize = bng_fw_cmdqe_page_size(rcfw->cmdq_depth);
+ hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
+ hwq_attr.stride = BNG_FW_CMDQE_UNITS;
+ hwq_attr.type = BNG_HWQ_TYPE_CTX;
+ if (bng_re_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
+ dev_err(&rcfw->pdev->dev,
+ "HW channel CMDQ allocation failed\n");
+ goto fail;
+ }
+
+ rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements,
+ sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+ if (!rcfw->crsqe_tbl)
+ goto fail;
+
+ spin_lock_init(&rcfw->tbl_lock);
+
+ rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
+ return 0;
+
+fail:
+ bng_re_free_rcfw_channel(rcfw);
+ return -ENOMEM;
+}
+
+static int bng_re_process_qp_event(struct bng_re_rcfw *rcfw,
+ struct creq_qp_event *qp_event,
+ u32 *num_wait)
+{
+ struct bng_re_hwq *hwq = &rcfw->cmdq.hwq;
+ struct bng_re_crsqe *crsqe;
+ u32 req_size;
+ u16 cookie;
+ bool is_waiter_alive;
+ struct pci_dev *pdev;
+ u32 wait_cmds = 0;
+ int rc = 0;
+
+ pdev = rcfw->pdev;
+ switch (qp_event->event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ dev_err(&pdev->dev, "Received QP error notification\n");
+ break;
+ default:
+ /*
+ * Command Response
+ * cmdq->lock needs to be acquired to synchronie
+ * the command send and completion reaping. This function
+ * is always called with creq->lock held. Using
+ * the nested variant of spin_lock.
+ *
+ */
+
+ spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING);
+ cookie = le16_to_cpu(qp_event->cookie);
+ cookie &= BNG_FW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
+ &rcfw->cmdq.flags),
+ "Unreponsive rcfw channel detected.!!")) {
+ dev_info(&pdev->dev,
+ "rcfw timedout: cookie = %#x, free_slots = %d",
+ cookie, crsqe->free_slots);
+ spin_unlock(&hwq->lock);
+ return rc;
+ }
+
+ if (crsqe->is_waiter_alive) {
+ if (crsqe->resp) {
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ /* Insert write memory barrier to ensure that
+ * response data is copied before clearing the
+ * flags
+ */
+ smp_wmb();
+ }
+ }
+
+ wait_cmds++;
+
+ req_size = crsqe->req_size;
+ is_waiter_alive = crsqe->is_waiter_alive;
+
+ crsqe->req_size = 0;
+ if (!is_waiter_alive)
+ crsqe->resp = NULL;
+
+ crsqe->is_in_used = false;
+
+ hwq->cons += req_size;
+
+ spin_unlock(&hwq->lock);
+ }
+ *num_wait += wait_cmds;
+ return rc;
+}
+
+/* function events */
+static int bng_re_process_func_event(struct bng_re_rcfw *rcfw,
+ struct creq_func_event *func_event)
+{
+ switch (func_event->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
+ case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* CREQ Completion handlers */
+static void bng_re_service_creq(struct tasklet_struct *t)
+{
+ struct bng_re_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
+ struct bng_re_creq_ctx *creq = &rcfw->creq;
+ u32 type, budget = BNG_FW_CREQ_ENTRY_POLL_BUDGET;
+ struct bng_re_hwq *hwq = &creq->hwq;
+ struct creq_base *creqe;
+ u32 num_wakeup = 0;
+ u32 hw_polled = 0;
+
+ /* Service the CREQ until budget is over */
+ spin_lock_bh(&hwq->lock);
+ while (budget > 0) {
+ creqe = bng_re_get_qe(hwq, hwq->cons, NULL);
+ if (!BNG_FW_CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
+ break;
+ /* The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+
+ type = creqe->type & CREQ_BASE_TYPE_MASK;
+ switch (type) {
+ case CREQ_BASE_TYPE_QP_EVENT:
+ bng_re_process_qp_event
+ (rcfw, (struct creq_qp_event *)creqe,
+ &num_wakeup);
+ creq->stats.creq_qp_event_processed++;
+ break;
+ case CREQ_BASE_TYPE_FUNC_EVENT:
+ if (!bng_re_process_func_event
+ (rcfw, (struct creq_func_event *)creqe))
+ creq->stats.creq_func_event_processed++;
+ else
+ dev_warn(&rcfw->pdev->dev,
+ "aeqe:%#x Not handled\n", type);
+ break;
+ default:
+ if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
+ dev_warn(&rcfw->pdev->dev,
+ "creqe with event 0x%x not handled\n",
+ type);
+ break;
+ }
+ budget--;
+ hw_polled++;
+ bng_re_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &creq->creq_db.dbinfo.flags);
+ }
+
+ if (hw_polled)
+ bng_re_ring_nq_db(&creq->creq_db.dbinfo,
+ rcfw->res->cctx, true);
+ spin_unlock_bh(&hwq->lock);
+ if (num_wakeup)
+ wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
+}
+
+static int __send_message_basic_sanity(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg,
+ u8 opcode)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+
+ cmdq = &rcfw->cmdq;
+
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "RCFW already initialized!");
+ return -EINVAL;
+ }
+
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
+ opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
+ dev_err(&rcfw->pdev->dev,
+ "RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int __send_message(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg, u8 opcode)
+{
+ u32 bsize, free_slots, required_slots;
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_crsqe *crsqe;
+ struct bng_fw_cmdqe *cmdqe;
+ struct bng_re_hwq *hwq;
+ u32 sw_prod, cmdq_prod;
+ struct pci_dev *pdev;
+ u16 cookie;
+ u8 *preq;
+
+ cmdq = &rcfw->cmdq;
+ hwq = &cmdq->hwq;
+ pdev = rcfw->pdev;
+
+ /* Cmdq are in 16-byte units, each request can consume 1 or more
+ * cmdqe
+ */
+ spin_lock_bh(&hwq->lock);
+ required_slots = bng_re_get_cmd_slots(msg->req);
+ free_slots = HWQ_FREE_SLOTS(hwq);
+ cookie = cmdq->seq_num & BNG_FW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (required_slots >= free_slots) {
+ dev_info_ratelimited(&pdev->dev,
+ "CMDQ is full req/free %d/%d!",
+ required_slots, free_slots);
+ spin_unlock_bh(&hwq->lock);
+ return -EAGAIN;
+ }
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+
+ bsize = bng_re_set_cmd_slots(msg->req);
+ crsqe->free_slots = free_slots;
+ crsqe->resp = (struct creq_qp_event *)msg->resp;
+ crsqe->is_waiter_alive = true;
+ crsqe->is_in_used = true;
+ crsqe->opcode = opcode;
+
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
+ if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
+ struct bng_re_rcfw_sbuf *sbuf = msg->sb;
+
+ __set_cmdq_base_resp_addr(msg->req, msg->req_sz,
+ cpu_to_le64(sbuf->dma_addr));
+ __set_cmdq_base_resp_size(msg->req, msg->req_sz,
+ ALIGN(sbuf->size,
+ BNG_FW_CMDQE_UNITS) /
+ BNG_FW_CMDQE_UNITS);
+ }
+
+ preq = (u8 *)msg->req;
+ do {
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bng_re_get_qe(hwq, sw_prod, NULL);
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
+
+ cmdq_prod = hwq->prod & 0xFFFF;
+ if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
+ /* The very first doorbell write
+ * is required to set this flag
+ * which prompts the FW to reset
+ * its internal pointers
+ */
+ cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG);
+ clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ }
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(BNG_FW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+ spin_unlock_bh(&hwq->lock);
+ /* Return the CREQ response pointer */
+ return 0;
+}
+
+/**
+ * __wait_for_resp - Don't hold the cpu context and wait for response
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * Wait for command completion in sleepable context.
+ *
+ * Returns:
+ * 0 if command is completed by firmware.
+ * Non zero error code for rest of the case.
+ */
+static int __wait_for_resp(struct bng_re_rcfw *rcfw, u16 cookie)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_crsqe *crsqe;
+
+ cmdq = &rcfw->cmdq;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ wait_event_timeout(cmdq->waitq,
+ !crsqe->is_in_used,
+ secs_to_jiffies(rcfw->max_timeout));
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ bng_re_service_creq(&rcfw->creq.creq_tasklet);
+
+ if (!crsqe->is_in_used)
+ return 0;
+ } while (true);
+};
+
+/**
+ * bng_re_rcfw_send_message - interface to send
+ * and complete rcfw command.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: message to send
+ *
+ * This function does not account shadow queue depth. It will send
+ * all the command unconditionally as long as send queue is not full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg)
+{
+ struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
+ struct bng_re_crsqe *crsqe;
+ u16 cookie;
+ int rc;
+ u8 opcode;
+
+ opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
+
+ rc = __send_message_basic_sanity(rcfw, msg, opcode);
+ if (rc)
+ return rc == -ENXIO ? bng_re_map_rc(opcode) : rc;
+
+ rc = __send_message(rcfw, msg, opcode);
+ if (rc)
+ return rc;
+
+ cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
+ & BNG_FW_MAX_COOKIE_VALUE;
+
+ rc = __wait_for_resp(rcfw, cookie);
+
+ if (rc) {
+ spin_lock_bh(&rcfw->cmdq.hwq.lock);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ crsqe->is_waiter_alive = false;
+ if (rc == -ENODEV)
+ set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
+ spin_unlock_bh(&rcfw->cmdq.hwq.lock);
+ return -ETIMEDOUT;
+ }
+
+ if (evnt->status) {
+ /* failed with status */
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
+ cookie, opcode, evnt->status);
+ rc = -EIO;
+ }
+
+ return rc;
+}
+
+static int bng_re_map_cmdq_mbox(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_cmdq_mbox *mbox;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ mbox = &rcfw->cmdq.cmdq_mbox;
+
+ mbox->reg.bar_id = BNG_FW_COMM_PCI_BAR_REGION;
+ mbox->reg.len = BNG_FW_COMM_SIZE;
+ mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id);
+ if (!mbox->reg.bar_base) {
+ dev_err(&pdev->dev,
+ "CMDQ BAR region %d resc start is 0!\n",
+ mbox->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ bar_reg = mbox->reg.bar_base + BNG_FW_COMM_BASE_OFFSET;
+ mbox->reg.len = BNG_FW_COMM_SIZE;
+ mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len);
+ if (!mbox->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "CMDQ BAR region %d mapping failed\n",
+ mbox->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ mbox->prod = (void __iomem *)(mbox->reg.bar_reg +
+ BNG_FW_PF_VF_COMM_PROD_OFFSET);
+ mbox->db = (void __iomem *)(mbox->reg.bar_reg + BNG_FW_COMM_TRIG_OFFSET);
+ return 0;
+}
+
+static irqreturn_t bng_re_creq_irq(int irq, void *dev_instance)
+{
+ struct bng_re_rcfw *rcfw = dev_instance;
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_hwq *hwq;
+ u32 sw_cons;
+
+ creq = &rcfw->creq;
+ hwq = &creq->hwq;
+ /* Prefetch the CREQ element */
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ bng_re_get_qe(hwq, sw_cons, NULL);
+
+ tasklet_schedule(&creq->creq_tasklet);
+ return IRQ_HANDLED;
+}
+
+int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector,
+ bool need_init)
+{
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_res *res;
+ int rc;
+
+ creq = &rcfw->creq;
+ res = rcfw->res;
+
+ if (creq->irq_handler_avail)
+ return -EFAULT;
+
+ creq->msix_vec = msix_vector;
+ if (need_init)
+ tasklet_setup(&creq->creq_tasklet, bng_re_service_creq);
+ else
+ tasklet_enable(&creq->creq_tasklet);
+
+ creq->irq_name = kasprintf(GFP_KERNEL, "bng_re-creq@pci:%s",
+ pci_name(res->pdev));
+ if (!creq->irq_name)
+ return -ENOMEM;
+ rc = request_irq(creq->msix_vec, bng_re_creq_irq, 0,
+ creq->irq_name, rcfw);
+ if (rc) {
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ tasklet_disable(&creq->creq_tasklet);
+ return rc;
+ }
+ creq->irq_handler_avail = true;
+
+ bng_re_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
+ atomic_inc(&rcfw->rcfw_intr_enabled);
+
+ return 0;
+}
+
+static int bng_re_map_creq_db(struct bng_re_rcfw *rcfw, u32 reg_offt)
+{
+ struct bng_re_creq_db *creq_db;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ creq_db = &rcfw->creq.creq_db;
+
+ creq_db->dbinfo.flags = 0;
+ creq_db->reg.bar_id = BNG_FW_COMM_CONS_PCI_BAR_REGION;
+ creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
+ if (!creq_db->reg.bar_id)
+ dev_err(&pdev->dev,
+ "CREQ BAR region %d resc start is 0!",
+ creq_db->reg.bar_id);
+
+ bar_reg = creq_db->reg.bar_base + reg_offt;
+
+ creq_db->reg.len = BNG_FW_CREQ_DB_LEN;
+ creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len);
+ if (!creq_db->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "CREQ BAR region %d mapping failed",
+ creq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+ creq_db->dbinfo.db = creq_db->reg.bar_reg;
+ creq_db->dbinfo.hwq = &rcfw->creq.hwq;
+ creq_db->dbinfo.xid = rcfw->creq.ring_id;
+ return 0;
+}
+
+void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill)
+{
+ struct bng_re_creq_ctx *creq;
+
+ creq = &rcfw->creq;
+
+ if (!creq->irq_handler_avail)
+ return;
+
+ creq->irq_handler_avail = false;
+ /* Mask h/w interrupts */
+ bng_re_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
+ /* Sync with last running IRQ-handler */
+ synchronize_irq(creq->msix_vec);
+ free_irq(creq->msix_vec, rcfw);
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ atomic_set(&rcfw->rcfw_intr_enabled, 0);
+ if (kill)
+ tasklet_kill(&creq->creq_tasklet);
+ tasklet_disable(&creq->creq_tasklet);
+}
+
+void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_cmdq_ctx *cmdq;
+
+ creq = &rcfw->creq;
+ cmdq = &rcfw->cmdq;
+ /* Make sure the HW channel is stopped! */
+ bng_re_rcfw_stop_irq(rcfw, true);
+
+ iounmap(cmdq->cmdq_mbox.reg.bar_reg);
+ iounmap(creq->creq_db.reg.bar_reg);
+
+ cmdq->cmdq_mbox.reg.bar_reg = NULL;
+ creq->creq_db.reg.bar_reg = NULL;
+ creq->msix_vec = 0;
+}
+
+static void bng_re_start_rcfw(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_cmdq_mbox *mbox;
+ struct cmdq_init init = {0};
+
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ mbox = &cmdq->cmdq_mbox;
+
+ init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr[0]);
+ init.cmdq_size_cmdq_lvl =
+ cpu_to_le16(((rcfw->cmdq_depth <<
+ CMDQ_INIT_CMDQ_SIZE_SFT) &
+ CMDQ_INIT_CMDQ_SIZE_MASK) |
+ ((cmdq->hwq.level <<
+ CMDQ_INIT_CMDQ_LVL_SFT) &
+ CMDQ_INIT_CMDQ_LVL_MASK));
+ init.creq_ring_id = cpu_to_le16(creq->ring_id);
+ /* Write to the mailbox register */
+ __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4);
+}
+
+int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+ int rc;
+
+ cmdq = &rcfw->cmdq;
+
+ /* Assign defaults */
+ cmdq->seq_num = 0;
+ set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ init_waitqueue_head(&cmdq->waitq);
+
+ rc = bng_re_map_cmdq_mbox(rcfw);
+ if (rc)
+ return rc;
+
+ rc = bng_re_map_creq_db(rcfw, cp_bar_reg_off);
+ if (rc)
+ return rc;
+
+ rc = bng_re_rcfw_start_irq(rcfw, msix_vector, true);
+ if (rc) {
+ dev_err(&rcfw->pdev->dev,
+ "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
+ bng_re_disable_rcfw_channel(rcfw);
+ return rc;
+ }
+
+ bng_re_start_rcfw(rcfw);
+ return 0;
+}
+
+int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw)
+{
+ struct creq_deinitialize_fw_resp resp = {};
+ struct cmdq_deinitialize_fw req = {};
+ struct bng_re_cmdqmsg msg = {};
+ int rc;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEINITIALIZE_FW,
+ sizeof(req));
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL,
+ sizeof(req), sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
+ return 0;
+}
+static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED |
+ CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED);
+}
+
+#define BNG_RE_HW_RETX(a) _is_hw_retx_supported((a))
+static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 &
+ CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
+}
+
+int bng_re_init_rcfw(struct bng_re_rcfw *rcfw,
+ struct bng_re_stats *stats_ctx)
+{
+ struct creq_initialize_fw_resp resp = {};
+ struct cmdq_initialize_fw req = {};
+ struct bng_re_cmdqmsg msg = {};
+ int rc;
+ u16 flags = 0;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_INITIALIZE_FW,
+ sizeof(req));
+ /* Supply (log-base-2-of-host-page-size - base-page-shift)
+ * to bono to adjust the doorbell page sizes.
+ */
+ req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
+ BNG_FW_DBR_BASE_PAGE_SHIFT);
+ if (BNG_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
+ if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
+ req.flags |= cpu_to_le16(flags);
+ req.stat_ctx_id = cpu_to_le32(stats_ctx->fw_id);
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_fw.h b/drivers/infiniband/hw/bng_re/bng_fw.h
new file mode 100644
index 000000000000..c89c926ec2fc
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_fw.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_FW_H__
+#define __BNG_FW_H__
+
+#include "bng_tlv.h"
+
+/* FW DB related */
+#define BNG_FW_CMDQ_TRIG_VAL 1
+#define BNG_FW_COMM_PCI_BAR_REGION 0
+#define BNG_FW_COMM_CONS_PCI_BAR_REGION 2
+#define BNG_FW_DBR_BASE_PAGE_SHIFT 12
+#define BNG_FW_COMM_SIZE 0x104
+#define BNG_FW_COMM_BASE_OFFSET 0x600
+#define BNG_FW_COMM_TRIG_OFFSET 0x100
+#define BNG_FW_PF_VF_COMM_PROD_OFFSET 0xc
+#define BNG_FW_CREQ_DB_LEN 8
+
+/* CREQ */
+#define BNG_FW_CREQE_MAX_CNT (64 * 1024)
+#define BNG_FW_CREQE_UNITS 16
+#define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100
+#define BNG_FW_CREQ_CMP_VALID(hdr, pass) \
+ (!!((hdr)->v & CREQ_BASE_V) == \
+ !((pass) & BNG_RE_FLAG_EPOCH_CONS_MASK))
+#define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100
+
+/* CMDQ */
+struct bng_fw_cmdqe {
+ u8 data[16];
+};
+
+#define BNG_FW_CMDQE_MAX_CNT 8192
+#define BNG_FW_CMDQE_UNITS sizeof(struct bng_fw_cmdqe)
+#define BNG_FW_CMDQE_BYTES(depth) ((depth) * BNG_FW_CMDQE_UNITS)
+
+#define BNG_FW_MAX_COOKIE_VALUE (BNG_FW_CMDQE_MAX_CNT - 1)
+#define BNG_FW_CMD_IS_BLOCKING 0x8000
+
+/* Crsq buf is 1024-Byte */
+struct bng_re_crsbe {
+ u8 data[1024];
+};
+
+
+static inline u32 bng_fw_cmdqe_npages(u32 depth)
+{
+ u32 npages;
+
+ npages = BNG_FW_CMDQE_BYTES(depth) / PAGE_SIZE;
+ if (BNG_FW_CMDQE_BYTES(depth) % PAGE_SIZE)
+ npages++;
+ return npages;
+}
+
+static inline u32 bng_fw_cmdqe_page_size(u32 depth)
+{
+ return (bng_fw_cmdqe_npages(depth) * PAGE_SIZE);
+}
+struct bng_re_cmdq_mbox {
+ struct bng_re_reg_desc reg;
+ void __iomem *prod;
+ void __iomem *db;
+};
+
+/* HWQ */
+struct bng_re_cmdq_ctx {
+ struct bng_re_hwq hwq;
+ struct bng_re_cmdq_mbox cmdq_mbox;
+ unsigned long flags;
+#define FIRMWARE_INITIALIZED_FLAG (0)
+#define FIRMWARE_STALL_DETECTED (3)
+#define FIRMWARE_FIRST_FLAG (31)
+ wait_queue_head_t waitq;
+ u32 seq_num;
+};
+
+struct bng_re_creq_db {
+ struct bng_re_reg_desc reg;
+ struct bng_re_db_info dbinfo;
+};
+
+struct bng_re_creq_stat {
+ u64 creq_qp_event_processed;
+ u64 creq_func_event_processed;
+};
+
+struct bng_re_creq_ctx {
+ struct bng_re_hwq hwq;
+ struct bng_re_creq_db creq_db;
+ struct bng_re_creq_stat stats;
+ struct tasklet_struct creq_tasklet;
+ u16 ring_id;
+ int msix_vec;
+ bool irq_handler_avail;
+ char *irq_name;
+};
+
+struct bng_re_crsqe {
+ struct creq_qp_event *resp;
+ u32 req_size;
+ /* Free slots at the time of submission */
+ u32 free_slots;
+ u8 opcode;
+ bool is_waiter_alive;
+ bool is_in_used;
+};
+
+struct bng_re_rcfw_sbuf {
+ void *sb;
+ dma_addr_t dma_addr;
+ u32 size;
+};
+
+/* RoCE FW Communication Channels */
+struct bng_re_rcfw {
+ struct pci_dev *pdev;
+ struct bng_re_res *res;
+ struct bng_re_cmdq_ctx cmdq;
+ struct bng_re_creq_ctx creq;
+ struct bng_re_crsqe *crsqe_tbl;
+ /* To synchronize the qp-handle hash table */
+ spinlock_t tbl_lock;
+ u32 cmdq_depth;
+ /* cached from chip cctx for quick reference in slow path */
+ u16 max_timeout;
+ atomic_t rcfw_intr_enabled;
+};
+
+struct bng_re_cmdqmsg {
+ struct cmdq_base *req;
+ struct creq_base *resp;
+ void *sb;
+ u32 req_sz;
+ u32 res_sz;
+ u8 block;
+};
+
+static inline void bng_re_rcfw_cmd_prep(struct cmdq_base *req,
+ u8 opcode, u8 cmd_size)
+{
+ req->opcode = opcode;
+ req->cmd_size = cmd_size;
+}
+
+static inline void bng_re_fill_cmdqmsg(struct bng_re_cmdqmsg *msg,
+ void *req, void *resp, void *sb,
+ u32 req_sz, u32 res_sz, u8 block)
+{
+ msg->req = req;
+ msg->resp = resp;
+ msg->sb = sb;
+ msg->req_sz = req_sz;
+ msg->res_sz = res_sz;
+ msg->block = block;
+}
+
+/* Get the number of command units required for the req. The
+ * function returns correct value only if called before
+ * setting using bng_re_set_cmd_slots
+ */
+static inline u32 bng_re_get_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_units = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_units = tlv_req->total_size;
+ } else {
+ cmd_units = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) /
+ BNG_FW_CMDQE_UNITS;
+ }
+
+ return cmd_units;
+}
+
+static inline u32 bng_re_set_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_byte = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_byte = tlv_req->total_size * BNG_FW_CMDQE_UNITS;
+ } else {
+ cmd_byte = req->cmd_size;
+ req->cmd_size = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) /
+ BNG_FW_CMDQE_UNITS;
+ }
+
+ return cmd_byte;
+}
+
+void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw);
+int bng_re_alloc_fw_channel(struct bng_re_res *res,
+ struct bng_re_rcfw *rcfw);
+int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off);
+void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw);
+int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector,
+ bool need_init);
+void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill);
+int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg);
+int bng_re_init_rcfw(struct bng_re_rcfw *rcfw,
+ struct bng_re_stats *stats_ctx);
+int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_re.h b/drivers/infiniband/hw/bng_re/bng_re.h
new file mode 100644
index 000000000000..dae4862621a7
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_re.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_RE_H__
+#define __BNG_RE_H__
+
+#include "bng_res.h"
+
+#define BNG_RE_ADEV_NAME "bng_en"
+
+#define BNG_RE_DESC "Broadcom 800G RoCE Driver"
+
+#define rdev_to_dev(rdev) ((rdev) ? (&(rdev)->ibdev.dev) : NULL)
+
+#define BNG_RE_MIN_MSIX 2
+#define BNG_RE_MAX_MSIX BNGE_MAX_ROCE_MSIX
+
+#define BNG_RE_CREQ_NQ_IDX 0
+
+#define BNGE_INVALID_STATS_CTX_ID -1
+/* NQ specific structures */
+struct bng_re_nq_db {
+ struct bng_re_reg_desc reg;
+ struct bng_re_db_info dbinfo;
+};
+
+struct bng_re_nq {
+ struct pci_dev *pdev;
+ struct bng_re_res *res;
+ char *name;
+ struct bng_re_hwq hwq;
+ struct bng_re_nq_db nq_db;
+ u16 ring_id;
+ int msix_vec;
+ cpumask_t mask;
+ struct tasklet_struct nq_tasklet;
+ bool requested;
+ int budget;
+ u32 load;
+
+ struct workqueue_struct *cqn_wq;
+};
+
+struct bng_re_nq_record {
+ struct bnge_msix_info msix_entries[BNG_RE_MAX_MSIX];
+ struct bng_re_nq nq[BNG_RE_MAX_MSIX];
+ int num_msix;
+ /* serialize NQ access */
+ struct mutex load_lock;
+};
+
+struct bng_re_en_dev_info {
+ struct bng_re_dev *rdev;
+ struct bnge_auxr_dev *auxr_dev;
+};
+
+struct bng_re_ring_attr {
+ dma_addr_t *dma_arr;
+ int pages;
+ int type;
+ u32 depth;
+ u32 lrid; /* Logical ring id */
+ u8 mode;
+};
+
+struct bng_re_dev {
+ struct ib_device ibdev;
+ unsigned long flags;
+#define BNG_RE_FLAG_NETDEV_REGISTERED 0
+#define BNG_RE_FLAG_RCFW_CHANNEL_EN 1
+ struct net_device *netdev;
+ struct auxiliary_device *adev;
+ struct bnge_auxr_dev *aux_dev;
+ struct bng_re_chip_ctx *chip_ctx;
+ int fn_id;
+ struct bng_re_res bng_res;
+ struct bng_re_rcfw rcfw;
+ struct bng_re_nq_record *nqr;
+ /* Device Resources */
+ struct bng_re_dev_attr *dev_attr;
+ struct dentry *dbg_root;
+ struct bng_re_stats stats_ctx;
+};
+
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_res.c b/drivers/infiniband/hw/bng_re/bng_res.c
new file mode 100644
index 000000000000..c50823758b53
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_res.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+
+#include <linux/bnxt/hsi.h>
+#include "bng_res.h"
+#include "roce_hsi.h"
+
+/* Stats */
+void bng_re_free_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_stats *stats)
+{
+ if (stats->dma) {
+ dma_free_coherent(&pdev->dev, stats->size,
+ stats->dma, stats->dma_map);
+ }
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+}
+
+int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_chip_ctx *cctx,
+ struct bng_re_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+ stats->size = cctx->hw_stats_size;
+ stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
+ &stats->dma_map, GFP_KERNEL);
+ if (!stats->dma)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void bng_free_pbl(struct bng_re_res *res, struct bng_re_pbl *pbl)
+{
+ struct pci_dev *pdev = res->pdev;
+ int i;
+
+ for (i = 0; i < pbl->pg_count; i++) {
+ if (pbl->pg_arr[i])
+ dma_free_coherent(&pdev->dev, pbl->pg_size,
+ (void *)((unsigned long)
+ pbl->pg_arr[i] &
+ PAGE_MASK),
+ pbl->pg_map_arr[i]);
+ else
+ dev_warn(&pdev->dev,
+ "PBL free pg_arr[%d] empty?!\n", i);
+ pbl->pg_arr[i] = NULL;
+ }
+
+ vfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ vfree(pbl->pg_map_arr);
+ pbl->pg_map_arr = NULL;
+ pbl->pg_count = 0;
+ pbl->pg_size = 0;
+}
+
+static int bng_alloc_pbl(struct bng_re_res *res,
+ struct bng_re_pbl *pbl,
+ struct bng_re_sg_info *sginfo)
+{
+ struct pci_dev *pdev = res->pdev;
+ u32 pages;
+ int i;
+
+ if (sginfo->nopte)
+ return 0;
+ pages = sginfo->npages;
+
+ /* page ptr arrays */
+ pbl->pg_arr = vmalloc_array(pages, sizeof(void *));
+ if (!pbl->pg_arr)
+ return -ENOMEM;
+
+ pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t));
+ if (!pbl->pg_map_arr) {
+ vfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ return -ENOMEM;
+ }
+ pbl->pg_count = 0;
+ pbl->pg_size = sginfo->pgsize;
+
+ for (i = 0; i < pages; i++) {
+ pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
+ if (!pbl->pg_arr[i])
+ goto fail;
+ pbl->pg_count++;
+ }
+
+ return 0;
+fail:
+ bng_free_pbl(res, pbl);
+ return -ENOMEM;
+}
+
+void bng_re_free_hwq(struct bng_re_res *res,
+ struct bng_re_hwq *hwq)
+{
+ int i;
+
+ if (!hwq->max_elements)
+ return;
+ if (hwq->level >= BNG_PBL_LVL_MAX)
+ return;
+
+ for (i = 0; i < hwq->level + 1; i++)
+ bng_free_pbl(res, &hwq->pbl[i]);
+
+ hwq->level = BNG_PBL_LVL_MAX;
+ hwq->max_elements = 0;
+ hwq->element_size = 0;
+ hwq->prod = 0;
+ hwq->cons = 0;
+}
+
+/* All HWQs are power of 2 in size */
+int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq,
+ struct bng_re_hwq_attr *hwq_attr)
+{
+ u32 npages, pg_size;
+ struct bng_re_sg_info sginfo = {};
+ u32 depth, stride, npbl, npde;
+ dma_addr_t *src_phys_ptr, **dst_virt_ptr;
+ struct bng_re_res *res;
+ struct pci_dev *pdev;
+ int i, rc, lvl;
+
+ res = hwq_attr->res;
+ pdev = res->pdev;
+ pg_size = hwq_attr->sginfo->pgsize;
+ hwq->level = BNG_PBL_LVL_MAX;
+
+ depth = roundup_pow_of_two(hwq_attr->depth);
+ stride = roundup_pow_of_two(hwq_attr->stride);
+
+ npages = (depth * stride) / pg_size;
+ if ((depth * stride) % pg_size)
+ npages++;
+ if (!npages)
+ return -EINVAL;
+ hwq_attr->sginfo->npages = npages;
+
+ if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
+ /* This request is Level 0, map PTE */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = BNG_PBL_LVL_0;
+ goto done;
+ }
+
+ if (npages >= MAX_PBL_LVL_0_PGS) {
+ if (npages > MAX_PBL_LVL_1_PGS) {
+ u32 flag = PTU_PTE_VALID;
+ /* 2 levels of indirection */
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ npde = npbl >> MAX_PDL_LVL_SHIFT;
+ if (npbl % BIT(MAX_PDL_LVL_SHIFT))
+ npde++;
+ /* Alloc PDE pages */
+ sginfo.pgsize = npde * pg_size;
+ sginfo.npages = 1;
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+
+ /* Alloc PBL pages */
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1], &sginfo);
+ if (rc)
+ goto fail;
+ /* Fill PDL with PBL page pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[0][i] = src_phys_ptr[i] | flag;
+
+ /* Alloc or init PTEs */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_2],
+ hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = BNG_PBL_LVL_2;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBLs with PTE pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_1].pg_arr;
+ src_phys_ptr = hwq->pbl[BNG_PBL_LVL_2].pg_map_arr;
+ for (i = 0; i < hwq->pbl[BNG_PBL_LVL_2].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PTE_VALID;
+ }
+ if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[BNG_PBL_LVL_2].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ } else { /* pages < 512 npbl = 1, npde = 0 */
+ u32 flag = PTU_PTE_VALID;
+
+ /* 1 level of indirection */
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ /* Alloc PBL page */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+ /* Alloc or init PTEs */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1],
+ hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = BNG_PBL_LVL_1;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBL with PTE pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | flag;
+ if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[BNG_PBL_LVL_1].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ }
+ }
+done:
+ hwq->prod = 0;
+ hwq->cons = 0;
+ hwq->pdev = pdev;
+ hwq->depth = hwq_attr->depth;
+ hwq->max_elements = hwq->depth;
+ hwq->element_size = stride;
+ hwq->qe_ppg = pg_size / stride;
+ /* For direct access to the elements */
+ lvl = hwq->level;
+ if (hwq_attr->sginfo->nopte && hwq->level)
+ lvl = hwq->level - 1;
+ hwq->pbl_ptr = hwq->pbl[lvl].pg_arr;
+ hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr;
+ spin_lock_init(&hwq->lock);
+
+ return 0;
+fail:
+ bng_re_free_hwq(res, hwq);
+ return -ENOMEM;
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_res.h b/drivers/infiniband/hw/bng_re/bng_res.h
new file mode 100644
index 000000000000..9997f86d6a0e
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_res.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_RES_H__
+#define __BNG_RES_H__
+
+#include "roce_hsi.h"
+
+#define BNG_ROCE_FW_MAX_TIMEOUT 60
+
+#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
+#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
+#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
+#define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG)
+
+#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
+#define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \
+ ((HWQ_CMP(hwq->prod, hwq)\
+ - HWQ_CMP(hwq->cons, hwq))\
+ & (hwq->max_elements - 1)))
+
+#define MAX_PBL_LVL_0_PGS 1
+#define MAX_PBL_LVL_1_PGS 512
+#define MAX_PBL_LVL_1_PGS_SHIFT 9
+#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
+#define MAX_PBL_LVL_2_PGS (256 * 512)
+#define MAX_PDL_LVL_SHIFT 9
+
+#define BNG_RE_DBR_VALID (0x1UL << 26)
+#define BNG_RE_DBR_EPOCH_SHIFT 24
+#define BNG_RE_DBR_TOGGLE_SHIFT 25
+
+#define BNG_MAX_TQM_ALLOC_REQ 48
+
+struct bng_re_reg_desc {
+ u8 bar_id;
+ resource_size_t bar_base;
+ unsigned long offset;
+ void __iomem *bar_reg;
+ size_t len;
+};
+
+struct bng_re_db_info {
+ void __iomem *db;
+ void __iomem *priv_db;
+ struct bng_re_hwq *hwq;
+ u32 xid;
+ u32 max_slot;
+ u32 flags;
+ u8 toggle;
+};
+
+enum bng_re_db_info_flags_mask {
+ BNG_RE_FLAG_EPOCH_CONS_SHIFT = 0x0UL,
+ BNG_RE_FLAG_EPOCH_PROD_SHIFT = 0x1UL,
+ BNG_RE_FLAG_EPOCH_CONS_MASK = 0x1UL,
+ BNG_RE_FLAG_EPOCH_PROD_MASK = 0x2UL,
+};
+
+enum bng_re_db_epoch_flag_shift {
+ BNG_RE_DB_EPOCH_CONS_SHIFT = BNG_RE_DBR_EPOCH_SHIFT,
+ BNG_RE_DB_EPOCH_PROD_SHIFT = (BNG_RE_DBR_EPOCH_SHIFT - 1),
+};
+
+struct bng_re_chip_ctx {
+ u16 chip_num;
+ u16 hw_stats_size;
+ u64 hwrm_intf_ver;
+ u16 hwrm_cmd_max_timeout;
+};
+
+struct bng_re_pbl {
+ u32 pg_count;
+ u32 pg_size;
+ void **pg_arr;
+ dma_addr_t *pg_map_arr;
+};
+
+enum bng_re_pbl_lvl {
+ BNG_PBL_LVL_0,
+ BNG_PBL_LVL_1,
+ BNG_PBL_LVL_2,
+ BNG_PBL_LVL_MAX
+};
+
+enum bng_re_hwq_type {
+ BNG_HWQ_TYPE_CTX,
+ BNG_HWQ_TYPE_QUEUE
+};
+
+struct bng_re_sg_info {
+ u32 npages;
+ u32 pgshft;
+ u32 pgsize;
+ bool nopte;
+};
+
+struct bng_re_hwq_attr {
+ struct bng_re_res *res;
+ struct bng_re_sg_info *sginfo;
+ enum bng_re_hwq_type type;
+ u32 depth;
+ u32 stride;
+ u32 aux_stride;
+ u32 aux_depth;
+};
+
+struct bng_re_hwq {
+ struct pci_dev *pdev;
+ /* lock to protect hwq */
+ spinlock_t lock;
+ struct bng_re_pbl pbl[BNG_PBL_LVL_MAX + 1];
+ /* Valid values: 0, 1, 2 */
+ enum bng_re_pbl_lvl level;
+ /* PBL entries */
+ void **pbl_ptr;
+ /* PBL dma_addr */
+ dma_addr_t *pbl_dma_ptr;
+ u32 max_elements;
+ u32 depth;
+ u16 element_size;
+ u32 prod;
+ u32 cons;
+ /* queue entry per page */
+ u16 qe_ppg;
+};
+
+struct bng_re_stats {
+ dma_addr_t dma_map;
+ void *dma;
+ u32 size;
+ u32 fw_id;
+};
+
+struct bng_re_res {
+ struct pci_dev *pdev;
+ struct bng_re_chip_ctx *cctx;
+ struct bng_re_dev_attr *dattr;
+};
+
+static inline void *bng_re_get_qe(struct bng_re_hwq *hwq,
+ u32 indx, u64 *pg)
+{
+ u32 pg_num, pg_idx;
+
+ pg_num = (indx / hwq->qe_ppg);
+ pg_idx = (indx % hwq->qe_ppg);
+ if (pg)
+ *pg = (u64)&hwq->pbl_ptr[pg_num];
+ return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
+}
+
+#define BNG_RE_INIT_DBHDR(xid, type, indx, toggle) \
+ (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \
+ (type) | BNG_RE_DBR_VALID) << 32) | (indx) | \
+ (((u32)(toggle)) << (BNG_RE_DBR_TOGGLE_SHIFT)))
+
+static inline void bng_re_ring_db(struct bng_re_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u32 indx;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMALL ||
+ type == DBC_DBC_TYPE_CQ_ARMSE)
+ toggle = info->toggle;
+
+ indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNG_RE_FLAG_EPOCH_CONS_MASK) <<
+ BNG_RE_DB_EPOCH_CONS_SHIFT);
+
+ key = BNG_RE_INIT_DBHDR(info->xid, type, indx, toggle);
+ writeq(key, info->db);
+}
+
+static inline void bng_re_ring_nq_db(struct bng_re_db_info *info,
+ struct bng_re_chip_ctx *cctx,
+ bool arm)
+{
+ u32 type;
+
+ type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
+ bng_re_ring_db(info, type);
+}
+
+static inline void bng_re_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+ u32 *dbinfo_flags)
+{
+ /* move cons and update toggle/epoch if wrap around */
+ *cons += cnt;
+ if (*cons >= max_elements) {
+ *cons %= max_elements;
+ *dbinfo_flags ^= 1UL << BNG_RE_FLAG_EPOCH_CONS_SHIFT;
+ }
+}
+
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
+void bng_re_free_hwq(struct bng_re_res *res,
+ struct bng_re_hwq *hwq);
+
+int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq,
+ struct bng_re_hwq_attr *hwq_attr);
+
+void bng_re_free_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_stats *stats);
+
+int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_chip_ctx *cctx,
+ struct bng_re_stats *stats);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_sp.c b/drivers/infiniband/hw/bng_re/bng_sp.c
new file mode 100644
index 000000000000..83099e05328d
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_sp.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include "bng_res.h"
+#include "bng_fw.h"
+#include "bng_sp.h"
+#include "bng_tlv.h"
+
+static bool bng_re_is_atomic_cap(struct bng_re_rcfw *rcfw)
+{
+ u16 pcie_ctl2 = 0;
+
+ pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2);
+ return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
+
+static void bng_re_query_version(struct bng_re_rcfw *rcfw,
+ char *fw_ver)
+{
+ struct creq_query_version_resp resp = {};
+ struct bng_re_cmdqmsg msg = {};
+ struct cmdq_query_version req = {};
+ int rc;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_VERSION,
+ sizeof(req));
+
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return;
+ fw_ver[0] = resp.fw_maj;
+ fw_ver[1] = resp.fw_minor;
+ fw_ver[2] = resp.fw_bld;
+ fw_ver[3] = resp.fw_rsvd;
+}
+
+int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_dev_attr *attr = rcfw->res->dattr;
+ struct creq_query_func_resp resp = {};
+ struct bng_re_cmdqmsg msg = {};
+ struct creq_query_func_resp_sb *sb;
+ struct bng_re_rcfw_sbuf sbuf;
+ struct cmdq_query_func req = {};
+ u8 *tqm_alloc;
+ int i, rc;
+ u32 temp;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_FUNC,
+ sizeof(req));
+
+ sbuf.size = ALIGN(sizeof(*sb), BNG_FW_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ sb = sbuf.sb;
+ req.resp_size = sbuf.size / BNG_FW_CMDQE_UNITS;
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+ /* Extract the context from the side buffer */
+ attr->max_qp = le32_to_cpu(sb->max_qp);
+ /* max_qp value reported by FW doesn't include the QP1 */
+ attr->max_qp += 1;
+ attr->max_qp_rd_atom =
+ sb->max_qp_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ?
+ BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
+ attr->max_qp_init_rd_atom =
+ sb->max_qp_init_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ?
+ BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
+ attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1;
+
+ /* Adjust for max_qp_wqes for variable wqe */
+ attr->max_qp_wqes = min_t(u32, attr->max_qp_wqes, BNG_VAR_MAX_WQE - 1);
+
+ attr->max_qp_sges = min_t(u32, sb->max_sge_var_wqe, BNG_VAR_MAX_SGE);
+ attr->max_cq = le32_to_cpu(sb->max_cq);
+ attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+ attr->max_cq_sges = attr->max_qp_sges;
+ attr->max_mr = le32_to_cpu(sb->max_mr);
+ attr->max_mw = le32_to_cpu(sb->max_mw);
+
+ attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
+ attr->max_pd = 64 * 1024;
+ attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
+ attr->max_ah = le32_to_cpu(sb->max_ah);
+
+ attr->max_srq = le16_to_cpu(sb->max_srq);
+ attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
+ attr->max_srq_sges = sb->max_srq_sge;
+ attr->max_pkey = 1;
+ attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ /*
+ * Read the max gid supported by HW.
+ * For each entry in HW GID in HW table, we consume 2
+ * GID entries in the kernel GID table. So max_gid reported
+ * to stack can be up to twice the value reported by the HW, up to 256 gids.
+ */
+ attr->max_sgid = le32_to_cpu(sb->max_gid);
+ attr->max_sgid = min_t(u32, BNG_RE_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid);
+ attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
+ attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
+
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
+
+ bng_re_query_version(rcfw, attr->fw_ver);
+ for (i = 0; i < BNG_MAX_TQM_ALLOC_REQ / 4; i++) {
+ temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
+ tqm_alloc = (u8 *)&temp;
+ attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
+ attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
+ }
+
+ attr->max_dpi = le32_to_cpu(sb->max_dpi);
+ attr->is_atomic = bng_re_is_atomic_cap(rcfw);
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_sp.h b/drivers/infiniband/hw/bng_re/bng_sp.h
new file mode 100644
index 000000000000..e15190515ed1
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_sp.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_SP_H__
+#define __BNG_SP_H__
+
+#include "bng_fw.h"
+
+#define BNG_VAR_MAX_WQE 4352
+#define BNG_VAR_MAX_SGE 13
+
+struct bng_re_dev_attr {
+#define FW_VER_ARR_LEN 4
+ u8 fw_ver[FW_VER_ARR_LEN];
+#define BNG_RE_NUM_GIDS_SUPPORTED 256
+ u16 max_sgid;
+ u16 max_mrw;
+ u32 max_qp;
+#define BNG_RE_MAX_OUT_RD_ATOM 126
+ u32 max_qp_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_qp_wqes;
+ u32 max_qp_sges;
+ u32 max_cq;
+ u32 max_cq_wqes;
+ u32 max_cq_sges;
+ u32 max_mr;
+ u64 max_mr_size;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_raw_ethy_qp;
+ u32 max_ah;
+ u32 max_srq;
+ u32 max_srq_wqes;
+ u32 max_srq_sges;
+ u32 max_pkey;
+ u32 max_inline_data;
+ u32 l2_db_size;
+ u8 tqm_alloc_reqs[BNG_MAX_TQM_ALLOC_REQ];
+ bool is_atomic;
+ u16 dev_cap_flags;
+ u16 dev_cap_flags2;
+ u32 max_dpi;
+};
+
+int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_tlv.h b/drivers/infiniband/hw/bng_re/bng_tlv.h
new file mode 100644
index 000000000000..278f4922962d
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_tlv.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+#ifndef __BNG_TLV_H__
+#define __BNG_TLV_H__
+
+#include "roce_hsi.h"
+
+struct roce_tlv {
+ struct tlv tlv;
+ u8 total_size; // in units of 16 byte chunks
+ u8 unused[7]; // for 16 byte alignment
+};
+
+/*
+ * TLV size in units of 16 byte chunks
+ */
+#define TLV_SIZE ((sizeof(struct roce_tlv) + 15) / 16)
+/*
+ * TLV length in bytes
+ */
+#define TLV_BYTES (TLV_SIZE * 16)
+
+#define HAS_TLV_HEADER(msg) (le16_to_cpu(((struct tlv *)(msg))->cmd_discr) == CMD_DISCR_TLV_ENCAP)
+#define GET_TLV_DATA(tlv) ((void *)&((uint8_t *)(tlv))[TLV_BYTES])
+
+static inline u8 __get_cmdq_base_opcode(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->opcode;
+ else
+ return req->opcode;
+}
+
+static inline void __set_cmdq_base_opcode(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->opcode = val;
+ else
+ req->opcode = val;
+}
+
+static inline __le16 __get_cmdq_base_cookie(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->cookie;
+ else
+ return req->cookie;
+}
+
+static inline void __set_cmdq_base_cookie(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cookie = val;
+ else
+ req->cookie = val;
+}
+
+static inline __le64 __get_cmdq_base_resp_addr(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr;
+ else
+ return req->resp_addr;
+}
+
+static inline void __set_cmdq_base_resp_addr(struct cmdq_base *req,
+ u32 size, __le64 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr = val;
+ else
+ req->resp_addr = val;
+}
+
+static inline u8 __get_cmdq_base_resp_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size;
+ else
+ return req->resp_size;
+}
+
+static inline void __set_cmdq_base_resp_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size = val;
+ else
+ req->resp_size = val;
+}
+
+static inline u8 __get_cmdq_base_cmd_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct roce_tlv *)(req))->total_size;
+ else
+ return req->cmd_size;
+}
+
+static inline void __set_cmdq_base_cmd_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cmd_size = val;
+ else
+ req->cmd_size = val;
+}
+
+static inline __le16 __get_cmdq_base_flags(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->flags;
+ else
+ return req->flags;
+}
+
+static inline void __set_cmdq_base_flags(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->flags = val;
+ else
+ req->flags = val;
+}
+
+#endif /* __BNG_TLV_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index 51e8234520a9..6a17f5cdb020 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -1,10 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_BNXT_RE
- tristate "Broadcom Netxtreme HCA support"
- depends on 64BIT
- depends on ETHERNET && NETDEVICES && PCI && INET && DCB
- select NET_VENDOR_BROADCOM
- select BNXT
- ---help---
+ tristate "Broadcom Netxtreme HCA support"
+ depends on 64BIT
+ depends on INET && DCB && BNXT
+ help
This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
RoCE HCAs. To compile this driver as a module, choose M here:
the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile
index ee9bb1be61ea..f63417d2ccc6 100644
--- a/drivers/infiniband/hw/bnxt_re/Makefile
+++ b/drivers/infiniband/hw/bnxt_re/Makefile
@@ -4,4 +4,5 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
bnxt_re-y := main.o ib_verbs.o \
qplib_res.o qplib_rcfw.o \
- qplib_sp.o qplib_fp.o hw_counters.o
+ qplib_sp.o qplib_fp.o hw_counters.o \
+ debugfs.o
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index e55a1666c0cd..3a7ce4729fcf 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -39,33 +39,20 @@
#ifndef __BNXT_RE_H__
#define __BNXT_RE_H__
+#include <rdma/uverbs_ioctl.h>
+#include "hw_counters.h"
+#include <linux/hashtable.h>
#define ROCE_DRV_MODULE_NAME "bnxt_re"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
-#define BNXT_RE_PAGE_SHIFT_4K (12)
-#define BNXT_RE_PAGE_SHIFT_8K (13)
-#define BNXT_RE_PAGE_SHIFT_64K (16)
-#define BNXT_RE_PAGE_SHIFT_2M (21)
-#define BNXT_RE_PAGE_SHIFT_8M (23)
-#define BNXT_RE_PAGE_SHIFT_1G (30)
-#define BNXT_RE_PAGE_SIZE_4K BIT(BNXT_RE_PAGE_SHIFT_4K)
-#define BNXT_RE_PAGE_SIZE_8K BIT(BNXT_RE_PAGE_SHIFT_8K)
-#define BNXT_RE_PAGE_SIZE_64K BIT(BNXT_RE_PAGE_SHIFT_64K)
-#define BNXT_RE_PAGE_SIZE_2M BIT(BNXT_RE_PAGE_SHIFT_2M)
-#define BNXT_RE_PAGE_SIZE_8M BIT(BNXT_RE_PAGE_SHIFT_8M)
-#define BNXT_RE_PAGE_SIZE_1G BIT(BNXT_RE_PAGE_SHIFT_1G)
+#define BNXT_RE_PAGE_SHIFT_1G (30)
+#define BNXT_RE_PAGE_SIZE_SUPPORTED 0x7FFFF000 /* 4kb - 1G */
#define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G)
#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39)
#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
-#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
-#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
/* Number of MRs to reserve for PF, leaving remainder for VFs */
#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
@@ -89,13 +76,24 @@
#define BNXT_RE_DEFAULT_ACK_DELAY 16
-struct bnxt_re_work {
- struct work_struct work;
- unsigned long event;
- struct bnxt_re_dev *rdev;
- struct net_device *vlan_dev;
+struct bnxt_re_ring_attr {
+ dma_addr_t *dma_arr;
+ int pages;
+ int type;
+ u32 depth;
+ u32 lrid; /* Logical ring id */
+ u8 mode;
};
+/*
+ * Data structure and defines to handle
+ * recovery
+ */
+#define BNXT_RE_PRE_RECOVERY_REMOVE 0x1
+#define BNXT_RE_COMPLETE_REMOVE 0x2
+#define BNXT_RE_POST_RECOVERY_INIT 0x4
+#define BNXT_RE_COMPLETE_INIT 0x8
+
struct bnxt_re_sqp_entries {
struct bnxt_qplib_sge sge;
u64 wrid;
@@ -104,74 +102,137 @@ struct bnxt_re_sqp_entries {
struct bnxt_re_qp *qp1_qp;
};
-#define BNXT_RE_MIN_MSIX 2
-#define BNXT_RE_MAX_MSIX 9
+#define BNXT_RE_MAX_GSI_SQP_ENTRIES 1024
+struct bnxt_re_gsi_context {
+ struct bnxt_re_qp *gsi_qp;
+ struct bnxt_re_qp *gsi_sqp;
+ struct bnxt_re_ah *gsi_sah;
+ struct bnxt_re_sqp_entries *sqp_tbl;
+};
+
+struct bnxt_re_en_dev_info {
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
+};
+
#define BNXT_RE_AEQ_IDX 0
#define BNXT_RE_NQ_IDX 1
+#define BNXT_RE_GEN_P5_MAX_VF 64
+
+struct bnxt_re_pacing {
+ u64 dbr_db_fifo_reg_off;
+ void *dbr_page;
+ u64 dbr_bar_addr;
+ u32 pacing_algo_th;
+ u32 do_pacing_save;
+ u32 dbq_pacing_time; /* ms */
+ u32 dbr_def_do_pacing;
+ bool dbr_pacing;
+ struct mutex dbq_lock; /* synchronize db pacing algo */
+};
+
+#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF
+#define BNXT_RE_DBR_PACING_TIME 5 /* ms */
+#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */
+#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */
+/* Default do_pacing value when there is no congestion */
+#define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */
+
+#define BNXT_RE_MAX_FIFO_DEPTH_P5 0x2c00
+#define BNXT_RE_MAX_FIFO_DEPTH_P7 0x8000
+
+#define BNXT_RE_MAX_FIFO_DEPTH(ctx) \
+ (bnxt_qplib_is_chip_gen_p7((ctx)) ? \
+ BNXT_RE_MAX_FIFO_DEPTH_P7 :\
+ BNXT_RE_MAX_FIFO_DEPTH_P5)
+
+#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000
+
+#define BNXT_RE_MIN_MSIX 2
+#define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX
+struct bnxt_re_nq_record {
+ struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
+ struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ int num_msix;
+ /* serialize NQ access */
+ struct mutex load_lock;
+};
+
+#define MAX_CQ_HASH_BITS (16)
+#define MAX_SRQ_HASH_BITS (16)
+
+static inline bool bnxt_re_chip_gen_p7(u16 chip_num)
+{
+ return (chip_num == CHIP_NUM_58818 ||
+ chip_num == CHIP_NUM_57608);
+}
struct bnxt_re_dev {
struct ib_device ibdev;
struct list_head list;
unsigned long flags;
#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
-#define BNXT_RE_FLAG_IBDEV_REGISTERED 1
-#define BNXT_RE_FLAG_GOT_MSIX 2
+#define BNXT_RE_FLAG_STATS_CTX3_ALLOC 1
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
-#define BNXT_RE_FLAG_QOS_WORK_REG 5
#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
+#define BNXT_RE_FLAG_ERR_DEVICE_DETACHED 17
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
+ struct auxiliary_device *adev;
unsigned int version, major, minor;
- struct bnxt_qplib_chip_ctx chip_ctx;
+ struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
- struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
- int num_msix;
int id;
- struct delayed_work worker;
- u8 cur_prio_map;
- u8 active_speed;
- u8 active_width;
-
- /* FP Notification Queue (CQ & SRQ) */
- struct tasklet_struct nq_task;
-
/* RCFW Channel */
struct bnxt_qplib_rcfw rcfw;
- /* NQ */
- struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ /* NQ record */
+ struct bnxt_re_nq_record *nqr;
/* Device Resources */
- struct bnxt_qplib_dev_attr dev_attr;
+ struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_ctx qplib_ctx;
struct bnxt_qplib_res qplib_res;
struct bnxt_qplib_dpi dpi_privileged;
+ struct bnxt_qplib_cq_coal_param cq_coalescing;
- atomic_t qp_count;
struct mutex qp_lock; /* protect qp list */
struct list_head qp_list;
- atomic_t cq_count;
- atomic_t srq_count;
- atomic_t mr_count;
- atomic_t mw_count;
- atomic_t sched_count;
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
- /* QP for for handling QP1 packets */
- u32 sqp_id;
- struct bnxt_re_qp *qp1_sqp;
- struct bnxt_re_ah *sqp_ah;
- struct bnxt_re_sqp_entries sqp_tbl[1024];
+ /* QP for handling QP1 packets */
+ struct bnxt_re_gsi_context gsi_ctx;
+ struct bnxt_re_stats stats;
atomic_t nq_alloc_cnt;
u32 is_virtfn;
u32 num_vfs;
- struct bnxt_qplib_roce_stats stats;
+ struct bnxt_re_pacing pacing;
+ struct work_struct dbq_fifo_check_work;
+ struct delayed_work dbq_pacing_work;
+ DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS);
+ DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
+ struct dentry *dbg_root;
+ struct dentry *qp_debugfs;
+ unsigned long event_bitmap;
+ struct bnxt_qplib_cc_param cc_param;
+ struct workqueue_struct *dcb_wq;
+ struct dentry *cc_config;
+ struct bnxt_re_dbg_cc_config_params *cc_config_params;
+ struct dentry *cq_coal_cfg;
+ struct bnxt_re_dbg_cq_coal_params *cq_coal_cfg_params;
+#define BNXT_VPD_FLD_LEN 32
+ char board_partno[BNXT_VPD_FLD_LEN];
+ /* RoCE mirror */
+ u16 mirror_vnic_id;
+ union ib_gid ugid;
+ u32 ugid_index;
+ u8 sniffer_flow_created : 1;
};
#define to_bnxt_re_dev(ptr, member) \
@@ -181,6 +242,17 @@ struct bnxt_re_dev {
#define BNXT_RE_ROCEV2_IPV4_PACKET 2
#define BNXT_RE_ROCEV2_IPV6_PACKET 3
+#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
+void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad);
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
+ struct ib_mad *out_mad);
+
+void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev);
+int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev);
+int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id);
+
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
if (rdev)
@@ -188,4 +260,33 @@ static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
return NULL;
}
+extern const struct uapi_definition bnxt_re_uapi_defs[];
+
+static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev)
+{
+ rdev->qplib_res.pacing_data->dev_err_state =
+ test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+}
+
+static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev)
+{
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ||
+ rdev->rcfw.res->cctx->hwrm_intf_ver < HWRM_VERSION_READ_CTX)
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5 1088
+#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5 128
+#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5 128
+#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5 192
+
+#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 1088
+#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 192
+#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192
+#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192
+
+#define BNXT_RE_HWRM_CMD_TIMEOUT(rdev) \
+ ((rdev)->chip_ctx->hwrm_cmd_max_timeout * 1000)
+
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
new file mode 100644
index 000000000000..88817c86ae24
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright (c) 2024, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * Description: Debugfs component of the bnxt_re driver
+ */
+
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include "debugfs.h"
+
+static struct dentry *bnxt_re_debugfs_root;
+
+static const char * const bnxt_re_cq_coal_str[] = {
+ "buf_maxtime",
+ "normal_maxbuf",
+ "during_maxbuf",
+ "en_ring_idle_mode",
+ "enable",
+};
+
+static const char * const bnxt_re_cc_gen0_name[] = {
+ "enable_cc",
+ "run_avg_weight_g",
+ "num_phase_per_state",
+ "init_cr",
+ "init_tr",
+ "tos_ecn",
+ "tos_dscp",
+ "alt_vlan_pcp",
+ "alt_vlan_dscp",
+ "rtt",
+ "cc_mode",
+ "tcp_cp",
+ "tx_queue",
+ "inactivity_cp",
+};
+
+static inline const char *bnxt_re_qp_state_str(u8 state)
+{
+ switch (state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ return "RST";
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ return "INIT";
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ return "RTR";
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ return "RTS";
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ return "SQER";
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ return "SQD";
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ return "ERR";
+ default:
+ return "Invalid QP state";
+ }
+}
+
+static inline const char *bnxt_re_qp_type_str(u8 type)
+{
+ switch (type) {
+ case CMDQ_CREATE_QP1_TYPE_GSI: return "QP1";
+ case CMDQ_CREATE_QP_TYPE_GSI: return "QP1";
+ case CMDQ_CREATE_QP_TYPE_RC: return "RC";
+ case CMDQ_CREATE_QP_TYPE_UD: return "UD";
+ case CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
+ default: return "Invalid transport type";
+ }
+}
+
+static ssize_t qp_info_read(struct file *filep,
+ char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_qp *qp = filep->private_data;
+ char *buf;
+ int len;
+
+ if (*ppos)
+ return 0;
+
+ buf = kasprintf(GFP_KERNEL,
+ "QPN\t\t: %d\n"
+ "transport\t: %s\n"
+ "state\t\t: %s\n"
+ "mtu\t\t: %d\n"
+ "timeout\t\t: %d\n"
+ "remote QPN\t: %d\n",
+ qp->qplib_qp.id,
+ bnxt_re_qp_type_str(qp->qplib_qp.type),
+ bnxt_re_qp_state_str(qp->qplib_qp.state),
+ qp->qplib_qp.mtu,
+ qp->qplib_qp.timeout,
+ qp->qplib_qp.dest_qpn);
+ if (!buf)
+ return -ENOMEM;
+ if (count < strlen(buf)) {
+ kfree(buf);
+ return -ENOSPC;
+ }
+ len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+ kfree(buf);
+ return len;
+}
+
+static const struct file_operations debugfs_qp_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qp_info_read,
+};
+
+void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ char resn[32];
+
+ sprintf(resn, "0x%x", qp->qplib_qp.id);
+ qp->dentry = debugfs_create_file(resn, 0400, rdev->qp_debugfs, qp, &debugfs_qp_fops);
+}
+
+void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ debugfs_remove(qp->dentry);
+}
+
+static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val)
+{
+ u64 map_offset;
+
+ map_offset = BIT(offset);
+
+ switch (map_offset) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ *val = ccparam->enable;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ *val = ccparam->g;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ *val = ccparam->nph_per_state;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ *val = ccparam->init_cr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ *val = ccparam->init_tr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ *val = ccparam->tos_ecn;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ *val = ccparam->tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ *val = ccparam->alt_vlan_pcp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ *val = ccparam->alt_tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ *val = ccparam->rtt;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ *val = ccparam->cc_mode;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ *val = ccparam->tcp_cp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ *val = ccparam->inact_th;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ struct bnxt_qplib_cc_param ccparam = {};
+ u32 offset = dbg_cc_param->offset;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam);
+ if (rc)
+ return rc;
+
+ rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val);
+ if (rc)
+ return rc;
+
+ rc = snprintf(buf, sizeof(buf), "%d\n", val);
+ if (rc < 0)
+ return rc;
+
+ return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
+}
+
+static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
+{
+ u32 modify_mask;
+
+ modify_mask = BIT(offset);
+
+ switch (modify_mask) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ ccparam->enable = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ ccparam->g = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ ccparam->nph_per_state = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ ccparam->init_cr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ ccparam->init_tr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ ccparam->tos_ecn = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ ccparam->tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ ccparam->alt_vlan_pcp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ ccparam->alt_tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ ccparam->rtt = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ ccparam->cc_mode = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ ccparam->tcp_cp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
+ return -EOPNOTSUPP;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ ccparam->inact_th = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
+ ccparam->time_pph = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE:
+ ccparam->pkts_pph = val;
+ break;
+ }
+
+ ccparam->mask = modify_mask;
+ return 0;
+}
+
+static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
+{
+ struct bnxt_qplib_cc_param ccparam = { };
+ int rc;
+
+ if (gen_ext != CC_CONFIG_GEN0_EXT0)
+ return -EOPNOTSUPP;
+
+ rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ u32 offset = dbg_cc_param->offset;
+ u8 cc_gen = dbg_cc_param->cc_gen;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ buf[count] = '\0';
+ if (kstrtou32(buf, 0, &val))
+ return -EINVAL;
+
+ rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val);
+ return rc ? rc : count;
+}
+
+static const struct file_operations bnxt_re_cc_config_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = bnxt_re_cc_config_get,
+ .write = bnxt_re_cc_config_set,
+};
+
+static int info_show(struct seq_file *m, void *unused)
+{
+ struct bnxt_re_dev *rdev = m->private;
+ struct bnxt_re_res_cntrs *res_s = &rdev->stats.res;
+
+ seq_puts(m, "Info:\n");
+ seq_printf(m, "Device Name\t\t: %s\n", dev_name(&rdev->ibdev.dev));
+ seq_printf(m, "PD Watermark\t\t: %llu\n", res_s->pd_watermark);
+ seq_printf(m, "AH Watermark\t\t: %llu\n", res_s->ah_watermark);
+ seq_printf(m, "QP Watermark\t\t: %llu\n", res_s->qp_watermark);
+ seq_printf(m, "RC QP Watermark\t\t: %llu\n", res_s->rc_qp_watermark);
+ seq_printf(m, "UD QP Watermark\t\t: %llu\n", res_s->ud_qp_watermark);
+ seq_printf(m, "SRQ Watermark\t\t: %llu\n", res_s->srq_watermark);
+ seq_printf(m, "CQ Watermark\t\t: %llu\n", res_s->cq_watermark);
+ seq_printf(m, "MR Watermark\t\t: %llu\n", res_s->mr_watermark);
+ seq_printf(m, "MW Watermark\t\t: %llu\n", res_s->mw_watermark);
+ seq_printf(m, "CQ Resize Count\t\t: %d\n", atomic_read(&res_s->resize_count));
+ if (rdev->pacing.dbr_pacing) {
+ seq_printf(m, "DB Pacing Reschedule\t: %llu\n", rdev->stats.pacing.resched);
+ seq_printf(m, "DB Pacing Complete\t: %llu\n", rdev->stats.pacing.complete);
+ seq_printf(m, "DB Pacing Alerts\t: %llu\n", rdev->stats.pacing.alerts);
+ seq_printf(m, "DB FIFO Register\t: 0x%x\n",
+ readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off));
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(info);
+
+static void bnxt_re_debugfs_add_info(struct bnxt_re_dev *rdev)
+{
+ debugfs_create_file("info", 0400, rdev->dbg_root, rdev, &info_fops);
+}
+
+static ssize_t cq_coal_cfg_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct seq_file *s = file->private_data;
+ struct bnxt_re_cq_coal_param *param = s->private;
+ struct bnxt_re_dev *rdev = param->rdev;
+ int offset = param->offset;
+ char lbuf[16] = { };
+ u32 val;
+
+ if (count > sizeof(lbuf))
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, count))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = '\0';
+
+ if (kstrtou32(lbuf, 0, &val))
+ return -EINVAL;
+
+ switch (offset) {
+ case BNXT_RE_COAL_CQ_BUF_MAXTIME:
+ if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME)
+ return -EINVAL;
+ rdev->cq_coalescing.buf_maxtime = val;
+ break;
+ case BNXT_RE_COAL_CQ_NORMAL_MAXBUF:
+ if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF)
+ return -EINVAL;
+ rdev->cq_coalescing.normal_maxbuf = val;
+ break;
+ case BNXT_RE_COAL_CQ_DURING_MAXBUF:
+ if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF)
+ return -EINVAL;
+ rdev->cq_coalescing.during_maxbuf = val;
+ break;
+ case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE:
+ if (val > BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE)
+ return -EINVAL;
+ rdev->cq_coalescing.en_ring_idle_mode = val;
+ break;
+ case BNXT_RE_COAL_CQ_ENABLE:
+ if (val > 1)
+ return -EINVAL;
+ rdev->cq_coalescing.enable = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return count;
+}
+
+static int cq_coal_cfg_show(struct seq_file *s, void *unused)
+{
+ struct bnxt_re_cq_coal_param *param = s->private;
+ struct bnxt_re_dev *rdev = param->rdev;
+ int offset = param->offset;
+ u32 val = 0;
+
+ switch (offset) {
+ case BNXT_RE_COAL_CQ_BUF_MAXTIME:
+ val = rdev->cq_coalescing.buf_maxtime;
+ break;
+ case BNXT_RE_COAL_CQ_NORMAL_MAXBUF:
+ val = rdev->cq_coalescing.normal_maxbuf;
+ break;
+ case BNXT_RE_COAL_CQ_DURING_MAXBUF:
+ val = rdev->cq_coalescing.during_maxbuf;
+ break;
+ case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE:
+ val = rdev->cq_coalescing.en_ring_idle_mode;
+ break;
+ case BNXT_RE_COAL_CQ_ENABLE:
+ val = rdev->cq_coalescing.enable;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ seq_printf(s, "%u\n", val);
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(cq_coal_cfg);
+
+static void bnxt_re_cleanup_cq_coal_debugfs(struct bnxt_re_dev *rdev)
+{
+ debugfs_remove_recursive(rdev->cq_coal_cfg);
+ kfree(rdev->cq_coal_cfg_params);
+}
+
+static void bnxt_re_init_cq_coal_debugfs(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_dbg_cq_coal_params *dbg_cq_coal_params;
+ int i;
+
+ if (!_is_cq_coalescing_supported(rdev->dev_attr->dev_cap_flags2))
+ return;
+
+ dbg_cq_coal_params = kzalloc(sizeof(*dbg_cq_coal_params), GFP_KERNEL);
+ if (!dbg_cq_coal_params)
+ return;
+
+ rdev->cq_coal_cfg = debugfs_create_dir("cq_coal_cfg", rdev->dbg_root);
+ rdev->cq_coal_cfg_params = dbg_cq_coal_params;
+
+ for (i = 0; i < BNXT_RE_COAL_CQ_MAX; i++) {
+ dbg_cq_coal_params->params[i].offset = i;
+ dbg_cq_coal_params->params[i].rdev = rdev;
+ debugfs_create_file(bnxt_re_cq_coal_str[i],
+ 0600, rdev->cq_coal_cfg,
+ &dbg_cq_coal_params->params[i],
+ &cq_coal_cfg_fops);
+ }
+}
+
+void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->en_dev->pdev;
+ struct bnxt_re_dbg_cc_config_params *cc_params;
+ int i;
+
+ rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
+
+ rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
+ rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
+
+ bnxt_re_debugfs_add_info(rdev);
+
+ rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
+
+ for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
+ struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i];
+
+ tmp_params->rdev = rdev;
+ tmp_params->offset = i;
+ tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0;
+ tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400,
+ rdev->cc_config, tmp_params,
+ &bnxt_re_cc_config_ops);
+ }
+
+ bnxt_re_init_cq_coal_debugfs(rdev);
+}
+
+void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
+{
+ bnxt_re_cleanup_cq_coal_debugfs(rdev);
+ debugfs_remove_recursive(rdev->qp_debugfs);
+ debugfs_remove_recursive(rdev->cc_config);
+ kfree(rdev->cc_config_params);
+ debugfs_remove_recursive(rdev->dbg_root);
+ rdev->dbg_root = NULL;
+}
+
+void bnxt_re_register_debugfs(void)
+{
+ bnxt_re_debugfs_root = debugfs_create_dir("bnxt_re", NULL);
+}
+
+void bnxt_re_unregister_debugfs(void)
+{
+ debugfs_remove(bnxt_re_debugfs_root);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h
new file mode 100644
index 000000000000..98f4620ef245
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright (c) 2024, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * Description: Debugfs header
+ */
+
+#ifndef __BNXT_RE_DEBUGFS__
+#define __BNXT_RE_DEBUGFS__
+
+void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
+void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
+
+void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev);
+void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
+
+void bnxt_re_register_debugfs(void);
+void bnxt_re_unregister_debugfs(void);
+
+#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y))
+#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0)
+
+#define BNXT_RE_CC_PARAM_GEN0 14
+
+struct bnxt_re_cc_param {
+ struct bnxt_re_dev *rdev;
+ struct dentry *dentry;
+ u32 offset;
+ u8 cc_gen;
+};
+
+struct bnxt_re_dbg_cc_config_params {
+ struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
+};
+
+struct bnxt_re_cq_coal_param {
+ struct bnxt_re_dev *rdev;
+ u32 offset;
+};
+
+enum bnxt_re_cq_coal_types {
+ BNXT_RE_COAL_CQ_BUF_MAXTIME,
+ BNXT_RE_COAL_CQ_NORMAL_MAXBUF,
+ BNXT_RE_COAL_CQ_DURING_MAXBUF,
+ BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE,
+ BNXT_RE_COAL_CQ_ENABLE,
+ BNXT_RE_COAL_CQ_MAX
+
+};
+
+struct bnxt_re_dbg_cq_coal_params {
+ struct bnxt_re_cq_coal_param params[BNXT_RE_COAL_CQ_MAX];
+};
+#endif
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 604b71875f5f..651cf9d0e0c7 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -37,18 +37,11 @@
*
*/
-#include <linux/interrupt.h>
#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/pci.h>
-#include <linux/prefetch.h>
-#include <linux/delay.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
-#include <rdma/ib_addr.h>
-
-#include "bnxt_ulp.h"
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
@@ -57,192 +50,384 @@
#include "bnxt_re.h"
#include "hw_counters.h"
-static const char * const bnxt_re_stat_name[] = {
- [BNXT_RE_ACTIVE_QP] = "active_qps",
- [BNXT_RE_ACTIVE_SRQ] = "active_srqs",
- [BNXT_RE_ACTIVE_CQ] = "active_cqs",
- [BNXT_RE_ACTIVE_MR] = "active_mrs",
- [BNXT_RE_ACTIVE_MW] = "active_mws",
- [BNXT_RE_RX_PKTS] = "rx_pkts",
- [BNXT_RE_RX_BYTES] = "rx_bytes",
- [BNXT_RE_TX_PKTS] = "tx_pkts",
- [BNXT_RE_TX_BYTES] = "tx_bytes",
- [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
- [BNXT_RE_RX_DROPS] = "rx_roce_drops",
- [BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
- [BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
- [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
- [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
- [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
- [BNXT_RE_MISSING_RESP] = "missin_resp",
- [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
- [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
- [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
- [BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err",
- [BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err",
- [BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err",
- [BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err",
- [BNXT_RE_REMOTE_OP_ERR] = "remote_op_err",
- [BNXT_RE_DUP_REQ] = "dup_req",
- [BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max",
- [BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch",
- [BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe",
- [BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err",
- [BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey",
- [BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err",
- [BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm",
- [BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err",
- [BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey",
- [BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err",
- [BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm",
- [BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err",
- [BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow",
- [BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode",
- [BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic",
- [BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err",
- [BNXT_RE_RES_MEM_ERROR] = "res_mem_err",
- [BNXT_RE_RES_SRQ_ERR] = "res_srq_err",
- [BNXT_RE_RES_CMP_ERR] = "res_cmp_err",
- [BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey",
- [BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err",
- [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
- [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
- [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
- [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
- [BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
+static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
+ [BNXT_RE_RX_PKTS].name = "rx_pkts",
+ [BNXT_RE_RX_BYTES].name = "rx_bytes",
+ [BNXT_RE_TX_PKTS].name = "tx_pkts",
+ [BNXT_RE_TX_BYTES].name = "tx_bytes",
+ [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors",
+ [BNXT_RE_TX_ERRORS].name = "tx_roce_errors",
+ [BNXT_RE_TX_DISCARDS].name = "tx_roce_discards",
+ [BNXT_RE_RX_ERRORS].name = "rx_roce_errors",
+ [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards",
+ [BNXT_RE_TO_RETRANSMITS].name = "local_ack_timeout_err",
+ [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "packet_seq_err",
+ [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded",
+ [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_nak_retry_err",
+ [BNXT_RE_MISSING_RESP].name = "implied_nak_seq_err",
+ [BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err",
+ [BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err",
+ [BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err",
+ [BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err",
+ [BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err",
+ [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "req_remote_invalid_request",
+ [BNXT_RE_REMOTE_ACCESS_ERR].name = "req_remote_access_errors",
+ [BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err",
+ [BNXT_RE_DUP_REQ].name = "duplicate_request",
+ [BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max",
+ [BNXT_RE_RES_LENGTH_MISMATCH].name = "resp_local_length_error",
+ [BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe",
+ [BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err",
+ [BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey",
+ [BNXT_RE_RES_RX_DOMAIN_ERR].name = "res_rx_domain_err",
+ [BNXT_RE_RES_RX_NO_PERM].name = "res_rx_no_perm",
+ [BNXT_RE_RES_RX_RANGE_ERR].name = "res_rx_range_err",
+ [BNXT_RE_RES_TX_INVALID_RKEY].name = "res_tx_invalid_rkey",
+ [BNXT_RE_RES_TX_DOMAIN_ERR].name = "res_tx_domain_err",
+ [BNXT_RE_RES_TX_NO_PERM].name = "res_tx_no_perm",
+ [BNXT_RE_RES_TX_RANGE_ERR].name = "res_tx_range_err",
+ [BNXT_RE_RES_IRRQ_OFLOW].name = "res_irrq_oflow",
+ [BNXT_RE_RES_UNSUP_OPCODE].name = "res_unsup_opcode",
+ [BNXT_RE_RES_UNALIGNED_ATOMIC].name = "res_unaligned_atomic",
+ [BNXT_RE_RES_REM_INV_ERR].name = "res_rem_inv_err",
+ [BNXT_RE_RES_MEM_ERROR].name = "res_mem_err",
+ [BNXT_RE_RES_SRQ_ERR].name = "res_srq_err",
+ [BNXT_RE_RES_CMP_ERR].name = "res_cmp_err",
+ [BNXT_RE_RES_INVALID_DUP_RKEY].name = "res_invalid_dup_rkey",
+ [BNXT_RE_RES_WQE_FORMAT_ERR].name = "res_wqe_format_err",
+ [BNXT_RE_RES_CQ_LOAD_ERR].name = "res_cq_load_err",
+ [BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err",
+ [BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err",
+ [BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err",
+ [BNXT_RE_OUT_OF_SEQ_ERR].name = "out_of_sequence",
+ [BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req",
+ [BNXT_RE_TX_READ_REQ].name = "tx_read_req",
+ [BNXT_RE_TX_READ_RES].name = "tx_read_resp",
+ [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req",
+ [BNXT_RE_TX_SEND_REQ].name = "tx_send_req",
+ [BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts",
+ [BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes",
+ [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_requests",
+ [BNXT_RE_RX_READ_REQ].name = "rx_read_requests",
+ [BNXT_RE_RX_READ_RESP].name = "rx_read_resp",
+ [BNXT_RE_RX_WRITE_REQ].name = "rx_write_requests",
+ [BNXT_RE_RX_SEND_REQ].name = "rx_send_req",
+ [BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts",
+ [BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes",
+ [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts",
+ [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes",
+ [BNXT_RE_OOB].name = "out_of_buffer",
+ [BNXT_RE_TX_CNP].name = "np_cnp_pkts",
+ [BNXT_RE_RX_CNP].name = "rp_cnp_handled",
+ [BNXT_RE_RX_ECN].name = "np_ecn_marked_roce_packets",
+ [BNXT_RE_REQ_CQE_ERROR].name = "req_cqe_error",
+ [BNXT_RE_RESP_CQE_ERROR].name = "resp_cqe_error",
+ [BNXT_RE_RESP_REMOTE_ACCESS_ERRS].name = "resp_remote_access_errors",
};
+static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats,
+ struct bnxt_qplib_ext_stat *s)
+{
+ stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req;
+ stats->value[BNXT_RE_TX_READ_REQ] = s->tx_read_req;
+ stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res;
+ stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req;
+ stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req;
+ stats->value[BNXT_RE_TX_ROCE_PKTS] = s->tx_roce_pkts;
+ stats->value[BNXT_RE_TX_ROCE_BYTES] = s->tx_roce_bytes;
+ stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req;
+ stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req;
+ stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res;
+ stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req;
+ stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req;
+ stats->value[BNXT_RE_RX_ROCE_PKTS] = s->rx_roce_pkts;
+ stats->value[BNXT_RE_RX_ROCE_BYTES] = s->rx_roce_bytes;
+ stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts;
+ stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes;
+ stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer;
+ stats->value[BNXT_RE_TX_CNP] = s->tx_cnp;
+ stats->value[BNXT_RE_RX_CNP] = s->rx_cnp;
+ stats->value[BNXT_RE_RX_ECN] = s->rx_ecn_marked;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = s->rx_out_of_sequence;
+}
+
+static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ u32 fid;
+ int rc;
+
+ fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ goto done;
+ bnxt_re_copy_ext_stats(rdev, stats, estat);
+
+done:
+ return rc;
+}
+
+static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats,
+ struct bnxt_qplib_roce_stats *err_s)
+{
+ stats->value[BNXT_RE_TO_RETRANSMITS] =
+ err_s->to_retransmits;
+ stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
+ err_s->seq_err_naks_rcvd;
+ stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
+ err_s->max_retry_exceeded;
+ stats->value[BNXT_RE_RNR_NAKS_RCVD] =
+ err_s->rnr_naks_rcvd;
+ stats->value[BNXT_RE_MISSING_RESP] =
+ err_s->missing_resp;
+ stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
+ err_s->unrecoverable_err;
+ stats->value[BNXT_RE_BAD_RESP_ERR] =
+ err_s->bad_resp_err;
+ stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
+ err_s->local_qp_op_err;
+ stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
+ err_s->local_protection_err;
+ stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
+ err_s->mem_mgmt_op_err;
+ stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
+ err_s->remote_invalid_req_err;
+ stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
+ err_s->remote_access_err;
+ stats->value[BNXT_RE_REMOTE_OP_ERR] =
+ err_s->remote_op_err;
+ stats->value[BNXT_RE_DUP_REQ] =
+ err_s->dup_req;
+ stats->value[BNXT_RE_RES_EXCEED_MAX] =
+ err_s->res_exceed_max;
+ stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
+ err_s->res_length_mismatch;
+ stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
+ err_s->res_exceeds_wqe;
+ stats->value[BNXT_RE_RES_OPCODE_ERR] =
+ err_s->res_opcode_err;
+ stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
+ err_s->res_rx_invalid_rkey;
+ stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
+ err_s->res_rx_domain_err;
+ stats->value[BNXT_RE_RES_RX_NO_PERM] =
+ err_s->res_rx_no_perm;
+ stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
+ err_s->res_rx_range_err;
+ stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
+ err_s->res_tx_invalid_rkey;
+ stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
+ err_s->res_tx_domain_err;
+ stats->value[BNXT_RE_RES_TX_NO_PERM] =
+ err_s->res_tx_no_perm;
+ stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
+ err_s->res_tx_range_err;
+ stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
+ err_s->res_irrq_oflow;
+ stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
+ err_s->res_unsup_opcode;
+ stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
+ err_s->res_unaligned_atomic;
+ stats->value[BNXT_RE_RES_REM_INV_ERR] =
+ err_s->res_rem_inv_err;
+ stats->value[BNXT_RE_RES_MEM_ERROR] =
+ err_s->res_mem_error;
+ stats->value[BNXT_RE_RES_SRQ_ERR] =
+ err_s->res_srq_err;
+ stats->value[BNXT_RE_RES_CMP_ERR] =
+ err_s->res_cmp_err;
+ stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
+ err_s->res_invalid_dup_rkey;
+ stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
+ err_s->res_wqe_format_err;
+ stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
+ err_s->res_cq_load_err;
+ stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
+ err_s->res_srq_load_err;
+ stats->value[BNXT_RE_RES_TX_PCI_ERR] =
+ err_s->res_tx_pci_err;
+ stats->value[BNXT_RE_RES_RX_PCI_ERR] =
+ err_s->res_rx_pci_err;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
+ err_s->res_oos_drop_count;
+ stats->value[BNXT_RE_REQ_CQE_ERROR] =
+ err_s->bad_resp_err +
+ err_s->local_qp_op_err +
+ err_s->local_protection_err +
+ err_s->mem_mgmt_op_err +
+ err_s->remote_invalid_req_err +
+ err_s->remote_access_err +
+ err_s->remote_op_err;
+ stats->value[BNXT_RE_RESP_CQE_ERROR] =
+ err_s->res_cmp_err +
+ err_s->res_cq_load_err;
+ stats->value[BNXT_RE_RESP_REMOTE_ACCESS_ERRS] =
+ err_s->res_rx_no_perm +
+ err_s->res_tx_no_perm;
+}
+
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct ib_pma_portcounters_ext *pma_cnt_ext;
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+
+ } else {
+ pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4);
+ pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4);
+ pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ }
+ return 0;
+}
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ib_pma_portcounters *pma_cnt;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) &
+ 0xFFFFFFFF) / 4));
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes)
+ & 0xFFFFFFFF) / 4));
+ } else {
+ pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts);
+ pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4));
+ pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts);
+ pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4));
+ }
+ pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF);
+ pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts)
+ & 0xFFFF));
+ pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF);
+ pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts)
+ & 0xFFFF));
+
+ return 0;
+}
+
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
+ struct bnxt_qplib_roce_stats *err_s = NULL;
+ struct ctx_hw_stats *hw_stats = NULL;
int rc = 0;
+ hw_stats = rdev->qplib_ctx.stats.dma;
if (!port || !stats)
return -EINVAL;
- stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&rdev->qp_count);
- stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&rdev->srq_count);
- stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count);
- stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count);
- stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count);
- if (bnxt_re_stats) {
+ if (hw_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
- le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
- stats->value[BNXT_RE_RX_DROPS] =
- le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
+ le64_to_cpu(hw_stats->tx_bcast_pkts);
+ stats->value[BNXT_RE_TX_DISCARDS] =
+ le64_to_cpu(hw_stats->tx_discard_pkts);
+ stats->value[BNXT_RE_TX_ERRORS] =
+ le64_to_cpu(hw_stats->tx_error_pkts);
+ stats->value[BNXT_RE_RX_ERRORS] =
+ le64_to_cpu(hw_stats->rx_error_pkts);
stats->value[BNXT_RE_RX_DISCARDS] =
- le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
+ le64_to_cpu(hw_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] =
- le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
+ le64_to_cpu(hw_stats->rx_ucast_pkts);
stats->value[BNXT_RE_RX_BYTES] =
- le64_to_cpu(bnxt_re_stats->rx_ucast_bytes);
+ le64_to_cpu(hw_stats->rx_ucast_bytes);
stats->value[BNXT_RE_TX_PKTS] =
- le64_to_cpu(bnxt_re_stats->tx_ucast_pkts);
+ le64_to_cpu(hw_stats->tx_ucast_pkts);
stats->value[BNXT_RE_TX_BYTES] =
- le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
+ le64_to_cpu(hw_stats->tx_ucast_bytes);
}
+ err_s = &rdev->stats.rstat.errs;
if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
- rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
- if (rc)
+ rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s);
+ if (rc) {
clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
&rdev->flags);
- stats->value[BNXT_RE_TO_RETRANSMITS] =
- rdev->stats.to_retransmits;
- stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
- rdev->stats.seq_err_naks_rcvd;
- stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
- rdev->stats.max_retry_exceeded;
- stats->value[BNXT_RE_RNR_NAKS_RCVD] =
- rdev->stats.rnr_naks_rcvd;
- stats->value[BNXT_RE_MISSING_RESP] =
- rdev->stats.missing_resp;
- stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
- rdev->stats.unrecoverable_err;
- stats->value[BNXT_RE_BAD_RESP_ERR] =
- rdev->stats.bad_resp_err;
- stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
- rdev->stats.local_qp_op_err;
- stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
- rdev->stats.local_protection_err;
- stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
- rdev->stats.mem_mgmt_op_err;
- stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
- rdev->stats.remote_invalid_req_err;
- stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
- rdev->stats.remote_access_err;
- stats->value[BNXT_RE_REMOTE_OP_ERR] =
- rdev->stats.remote_op_err;
- stats->value[BNXT_RE_DUP_REQ] =
- rdev->stats.dup_req;
- stats->value[BNXT_RE_RES_EXCEED_MAX] =
- rdev->stats.res_exceed_max;
- stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
- rdev->stats.res_length_mismatch;
- stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
- rdev->stats.res_exceeds_wqe;
- stats->value[BNXT_RE_RES_OPCODE_ERR] =
- rdev->stats.res_opcode_err;
- stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
- rdev->stats.res_rx_invalid_rkey;
- stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
- rdev->stats.res_rx_domain_err;
- stats->value[BNXT_RE_RES_RX_NO_PERM] =
- rdev->stats.res_rx_no_perm;
- stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
- rdev->stats.res_rx_range_err;
- stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
- rdev->stats.res_tx_invalid_rkey;
- stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
- rdev->stats.res_tx_domain_err;
- stats->value[BNXT_RE_RES_TX_NO_PERM] =
- rdev->stats.res_tx_no_perm;
- stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
- rdev->stats.res_tx_range_err;
- stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
- rdev->stats.res_irrq_oflow;
- stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
- rdev->stats.res_unsup_opcode;
- stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
- rdev->stats.res_unaligned_atomic;
- stats->value[BNXT_RE_RES_REM_INV_ERR] =
- rdev->stats.res_rem_inv_err;
- stats->value[BNXT_RE_RES_MEM_ERROR] =
- rdev->stats.res_mem_error;
- stats->value[BNXT_RE_RES_SRQ_ERR] =
- rdev->stats.res_srq_err;
- stats->value[BNXT_RE_RES_CMP_ERR] =
- rdev->stats.res_cmp_err;
- stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
- rdev->stats.res_invalid_dup_rkey;
- stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
- rdev->stats.res_wqe_format_err;
- stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
- rdev->stats.res_cq_load_err;
- stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
- rdev->stats.res_srq_load_err;
- stats->value[BNXT_RE_RES_TX_PCI_ERR] =
- rdev->stats.res_tx_pci_err;
- stats->value[BNXT_RE_RES_RX_PCI_ERR] =
- rdev->stats.res_rx_pci_err;
- stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
- rdev->stats.res_oos_drop_count;
+ goto done;
+ }
+ bnxt_re_copy_err_stats(rdev, stats, err_s);
+ if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags,
+ rdev->is_virtfn)) {
+ rc = bnxt_re_get_ext_stat(rdev, stats);
+ if (rc) {
+ clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+ &rdev->flags);
+ goto done;
+ }
+ }
}
- return ARRAY_SIZE(bnxt_re_stat_name);
+done:
+ return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
+ BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS;
}
-struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
{
- BUILD_BUG_ON(ARRAY_SIZE(bnxt_re_stat_name) != BNXT_RE_NUM_COUNTERS);
- /* We support only per port stats */
- if (!port_num)
- return NULL;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ int num_counters = 0;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ num_counters = BNXT_RE_NUM_EXT_COUNTERS;
+ else
+ num_counters = BNXT_RE_NUM_STD_COUNTERS;
- return rdma_alloc_hw_stats_struct(bnxt_re_stat_name,
- ARRAY_SIZE(bnxt_re_stat_name),
+ return rdma_alloc_hw_stats_struct(bnxt_re_stat_descs, num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index 76399f477e5c..09d371d442aa 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -41,17 +41,14 @@
#define __BNXT_RE_HW_STATS_H__
enum bnxt_re_hw_stats {
- BNXT_RE_ACTIVE_QP,
- BNXT_RE_ACTIVE_SRQ,
- BNXT_RE_ACTIVE_CQ,
- BNXT_RE_ACTIVE_MR,
- BNXT_RE_ACTIVE_MW,
BNXT_RE_RX_PKTS,
BNXT_RE_RX_BYTES,
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
- BNXT_RE_RX_DROPS,
+ BNXT_RE_TX_ERRORS,
+ BNXT_RE_TX_DISCARDS,
+ BNXT_RE_RX_ERRORS,
BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD,
@@ -93,12 +90,76 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_OUT_OF_SEQ_ERR,
- BNXT_RE_NUM_COUNTERS
+ BNXT_RE_TX_ATOMIC_REQ,
+ BNXT_RE_TX_READ_REQ,
+ BNXT_RE_TX_READ_RES,
+ BNXT_RE_TX_WRITE_REQ,
+ BNXT_RE_TX_SEND_REQ,
+ BNXT_RE_TX_ROCE_PKTS,
+ BNXT_RE_TX_ROCE_BYTES,
+ BNXT_RE_RX_ATOMIC_REQ,
+ BNXT_RE_RX_READ_REQ,
+ BNXT_RE_RX_READ_RESP,
+ BNXT_RE_RX_WRITE_REQ,
+ BNXT_RE_RX_SEND_REQ,
+ BNXT_RE_RX_ROCE_PKTS,
+ BNXT_RE_RX_ROCE_BYTES,
+ BNXT_RE_RX_ROCE_GOOD_PKTS,
+ BNXT_RE_RX_ROCE_GOOD_BYTES,
+ BNXT_RE_OOB,
+ BNXT_RE_TX_CNP,
+ BNXT_RE_RX_CNP,
+ BNXT_RE_RX_ECN,
+ BNXT_RE_REQ_CQE_ERROR,
+ BNXT_RE_RESP_CQE_ERROR,
+ BNXT_RE_RESP_REMOTE_ACCESS_ERRS,
+ BNXT_RE_NUM_EXT_COUNTERS
};
-struct rdma_hw_stats *bnxt_re_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num);
+#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1)
+
+struct bnxt_re_db_pacing_stats {
+ u64 resched;
+ u64 complete;
+ u64 alerts;
+};
+
+struct bnxt_re_res_cntrs {
+ atomic_t qp_count;
+ atomic_t rc_qp_count;
+ atomic_t ud_qp_count;
+ atomic_t cq_count;
+ atomic_t srq_count;
+ atomic_t mr_count;
+ atomic_t mw_count;
+ atomic_t ah_count;
+ atomic_t pd_count;
+ atomic_t resize_count;
+ u64 qp_watermark;
+ u64 rc_qp_watermark;
+ u64 ud_qp_watermark;
+ u64 cq_watermark;
+ u64 srq_watermark;
+ u64 mr_watermark;
+ u64 mw_watermark;
+ u64 ah_watermark;
+ u64 pd_watermark;
+};
+
+struct bnxt_re_rstat {
+ struct bnxt_qplib_roce_stats errs;
+ struct bnxt_qplib_ext_stat ext_stat;
+};
+
+struct bnxt_re_stats {
+ struct bnxt_re_rstat rstat;
+ struct bnxt_re_res_cntrs res;
+ struct bnxt_re_db_pacing_stats pacing;
+};
+
+struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index);
+ u32 port, int index);
#endif /* __BNXT_RE_HW_STATS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2c3685faa57a..f19b55c13d58 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -41,6 +41,7 @@
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <net/addrconf.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
@@ -48,9 +49,9 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_pma.h>
#include <rdma/uverbs_ioctl.h>
-
-#include "bnxt_ulp.h"
+#include <linux/hashtable.h>
#include "roce_hsi.h"
#include "qplib_res.h"
@@ -60,6 +61,16 @@
#include "bnxt_re.h"
#include "ib_verbs.h"
+#include "debugfs.h"
+
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+
+#include <rdma/ib_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME bnxt_re
+#include <rdma/uverbs_named_ioctl.h>
+
#include <rdma/bnxt_re-abi.h>
static int __from_ib_access_flags(int iflags)
@@ -83,9 +94,9 @@ static int __from_ib_access_flags(int iflags)
return qflags;
};
-static enum ib_access_flags __to_ib_access_flags(int qflags)
+static int __to_ib_access_flags(int qflags)
{
- enum ib_access_flags iflags = 0;
+ int iflags = 0;
if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
iflags |= IB_ACCESS_LOCAL_WRITE;
@@ -102,7 +113,57 @@ static enum ib_access_flags __to_ib_access_flags(int qflags)
if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
iflags |= IB_ACCESS_ON_DEMAND;
return iflags;
-};
+}
+
+static u8 __qp_access_flags_from_ib(struct bnxt_qplib_chip_ctx *cctx, int iflags)
+{
+ u8 qflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return (u8)__from_ib_access_flags(iflags);
+
+ /* For P5, P7 and later chips */
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC;
+
+ return qflags;
+}
+
+static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags)
+{
+ int iflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return __to_ib_access_flags(qflags);
+
+ /* For P5, P7 and later chips */
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return iflags;
+}
+
+static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
+ struct bnxt_qplib_mrw *qplib_mr)
+{
+ if (_is_relaxed_ordering_supported(rdev->dev_attr->dev_cap_flags2) &&
+ pcie_relaxed_ordering_enabled(rdev->en_dev->pdev))
+ qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO;
+}
static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
struct bnxt_qplib_sge *sg_list, int num)
@@ -124,20 +185,20 @@ int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
memset(ib_attr, 0, sizeof(*ib_attr));
memcpy(&ib_attr->fw_ver, dev_attr->fw_ver,
min(sizeof(dev_attr->fw_ver),
sizeof(ib_attr->fw_ver)));
- bnxt_qplib_get_guid(rdev->netdev->dev_addr,
- (u8 *)&ib_attr->sys_image_guid);
+ addrconf_addr_eui48((u8 *)&ib_attr->sys_image_guid,
+ rdev->netdev->dev_addr);
ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
- ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M;
+ ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED;
ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
- ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device;
+ ib_attr->hw_ver = rdev->en_dev->pdev->revision;
ib_attr->max_qp = dev_attr->max_qp;
ib_attr->max_qp_wr = dev_attr->max_qp_wqes;
ib_attr->device_cap_flags =
@@ -145,13 +206,13 @@ int bnxt_re_query_device(struct ib_device *ibdev,
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SHUTDOWN_PORT
| IB_DEVICE_SYS_IMAGE_GUID
- | IB_DEVICE_LOCAL_DMA_LKEY
| IB_DEVICE_RESIZE_MAX_WR
| IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_N_NOTIFY_CQ
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_MEM_MGT_EXTENSIONS;
+ ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
ib_attr->max_send_sge = dev_attr->max_qp_sges;
ib_attr->max_recv_sge = dev_attr->max_qp_sges;
ib_attr->max_sge_rd = dev_attr->max_qp_sges;
@@ -163,6 +224,10 @@ int bnxt_re_query_device(struct ib_device *ibdev,
ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
ib_attr->atomic_cap = IB_ATOMIC_NONE;
ib_attr->masked_atomic_cap = IB_ATOMIC_NONE;
+ if (dev_attr->is_atomic) {
+ ib_attr->atomic_cap = IB_ATOMIC_GLOB;
+ ib_attr->masked_atomic_cap = IB_ATOMIC_GLOB;
+ }
ib_attr->max_ee_rd_atom = 0;
ib_attr->max_res_rd_atom = 0;
@@ -177,9 +242,6 @@ int bnxt_re_query_device(struct ib_device *ibdev,
ib_attr->max_total_mcast_qp_attach = 0;
ib_attr->max_ah = dev_attr->max_ah;
- ib_attr->max_fmr = 0;
- ib_attr->max_map_per_fmr = 0;
-
ib_attr->max_srq = dev_attr->max_srq;
ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
ib_attr->max_srq_sge = dev_attr->max_srq_sges;
@@ -195,39 +257,40 @@ int bnxt_re_modify_device(struct ib_device *ibdev,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
- switch (device_modify_mask) {
- case IB_DEVICE_MODIFY_SYS_IMAGE_GUID:
- /* Modify the GUID requires the modification of the GID table */
- /* GUID should be made as READ-ONLY */
- break;
- case IB_DEVICE_MODIFY_NODE_DESC:
- /* Node Desc should be made as READ-ONLY */
- break;
- default:
- break;
- }
+ ibdev_dbg(ibdev, "Modify device with mask 0x%x", device_modify_mask);
+
+ if (device_modify_mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (!(device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC))
+ return 0;
+
+ memcpy(ibdev->node_desc, device_modify->node_desc, IB_DEVICE_NODE_DESC_MAX);
return 0;
}
/* Port */
-int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ int rc;
memset(port_attr, 0, sizeof(*port_attr));
if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
port_attr->state = IB_PORT_ACTIVE;
- port_attr->phys_state = 5;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
} else {
port_attr->state = IB_PORT_DOWN;
- port_attr->phys_state = 3;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
port_attr->max_mtu = IB_MTU_4096;
port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
- port_attr->gid_tbl_len = dev_attr->max_sgid;
+ /* One GID is reserved for RawEth QP. Report one less */
+ port_attr->gid_tbl_len = (rdev->rcfw.roce_mirror ? (dev_attr->max_sgid - 1) :
+ dev_attr->max_sgid);
port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP;
@@ -244,13 +307,13 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
- port_attr->active_speed = rdev->active_speed;
- port_attr->active_width = rdev->active_width;
+ rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
+ &port_attr->active_width);
- return 0;
+ return rc;
}
-int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr port_attr;
@@ -271,27 +334,26 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str)
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
- rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1],
- rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]);
+ rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1],
+ rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]);
}
-int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num,
u16 index, u16 *pkey)
{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ if (index > 0)
+ return -EINVAL;
- /* Ignore port_num */
+ *pkey = IB_DEFAULT_PKEY_FULL;
- memset(pkey, 0, sizeof(*pkey));
- return bnxt_qplib_get_pkey(&rdev->qplib_res,
- &rdev->qplib_res.pkey_tbl, index, pkey);
+ return 0;
}
-int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num,
int index, union ib_gid *gid)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- int rc = 0;
+ int rc;
/* Ignore port_num */
memset(gid, 0, sizeof(*gid));
@@ -308,16 +370,18 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
struct bnxt_qplib_gid *gid_to_del;
+ u16 vlan_id = 0xFFFF;
/* Delete the entry from the hardware */
ctx = *context;
if (!ctx)
return -EINVAL;
- if (sgid_tbl && sgid_tbl->active) {
+ if (sgid_tbl->active) {
if (ctx->idx >= sgid_tbl->max)
return -EINVAL;
- gid_to_del = &sgid_tbl->tbl[ctx->idx];
+ gid_to_del = &sgid_tbl->tbl[ctx->idx].gid;
+ vlan_id = sgid_tbl->tbl[ctx->idx].vlan_id;
/* DEL_GID is called in WQ context(netdevice_event_work_handler)
* or via the ib_unregister_device path. In the former case QP1
* may not be destroyed yet, in which case just return as FW
@@ -328,17 +392,18 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
*/
if (ctx->idx == 0 &&
rdma_link_local_addr((struct in6_addr *)gid_to_del) &&
- ctx->refcnt == 1 && rdev->qp1_sqp) {
- dev_dbg(rdev_to_dev(rdev),
- "Trying to delete GID0 while QP1 is alive\n");
+ ctx->refcnt == 1 && rdev->gsi_ctx.gsi_sqp) {
+ ibdev_dbg(&rdev->ibdev,
+ "Trying to delete GID0 while QP1 is alive\n");
return -EFAULT;
}
ctx->refcnt--;
if (!ctx->refcnt) {
- rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true);
+ rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del,
+ vlan_id, true);
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to remove GID: %#x", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to remove GID: %#x", rc);
} else {
ctx_tbl = sgid_tbl->ctx;
ctx_tbl[ctx->idx] = NULL;
@@ -366,7 +431,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
rdev->qplib_res.netdev->dev_addr,
- vlan_id, true, &tbl_idx);
+ vlan_id, true, &tbl_idx, false, 0);
if (rc == -EALREADY) {
ctx_tbl = sgid_tbl->ctx;
ctx_tbl[tbl_idx]->refcnt++;
@@ -375,7 +440,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
}
if (rc < 0) {
- dev_err(rdev_to_dev(rdev), "Failed to add GID: %#x", rc);
+ ibdev_err(&rdev->ibdev, "Failed to add GID: %#x", rc);
return rc;
}
@@ -392,7 +457,7 @@ int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
}
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
- u8 port_num)
+ u32 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
@@ -404,6 +469,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
struct bnxt_re_fence_data *fence = &pd->fence;
struct ib_mr *ib_mr = &fence->mr->ib_mr;
struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+ struct bnxt_re_dev *rdev = pd->rdev;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
memset(wqe, 0, sizeof(*wqe));
wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
@@ -438,12 +507,12 @@ static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
wqe.bind.r_key = fence->bind_rkey;
fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
- dev_dbg(rdev_to_dev(qp->rdev),
- "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+ ibdev_dbg(&qp->rdev->ibdev,
+ "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
wqe.bind.r_key, qp->qplib_qp.id, pd);
rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
if (rc) {
- dev_err(rdev_to_dev(qp->rdev), "Failed to bind fence-WQE\n");
+ ibdev_err(&qp->rdev->ibdev, "Failed to bind fence-WQE\n");
return rc;
}
bnxt_qplib_post_send_db(&qp->qplib_qp);
@@ -458,6 +527,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
struct device *dev = &rdev->en_dev->pdev->dev;
struct bnxt_re_mr *mr = fence->mr;
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
if (fence->mw) {
bnxt_re_dealloc_mw(fence->mw);
fence->mw = NULL;
@@ -487,14 +559,16 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
struct bnxt_re_mr *mr = NULL;
dma_addr_t dma_addr = 0;
struct ib_mw *mw;
- u64 pbl_tbl;
int rc;
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
DMA_BIDIRECTIONAL);
rc = dma_mapping_error(dev, dma_addr);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to dma-map fence-MR-mem\n");
+ ibdev_err(&rdev->ibdev, "Failed to dma-map fence-MR-mem\n");
rc = -EIO;
fence->dma_addr = 0;
goto fail;
@@ -511,22 +585,26 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
- mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
- rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to alloc fence-HW-MR\n");
- goto fail;
- }
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+ if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) {
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n");
+ goto fail;
+ }
- /* Register MR */
- mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ /* Register MR */
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ } else {
+ mr->qplib_mr.flags = CMDQ_REGISTER_MR_FLAGS_ALLOC_MR;
+ }
mr->qplib_mr.va = (u64)(unsigned long)fence->va;
mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
- pbl_tbl = dma_addr;
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl,
- BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL,
+ BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE,
+ _is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags));
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to register fence-MR\n");
+ ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n");
goto fail;
}
mr->ib_mr.rkey = mr->qplib_mr.rkey;
@@ -534,8 +612,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
/* Create a fence MW only for kernel consumers */
mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
if (IS_ERR(mw)) {
- dev_err(rdev_to_dev(rdev),
- "Failed to create fence-MW for PD: %p\n", pd);
+ ibdev_err(&rdev->ibdev,
+ "Failed to create fence-MW for PD: %p\n", pd);
rc = PTR_ERR(mw);
goto fail;
}
@@ -549,17 +627,69 @@ fail:
return rc;
}
+static struct bnxt_re_user_mmap_entry*
+bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset,
+ enum bnxt_re_mmap_flag mmap_flag, u64 *offset)
+{
+ struct bnxt_re_user_mmap_entry *entry;
+ int ret;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->mem_offset = mem_offset;
+ entry->mmap_flag = mmap_flag;
+ entry->uctx = uctx;
+
+ switch (mmap_flag) {
+ case BNXT_RE_MMAP_SH_PAGE:
+ ret = rdma_user_mmap_entry_insert_exact(&uctx->ib_uctx,
+ &entry->rdma_entry, PAGE_SIZE, 0);
+ break;
+ case BNXT_RE_MMAP_UC_DB:
+ case BNXT_RE_MMAP_WC_DB:
+ case BNXT_RE_MMAP_DBR_BAR:
+ case BNXT_RE_MMAP_DBR_PAGE:
+ case BNXT_RE_MMAP_TOGGLE_PAGE:
+ ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx,
+ &entry->rdma_entry, PAGE_SIZE);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+ if (offset)
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return entry;
+}
+
/* Protection Domains */
-void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
+ if (udata) {
+ rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+ pd->pd_db_mmap = NULL;
+ }
+
bnxt_re_destroy_fence_mr(pd);
- if (pd->qplib_pd.id)
- bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
- &pd->qplib_pd);
+ if (pd->qplib_pd.id) {
+ if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+ &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd))
+ atomic_dec(&rdev->stats.res.pd_count);
+ }
+ return 0;
}
int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
@@ -569,25 +699,27 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context(
udata, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
- int rc;
+ struct bnxt_re_user_mmap_entry *entry = NULL;
+ u32 active_pds;
+ int rc = 0;
pd->rdev = rdev;
- if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
- dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
+ if (bnxt_qplib_alloc_pd(&rdev->qplib_res, &pd->qplib_pd)) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate HW PD");
rc = -ENOMEM;
goto fail;
}
if (udata) {
- struct bnxt_re_pd_resp resp;
+ struct bnxt_re_pd_resp resp = {};
if (!ucntx->dpi.dbr) {
/* Allocate DPI in alloc_pd to avoid failing of
* ibv_devinfo and family of application when DPIs
* are depleted.
*/
- if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
- &ucntx->dpi, ucntx)) {
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res,
+ &ucntx->dpi, ucntx, BNXT_QPLIB_DPI_TYPE_UC)) {
rc = -ENOMEM;
goto dbfail;
}
@@ -596,20 +728,33 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
resp.pdid = pd->qplib_pd.id;
/* Still allow mapping this DBR to the new user PD. */
resp.dpi = ucntx->dpi.dpi;
- resp.dbr = (u64)ucntx->dpi.umdbr;
- rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ entry = bnxt_re_mmap_entry_insert(ucntx, (u64)ucntx->dpi.umdbr,
+ BNXT_RE_MMAP_UC_DB, &resp.dbr);
+
+ if (!entry) {
+ rc = -ENOMEM;
+ goto dbfail;
+ }
+
+ pd->pd_db_mmap = &entry->rdma_entry;
+
+ rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to copy user response\n");
+ rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+ rc = -EFAULT;
goto dbfail;
}
}
if (!udata)
if (bnxt_re_create_fence_mr(pd))
- dev_warn(rdev_to_dev(rdev),
- "Failed to create Fence-MR\n");
+ ibdev_warn(&rdev->ibdev,
+ "Failed to create Fence-MR\n");
+ active_pds = atomic_inc_return(&rdev->stats.res.pd_count);
+ if (active_pds > rdev->stats.res.pd_watermark)
+ rdev->stats.res.pd_watermark = active_pds;
+
return 0;
dbfail:
bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
@@ -619,13 +764,24 @@ fail:
}
/* Address Handles */
-void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
+ bool block = true;
+ int rc;
- bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
- !(flags & RDMA_DESTROY_AH_SLEEPABLE));
+ block = !(flags & RDMA_DESTROY_AH_SLEEPABLE);
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block);
+ if (BNXT_RE_CHECK_RC(rc)) {
+ if (rc == -ETIMEDOUT)
+ rc = 0;
+ else
+ goto fail;
+ }
+ atomic_dec(&rdev->stats.res.ah_count);
+fail:
+ return rc;
}
static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
@@ -646,20 +802,23 @@ static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
return nw_type;
}
-int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct ib_pd *ib_pd = ib_ah->pd;
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
struct bnxt_re_dev *rdev = pd->rdev;
const struct ib_gid_attr *sgid_attr;
+ struct bnxt_re_gid_ctx *ctx;
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+ u32 active_ahs;
u8 nw_type;
int rc;
if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
- dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
+ ibdev_err(&rdev->ibdev, "Failed to alloc AH: GRH not set");
return -EINVAL;
}
@@ -669,28 +828,28 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
/* Supply the configuration for the HW */
memcpy(ah->qplib_ah.dgid.data, grh->dgid.raw,
sizeof(union ib_gid));
- /*
- * If RoCE V2 is enabled, stack will have two entries for
- * each GID entry. Avoiding this duplicte entry in HW. Dividing
- * the GID index by 2 for RoCE V2
+ sgid_attr = grh->sgid_attr;
+ /* Get the HW context of the GID. The reference
+ * of GID table entry is already taken by the caller.
*/
- ah->qplib_ah.sgid_index = grh->sgid_index / 2;
+ ctx = rdma_read_gid_hw_context(sgid_attr);
+ ah->qplib_ah.sgid_index = ctx->idx;
ah->qplib_ah.host_sgid_index = grh->sgid_index;
ah->qplib_ah.traffic_class = grh->traffic_class;
ah->qplib_ah.flow_label = grh->flow_label;
ah->qplib_ah.hop_limit = grh->hop_limit;
ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
- sgid_attr = grh->sgid_attr;
/* Get network header type for this GID */
nw_type = rdma_gid_attr_network_type(sgid_attr);
ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type);
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
- !(flags & RDMA_CREATE_AH_SLEEPABLE));
+ !(init_attr->flags &
+ RDMA_CREATE_AH_SLEEPABLE));
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
+ ibdev_err(&rdev->ibdev, "Failed to allocate HW AH");
return rc;
}
@@ -707,15 +866,13 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
wmb(); /* make sure cache is updated. */
spin_unlock_irqrestore(&uctx->sh_lock, flag);
}
+ active_ahs = atomic_inc_return(&rdev->stats.res.ah_count);
+ if (active_ahs > rdev->stats.res.ah_watermark)
+ rdev->stats.res.ah_watermark = active_ahs;
return 0;
}
-int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
-{
- return 0;
-}
-
int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
@@ -757,20 +914,77 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
}
+static void bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
+{
+ struct bnxt_re_qp *gsi_sqp;
+ struct bnxt_re_ah *gsi_sah;
+ struct bnxt_re_dev *rdev;
+ int rc;
+
+ rdev = qp->rdev;
+ gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+ gsi_sah = rdev->gsi_ctx.gsi_sah;
+
+ ibdev_dbg(&rdev->ibdev, "Destroy the shadow AH\n");
+ bnxt_qplib_destroy_ah(&rdev->qplib_res,
+ &gsi_sah->qplib_ah,
+ true);
+ atomic_dec(&rdev->stats.res.ah_count);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
+
+ ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n");
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed");
+
+ bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp);
+
+ /* remove from active qp list */
+ mutex_lock(&rdev->qp_lock);
+ list_del(&gsi_sqp->list);
+ mutex_unlock(&rdev->qp_lock);
+ atomic_dec(&rdev->stats.res.qp_count);
+
+ kfree(rdev->gsi_ctx.sqp_tbl);
+ kfree(gsi_sah);
+ kfree(gsi_sqp);
+ rdev->gsi_ctx.gsi_sqp = NULL;
+ rdev->gsi_ctx.gsi_sah = NULL;
+ rdev->gsi_ctx.sqp_tbl = NULL;
+}
+
+static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev)
+{
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return;
+
+ rc = bnxt_qplib_del_sgid(&rdev->qplib_res.sgid_tbl,
+ (struct bnxt_qplib_gid *)&rdev->ugid,
+ 0xFFFF, true);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to delete unique GID, rc: %d\n", rc);
+}
+
/* Queue Pairs */
int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_nq *scq_nq = NULL;
+ struct bnxt_qplib_nq *rcq_nq = NULL;
unsigned int flags;
int rc;
+ bnxt_re_debug_rem_qpinfo(rdev, qp);
+
bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
+
rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
- return rc;
- }
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to destroy HW QP");
if (rdma_is_kernel_res(&qp->ib_qp.res)) {
flags = bnxt_re_lock_cqs(qp);
@@ -780,41 +994,33 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
- if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
- bnxt_qplib_destroy_ah(&rdev->qplib_res, &rdev->sqp_ah->qplib_ah,
- false);
-
- bnxt_qplib_clean_qp(&qp->qplib_qp);
- rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
- &rdev->qp1_sqp->qplib_qp);
- if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to destroy Shadow QP");
- return rc;
- }
- bnxt_qplib_free_qp_res(&rdev->qplib_res,
- &rdev->qp1_sqp->qplib_qp);
- mutex_lock(&rdev->qp_lock);
- list_del(&rdev->qp1_sqp->list);
- atomic_dec(&rdev->qp_count);
- mutex_unlock(&rdev->qp_lock);
-
- kfree(rdev->sqp_ah);
- kfree(rdev->qp1_sqp);
- rdev->qp1_sqp = NULL;
- rdev->sqp_ah = NULL;
- }
-
- if (!IS_ERR_OR_NULL(qp->rumem))
- ib_umem_release(qp->rumem);
- if (!IS_ERR_OR_NULL(qp->sumem))
- ib_umem_release(qp->sumem);
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp)
+ bnxt_re_destroy_gsi_sqp(qp);
mutex_lock(&rdev->qp_lock);
list_del(&qp->list);
- atomic_dec(&rdev->qp_count);
mutex_unlock(&rdev->qp_lock);
- kfree(qp);
+ atomic_dec(&rdev->stats.res.qp_count);
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC)
+ atomic_dec(&rdev->stats.res.rc_qp_count);
+ else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD)
+ atomic_dec(&rdev->stats.res.ud_qp_count);
+
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE)
+ bnxt_re_del_unique_gid(rdev);
+
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
+
+ /* Flush all the entries of notification queue associated with
+ * given qp.
+ */
+ scq_nq = qplib_qp->scq->nq;
+ rcq_nq = qplib_qp->rcq->nq;
+ bnxt_re_synchronize_nq(scq_nq);
+ if (scq_nq != rcq_nq)
+ bnxt_re_synchronize_nq(rcq_nq);
+
return 0;
}
@@ -827,54 +1033,124 @@ static u8 __from_ib_qp_type(enum ib_qp_type type)
return CMDQ_CREATE_QP_TYPE_RC;
case IB_QPT_UD:
return CMDQ_CREATE_QP_TYPE_UD;
+ case IB_QPT_RAW_PACKET:
+ return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE;
default:
return IB_QPT_MAX;
}
}
+static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp,
+ int rsge, int max)
+{
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ rsge = max;
+ return bnxt_re_get_rwqe_size(rsge);
+}
+
+static u16 bnxt_re_get_wqe_size(int ilsize, int nsge)
+{
+ u16 wqe_size, calc_ils;
+
+ wqe_size = bnxt_re_get_swqe_size(nsge);
+ if (ilsize) {
+ calc_ils = sizeof(struct sq_send_hdr) + ilsize;
+ wqe_size = max_t(u16, calc_ils, wqe_size);
+ wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr));
+ }
+ return wqe_size;
+}
+
+static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
+ int align, ilsize;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
+ dev_attr = rdev->dev_attr;
+
+ align = sizeof(struct sq_send_hdr);
+ ilsize = ALIGN(init_attr->cap.max_inline_data, align);
+
+ /* For gen p4 and gen p5 fixed wqe compatibility mode
+ * wqe size is fixed to 128 bytes - ie 6 SGEs
+ */
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) {
+ sq->wqe_size = bnxt_re_get_swqe_size(BNXT_STATIC_MAX_SGE);
+ sq->max_sge = BNXT_STATIC_MAX_SGE;
+ } else {
+ sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge);
+ if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges))
+ return -EINVAL;
+ }
+
+ if (init_attr->cap.max_inline_data) {
+ qplqp->max_inline_data = sq->wqe_size -
+ sizeof(struct sq_send_hdr);
+ init_attr->cap.max_inline_data = qplqp->max_inline_data;
+ }
+
+ return 0;
+}
+
static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
- struct bnxt_re_qp *qp, struct ib_udata *udata)
+ struct bnxt_re_qp *qp, struct bnxt_re_ucontext *cntx,
+ struct bnxt_re_qp_req *ureq)
{
- struct bnxt_re_qp_req ureq;
- struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
- struct ib_umem *umem;
+ struct bnxt_qplib_qp *qplib_qp;
int bytes = 0, psn_sz;
- struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
- udata, struct bnxt_re_ucontext, ib_uctx);
+ struct ib_umem *umem;
+ int psn_nume;
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
- return -EFAULT;
+ qplib_qp = &qp->qplib_qp;
- bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+ bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size);
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
- psn_sz = bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
- sizeof(struct sq_psn_search_ext) :
- sizeof(struct sq_psn_search);
- bytes += (qplib_qp->sq.max_wqe * psn_sz);
+ psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
+ sizeof(struct sq_psn_search_ext) :
+ sizeof(struct sq_psn_search);
+ if (cntx && bnxt_re_is_var_size_supported(rdev, cntx)) {
+ psn_nume = ureq->sq_slots;
+ } else {
+ psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ qplib_qp->sq.max_wqe : ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
+ sizeof(struct bnxt_qplib_sge));
+ }
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ psn_nume = roundup_pow_of_two(psn_nume);
+ bytes += (psn_nume * psn_sz);
}
+
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
+ umem = ib_umem_get(&rdev->ibdev, ureq->qpsva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
return PTR_ERR(umem);
qp->sumem = umem;
- qplib_qp->sq.sg_info.sglist = umem->sg_head.sgl;
- qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem);
- qplib_qp->sq.sg_info.nmap = umem->nmap;
- qplib_qp->qp_handle = ureq.qp_handle;
+ qplib_qp->sq.sg_info.umem = umem;
+ qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
+ qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
+ qplib_qp->qp_handle = ureq->qp_handle;
if (!qp->qplib_qp.srq) {
- bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size);
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(udata, ureq.qprva, bytes,
- IB_ACCESS_LOCAL_WRITE, 1);
+ umem = ib_umem_get(&rdev->ibdev, ureq->qprva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
goto rqfail;
qp->rumem = umem;
- qplib_qp->rq.sg_info.sglist = umem->sg_head.sgl;
- qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem);
- qplib_qp->rq.sg_info.nmap = umem->nmap;
+ qplib_qp->rq.sg_info.umem = umem;
+ qplib_qp->rq.sg_info.pgsize = PAGE_SIZE;
+ qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT;
}
qplib_qp->dpi = &cntx->dpi;
@@ -922,10 +1198,11 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false);
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to allocate HW AH for Shadow QP");
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate HW AH for Shadow QP");
goto fail;
}
+ atomic_inc(&rdev->stats.res.ah_count);
return ah;
@@ -960,18 +1237,26 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.sig_type = true;
/* Shadow QP SQ depth should be same as QP1 RQ depth */
+ qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6);
qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.sq.max_sw_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.sq.max_sge = 2;
/* Q full delta can be 1 since it is internal QP */
qp->qplib_qp.sq.q_full_delta = 1;
+ qp->qplib_qp.sq.sg_info.pgsize = PAGE_SIZE;
+ qp->qplib_qp.sq.sg_info.pgshft = PAGE_SHIFT;
qp->qplib_qp.scq = qp1_qp->scq;
qp->qplib_qp.rcq = qp1_qp->rcq;
+ qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6);
qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.rq.max_sw_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
/* Q full delta can be 1 since it is internal QP */
qp->qplib_qp.rq.q_full_delta = 1;
+ qp->qplib_qp.rq.sg_info.pgsize = PAGE_SIZE;
+ qp->qplib_qp.rq.sg_info.pgshft = PAGE_SHIFT;
qp->qplib_qp.mtu = qp1_qp->mtu;
@@ -983,13 +1268,11 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
if (rc)
goto fail;
- rdev->sqp_id = qp->qplib_qp.id;
-
spin_lock_init(&qp->sq_lock);
INIT_LIST_HEAD(&qp->list);
mutex_lock(&rdev->qp_lock);
list_add_tail(&qp->list, &rdev->qp_list);
- atomic_inc(&rdev->qp_count);
+ atomic_inc(&rdev->stats.res.qp_count);
mutex_unlock(&rdev->qp_lock);
return qp;
fail:
@@ -997,219 +1280,464 @@ fail:
return NULL;
}
-struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata)
+static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx)
{
- struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
- struct bnxt_re_dev *rdev = pd->rdev;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_qp *qp;
- struct bnxt_re_cq *cq;
- struct bnxt_re_srq *srq;
- int rc, entries;
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *rq;
+ int entries;
- if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
- (qp_init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) ||
- (qp_init_attr->cap.max_send_sge > dev_attr->max_qp_sges) ||
- (qp_init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) ||
- (qp_init_attr->cap.max_inline_data > dev_attr->max_inline_data))
- return ERR_PTR(-EINVAL);
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ rq = &qplqp->rq;
+ dev_attr = rdev->dev_attr;
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ if (init_attr->srq) {
+ struct bnxt_re_srq *srq;
- qp->rdev = rdev;
- ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
- qp->qplib_qp.pd = &pd->qplib_pd;
- qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
- qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
-
- if (qp_init_attr->qp_type == IB_QPT_GSI &&
- bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))
- qp->qplib_qp.type = CMDQ_CREATE_QP_TYPE_GSI;
- if (qp->qplib_qp.type == IB_QPT_MAX) {
- dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
- qp->qplib_qp.type);
- rc = -EINVAL;
- goto fail;
+ srq = container_of(init_attr->srq, struct bnxt_re_srq, ib_srq);
+ qplqp->srq = &srq->qplib_srq;
+ rq->max_wqe = 0;
+ } else {
+ rq->max_sge = init_attr->cap.max_recv_sge;
+ if (rq->max_sge > dev_attr->max_qp_sges)
+ rq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_recv_sge = rq->max_sge;
+ rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge,
+ dev_attr->max_qp_sges);
+ /* Allocate 1 more than what's provided so posting max doesn't
+ * mean empty.
+ */
+ entries = bnxt_re_init_depth(init_attr->cap.max_recv_wr + 1, uctx);
+ rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ rq->max_sw_wqe = rq->max_wqe;
+ rq->q_full_delta = 0;
+ rq->sg_info.pgsize = PAGE_SIZE;
+ rq->sg_info.pgshft = PAGE_SHIFT;
}
- qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
- qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
- IB_SIGNAL_ALL_WR) ? true : false);
+ return 0;
+}
+
+static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
- qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
- if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
- qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ dev_attr = rdev->dev_attr;
- if (qp_init_attr->send_cq) {
- cq = container_of(qp_init_attr->send_cq, struct bnxt_re_cq,
- ib_cq);
- if (!cq) {
- dev_err(rdev_to_dev(rdev), "Send CQ not found");
- rc = -EINVAL;
- goto fail;
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+ qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ qplqp->rq.max_sge = 6;
+ }
+}
+
+static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx,
+ struct bnxt_re_qp_req *ureq)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
+ int diff = 0;
+ int entries;
+ int rc;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
+ dev_attr = rdev->dev_attr;
+
+ sq->max_sge = init_attr->cap.max_send_sge;
+ entries = init_attr->cap.max_send_wr;
+ if (uctx && qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) {
+ sq->max_wqe = ureq->sq_slots;
+ sq->max_sw_wqe = ureq->sq_slots;
+ sq->wqe_size = sizeof(struct sq_sge);
+ } else {
+ if (sq->max_sge > dev_attr->max_qp_sges) {
+ sq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_send_sge = sq->max_sge;
}
- qp->qplib_qp.scq = &cq->qplib_cq;
+
+ rc = bnxt_re_setup_swqe_size(qp, init_attr);
+ if (rc)
+ return rc;
+
+ /* Allocate 128 + 1 more than what's provided */
+ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
+ 0 : BNXT_QPLIB_RESERVED_QP_WRS;
+ entries = bnxt_re_init_depth(entries + diff + 1, uctx);
+ sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true);
+ else
+ sq->max_sw_wqe = sq->max_wqe;
+
+ }
+ sq->q_full_delta = diff + 1;
+ /*
+ * Reserving one slot for Phantom WQE. Application can
+ * post one extra entry in this case. But allowing this to avoid
+ * unexpected Queue full condition
+ */
+ qplqp->sq.q_full_delta -= 1;
+ qplqp->sq.sg_info.pgsize = PAGE_SIZE;
+ qplqp->sq.sg_info.pgshft = PAGE_SHIFT;
+
+ return 0;
+}
+
+static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ int entries;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ dev_attr = rdev->dev_attr;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx);
+ qplqp->sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
+ init_attr->cap.max_send_wr;
+ qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
+ if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
+ qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ }
+}
+
+static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_qplib_chip_ctx *chip_ctx;
+ int qptype;
+
+ chip_ctx = rdev->chip_ctx;
+
+ qptype = __from_ib_qp_type(init_attr->qp_type);
+ if (qptype == IB_QPT_MAX) {
+ ibdev_err(&rdev->ibdev, "QP type 0x%x not supported", qptype);
+ qptype = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(chip_ctx) &&
+ init_attr->qp_type == IB_QPT_GSI)
+ qptype = CMDQ_CREATE_QP_TYPE_GSI;
+out:
+ return qptype;
+}
+
+static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx,
+ struct bnxt_re_qp_req *ureq)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int rc = 0, qptype;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ dev_attr = rdev->dev_attr;
+
+ /* Setup misc params */
+ ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr);
+ qplqp->pd = &pd->qplib_pd;
+ qplqp->qp_handle = (u64)qplqp;
+ qplqp->max_inline_data = init_attr->cap.max_inline_data;
+ qplqp->sig_type = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ qptype = bnxt_re_init_qp_type(rdev, init_attr);
+ if (qptype < 0) {
+ rc = qptype;
+ goto out;
+ }
+ qplqp->type = (u8)qptype;
+ qplqp->wqe_mode = bnxt_re_is_var_size_supported(rdev, uctx);
+ if (init_attr->qp_type == IB_QPT_RC) {
+ qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
+ qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
+ }
+ qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+ qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */
+ if (init_attr->create_flags) {
+ ibdev_dbg(&rdev->ibdev,
+ "QP create flags 0x%x not supported",
+ init_attr->create_flags);
+ return -EOPNOTSUPP;
+ }
+
+ /* Setup CQs */
+ if (init_attr->send_cq) {
+ cq = container_of(init_attr->send_cq, struct bnxt_re_cq, ib_cq);
+ qplqp->scq = &cq->qplib_cq;
qp->scq = cq;
}
- if (qp_init_attr->recv_cq) {
- cq = container_of(qp_init_attr->recv_cq, struct bnxt_re_cq,
- ib_cq);
- if (!cq) {
- dev_err(rdev_to_dev(rdev), "Receive CQ not found");
- rc = -EINVAL;
- goto fail;
- }
- qp->qplib_qp.rcq = &cq->qplib_cq;
+ if (init_attr->recv_cq) {
+ cq = container_of(init_attr->recv_cq, struct bnxt_re_cq, ib_cq);
+ qplqp->rcq = &cq->qplib_cq;
qp->rcq = cq;
}
- if (qp_init_attr->srq) {
- srq = container_of(qp_init_attr->srq, struct bnxt_re_srq,
- ib_srq);
- if (!srq) {
- dev_err(rdev_to_dev(rdev), "SRQ not found");
- rc = -EINVAL;
- goto fail;
- }
- qp->qplib_qp.srq = &srq->qplib_srq;
- qp->qplib_qp.rq.max_wqe = 0;
- } else {
- /* Allocate 1 more than what's provided so posting max doesn't
- * mean empty
- */
- entries = roundup_pow_of_two(qp_init_attr->cap.max_recv_wr + 1);
- qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
- dev_attr->max_qp_wqes + 1);
+ /* Setup RQ/SRQ */
+ rc = bnxt_re_init_rq_attr(qp, init_attr, uctx);
+ if (rc)
+ goto out;
+ if (init_attr->qp_type == IB_QPT_GSI)
+ bnxt_re_adjust_gsi_rq_attr(qp);
+
+ /* Setup SQ */
+ rc = bnxt_re_init_sq_attr(qp, init_attr, uctx, ureq);
+ if (rc)
+ goto out;
+ if (init_attr->qp_type == IB_QPT_GSI)
+ bnxt_re_adjust_gsi_sq_attr(qp, init_attr, uctx);
+
+ if (uctx) /* This will update DPI and qp_handle */
+ rc = bnxt_re_init_user_qp(rdev, pd, qp, uctx, ureq);
+out:
+ return rc;
+}
- qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
- qp_init_attr->cap.max_recv_wr;
+static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
+ struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_sqp_entries *sqp_tbl;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_qp *sqp;
+ struct bnxt_re_ah *sah;
+ int rc = 0;
+
+ rdev = qp->rdev;
+ /* Create a shadow QP to handle the QP1 traffic */
+ sqp_tbl = kcalloc(BNXT_RE_MAX_GSI_SQP_ENTRIES, sizeof(*sqp_tbl),
+ GFP_KERNEL);
+ if (!sqp_tbl)
+ return -ENOMEM;
+ rdev->gsi_ctx.sqp_tbl = sqp_tbl;
- qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
- if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
- qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res, &qp->qplib_qp);
+ if (!sqp) {
+ rc = -ENODEV;
+ ibdev_err(&rdev->ibdev, "Failed to create Shadow QP for QP1");
+ goto out;
+ }
+ rdev->gsi_ctx.gsi_sqp = sqp;
+
+ sqp->rcq = qp->rcq;
+ sqp->scq = qp->scq;
+ sah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
+ &qp->qplib_qp);
+ if (!sah) {
+ bnxt_qplib_destroy_qp(&rdev->qplib_res,
+ &sqp->qplib_qp);
+ rc = -ENODEV;
+ ibdev_err(&rdev->ibdev,
+ "Failed to create AH entry for ShadowQP");
+ goto out;
}
+ rdev->gsi_ctx.gsi_sah = sah;
- qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+ return 0;
+out:
+ kfree(sqp_tbl);
+ return rc;
+}
- if (qp_init_attr->qp_type == IB_QPT_GSI &&
- !(bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx))) {
- /* Allocate 1 more than what's provided */
- entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
- qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
- dev_attr->max_qp_wqes + 1);
- qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
- qp_init_attr->cap.max_send_wr;
- qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
- if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
- qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
- qp->qplib_qp.sq.max_sge++;
- if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
- qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
-
- qp->qplib_qp.rq_hdr_buf_size =
- BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
-
- qp->qplib_qp.sq_hdr_buf_size =
- BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
- qp->qplib_qp.dpi = &rdev->dpi_privileged;
- rc = bnxt_qplib_create_qp1(&rdev->qplib_res, &qp->qplib_qp);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to create HW QP1");
- goto fail;
- }
- /* Create a shadow QP to handle the QP1 traffic */
- rdev->qp1_sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res,
- &qp->qplib_qp);
- if (!rdev->qp1_sqp) {
- rc = -EINVAL;
- dev_err(rdev_to_dev(rdev),
- "Failed to create Shadow QP for QP1");
- goto qp_destroy;
- }
- rdev->sqp_ah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
- &qp->qplib_qp);
- if (!rdev->sqp_ah) {
- bnxt_qplib_destroy_qp(&rdev->qplib_res,
- &rdev->qp1_sqp->qplib_qp);
- rc = -EINVAL;
- dev_err(rdev_to_dev(rdev),
- "Failed to create AH entry for ShadowQP");
- goto qp_destroy;
- }
+static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_qp *qplqp;
+ int rc;
- } else {
- /* Allocate 128 + 1 more than what's provided */
- entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr +
- BNXT_QPLIB_RESERVED_QP_WRS + 1);
- qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
- dev_attr->max_qp_wqes +
- BNXT_QPLIB_RESERVED_QP_WRS + 1);
- qp->qplib_qp.sq.q_full_delta = BNXT_QPLIB_RESERVED_QP_WRS + 1;
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
- /*
- * Reserving one slot for Phantom WQE. Application can
- * post one extra entry in this case. But allowing this to avoid
- * unexpected Queue full condition
- */
+ qplqp->rq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+ qplqp->sq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
- qp->qplib_qp.sq.q_full_delta -= 1;
+ rc = bnxt_qplib_create_qp1(&rdev->qplib_res, qplqp);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "create HW QP1 failed!");
+ goto out;
+ }
- qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
- qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
- if (udata) {
- rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
- if (rc)
- goto fail;
- } else {
- qp->qplib_qp.dpi = &rdev->dpi_privileged;
- }
+ rc = bnxt_re_create_shadow_gsi(qp, pd);
+out:
+ return rc;
+}
+static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_qplib_dev_attr *dev_attr)
+{
+ bool rc = true;
+
+ if (init_attr->cap.max_send_wr > dev_attr->max_qp_wqes ||
+ init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes ||
+ init_attr->cap.max_send_sge > dev_attr->max_qp_sges ||
+ init_attr->cap.max_recv_sge > dev_attr->max_qp_sges ||
+ init_attr->cap.max_inline_data > dev_attr->max_inline_data) {
+ ibdev_err(&rdev->ibdev,
+ "Create QP failed - max exceeded! 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x",
+ init_attr->cap.max_send_wr, dev_attr->max_qp_wqes,
+ init_attr->cap.max_recv_wr, dev_attr->max_qp_wqes,
+ init_attr->cap.max_send_sge, dev_attr->max_qp_sges,
+ init_attr->cap.max_recv_sge, dev_attr->max_qp_sges,
+ init_attr->cap.max_inline_data,
+ dev_attr->max_inline_data);
+ rc = false;
+ }
+ return rc;
+}
+
+static int bnxt_re_add_unique_gid(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return 0;
+
+ rdev->ugid.global.subnet_prefix = cpu_to_be64(0xfe8000000000abcdLL);
+ addrconf_ifid_eui48(&rdev->ugid.raw[8], rdev->netdev);
+
+ rc = bnxt_qplib_add_sgid(&res->sgid_tbl,
+ (struct bnxt_qplib_gid *)&rdev->ugid,
+ rdev->qplib_res.netdev->dev_addr,
+ 0xFFFF, true, &rdev->ugid_index, true,
+ hctx->stats3.fw_id);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to add unique GID. rc = %d\n", rc);
+
+ return rc;
+}
+
+int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_qp_req ureq;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_pd *pd;
+ struct bnxt_re_qp *qp;
+ struct ib_pd *ib_pd;
+ u32 active_qps;
+ int rc;
+
+ ib_pd = ib_qp->pd;
+ pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ rdev = pd->rdev;
+ dev_attr = rdev->dev_attr;
+ qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ if (udata)
+ if (ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))))
+ return -EFAULT;
+
+ rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr);
+ if (!rc) {
+ rc = -EINVAL;
+ goto fail;
+ }
+
+ qp->rdev = rdev;
+ rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, uctx, &ureq);
+ if (rc)
+ goto fail;
+
+ if (qp_init_attr->qp_type == IB_QPT_GSI &&
+ !(bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))) {
+ rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr);
+ if (rc == -ENODEV)
+ goto qp_destroy;
+ if (rc)
+ goto fail;
+ } else {
rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
+ ibdev_err(&rdev->ibdev, "Failed to create HW QP");
goto free_umem;
}
+ if (udata) {
+ struct bnxt_re_qp_resp resp;
+
+ resp.qpid = qp->qplib_qp.id;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to copy QP udata");
+ goto qp_destroy;
+ }
+ }
+ }
+
+ /* Support for RawEth QP is added to capture TCP pkt dump.
+ * So unique SGID is used to avoid incorrect statistics on per
+ * function stats_ctx
+ */
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) {
+ rc = bnxt_re_add_unique_gid(rdev);
+ if (rc)
+ goto qp_destroy;
+ qp->qplib_qp.ugid_index = rdev->ugid_index;
}
qp->ib_qp.qp_num = qp->qplib_qp.id;
+ if (qp_init_attr->qp_type == IB_QPT_GSI)
+ rdev->gsi_ctx.gsi_qp = qp;
spin_lock_init(&qp->sq_lock);
spin_lock_init(&qp->rq_lock);
-
- if (udata) {
- struct bnxt_re_qp_resp resp;
-
- resp.qpid = qp->ib_qp.qp_num;
- resp.rsvd = 0;
- rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to copy QP udata");
- goto qp_destroy;
- }
- }
INIT_LIST_HEAD(&qp->list);
mutex_lock(&rdev->qp_lock);
list_add_tail(&qp->list, &rdev->qp_list);
- atomic_inc(&rdev->qp_count);
mutex_unlock(&rdev->qp_lock);
+ active_qps = atomic_inc_return(&rdev->stats.res.qp_count);
+ if (active_qps > rdev->stats.res.qp_watermark)
+ rdev->stats.res.qp_watermark = active_qps;
+ if (qp_init_attr->qp_type == IB_QPT_RC) {
+ active_qps = atomic_inc_return(&rdev->stats.res.rc_qp_count);
+ if (active_qps > rdev->stats.res.rc_qp_watermark)
+ rdev->stats.res.rc_qp_watermark = active_qps;
+ } else if (qp_init_attr->qp_type == IB_QPT_UD) {
+ active_qps = atomic_inc_return(&rdev->stats.res.ud_qp_count);
+ if (active_qps > rdev->stats.res.ud_qp_watermark)
+ rdev->stats.res.ud_qp_watermark = active_qps;
+ }
+ bnxt_re_debug_add_qpinfo(rdev, qp);
- return &qp->ib_qp;
+ return 0;
qp_destroy:
bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
free_umem:
- if (udata) {
- if (qp->rumem)
- ib_umem_release(qp->rumem);
- if (qp->sumem)
- ib_umem_release(qp->sumem);
- }
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
fail:
- kfree(qp);
- return ERR_PTR(rc);
+ return rc;
}
static u8 __from_ib_qp_state(enum ib_qp_state state)
@@ -1291,22 +1819,21 @@ static enum ib_mtu __to_ib_mtu(u32 mtu)
}
/* Shared Receive Queues */
-void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
+int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
{
struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
ib_srq);
struct bnxt_re_dev *rdev = srq->rdev;
struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
- struct bnxt_qplib_nq *nq = NULL;
- if (qplib_srq->cq)
- nq = qplib_srq->cq->nq;
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
+ free_page((unsigned long)srq->uctx_srq_page);
+ hash_del(&srq->hash_entry);
+ }
bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
- if (srq->umem)
- ib_umem_release(srq->umem);
- atomic_dec(&rdev->srq_count);
- if (nq)
- nq->budget--;
+ ib_umem_release(srq->umem);
+ atomic_dec(&rdev->stats.res.srq_count);
+ return 0;
}
static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
@@ -1324,16 +1851,17 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
return -EFAULT;
- bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = (qplib_srq->max_wqe * qplib_srq->wqe_size);
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
+ umem = ib_umem_get(&rdev->ibdev, ureq.srqva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
return PTR_ERR(umem);
srq->umem = umem;
- qplib_srq->sg_info.sglist = umem->sg_head.sgl;
- qplib_srq->sg_info.npages = ib_umem_num_pages(umem);
- qplib_srq->sg_info.nmap = umem->nmap;
+ qplib_srq->sg_info.umem = umem;
+ qplib_srq->sg_info.pgsize = PAGE_SIZE;
+ qplib_srq->sg_info.pgshft = PAGE_SHIFT;
qplib_srq->srq_handle = ureq.srq_handle;
qplib_srq->dpi = &cntx->dpi;
@@ -1344,17 +1872,23 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata)
{
- struct ib_pd *ib_pd = ib_srq->pd;
- struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
- struct bnxt_re_dev *rdev = pd->rdev;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_srq *srq =
- container_of(ib_srq, struct bnxt_re_srq, ib_srq);
- struct bnxt_qplib_nq *nq = NULL;
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ struct bnxt_re_pd *pd;
+ struct ib_pd *ib_pd;
+ u32 active_srqs;
int rc, entries;
+ ib_pd = ib_srq->pd;
+ pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ rdev = pd->rdev;
+ dev_attr = rdev->dev_attr;
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+
if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
- dev_err(rdev_to_dev(rdev), "Create CQ failed - max exceeded");
+ ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded");
rc = -EINVAL;
goto exit;
}
@@ -1364,22 +1898,26 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
goto exit;
}
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
srq->rdev = rdev;
srq->qplib_srq.pd = &pd->qplib_pd;
srq->qplib_srq.dpi = &rdev->dpi_privileged;
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty
*/
- entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+ entries = bnxt_re_init_depth(srq_init_attr->attr.max_wr + 1, uctx);
if (entries > dev_attr->max_srq_wqes + 1)
entries = dev_attr->max_srq_wqes + 1;
-
srq->qplib_srq.max_wqe = entries;
+
srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
+ /* 128 byte wqe size for SRQ . So use max sges */
+ srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges);
srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
srq->srq_limit = srq_init_attr->attr.srq_limit;
- srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id;
- nq = &rdev->nq[0];
+ srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
+ srq->qplib_srq.sg_info.pgsize = PAGE_SIZE;
+ srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT;
if (udata) {
rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
@@ -1389,31 +1927,40 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Create HW SRQ failed!");
+ ibdev_err(&rdev->ibdev, "Create HW SRQ failed!");
goto fail;
}
if (udata) {
- struct bnxt_re_srq_resp resp;
+ struct bnxt_re_srq_resp resp = {};
resp.srqid = srq->qplib_srq.id;
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
+ hash_add(rdev->srq_hash, &srq->hash_entry, srq->qplib_srq.id);
+ srq->uctx_srq_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!srq->uctx_srq_page) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ resp.comp_mask |= BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT;
+ }
rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (rc) {
- dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!");
+ ibdev_err(&rdev->ibdev, "SRQ copy to udata failed!");
bnxt_qplib_destroy_srq(&rdev->qplib_res,
&srq->qplib_srq);
- goto exit;
+ goto fail;
}
}
- if (nq)
- nq->budget++;
- atomic_inc(&rdev->srq_count);
+ active_srqs = atomic_inc_return(&rdev->stats.res.srq_count);
+ if (active_srqs > rdev->stats.res.srq_watermark)
+ rdev->stats.res.srq_watermark = active_srqs;
+ spin_lock_init(&srq->lock);
return 0;
fail:
- if (srq->umem)
- ib_umem_release(srq->umem);
+ ib_umem_release(srq->umem);
exit:
return rc;
}
@@ -1425,33 +1972,28 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
ib_srq);
struct bnxt_re_dev *rdev = srq->rdev;
- int rc;
switch (srq_attr_mask) {
case IB_SRQ_MAX_WR:
/* SRQ resize is not supported */
- break;
+ return -EINVAL;
case IB_SRQ_LIMIT:
/* Change the SRQ threshold */
if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
return -EINVAL;
srq->qplib_srq.threshold = srq_attr->srq_limit;
- rc = bnxt_qplib_modify_srq(&rdev->qplib_res, &srq->qplib_srq);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Modify HW SRQ failed!");
- return rc;
- }
+ bnxt_qplib_srq_arm_db(&srq->qplib_srq.dbinfo, srq->qplib_srq.threshold);
+
/* On success, update the shadow */
srq->srq_limit = srq_attr->srq_limit;
/* No need to Build and send response back to udata */
- break;
+ return 0;
default:
- dev_err(rdev_to_dev(rdev),
- "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
+ ibdev_err(&rdev->ibdev,
+ "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
return -EINVAL;
}
- return 0;
}
int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
@@ -1466,7 +2008,7 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
tsrq.qplib_srq.id = srq->qplib_srq.id;
rc = bnxt_qplib_query_srq(&rdev->qplib_res, &tsrq.qplib_srq);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Query HW SRQ failed!");
+ ibdev_err(&rdev->ibdev, "Query HW SRQ failed!");
return rc;
}
srq_attr->max_wr = srq->qplib_srq.max_wqe;
@@ -1508,8 +2050,8 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_re_qp *qp1_qp,
int qp_attr_mask)
{
- struct bnxt_re_qp *qp = rdev->qp1_sqp;
- int rc = 0;
+ struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp;
+ int rc;
if (qp_attr_mask & IB_QP_STATE) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
@@ -1532,8 +2074,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
if (rc)
- dev_err(rdev_to_dev(rdev),
- "Failed to modify Shadow QP for QP1");
+ ibdev_err(&rdev->ibdev, "Failed to modify Shadow QP for QP1");
return rc;
}
@@ -1542,27 +2083,30 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
{
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
struct bnxt_re_dev *rdev = qp->rdev;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
enum ib_qp_state curr_qp_state, new_qp_state;
int rc, entries;
unsigned int flags;
u8 nw_type;
+ if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
qp->qplib_qp.modify_flags = 0;
if (qp_attr_mask & IB_QP_STATE) {
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
new_qp_state = qp_attr->qp_state;
if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
ib_qp->qp_type, qp_attr_mask)) {
- dev_err(rdev_to_dev(rdev),
- "Invalid attribute mask: %#x specified ",
- qp_attr_mask);
- dev_err(rdev_to_dev(rdev),
- "for qpn: %#x type: %#x",
- ib_qp->qp_num, ib_qp->qp_type);
- dev_err(rdev_to_dev(rdev),
- "curr_qp_state=0x%x, new_qp_state=0x%x\n",
- curr_qp_state, new_qp_state);
+ ibdev_err(&rdev->ibdev,
+ "Invalid attribute mask: %#x specified ",
+ qp_attr_mask);
+ ibdev_err(&rdev->ibdev,
+ "for qpn: %#x type: %#x",
+ ib_qp->qp_num, ib_qp->qp_type);
+ ibdev_err(&rdev->ibdev,
+ "curr_qp_state=0x%x, new_qp_state=0x%x\n",
+ curr_qp_state, new_qp_state);
return -EINVAL;
}
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
@@ -1570,18 +2114,16 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (!qp->sumem &&
qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- dev_dbg(rdev_to_dev(rdev),
- "Move QP = %p to flush list\n",
- qp);
+ ibdev_dbg(&rdev->ibdev,
+ "Move QP = %p to flush list\n", qp);
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_add_flush_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
}
if (!qp->sumem &&
qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
- dev_dbg(rdev_to_dev(rdev),
- "Move QP = %p out of flush list\n",
- qp);
+ ibdev_dbg(&rdev->ibdev,
+ "Move QP = %p out of flush list\n", qp);
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_clean_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
@@ -1595,12 +2137,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
qp->qplib_qp.access =
- __from_ib_access_flags(qp_attr->qp_access_flags);
+ __qp_access_flags_from_ib(qp->qplib_qp.cctx,
+ qp_attr->qp_access_flags);
/* LOCAL_WRITE access must be set to allow RC receive */
- qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
- /* Temp: Set all params on QP as of now */
- qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
- qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
}
if (qp_attr_mask & IB_QP_PKEY_INDEX) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
@@ -1614,6 +2154,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
const struct ib_global_route *grh =
rdma_ah_read_grh(&qp_attr->ah_attr);
const struct ib_gid_attr *sgid_attr;
+ struct bnxt_re_gid_ctx *ctx;
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
@@ -1625,19 +2166,19 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
memcpy(qp->qplib_qp.ah.dgid.data, grh->dgid.raw,
sizeof(qp->qplib_qp.ah.dgid.data));
qp->qplib_qp.ah.flow_label = grh->flow_label;
- /* If RoCE V2 is enabled, stack will have two entries for
- * each GID entry. Avoiding this duplicte entry in HW. Dividing
- * the GID index by 2 for RoCE V2
+ sgid_attr = grh->sgid_attr;
+ /* Get the HW context of the GID. The reference
+ * of GID table entry is already taken by the caller.
*/
- qp->qplib_qp.ah.sgid_index = grh->sgid_index / 2;
+ ctx = rdma_read_gid_hw_context(sgid_attr);
+ qp->qplib_qp.ah.sgid_index = ctx->idx;
qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
qp->qplib_qp.ah.hop_limit = grh->hop_limit;
- qp->qplib_qp.ah.traffic_class = grh->traffic_class;
+ qp->qplib_qp.ah.traffic_class = grh->traffic_class >> 2;
qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr);
ether_addr_copy(qp->qplib_qp.ah.dmac,
qp_attr->ah_attr.roce.dmac);
- sgid_attr = qp_attr->ah_attr.grh.sgid_attr;
rc = rdma_read_gid_l2_fields(sgid_attr, NULL,
&qp->qplib_qp.smac[0]);
if (rc)
@@ -1660,18 +2201,20 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
}
}
- if (qp_attr_mask & IB_QP_PATH_MTU) {
- qp->qplib_qp.modify_flags |=
- CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
- qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
- qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu);
- } else if (qp_attr->qp_state == IB_QPS_RTR) {
- qp->qplib_qp.modify_flags |=
- CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
- qp->qplib_qp.path_mtu =
- __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
- qp->qplib_qp.mtu =
- ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+ if (qp_attr->qp_state == IB_QPS_RTR) {
+ enum ib_mtu qpmtu;
+
+ qpmtu = iboe_get_mtu(rdev->netdev->mtu);
+ if (qp_attr_mask & IB_QP_PATH_MTU) {
+ if (ib_mtu_enum_to_int(qp_attr->path_mtu) >
+ ib_mtu_enum_to_int(qpmtu))
+ return -EINVAL;
+ qpmtu = qp_attr->path_mtu;
+ }
+
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->qplib_qp.path_mtu = __from_ib_mtu(qpmtu);
+ qp->qplib_qp.mtu = ib_mtu_enum_to_int(qpmtu);
}
if (qp_attr_mask & IB_QP_TIMEOUT) {
@@ -1711,10 +2254,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
if (qp_attr->max_dest_rd_atomic >
dev_attr->max_qp_init_rd_atom) {
- dev_err(rdev_to_dev(rdev),
- "max_dest_rd_atomic requested%d is > dev_max%d",
- qp_attr->max_dest_rd_atomic,
- dev_attr->max_qp_init_rd_atom);
+ ibdev_err(&rdev->ibdev,
+ "max_dest_rd_atomic requested%d is > dev_max%d",
+ qp_attr->max_dest_rd_atomic,
+ dev_attr->max_qp_init_rd_atom);
return -EINVAL;
}
@@ -1723,6 +2266,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
}
if (qp_attr_mask & IB_QP_CAP) {
+ struct bnxt_re_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+
qp->qplib_qp.modify_flags |=
CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE |
CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE |
@@ -1735,11 +2281,11 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
(qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) ||
(qp_attr->cap.max_inline_data >=
dev_attr->max_inline_data)) {
- dev_err(rdev_to_dev(rdev),
- "Create QP failed - max exceeded");
+ ibdev_err(&rdev->ibdev,
+ "Create QP failed - max exceeded");
return -EINVAL;
}
- entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
+ entries = bnxt_re_init_depth(qp_attr->cap.max_send_wr, uctx);
qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
dev_attr->max_qp_wqes + 1);
qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
@@ -1752,9 +2298,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.sq.q_full_delta -= 1;
qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
if (qp->qplib_qp.rq.max_wqe) {
- entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
+ entries = bnxt_re_init_depth(qp_attr->cap.max_recv_wr, uctx);
qp->qplib_qp.rq.max_wqe =
min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.max_sw_wqe = qp->qplib_qp.rq.max_wqe;
qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
qp_attr->cap.max_recv_wr;
qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
@@ -1769,10 +2316,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
}
rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to modify HW QP");
+ ibdev_err(&rdev->ibdev, "Failed to modify HW QP");
return rc;
}
- if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp)
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp)
rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask);
return rc;
}
@@ -1794,16 +2341,18 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
+ ibdev_err(&rdev->ibdev, "Failed to query HW QP");
goto out;
}
qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
+ qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state);
qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
- qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access);
+ qp_attr->qp_access_flags = __qp_access_flags_to_ib(qp->qplib_qp.cctx,
+ qplib_qp->access);
qp_attr->pkey_index = qplib_qp->pkey_index;
qp_attr->qkey = qplib_qp->qkey;
qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label,
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->udp_sport,
qplib_qp->ah.host_sgid_index,
qplib_qp->ah.hop_limit,
qplib_qp->ah.traffic_class);
@@ -1815,6 +2364,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp_attr->retry_cnt = qplib_qp->retry_cnt;
qp_attr->rnr_retry = qplib_qp->rnr_retry;
qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer;
+ qp_attr->port_num = __to_ib_port_num(qplib_qp->port_id);
qp_attr->rq_psn = qplib_qp->rq.psn;
qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic;
qp_attr->sq_psn = qplib_qp->sq.psn;
@@ -1857,7 +2407,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
u8 ip_version = 0;
u16 vlan_id = 0xFFFF;
void *buf;
- int i, rc = 0;
+ int i, rc;
memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
@@ -1895,7 +2445,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
}
is_eth = true;
- is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false;
+ is_vlan = vlan_id && (vlan_id < 0x1000);
ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh,
ip_version, is_udp, 0, &qp->qp1_hdr);
@@ -1999,7 +2549,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
wqe->num_sge++;
} else {
- dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!");
+ ibdev_err(&qp->rdev->ibdev, "QP1 buffer is empty!");
rc = -ENOMEM;
}
return rc;
@@ -2016,9 +2566,12 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
struct bnxt_qplib_swqe *wqe,
int payload_size)
{
+ struct bnxt_re_sqp_entries *sqp_entry;
struct bnxt_qplib_sge ref, sge;
+ struct bnxt_re_dev *rdev;
u32 rq_prod_index;
- struct bnxt_re_sqp_entries *sqp_entry;
+
+ rdev = qp->rdev;
rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp);
@@ -2033,7 +2586,7 @@ static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
ref.lkey = wqe->sg_list[0].lkey;
ref.size = wqe->sg_list[0].size;
- sqp_entry = &qp->rdev->sqp_tbl[rq_prod_index];
+ sqp_entry = &rdev->gsi_ctx.sqp_tbl[rq_prod_index];
/* SGE 1 */
wqe->sg_list[0].addr = sge.addr;
@@ -2075,7 +2628,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
break;
case IB_WR_SEND_WITH_IMM:
wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM;
- wqe->send.imm_data = wr->ex.imm_data;
+ wqe->send.imm_data = be32_to_cpu(wr->ex.imm_data);
break;
case IB_WR_SEND_WITH_INV:
wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV;
@@ -2105,7 +2658,7 @@ static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr,
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM;
- wqe->rdma.imm_data = wr->ex.imm_data;
+ wqe->rdma.imm_data = be32_to_cpu(wr->ex.imm_data);
break;
case IB_WR_RDMA_READ:
wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ;
@@ -2161,11 +2714,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
- /* Need unconditional fence for local invalidate
- * opcode to work as expected.
- */
- wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
if (wr->send_flags & IB_SEND_SIGNALED)
wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
if (wr->send_flags & IB_SEND_SOLICITED)
@@ -2185,15 +2733,9 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0];
wqe->frmr.page_list = mr->pages;
wqe->frmr.page_list_len = mr->npages;
- wqe->frmr.levels = qplib_frpl->hwq.level + 1;
+ wqe->frmr.levels = qplib_frpl->hwq.level;
wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
- /* Need unconditional fence for reg_mr
- * opcode to function as expected.
- */
-
- wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
if (wr->wr.send_flags & IB_SEND_SIGNALED)
wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
@@ -2210,7 +2752,8 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
wqe->frmr.l_key = wr->key;
wqe->frmr.length = wr->mr->length;
- wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1;
+ wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K);
+ wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K);
wqe->frmr.va = wr->mr->iova;
return 0;
}
@@ -2232,8 +2775,8 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
if ((sge_len + wqe->inline_len) >
BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
- dev_err(rdev_to_dev(rdev),
- "Inline data size requested > supported value");
+ ibdev_err(&rdev->ibdev,
+ "Inline data size requested > supported value");
return -EINVAL;
}
sge_len = wr->sg_list[i].length;
@@ -2280,21 +2823,18 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_re_qp *qp,
const struct ib_send_wr *wr)
{
- struct bnxt_qplib_swqe wqe;
int rc = 0, payload_sz = 0;
unsigned long flags;
spin_lock_irqsave(&qp->sq_lock, flags);
- memset(&wqe, 0, sizeof(wqe));
while (wr) {
- /* House keeping */
- memset(&wqe, 0, sizeof(wqe));
+ struct bnxt_qplib_swqe wqe = {};
/* Common */
wqe.num_sge = wr->num_sge;
if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
- dev_err(rdev_to_dev(rdev),
- "Limit exceeded for Send SGEs");
+ ibdev_err(&rdev->ibdev,
+ "Limit exceeded for Send SGEs");
rc = -EINVAL;
goto bad;
}
@@ -2313,19 +2853,32 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
bad:
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Post send failed opcode = %#x rc = %d",
- wr->opcode, rc);
+ ibdev_err(&rdev->ibdev,
+ "Post send failed opcode = %#x rc = %d",
+ wr->opcode, rc);
break;
}
wr = wr->next;
}
bnxt_qplib_post_send_db(&qp->qplib_qp);
- bnxt_ud_qp_hw_stall_workaround(qp);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_ud_qp_hw_stall_workaround(qp);
spin_unlock_irqrestore(&qp->sq_lock, flags);
return rc;
}
+static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe)
+{
+ /* Need unconditional fence for non-wire memory opcode
+ * to work as expected.
+ */
+ if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+}
+
int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
@@ -2342,8 +2895,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
/* Common */
wqe.num_sge = wr->num_sge;
if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
- dev_err(rdev_to_dev(qp->rdev),
- "Limit exceeded for Send SGEs");
+ ibdev_err(&qp->rdev->ibdev,
+ "Limit exceeded for Send SGEs");
rc = -EINVAL;
goto bad;
}
@@ -2374,7 +2927,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
default:
break;
}
- /* fall through */
+ fallthrough;
case IB_WR_SEND_WITH_INV:
rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
break;
@@ -2388,8 +2941,8 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
rc = bnxt_re_build_atomic_wqe(wr, &wqe);
break;
case IB_WR_RDMA_READ_WITH_INV:
- dev_err(rdev_to_dev(qp->rdev),
- "RDMA Read with Invalidate is not supported");
+ ibdev_err(&qp->rdev->ibdev,
+ "RDMA Read with Invalidate is not supported");
rc = -EINVAL;
goto bad;
case IB_WR_LOCAL_INV:
@@ -2400,25 +2953,29 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
break;
default:
/* Unsupported WRs */
- dev_err(rdev_to_dev(qp->rdev),
- "WR (%#x) is not supported", wr->opcode);
+ ibdev_err(&qp->rdev->ibdev,
+ "WR (%#x) is not supported", wr->opcode);
rc = -EINVAL;
goto bad;
}
- if (!rc)
+ if (!rc) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_re_legacy_set_uc_fence(&wqe);
rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ }
bad:
if (rc) {
- dev_err(rdev_to_dev(qp->rdev),
- "post_send failed op:%#x qps = %#x rc = %d\n",
- wr->opcode, qp->qplib_qp.state, rc);
+ ibdev_err(&qp->rdev->ibdev,
+ "post_send failed op:%#x qps = %#x rc = %d\n",
+ wr->opcode, qp->qplib_qp.state, rc);
*bad_wr = wr;
break;
}
wr = wr->next;
}
bnxt_qplib_post_send_db(&qp->qplib_qp);
- bnxt_ud_qp_hw_stall_workaround(qp);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_ud_qp_hw_stall_workaround(qp);
spin_unlock_irqrestore(&qp->sq_lock, flags);
return rc;
@@ -2431,7 +2988,6 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_qplib_swqe wqe;
int rc = 0;
- memset(&wqe, 0, sizeof(wqe));
while (wr) {
/* House keeping */
memset(&wqe, 0, sizeof(wqe));
@@ -2439,8 +2995,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
/* Common */
wqe.num_sge = wr->num_sge;
if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
- dev_err(rdev_to_dev(rdev),
- "Limit exceeded for Receive SGEs");
+ ibdev_err(&rdev->ibdev,
+ "Limit exceeded for Receive SGEs");
rc = -EINVAL;
break;
}
@@ -2476,8 +3032,8 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
/* Common */
wqe.num_sge = wr->num_sge;
if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
- dev_err(rdev_to_dev(qp->rdev),
- "Limit exceeded for Receive SGEs");
+ ibdev_err(&qp->rdev->ibdev,
+ "Limit exceeded for Receive SGEs");
rc = -EINVAL;
*bad_wr = wr;
break;
@@ -2516,81 +3072,103 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
return rc;
}
+static struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev)
+{
+ int min, indx;
+
+ mutex_lock(&rdev->nqr->load_lock);
+ for (indx = 0, min = 0; indx < (rdev->nqr->num_msix - 1); indx++) {
+ if (rdev->nqr->nq[min].load > rdev->nqr->nq[indx].load)
+ min = indx;
+ }
+ rdev->nqr->nq[min].load++;
+ mutex_unlock(&rdev->nqr->load_lock);
+
+ return &rdev->nqr->nq[min];
+}
+
+static void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq)
+{
+ mutex_lock(&rdev->nqr->load_lock);
+ nq->load--;
+ mutex_unlock(&rdev->nqr->load_lock);
+}
+
/* Completion Queues */
int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
- int rc;
- struct bnxt_re_cq *cq;
+ struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_qplib_nq *nq;
struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
rdev = cq->rdev;
nq = cq->qplib_cq.nq;
+ cctx = rdev->chip_ctx;
- rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ");
- return rc;
+ if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+ free_page((unsigned long)cq->uctx_cq_page);
+ hash_del(&cq->hash_entry);
}
- if (!IS_ERR_OR_NULL(cq->umem))
- ib_umem_release(cq->umem);
+ bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
- atomic_dec(&rdev->cq_count);
- nq->budget--;
- kfree(cq->cql);
- kfree(cq);
+ bnxt_re_put_nq(rdev, nq);
+ ib_umem_release(cq->umem);
+ atomic_dec(&rdev->stats.res.cq_count);
+ kfree(cq->cql);
return 0;
}
-struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
- struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_cq *cq = NULL;
- int rc, entries;
+ struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct bnxt_re_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ struct bnxt_qplib_chip_ctx *cctx;
int cqe = attr->cqe;
- struct bnxt_qplib_nq *nq = NULL;
- unsigned int nq_alloc_cnt;
+ int rc, entries;
+ u32 active_cqs;
+
+ if (attr->flags)
+ return -EOPNOTSUPP;
/* Validate CQ fields */
if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
- dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded");
- return ERR_PTR(-EINVAL);
+ ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded");
+ return -EINVAL;
}
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
cq->rdev = rdev;
+ cctx = rdev->chip_ctx;
cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
- entries = roundup_pow_of_two(cqe + 1);
+ entries = bnxt_re_init_depth(cqe + 1, uctx);
if (entries > dev_attr->max_cq_wqes + 1)
entries = dev_attr->max_cq_wqes + 1;
+ cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+ cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
if (udata) {
struct bnxt_re_cq_req req;
- struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
- udata, struct bnxt_re_ucontext, ib_uctx);
if (ib_copy_from_udata(&req, udata, sizeof(req))) {
rc = -EFAULT;
goto fail;
}
- cq->umem = ib_umem_get(udata, req.cq_va,
+ cq->umem = ib_umem_get(&rdev->ibdev, req.cq_va,
entries * sizeof(struct cq_base),
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->umem)) {
rc = PTR_ERR(cq->umem);
goto fail;
}
- cq->qplib_cq.sg_info.sglist = cq->umem->sg_head.sgl;
- cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem);
- cq->qplib_cq.sg_info.nmap = cq->umem->nmap;
+ cq->qplib_cq.sg_info.umem = cq->umem;
cq->qplib_cq.dpi = &uctx->dpi;
} else {
cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
@@ -2603,53 +3181,159 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
cq->qplib_cq.dpi = &rdev->dpi_privileged;
}
- /*
- * Allocating the NQ in a round robin fashion. nq_alloc_cnt is a
- * used for getting the NQ index.
- */
- nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt);
- nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)];
cq->qplib_cq.max_wqe = entries;
- cq->qplib_cq.cnq_hw_ring_id = nq->ring_id;
- cq->qplib_cq.nq = nq;
+ cq->qplib_cq.coalescing = &rdev->cq_coalescing;
+ cq->qplib_cq.nq = bnxt_re_get_nq(rdev);
+ cq->qplib_cq.cnq_hw_ring_id = cq->qplib_cq.nq->ring_id;
rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to create HW CQ");
+ ibdev_err(&rdev->ibdev, "Failed to create HW CQ");
goto fail;
}
cq->ib_cq.cqe = entries;
cq->cq_period = cq->qplib_cq.period;
- nq->budget++;
- atomic_inc(&rdev->cq_count);
+ active_cqs = atomic_inc_return(&rdev->stats.res.cq_count);
+ if (active_cqs > rdev->stats.res.cq_watermark)
+ rdev->stats.res.cq_watermark = active_cqs;
spin_lock_init(&cq->cq_lock);
if (udata) {
- struct bnxt_re_cq_resp resp;
+ struct bnxt_re_cq_resp resp = {};
+ if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+ hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id);
+ /* Allocate a page */
+ cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!cq->uctx_cq_page) {
+ rc = -ENOMEM;
+ goto c2fail;
+ }
+ resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT;
+ }
resp.cqid = cq->qplib_cq.id;
resp.tail = cq->qplib_cq.hwq.cons;
resp.phase = cq->qplib_cq.period;
resp.rsvd = 0;
- rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to copy CQ udata");
+ ibdev_err(&rdev->ibdev, "Failed to copy CQ udata");
bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
- goto c2fail;
+ goto free_mem;
}
}
- return &cq->ib_cq;
+ return 0;
+free_mem:
+ free_page((unsigned long)cq->uctx_cq_page);
c2fail:
- if (udata)
- ib_umem_release(cq->umem);
+ ib_umem_release(cq->umem);
fail:
kfree(cq->cql);
- kfree(cq);
- return ERR_PTR(rc);
+ return rc;
+}
+
+static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
+{
+ struct bnxt_re_dev *rdev = cq->rdev;
+
+ bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
+
+ cq->qplib_cq.max_wqe = cq->resize_cqe;
+ if (cq->resize_umem) {
+ ib_umem_release(cq->umem);
+ cq->umem = cq->resize_umem;
+ cq->resize_umem = NULL;
+ cq->resize_cqe = 0;
+ }
+}
+
+int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+{
+ struct bnxt_qplib_sg_info sg_info = {};
+ struct bnxt_qplib_dpi *orig_dpi = NULL;
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx = NULL;
+ struct bnxt_re_resize_cq_req req;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int rc, entries;
+
+ cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
+ dev_attr = rdev->dev_attr;
+ if (!ibcq->uobject) {
+ ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (cq->resize_umem) {
+ ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
+ cq->qplib_cq.id);
+ return -EBUSY;
+ }
+
+ /* Check the requested cq depth out of supported depth */
+ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+ ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - out of range cqe %d",
+ cq->qplib_cq.id, cqe);
+ return -EINVAL;
+ }
+
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ entries = bnxt_re_init_depth(cqe + 1, uctx);
+ if (entries > dev_attr->max_cq_wqes + 1)
+ entries = dev_attr->max_cq_wqes + 1;
+
+ /* uverbs consumer */
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ rc = -EFAULT;
+ goto fail;
+ }
+
+ cq->resize_umem = ib_umem_get(&rdev->ibdev, req.cq_va,
+ entries * sizeof(struct cq_base),
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->resize_umem)) {
+ rc = PTR_ERR(cq->resize_umem);
+ ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %pe\n",
+ __func__, cq->resize_umem);
+ cq->resize_umem = NULL;
+ goto fail;
+ }
+ cq->resize_cqe = entries;
+ memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info));
+ orig_dpi = cq->qplib_cq.dpi;
+
+ cq->qplib_cq.sg_info.umem = cq->resize_umem;
+ cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+ cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
+ cq->qplib_cq.dpi = &uctx->dpi;
+
+ rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Resize HW CQ %#x failed!",
+ cq->qplib_cq.id);
+ goto fail;
+ }
+
+ cq->ib_cq.cqe = cq->resize_cqe;
+ atomic_inc(&rdev->stats.res.resize_count);
+
+ return 0;
+
+fail:
+ if (cq->resize_umem) {
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
+ cq->resize_cqe = 0;
+ memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
+ cq->qplib_cq.dpi = orig_dpi;
+ }
+ return rc;
}
static u8 __req_to_ib_wc_status(u8 qstatus)
@@ -2868,54 +3552,50 @@ static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev,
return rc;
}
-static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
+static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
struct bnxt_qplib_cqe *cqe)
{
- struct bnxt_re_dev *rdev = qp1_qp->rdev;
+ struct bnxt_re_dev *rdev = gsi_qp->rdev;
struct bnxt_re_sqp_entries *sqp_entry = NULL;
- struct bnxt_re_qp *qp = rdev->qp1_sqp;
+ struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+ dma_addr_t shrq_hdr_buf_map;
+ struct ib_sge s_sge[2] = {};
+ struct ib_sge r_sge[2] = {};
+ struct bnxt_re_ah *gsi_sah;
+ struct ib_recv_wr rwr = {};
+ dma_addr_t rq_hdr_buf_map;
+ struct ib_ud_wr udwr = {};
struct ib_send_wr *swr;
- struct ib_ud_wr udwr;
- struct ib_recv_wr rwr;
+ u32 skip_bytes = 0;
int pkt_type = 0;
- u32 tbl_idx;
void *rq_hdr_buf;
- dma_addr_t rq_hdr_buf_map;
- dma_addr_t shrq_hdr_buf_map;
u32 offset = 0;
- u32 skip_bytes = 0;
- struct ib_sge s_sge[2];
- struct ib_sge r_sge[2];
+ u32 tbl_idx;
int rc;
- memset(&udwr, 0, sizeof(udwr));
- memset(&rwr, 0, sizeof(rwr));
- memset(&s_sge, 0, sizeof(s_sge));
- memset(&r_sge, 0, sizeof(r_sge));
-
swr = &udwr.wr;
tbl_idx = cqe->wr_id;
- rq_hdr_buf = qp1_qp->qplib_qp.rq_hdr_buf +
- (tbl_idx * qp1_qp->qplib_qp.rq_hdr_buf_size);
- rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp1_qp->qplib_qp,
+ rq_hdr_buf = gsi_qp->qplib_qp.rq_hdr_buf +
+ (tbl_idx * gsi_qp->qplib_qp.rq_hdr_buf_size);
+ rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp,
tbl_idx);
/* Shadow QP header buffer */
- shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp->qplib_qp,
+ shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp,
tbl_idx);
- sqp_entry = &rdev->sqp_tbl[tbl_idx];
+ sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx];
/* Store this cqe */
memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe));
- sqp_entry->qp1_qp = qp1_qp;
+ sqp_entry->qp1_qp = gsi_qp;
/* Find packet type from the cqe */
pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags,
cqe->raweth_qp1_flags2);
if (pkt_type < 0) {
- dev_err(rdev_to_dev(rdev), "Invalid packet\n");
+ ibdev_err(&rdev->ibdev, "Invalid packet\n");
return -EINVAL;
}
@@ -2962,10 +3642,10 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
rwr.wr_id = tbl_idx;
rwr.next = NULL;
- rc = bnxt_re_post_recv_shadow_qp(rdev, qp, &rwr);
+ rc = bnxt_re_post_recv_shadow_qp(rdev, gsi_sqp, &rwr);
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to post Rx buffers to shadow QP");
+ ibdev_err(&rdev->ibdev,
+ "Failed to post Rx buffers to shadow QP");
return -ENOMEM;
}
@@ -2974,15 +3654,13 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
swr->wr_id = tbl_idx;
swr->opcode = IB_WR_SEND;
swr->next = NULL;
-
- udwr.ah = &rdev->sqp_ah->ib_ah;
- udwr.remote_qpn = rdev->qp1_sqp->qplib_qp.id;
- udwr.remote_qkey = rdev->qp1_sqp->qplib_qp.qkey;
+ gsi_sah = rdev->gsi_ctx.gsi_sah;
+ udwr.ah = &gsi_sah->ib_ah;
+ udwr.remote_qpn = gsi_sqp->qplib_qp.id;
+ udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
/* post data received in the send queue */
- rc = bnxt_re_post_send_shadow_qp(rdev, qp, swr);
-
- return 0;
+ return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
}
static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
@@ -2993,6 +3671,19 @@ static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
wc->wc_flags |= IB_WC_GRH;
}
+static bool bnxt_re_check_if_vlan_valid(struct bnxt_re_dev *rdev,
+ u16 vlan_id)
+{
+ /*
+ * Check if the vlan is configured in the host. If not configured, it
+ * can be a transparent VLAN. So dont report the vlan id.
+ */
+ if (!__vlan_find_dev_deep_rcu(rdev->netdev,
+ htons(ETH_P_8021Q), vlan_id))
+ return false;
+ return true;
+}
+
static bool bnxt_re_is_vlan_pkt(struct bnxt_qplib_cqe *orig_cqe,
u16 *vid, u8 *sl)
{
@@ -3034,12 +3725,12 @@ static void bnxt_re_process_res_rc_wc(struct ib_wc *wc,
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
}
-static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
+static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp,
struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
- struct bnxt_re_dev *rdev = qp->rdev;
- struct bnxt_re_qp *qp1_qp = NULL;
+ struct bnxt_re_dev *rdev = gsi_sqp->rdev;
+ struct bnxt_re_qp *gsi_qp = NULL;
struct bnxt_qplib_cqe *orig_cqe = NULL;
struct bnxt_re_sqp_entries *sqp_entry = NULL;
int nw_type;
@@ -3049,21 +3740,23 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
tbl_idx = cqe->wr_id;
- sqp_entry = &rdev->sqp_tbl[tbl_idx];
- qp1_qp = sqp_entry->qp1_qp;
+ sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx];
+ gsi_qp = sqp_entry->qp1_qp;
orig_cqe = &sqp_entry->cqe;
wc->wr_id = sqp_entry->wrid;
wc->byte_len = orig_cqe->length;
- wc->qp = &qp1_qp->ib_qp;
+ wc->qp = &gsi_qp->ib_qp;
- wc->ex.imm_data = orig_cqe->immdata;
+ wc->ex.imm_data = cpu_to_be32(orig_cqe->immdata);
wc->src_qp = orig_cqe->src_qp;
memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) {
- wc->vlan_id = vlan_id;
- wc->sl = sl;
- wc->wc_flags |= IB_WC_WITH_VLAN;
+ if (bnxt_re_check_if_vlan_valid(rdev, vlan_id)) {
+ wc->vlan_id = vlan_id;
+ wc->sl = sl;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ }
}
wc->port_num = 1;
wc->vendor_err = orig_cqe->status;
@@ -3084,8 +3777,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
struct ib_wc *wc,
struct bnxt_qplib_cqe *cqe)
{
+ struct bnxt_re_dev *rdev;
+ u16 vlan_id = 0;
u8 nw_type;
+ rdev = qp->rdev;
wc->opcode = IB_WC_RECV;
wc->status = __rc_to_ib_wc_status(cqe->status);
@@ -3097,9 +3793,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= IB_WC_WITH_SMAC;
if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) {
- wc->vlan_id = (cqe->cfa_meta & 0xFFF);
- if (wc->vlan_id < 0x1000)
- wc->wc_flags |= IB_WC_WITH_VLAN;
+ vlan_id = (cqe->cfa_meta & 0xFFF);
+ }
+ /* Mark only if vlan_id is non zero */
+ if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) {
+ wc->vlan_id = vlan_id;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
}
nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >>
CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT;
@@ -3113,18 +3812,18 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp)
{
struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
unsigned long flags;
- int rc = 0;
+ int rc;
spin_lock_irqsave(&qp->sq_lock, flags);
rc = bnxt_re_bind_fence_mw(lib_qp);
if (!rc) {
lib_qp->sq.phantom_wqe_cnt++;
- dev_dbg(&lib_qp->sq.hwq.pdev->dev,
- "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
- lib_qp->id, lib_qp->sq.hwq.prod,
- HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
- lib_qp->sq.phantom_wqe_cnt);
+ ibdev_dbg(&qp->rdev->ibdev,
+ "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+ lib_qp->id, lib_qp->sq.hwq.prod,
+ HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+ lib_qp->sq.phantom_wqe_cnt);
}
spin_unlock_irqrestore(&qp->sq_lock, flags);
@@ -3134,7 +3833,7 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp)
int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
{
struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
- struct bnxt_re_qp *qp;
+ struct bnxt_re_qp *qp, *sh_qp;
struct bnxt_qplib_cqe *cqe;
int i, ncqe, budget;
struct bnxt_qplib_q *sq;
@@ -3143,11 +3842,20 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
struct bnxt_re_sqp_entries *sqp_entry = NULL;
unsigned long flags;
+ /* User CQ; the only processing we do is to
+ * complete any pending CQ resize operation.
+ */
+ if (cq->umem) {
+ if (cq->resize_umem)
+ bnxt_re_resize_cq_complete(cq);
+ return 0;
+ }
+
spin_lock_irqsave(&cq->cq_lock, flags);
budget = min_t(u32, num_entries, cq->max_cql);
num_entries = budget;
if (!cq->cql) {
- dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use");
+ ibdev_err(&cq->rdev->ibdev, "POLL CQ : no CQL to use");
goto exit;
}
cqe = &cq->cql[0];
@@ -3160,8 +3868,8 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
qp = container_of(lib_qp,
struct bnxt_re_qp, qplib_qp);
if (send_phantom_wqe(qp) == -ENOMEM)
- dev_err(rdev_to_dev(cq->rdev),
- "Phantom failed! Scheduled to send again\n");
+ ibdev_err(&cq->rdev->ibdev,
+ "Phantom failed! Scheduled to send again\n");
else
sq->send_phantom = false;
}
@@ -3184,13 +3892,11 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
((struct bnxt_qplib_qp *)
(unsigned long)(cqe->qp_handle),
struct bnxt_re_qp, qplib_qp);
- if (!qp) {
- dev_err(rdev_to_dev(cq->rdev),
- "POLL CQ : bad QP handle");
- continue;
- }
wc->qp = &qp->ib_qp;
- wc->ex.imm_data = cqe->immdata;
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->ex.imm_data = cpu_to_be32(cqe->immdata);
+ else
+ wc->ex.invalidate_rkey = cqe->invrkey;
wc->src_qp = cqe->src_qp;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->port_num = 1;
@@ -3198,8 +3904,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
switch (cqe->opcode) {
case CQ_BASE_CQE_TYPE_REQ:
- if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
- qp->rdev->qp1_sqp->qplib_qp.id) {
+ sh_qp = qp->rdev->gsi_ctx.gsi_sqp;
+ if (sh_qp &&
+ qp->qplib_qp.id == sh_qp->qplib_qp.id) {
/* Handle this completion with
* the stored completion
*/
@@ -3225,7 +3932,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
* stored in the table
*/
tbl_idx = cqe->wr_id;
- sqp_entry = &cq->rdev->sqp_tbl[tbl_idx];
+ sqp_entry = &cq->rdev->gsi_ctx.sqp_tbl[tbl_idx];
wc->wr_id = sqp_entry->wrid;
bnxt_re_process_res_rawqp1_wc(wc, cqe);
break;
@@ -3233,8 +3940,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
bnxt_re_process_res_rc_wc(wc, cqe);
break;
case CQ_BASE_CQE_TYPE_RES_UD:
- if (qp->rdev->qp1_sqp && qp->qplib_qp.id ==
- qp->rdev->qp1_sqp->qplib_qp.id) {
+ sh_qp = qp->rdev->gsi_ctx.gsi_sqp;
+ if (sh_qp &&
+ qp->qplib_qp.id == sh_qp->qplib_qp.id) {
/* Handle this completion with
* the stored completion
*/
@@ -3249,9 +3957,9 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
bnxt_re_process_res_ud_wc(qp, wc, cqe);
break;
default:
- dev_err(rdev_to_dev(cq->rdev),
- "POLL CQ : type 0x%x not handled",
- cqe->opcode);
+ ibdev_err(&cq->rdev->ibdev,
+ "POLL CQ : type 0x%x not handled",
+ cqe->opcode);
continue;
}
wc++;
@@ -3297,7 +4005,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
- u64 pbl = 0;
+ u32 active_mrs;
int rc;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
@@ -3306,9 +4014,12 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
- mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+ if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING)
+ bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr);
+
/* Allocate and register 0 as the address */
rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
if (rc)
@@ -3316,8 +4027,8 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->qplib_mr.hwq.level = PBL_LVL_MAX;
mr->qplib_mr.total_size = -1; /* Infinte length */
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false,
- PAGE_SIZE);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0,
+ PAGE_SIZE, false);
if (rc)
goto fail_mr;
@@ -3325,7 +4036,9 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_ATOMIC))
mr->ib_mr.rkey = mr->ib_mr.lkey;
- atomic_inc(&rdev->mr_count);
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
return &mr->ib_mr;
@@ -3343,8 +4056,10 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
int rc;
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc)
- dev_err(rdev_to_dev(rdev), "Dereg MR failed: %#x\n", rc);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Dereg MR failed: %#x\n", rc);
+ return rc;
+ }
if (mr->pages) {
rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
@@ -3353,11 +4068,10 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
mr->npages = 0;
mr->pages = NULL;
}
- if (!IS_ERR_OR_NULL(mr->ib_umem))
- ib_umem_release(mr->ib_umem);
+ ib_umem_release(mr->ib_umem);
kfree(mr);
- atomic_dec(&rdev->mr_count);
+ atomic_dec(&rdev->stats.res.mr_count);
return rc;
}
@@ -3382,15 +4096,16 @@ int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
}
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr = NULL;
+ u32 active_mrs;
int rc;
if (type != IB_MR_TYPE_MEM_REG) {
- dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type);
+ ibdev_dbg(&rdev->ibdev, "MR type 0x%x not supported", type);
return ERR_PTR(-EINVAL);
}
if (max_num_sg > MAX_PBL_LVL_1_PGS)
@@ -3402,7 +4117,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
- mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR;
+ mr->qplib_mr.access_flags = BNXT_QPLIB_FR_PMR;
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
@@ -3420,12 +4135,14 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
&mr->qplib_frpl, max_num_sg);
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to allocate HW FR page list");
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate HW FR page list");
goto fail_mr;
}
- atomic_inc(&rdev->mr_count);
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
return &mr->ib_mr;
fail_mr:
@@ -3443,6 +4160,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mw *mw;
+ u32 active_mws;
int rc;
mw = kzalloc(sizeof(*mw), GFP_KERNEL);
@@ -3456,12 +4174,14 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Allocate MW failed!");
+ ibdev_err(&rdev->ibdev, "Allocate MW failed!");
goto fail;
}
mw->ib_mw.rkey = mw->qplib_mw.rkey;
- atomic_inc(&rdev->mw_count);
+ active_mws = atomic_inc_return(&rdev->stats.res.mw_count);
+ if (active_mws > rdev->stats.res.mw_watermark)
+ rdev->stats.res.mw_watermark = active_mws;
return &mw->ib_mw;
fail:
@@ -3477,141 +4197,83 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Free MW failed: %#x\n", rc);
+ ibdev_err(&rdev->ibdev, "Free MW failed: %#x\n", rc);
return rc;
}
kfree(mw);
- atomic_dec(&rdev->mw_count);
+ atomic_dec(&rdev->stats.res.mw_count);
return rc;
}
-static int bnxt_re_page_size_ok(int page_shift)
-{
- switch (page_shift) {
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M:
- case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G:
- return 1;
- default:
- return 0;
- }
-}
-
-static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig,
- int page_shift)
-{
- u64 *pbl_tbl = pbl_tbl_orig;
- u64 page_size = BIT_ULL(page_shift);
- struct ib_block_iter biter;
-
- rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size)
- *pbl_tbl++ = rdma_block_iter_dma_address(&biter);
-
- return pbl_tbl - pbl_tbl_orig;
-}
-
-/* uverbs */
-struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_udata *udata)
+static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_umem *umem)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
+ unsigned long page_size;
struct bnxt_re_mr *mr;
- struct ib_umem *umem;
- u64 *pbl_tbl = NULL;
- int umem_pgs, page_shift, rc;
+ int umem_pgs, rc;
+ u32 active_mrs;
if (length > BNXT_RE_MAX_MR_SIZE) {
- dev_err(rdev_to_dev(rdev), "MR Size: %lld > Max supported:%lld\n",
- length, BNXT_RE_MAX_MR_SIZE);
+ ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
+ length, BNXT_RE_MAX_MR_SIZE);
return ERR_PTR(-ENOMEM);
}
+ page_size = ib_umem_find_best_pgsz(umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr);
+ if (!page_size) {
+ ibdev_err(&rdev->ibdev, "umem page size unsupported!");
+ return ERR_PTR(-EINVAL);
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
- mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
- rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
- goto free_mr;
- }
- /* The fixed portion of the rkey is the same as the lkey */
- mr->ib_mr.rkey = mr->qplib_mr.rkey;
-
- umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
- if (IS_ERR(umem)) {
- dev_err(rdev_to_dev(rdev), "Failed to get umem");
- rc = -EFAULT;
- goto free_mrw;
+ if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) {
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc);
+ rc = -EIO;
+ goto free_mr;
+ }
+ /* The fixed portion of the rkey is the same as the lkey */
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+ } else {
+ mr->qplib_mr.flags = CMDQ_REGISTER_MR_FLAGS_ALLOC_MR;
}
mr->ib_umem = umem;
-
mr->qplib_mr.va = virt_addr;
- umem_pgs = ib_umem_page_count(umem);
- if (!umem_pgs) {
- dev_err(rdev_to_dev(rdev), "umem is invalid!");
- rc = -EINVAL;
- goto free_umem;
- }
mr->qplib_mr.total_size = length;
- pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
- if (!pbl_tbl) {
- rc = -ENOMEM;
- goto free_umem;
- }
-
- page_shift = __ffs(ib_umem_find_best_pgsz(umem,
- BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M,
- virt_addr));
-
- if (!bnxt_re_page_size_ok(page_shift)) {
- dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
- rc = -EFAULT;
- goto fail;
- }
-
- if (page_shift == BNXT_RE_PAGE_SHIFT_4K &&
- length > BNXT_RE_MAX_MR_SIZE_LOW) {
- dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu",
- length, (u64)BNXT_RE_MAX_MR_SIZE_LOW);
- rc = -EINVAL;
- goto fail;
- }
+ if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING)
+ bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr);
- /* Map umem buf ptrs to the PBL */
- umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
- rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
- umem_pgs, false, 1 << page_shift);
+ umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem,
+ umem_pgs, page_size,
+ _is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags));
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to register user MR");
- goto fail;
+ ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc);
+ rc = -EIO;
+ goto free_mrw;
}
- kfree(pbl_tbl);
-
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->ib_mr.rkey = mr->qplib_mr.lkey;
- atomic_inc(&rdev->mr_count);
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
return &mr->ib_mr;
-fail:
- kfree(pbl_tbl);
-free_umem:
- ib_umem_release(umem);
+
free_mrw:
bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
free_mr:
@@ -3619,23 +4281,75 @@ free_mr:
return ERR_PTR(rc);
}
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct ib_umem *umem;
+ struct ib_mr *ib_mr;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+
+ ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem);
+ if (IS_ERR(ib_mr))
+ ib_umem_release(umem);
+ return ib_mr;
+}
+
+struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
+ u64 length, u64 virt_addr, int fd,
+ int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ struct ib_mr *ib_mr;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(&rdev->ibdev, start, length,
+ fd, mr_access_flags);
+ if (IS_ERR(umem_dmabuf))
+ return ERR_CAST(umem_dmabuf);
+
+ umem = &umem_dmabuf->umem;
+
+ ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem);
+ if (IS_ERR(ib_mr))
+ ib_umem_release(umem);
+ return ib_mr;
+}
+
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
{
struct ib_device *ibdev = ctx->device;
struct bnxt_re_ucontext *uctx =
container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_uctx_resp resp;
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ struct bnxt_re_user_mmap_entry *entry;
+ struct bnxt_re_uctx_resp resp = {};
+ struct bnxt_re_uctx_req ureq = {};
u32 chip_met_rev_num = 0;
int rc;
- dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
- ibdev->uverbs_abi_ver);
+ ibdev_dbg(ibdev, "ABI version requested %u", ibdev->ops.uverbs_abi_ver);
- if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
- dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
- BNXT_RE_ABI_VERSION);
+ if (ibdev->ops.uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
+ ibdev_dbg(ibdev, " is different from the device %d ",
+ BNXT_RE_ABI_VERSION);
return -EPERM;
}
@@ -3649,25 +4363,53 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
spin_lock_init(&uctx->sh_lock);
resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX;
- chip_met_rev_num = rdev->chip_ctx.chip_num;
- chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) <<
+ chip_met_rev_num = rdev->chip_ctx->chip_num;
+ chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_rev & 0xFF) <<
BNXT_RE_CHIP_ID0_CHIP_REV_SFT;
- chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) <<
+ chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_metal & 0xFF) <<
BNXT_RE_CHIP_ID0_CHIP_MET_SFT;
resp.chip_id0 = chip_met_rev_num;
- /* Future extension of chip info */
- resp.chip_id1 = 0;
/*Temp, Use xa_alloc instead */
resp.dev_id = rdev->en_dev->pdev->devfn;
resp.max_qp = rdev->qplib_ctx.qpc_count;
resp.pg_size = PAGE_SIZE;
resp.cqe_sz = sizeof(struct cq_base);
resp.max_cqd = dev_attr->max_cq_wqes;
- resp.rsvd = 0;
+
+ if (rdev->chip_ctx->modes.db_push)
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED;
+
+ entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto cfail;
+ }
+ uctx->shpage_mmap = &entry->rdma_entry;
+ if (rdev->pacing.dbr_pacing)
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED;
+
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED;
+
+ if (udata->inlen >= sizeof(ureq)) {
+ rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq)));
+ if (rc)
+ goto cfail;
+ if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT) {
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED;
+ uctx->cmask |= BNXT_RE_UCNTX_CAP_POW2_DISABLED;
+ }
+ if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT) {
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE;
+ resp.mode = rdev->chip_ctx->modes.wqe_mode;
+ if (resp.mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ uctx->cmask |= BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
+ }
+ }
rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to copy user context");
+ ibdev_err(ibdev, "Failed to copy user context");
rc = -EFAULT;
goto cfail;
}
@@ -3688,6 +4430,8 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
struct bnxt_re_dev *rdev = uctx->rdev;
+ rdma_user_mmap_entry_remove(uctx->shpage_mmap);
+ uctx->shpage_mmap = NULL;
if (uctx->shpg)
free_page((unsigned long)uctx->shpg);
@@ -3695,40 +4439,500 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
/* Free DPI only if this is the first PD allocated by the
* application and mark the context dpi as NULL
*/
- bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
- &rdev->qplib_res.dpi_tbl, &uctx->dpi);
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->dpi);
uctx->dpi.dbr = NULL;
}
}
+static int bnxt_re_setup_vnic(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ int rc;
+
+ rc = bnxt_re_hwrm_alloc_vnic(rdev);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_hwrm_cfg_vnic(rdev, qp->qplib_qp.id);
+ if (rc)
+ goto out_free_vnic;
+
+ return 0;
+out_free_vnic:
+ bnxt_re_hwrm_free_vnic(rdev);
+ return rc;
+}
+
+struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp,
+ struct ib_flow_attr *attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_re_flow *flow;
+ int rc;
+
+ if (attr->type != IB_FLOW_ATTR_SNIFFER ||
+ !rdev->rcfw.roce_mirror)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&rdev->qp_lock);
+ if (rdev->sniffer_flow_created) {
+ ibdev_err(&rdev->ibdev, "RoCE Mirroring is already Configured\n");
+ mutex_unlock(&rdev->qp_lock);
+ return ERR_PTR(-EBUSY);
+ }
+
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ if (!flow) {
+ mutex_unlock(&rdev->qp_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ flow->rdev = rdev;
+
+ rc = bnxt_re_setup_vnic(rdev, qp);
+ if (rc)
+ goto out_free_flow;
+
+ rc = bnxt_qplib_create_flow(&rdev->qplib_res);
+ if (rc)
+ goto out_free_vnic;
+
+ rdev->sniffer_flow_created = 1;
+ mutex_unlock(&rdev->qp_lock);
+
+ return &flow->ib_flow;
+
+out_free_vnic:
+ bnxt_re_hwrm_free_vnic(rdev);
+out_free_flow:
+ mutex_unlock(&rdev->qp_lock);
+ kfree(flow);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_destroy_flow(struct ib_flow *flow_id)
+{
+ struct bnxt_re_flow *flow =
+ container_of(flow_id, struct bnxt_re_flow, ib_flow);
+ struct bnxt_re_dev *rdev = flow->rdev;
+ int rc;
+
+ mutex_lock(&rdev->qp_lock);
+ rc = bnxt_qplib_destroy_flow(&rdev->qplib_res);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev, "failed to destroy_flow rc = %d\n", rc);
+ rdev->sniffer_flow_created = 0;
+
+ bnxt_re_hwrm_free_vnic(rdev);
+ mutex_unlock(&rdev->qp_lock);
+ kfree(flow);
+
+ return rc;
+}
+
+static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id)
+{
+ struct bnxt_re_cq *cq = NULL, *tmp_cq;
+
+ hash_for_each_possible(rdev->cq_hash, tmp_cq, hash_entry, cq_id) {
+ if (tmp_cq->qplib_cq.id == cq_id) {
+ cq = tmp_cq;
+ break;
+ }
+ }
+ return cq;
+}
+
+static struct bnxt_re_srq *bnxt_re_search_for_srq(struct bnxt_re_dev *rdev, u32 srq_id)
+{
+ struct bnxt_re_srq *srq = NULL, *tmp_srq;
+
+ hash_for_each_possible(rdev->srq_hash, tmp_srq, hash_entry, srq_id) {
+ if (tmp_srq->qplib_srq.id == srq_id) {
+ srq = tmp_srq;
+ break;
+ }
+ }
+ return srq;
+}
+
/* Helper function to mmap the virtual memory from user app */
int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
{
struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
struct bnxt_re_ucontext,
ib_uctx);
- struct bnxt_re_dev *rdev = uctx->rdev;
+ struct bnxt_re_user_mmap_entry *bnxt_entry;
+ struct rdma_user_mmap_entry *rdma_entry;
+ int ret = 0;
u64 pfn;
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ rdma_entry = rdma_user_mmap_entry_get(&uctx->ib_uctx, vma);
+ if (!rdma_entry)
return -EINVAL;
- if (vma->vm_pgoff) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- PAGE_SIZE, vma->vm_page_prot)) {
- dev_err(rdev_to_dev(rdev), "Failed to map DPI");
- return -EAGAIN;
+ bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry,
+ rdma_entry);
+
+ switch (bnxt_entry->mmap_flag) {
+ case BNXT_RE_MMAP_WC_DB:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_UC_DB:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_SH_PAGE:
+ ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg));
+ break;
+ case BNXT_RE_MMAP_DBR_BAR:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_DBR_PAGE:
+ case BNXT_RE_MMAP_TOGGLE_PAGE:
+ /* Driver doesn't expect write access for user space */
+ if (vma->vm_flags & VM_WRITE)
+ ret = -EFAULT;
+ else
+ ret = vm_insert_page(vma, vma->vm_start,
+ virt_to_page((void *)bnxt_entry->mem_offset));
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return ret;
+}
+
+void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct bnxt_re_user_mmap_entry *bnxt_entry;
+
+ bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry,
+ rdma_entry);
+
+ kfree(bnxt_entry);
+}
+
+int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct ib_class_port_info cpi = {};
+ int ret = IB_MAD_RESULT_SUCCESS;
+ int rc = 0;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return ret;
+
+ switch (in_mad->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+ break;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad);
+ break;
+ case IB_PMA_PORT_COUNTERS:
+ rc = bnxt_re_assign_pma_port_counters(rdev, out_mad);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc)
+ return IB_MAD_RESULT_FAILURE;
+ ret |= IB_MAD_RESULT_REPLY;
+ return ret;
+}
+
+static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_ucontext *uctx;
+
+ uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
+ bnxt_re_pacing_alert(uctx->rdev);
+ return 0;
+}
+
+static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
+ enum bnxt_re_alloc_page_type alloc_type;
+ struct bnxt_re_user_mmap_entry *entry;
+ enum bnxt_re_mmap_flag mmap_flag;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_dev *rdev;
+ u64 mmap_offset;
+ u32 length;
+ u32 dpi;
+ u64 addr;
+ int err;
+
+ uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
+ if (IS_ERR(uctx))
+ return PTR_ERR(uctx);
+
+ err = uverbs_get_const(&alloc_type, attrs, BNXT_RE_ALLOC_PAGE_TYPE);
+ if (err)
+ return err;
+
+ rdev = uctx->rdev;
+ cctx = rdev->chip_ctx;
+
+ switch (alloc_type) {
+ case BNXT_RE_ALLOC_WC_PAGE:
+ if (cctx->modes.db_push) {
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res, &uctx->wcdpi,
+ uctx, BNXT_QPLIB_DPI_TYPE_WC))
+ return -ENOMEM;
+ length = PAGE_SIZE;
+ dpi = uctx->wcdpi.dpi;
+ addr = (u64)uctx->wcdpi.umdbr;
+ mmap_flag = BNXT_RE_MMAP_WC_DB;
+ } else {
+ return -EINVAL;
}
- } else {
- pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT;
- if (remap_pfn_range(vma, vma->vm_start,
- pfn, PAGE_SIZE, vma->vm_page_prot)) {
- dev_err(rdev_to_dev(rdev),
- "Failed to map shared page");
- return -EAGAIN;
+
+ break;
+ case BNXT_RE_ALLOC_DBR_BAR_PAGE:
+ length = PAGE_SIZE;
+ addr = (u64)rdev->pacing.dbr_bar_addr;
+ mmap_flag = BNXT_RE_MMAP_DBR_BAR;
+ break;
+
+ case BNXT_RE_ALLOC_DBR_PAGE:
+ length = PAGE_SIZE;
+ addr = (u64)rdev->pacing.dbr_page;
+ mmap_flag = BNXT_RE_MMAP_DBR_PAGE;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mmap_offset);
+ if (!entry)
+ return -ENOMEM;
+
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI,
+ &dpi, sizeof(dpi));
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int alloc_page_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_user_mmap_entry *entry = uobject->object;
+ struct bnxt_re_ucontext *uctx = entry->uctx;
+
+ switch (entry->mmap_flag) {
+ case BNXT_RE_MMAP_WC_DB:
+ if (uctx && uctx->wcdpi.dbr) {
+ struct bnxt_re_dev *rdev = uctx->rdev;
+
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi);
+ uctx->wcdpi.dbr = NULL;
}
+ break;
+ case BNXT_RE_MMAP_DBR_BAR:
+ case BNXT_RE_MMAP_DBR_PAGE:
+ break;
+ default:
+ goto exit;
+ }
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+exit:
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_ALLOC_PAGE,
+ UVERBS_ATTR_IDR(BNXT_RE_ALLOC_PAGE_HANDLE,
+ BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(BNXT_RE_ALLOC_PAGE_TYPE,
+ enum bnxt_re_alloc_page_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_DPI,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_DESTROY_PAGE,
+ UVERBS_ATTR_IDR(BNXT_RE_DESTROY_PAGE_HANDLE,
+ BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_TYPE_ALLOC_IDR(alloc_page_obj_cleanup),
+ &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE),
+ &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE));
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV);
+
+DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV,
+ &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV));
+
+/* Toggle MEM */
+static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_TOGGLE_MEM_HANDLE);
+ enum bnxt_re_mmap_flag mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE;
+ enum bnxt_re_get_toggle_mem_type res_type;
+ struct bnxt_re_user_mmap_entry *entry;
+ struct bnxt_re_ucontext *uctx;
+ struct ib_ucontext *ib_uctx;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ u32 length = PAGE_SIZE;
+ struct bnxt_re_cq *cq;
+ u64 mem_offset;
+ u32 offset = 0;
+ u64 addr = 0;
+ u32 res_id;
+ int err;
+
+ ib_uctx = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ib_uctx))
+ return PTR_ERR(ib_uctx);
+
+ err = uverbs_get_const(&res_type, attrs, BNXT_RE_TOGGLE_MEM_TYPE);
+ if (err)
+ return err;
+
+ uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+ rdev = uctx->rdev;
+ err = uverbs_copy_from(&res_id, attrs, BNXT_RE_TOGGLE_MEM_RES_ID);
+ if (err)
+ return err;
+
+ switch (res_type) {
+ case BNXT_RE_CQ_TOGGLE_MEM:
+ cq = bnxt_re_search_for_cq(rdev, res_id);
+ if (!cq)
+ return -EINVAL;
+
+ addr = (u64)cq->uctx_cq_page;
+ break;
+ case BNXT_RE_SRQ_TOGGLE_MEM:
+ srq = bnxt_re_search_for_srq(rdev, res_id);
+ if (!srq)
+ return -EINVAL;
+
+ addr = (u64)srq->uctx_srq_page;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
}
+ entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mem_offset);
+ if (!entry)
+ return -ENOMEM;
+
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, BNXT_RE_TOGGLE_MEM_HANDLE);
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_PAGE,
+ &mem_offset, sizeof(mem_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
+ &offset, sizeof(offset));
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int get_toggle_mem_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_user_mmap_entry *entry = uobject->object;
+
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
return 0;
}
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM,
+ UVERBS_ATTR_IDR(BNXT_RE_TOGGLE_MEM_HANDLE,
+ BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(BNXT_RE_TOGGLE_MEM_TYPE,
+ enum bnxt_re_get_toggle_mem_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(BNXT_RE_TOGGLE_MEM_RES_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_PAGE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM,
+ UVERBS_ATTR_IDR(BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE,
+ BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_TYPE_ALLOC_IDR(get_toggle_mem_obj_cleanup),
+ &UVERBS_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM),
+ &UVERBS_METHOD(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM));
+
+const struct uapi_definition bnxt_re_uapi_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_GET_TOGGLE_MEM),
+ {}
+};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 09a33049e42f..76ba9ab04d5c 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -60,6 +60,8 @@ struct bnxt_re_pd {
struct bnxt_re_dev *rdev;
struct bnxt_qplib_pd qplib_pd;
struct bnxt_re_fence_data fence;
+ struct rdma_user_mmap_entry *pd_db_mmap;
+ struct rdma_user_mmap_entry *pd_wcdb_mmap;
};
struct bnxt_re_ah {
@@ -75,12 +77,14 @@ struct bnxt_re_srq {
struct bnxt_qplib_srq qplib_srq;
struct ib_umem *umem;
spinlock_t lock; /* protect srq */
+ void *uctx_srq_page;
+ struct hlist_node hash_entry;
};
struct bnxt_re_qp {
+ struct ib_qp ib_qp;
struct list_head list;
struct bnxt_re_dev *rdev;
- struct ib_qp ib_qp;
spinlock_t sq_lock; /* protect sq */
spinlock_t rq_lock; /* protect rq */
struct bnxt_qplib_qp qplib_qp;
@@ -91,19 +95,24 @@ struct bnxt_re_qp {
struct ib_ud_header qp1_hdr;
struct bnxt_re_cq *scq;
struct bnxt_re_cq *rcq;
+ struct dentry *dentry;
};
struct bnxt_re_cq {
+ struct ib_cq ib_cq;
struct bnxt_re_dev *rdev;
spinlock_t cq_lock; /* protect cq */
u16 cq_count;
u16 cq_period;
- struct ib_cq ib_cq;
struct bnxt_qplib_cq qplib_cq;
struct bnxt_qplib_cqe *cql;
#define MAX_CQL_PER_POLL 1024
u32 max_cql;
struct ib_umem *umem;
+ struct ib_umem *resize_umem;
+ int resize_cqe;
+ void *uctx_cq_page;
+ struct hlist_node hash_entry;
};
struct bnxt_re_mr {
@@ -122,12 +131,6 @@ struct bnxt_re_frpl {
u64 *page_list;
};
-struct bnxt_re_fmr {
- struct bnxt_re_dev *rdev;
- struct ib_fmr ib_fmr;
- struct bnxt_qplib_mrw qplib_fmr;
-};
-
struct bnxt_re_mw {
struct bnxt_re_dev *rdev;
struct ib_mw ib_mw;
@@ -138,36 +141,89 @@ struct bnxt_re_ucontext {
struct ib_ucontext ib_uctx;
struct bnxt_re_dev *rdev;
struct bnxt_qplib_dpi dpi;
+ struct bnxt_qplib_dpi wcdpi;
void *shpg;
spinlock_t sh_lock; /* protect shpg */
+ struct rdma_user_mmap_entry *shpage_mmap;
+ u64 cmask;
+};
+
+enum bnxt_re_mmap_flag {
+ BNXT_RE_MMAP_SH_PAGE,
+ BNXT_RE_MMAP_UC_DB,
+ BNXT_RE_MMAP_WC_DB,
+ BNXT_RE_MMAP_DBR_PAGE,
+ BNXT_RE_MMAP_DBR_BAR,
+ BNXT_RE_MMAP_TOGGLE_PAGE,
+};
+
+struct bnxt_re_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ struct bnxt_re_ucontext *uctx;
+ u64 mem_offset;
+ u8 mmap_flag;
+};
+
+struct bnxt_re_flow {
+ struct ib_flow ib_flow;
+ struct bnxt_re_dev *rdev;
+};
+
+static inline u16 bnxt_re_get_swqe_size(int nsge)
+{
+ return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
+}
+
+static inline u16 bnxt_re_get_rwqe_size(int nsge)
+{
+ return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge));
+}
+
+enum {
+ BNXT_RE_UCNTX_CAP_POW2_DISABLED = 0x1ULL,
+ BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED = 0x2ULL,
};
+static inline u32 bnxt_re_init_depth(u32 ent, struct bnxt_re_ucontext *uctx)
+{
+ return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CAP_POW2_DISABLED) ?
+ ent : roundup_pow_of_two(ent) : ent;
+}
+
+static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
+ struct bnxt_re_ucontext *uctx)
+{
+ if (uctx)
+ return uctx->cmask & BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
+ else
+ return rdev->chip_ctx->modes.wqe_mode;
+}
+
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
int bnxt_re_modify_device(struct ib_device *ibdev,
int device_modify_mask,
struct ib_device_modify *device_modify);
-int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr *port_attr);
-int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable);
void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
-int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num,
u16 index, u16 *pkey);
int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context);
-int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num,
int index, union ib_gid *gid);
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
- u8 port_num);
+ u32 port_num);
int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
int bnxt_re_create_srq(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
@@ -175,12 +231,11 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
-struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr,
- struct ib_udata *udata);
+int bnxt_re_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -190,9 +245,9 @@ int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
const struct ib_send_wr **bad_send_wr);
int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
-struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
@@ -201,17 +256,40 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct ib_udata *udata);
int bnxt_re_dealloc_mw(struct ib_mw *mw);
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
+struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp,
+ struct ib_flow_attr *attr,
+ struct ib_udata *udata);
+int bnxt_re_destroy_flow(struct ib_flow *flow_id);
+
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+
+int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+
+static inline u32 __to_ib_port_num(u16 port_id)
+{
+ return (u32)port_id + 1;
+}
unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 814f959c7db9..73003ad25ee8 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -48,11 +48,13 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <linux/if_ether.h>
+#include <linux/auxiliary_bus.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_addr.h>
+#include <linux/hashtable.h>
#include "bnxt_ulp.h"
#include "roce_hsi.h"
@@ -65,52 +67,157 @@
#include <rdma/bnxt_re-abi.h>
#include "bnxt.h"
#include "hw_counters.h"
+#include "debugfs.h"
static char version[] =
BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
-MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_DESCRIPTION(BNXT_RE_DESC);
MODULE_LICENSE("Dual BSD/GPL");
/* globals */
-static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
-/* Mutex to protect the list of bnxt_re devices added */
-static DEFINE_MUTEX(bnxt_re_dev_lock);
-static struct workqueue_struct *bnxt_re_wq;
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev);
+static DEFINE_MUTEX(bnxt_re_mutex);
+
+static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
+static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev);
+
+static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
+ u32 *offset);
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event);
+static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_qplib_res *res;
+ u32 l2db_len = 0;
+ u32 offset = 0;
+ u32 barlen;
+ int rc;
+
+ res = &rdev->qplib_res;
+ en_dev = rdev->en_dev;
+ cctx = rdev->chip_ctx;
+
+ /* Issue qcfg */
+ rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
+ if (rc)
+ dev_info(rdev_to_dev(rdev),
+ "Couldn't get DB bar size, Low latency framework is disabled\n");
+ /* set register offsets for both UC and WC */
+ if (bnxt_qplib_is_chip_gen_p7(cctx)) {
+ res->dpi_tbl.ucreg.offset = offset;
+ res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+ } else {
+ res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
+ BNXT_QPLIB_DBR_PF_DB_OFFSET;
+ res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
+ }
+
+ /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
+ * is equal to the DB-Bar actual size. This indicates that L2
+ * is mapping entire bar as UC-. RoCE driver can't enable WC mapping
+ * in such cases and DB-push will be disabled.
+ */
+ barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
+ if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
+ res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+ dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n");
+ }
+}
+
+static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+
+ cctx = rdev->chip_ctx;
+ cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ?
+ BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC;
+ if (bnxt_re_hwrm_qcaps(rdev))
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query hwrm qcaps\n");
+ if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) {
+ cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT;
+ cctx->modes.toggle_bits |= BNXT_QPLIB_SRQ_TOGGLE_BIT;
+ }
+}
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
{
+ struct bnxt_qplib_chip_ctx *chip_ctx;
+
+ if (!rdev->chip_ctx)
+ return;
+
+ kfree(rdev->dev_attr);
+ rdev->dev_attr = NULL;
+
+ chip_ctx = rdev->chip_ctx;
+ rdev->chip_ctx = NULL;
rdev->rcfw.res = NULL;
rdev->qplib_res.cctx = NULL;
+ rdev->qplib_res.pdev = NULL;
+ rdev->qplib_res.netdev = NULL;
+ kfree(chip_ctx);
}
static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
{
+ struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
- struct bnxt *bp;
+ int rc = -ENOMEM;
en_dev = rdev->en_dev;
- bp = netdev_priv(en_dev->net);
- rdev->chip_ctx.chip_num = bp->chip_num;
+ rdev->qplib_res.pdev = en_dev->pdev;
+ chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
+ if (!chip_ctx)
+ return -ENOMEM;
+ chip_ctx->chip_num = en_dev->chip_num;
+ chip_ctx->hw_stats_size = en_dev->hw_ring_stats_size;
+
+ rdev->chip_ctx = chip_ctx;
/* rest members to follow eventually */
- rdev->qplib_res.cctx = &rdev->chip_ctx;
+ rdev->qplib_res.cctx = rdev->chip_ctx;
rdev->rcfw.res = &rdev->qplib_res;
+ rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
+ if (!rdev->dev_attr)
+ goto free_chip_ctx;
+ rdev->qplib_res.dattr = rdev->dev_attr;
+ rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
+ rdev->qplib_res.en_dev = en_dev;
+
+ rc = bnxt_re_query_hwrm_intf_version(rdev);
+ if (rc)
+ goto free_dev_attr;
+ bnxt_re_set_drv_mode(rdev);
+
+ bnxt_re_set_db_offset(rdev);
+ rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
+ if (rc)
+ goto free_dev_attr;
+
+ if (bnxt_qplib_determine_atomics(en_dev->pdev))
+ ibdev_info(&rdev->ibdev,
+ "platform doesn't support global atomics.");
return 0;
+free_dev_attr:
+ kfree(rdev->dev_attr);
+ rdev->dev_attr = NULL;
+free_chip_ctx:
+ kfree(rdev->chip_ctx);
+ rdev->chip_ctx = NULL;
+ return rc;
}
/* SR-IOV helper functions */
static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
{
- struct bnxt *bp;
-
- bp = netdev_priv(rdev->en_dev->net);
- if (BNXT_VF(bp))
+ if (BNXT_EN_VF(rdev->en_dev))
rdev->is_virtfn = 1;
}
@@ -119,104 +226,237 @@ static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
* reserved for the function. The driver may choose to allocate fewer
* resources than the firmware maximum.
*/
-static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
{
- u32 vf_qps = 0, vf_srqs = 0, vf_cqs = 0, vf_mrws = 0, vf_gids = 0;
- u32 i;
- u32 vf_pct;
- u32 num_vfs;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_qplib_dev_attr *attr;
+ struct bnxt_qplib_ctx *ctx;
+ int i;
- rdev->qplib_ctx.qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
- dev_attr->max_qp);
+ attr = rdev->dev_attr;
+ ctx = &rdev->qplib_ctx;
- rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
+ ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+ attr->max_qp);
+ ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
/* Use max_mr from fw since max_mrw does not get set */
- rdev->qplib_ctx.mrw_count = min_t(u32, rdev->qplib_ctx.mrw_count,
- dev_attr->max_mr);
- rdev->qplib_ctx.srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
- dev_attr->max_srq);
- rdev->qplib_ctx.cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT,
- dev_attr->max_cq);
-
- for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
- rdev->qplib_ctx.tqm_count[i] =
- rdev->dev_attr.tqm_alloc_reqs[i];
-
- if (rdev->num_vfs) {
- /*
- * Reserve a set of resources for the PF. Divide the remaining
- * resources among the VFs
- */
- vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
- num_vfs = 100 * rdev->num_vfs;
- vf_qps = (rdev->qplib_ctx.qpc_count * vf_pct) / num_vfs;
- vf_srqs = (rdev->qplib_ctx.srqc_count * vf_pct) / num_vfs;
- vf_cqs = (rdev->qplib_ctx.cq_count * vf_pct) / num_vfs;
- /*
- * The driver allows many more MRs than other resources. If the
- * firmware does also, then reserve a fixed amount for the PF
- * and divide the rest among VFs. VFs may use many MRs for NFS
- * mounts, ISER, NVME applications, etc. If the firmware
- * severely restricts the number of MRs, then let PF have
- * half and divide the rest among VFs, as for the other
- * resource types.
- */
- if (rdev->qplib_ctx.mrw_count < BNXT_RE_MAX_MRW_COUNT_64K)
- vf_mrws = rdev->qplib_ctx.mrw_count * vf_pct / num_vfs;
- else
- vf_mrws = (rdev->qplib_ctx.mrw_count -
- BNXT_RE_RESVD_MR_FOR_PF) / rdev->num_vfs;
- vf_gids = BNXT_RE_MAX_GID_PER_VF;
- }
- rdev->qplib_ctx.vf_res.max_mrw_per_vf = vf_mrws;
- rdev->qplib_ctx.vf_res.max_gid_per_vf = vf_gids;
- rdev->qplib_ctx.vf_res.max_qp_per_vf = vf_qps;
- rdev->qplib_ctx.vf_res.max_srq_per_vf = vf_srqs;
- rdev->qplib_ctx.vf_res.max_cq_per_vf = vf_cqs;
+ ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
+ ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
+ attr->max_srq);
+ ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_ctx.qcount[i] =
+ rdev->dev_attr->tqm_alloc_reqs[i];
}
-/* for handling bnxt_en callbacks later */
-static void bnxt_re_stop(void *p)
+static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
{
+ struct bnxt_qplib_vf_res *vf_res;
+ u32 mrws = 0;
+ u32 vf_pct;
+ u32 nvfs;
+
+ vf_res = &qplib_ctx->vf_res;
+ /*
+ * Reserve a set of resources for the PF. Divide the remaining
+ * resources among the VFs
+ */
+ vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
+ nvfs = num_vf;
+ num_vf = 100 * num_vf;
+ vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
+ vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
+ vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
+ /*
+ * The driver allows many more MRs than other resources. If the
+ * firmware does also, then reserve a fixed amount for the PF and
+ * divide the rest among VFs. VFs may use many MRs for NFS
+ * mounts, ISER, NVME applications, etc. If the firmware severely
+ * restricts the number of MRs, then let PF have half and divide
+ * the rest among VFs, as for the other resource types.
+ */
+ if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
+ mrws = qplib_ctx->mrw_count * vf_pct;
+ nvfs = num_vf;
+ } else {
+ mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
+ }
+ vf_res->max_mrw_per_vf = (mrws / nvfs);
+ vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
}
-static void bnxt_re_start(void *p)
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
{
+ u32 num_vfs;
+
+ memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
+ bnxt_re_limit_pf_res(rdev);
+
+ num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
+ BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
+ if (num_vfs)
+ bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
}
-static void bnxt_re_sriov_config(void *p, int num_vfs)
+static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
{
- struct bnxt_re_dev *rdev = p;
-
- if (!rdev)
+ /*
+ * Use the total VF count since the actual VF count may not be
+ * available at this point.
+ */
+ rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
+ if (!rdev->num_vfs)
return;
- rdev->num_vfs = num_vfs;
bnxt_re_set_resource_limits(rdev);
bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
&rdev->qplib_ctx);
}
-static void bnxt_re_shutdown(void *p)
+struct bnxt_re_dcb_work {
+ struct work_struct work;
+ struct bnxt_re_dev *rdev;
+ struct hwrm_async_event_cmpl cmpl;
+};
+
+static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp)
+{
+ return qp->ib_qp.qp_type == IB_QPT_GSI;
+}
+
+static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ if (bnxt_re_is_qp1_qp(qp)) {
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+ return NULL;
+}
+
+static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
+ qp = bnxt_re_get_qp1_qp(rdev);
+ if (!qp)
+ return 0;
+
+ qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP;
+ qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp;
+
+ return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+}
+
+static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq");
+}
+
+static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ if (!rdev->dcb_wq)
+ return;
+ destroy_workqueue(rdev->dcb_wq);
+}
+
+static void bnxt_re_dcb_wq_task(struct work_struct *work)
+{
+ struct bnxt_re_dcb_work *dcb_work =
+ container_of(work, struct bnxt_re_dcb_work, work);
+ struct bnxt_re_dev *rdev = dcb_work->rdev;
+ struct bnxt_qplib_cc_param *cc_param;
+ int rc;
+
+ if (!rdev)
+ goto free_dcb;
+
+ cc_param = &rdev->cc_param;
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc);
+ goto free_dcb;
+ }
+ if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
+ cc_param->qp1_tos_dscp = cc_param->tos_dscp;
+ rc = bnxt_re_update_qp1_tos_dscp(rdev);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d",
+ __func__, rc);
+ goto free_dcb;
+ }
+ }
+
+free_dcb:
+ kfree(dcb_work);
+}
+
+static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
{
- struct bnxt_re_dev *rdev = p;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_re_dcb_work *dcb_work;
+ struct bnxt_re_dev *rdev;
+ u32 data1, data2;
+ u16 event_id;
+ rdev = en_info->rdev;
if (!rdev)
return;
- bnxt_re_ib_unreg(rdev);
+ event_id = le16_to_cpu(cmpl->event_id);
+ data1 = le32_to_cpu(cmpl->event_data1);
+ data2 = le32_to_cpu(cmpl->event_data2);
+
+ ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d",
+ event_id, data1, data2);
+
+ switch (event_id) {
+ case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE:
+ dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
+ if (!dcb_work)
+ break;
+
+ dcb_work->rdev = rdev;
+ memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl));
+ INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task);
+ queue_work(rdev->dcb_wq, &dcb_work->work);
+ break;
+ default:
+ break;
+ }
}
-static void bnxt_re_stop_irq(void *handle)
+static void bnxt_re_stop_irq(void *handle, bool reset)
{
- struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
- struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_re_dev *rdev;
struct bnxt_qplib_nq *nq;
int indx;
- for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
- nq = &rdev->nq[indx - 1];
+ rdev = en_info->rdev;
+ if (!rdev)
+ return;
+ rcfw = &rdev->rcfw;
+
+ if (reset) {
+ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_DEVICE_FATAL);
+ }
+
+ for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
+ nq = &rdev->nqr->nq[indx - 1];
bnxt_qplib_nq_stop_irq(nq, false);
}
@@ -225,207 +465,505 @@ static void bnxt_re_stop_irq(void *handle)
static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
{
- struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
- struct bnxt_msix_entry *msix_ent = rdev->msix_entries;
- struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_msix_entry *msix_ent;
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_re_dev *rdev;
struct bnxt_qplib_nq *nq;
int indx, rc;
+ rdev = en_info->rdev;
+ if (!rdev)
+ return;
+ msix_ent = rdev->nqr->msix_entries;
+ rcfw = &rdev->rcfw;
if (!ent) {
/* Not setting the f/w timeout bit in rcfw.
* During the driver unload the first command
* to f/w will timeout and that will set the
* timeout bit.
*/
- dev_err(rdev_to_dev(rdev), "Failed to re-start IRQs\n");
+ ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
return;
}
/* Vectors may change after restart, so update with new vectors
* in device sctructure.
*/
- for (indx = 0; indx < rdev->num_msix; indx++)
- rdev->msix_entries[indx].vector = ent[indx].vector;
+ for (indx = 0; indx < rdev->nqr->num_msix; indx++)
+ rdev->nqr->msix_entries[indx].vector = ent[indx].vector;
- bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
- false);
- for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) {
- nq = &rdev->nq[indx - 1];
+ rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
+ false);
+ if (rc) {
+ ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
+ return;
+ }
+ for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) {
+ nq = &rdev->nqr->nq[indx - 1];
rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
msix_ent[indx].vector, false);
- if (rc)
- dev_warn(rdev_to_dev(rdev),
- "Failed to reinit NQ index %d\n", indx - 1);
+ if (rc) {
+ ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
+ indx - 1);
+ return;
+ }
}
}
static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
- .ulp_async_notifier = NULL,
- .ulp_stop = bnxt_re_stop,
- .ulp_start = bnxt_re_start,
- .ulp_sriov_config = bnxt_re_sriov_config,
- .ulp_shutdown = bnxt_re_shutdown,
+ .ulp_async_notifier = bnxt_re_async_notifier,
.ulp_irq_stop = bnxt_re_stop_irq,
.ulp_irq_restart = bnxt_re_start_irq
};
/* RoCE -> Net driver */
-/* Driver registration routines used to let the networking driver (bnxt_en)
- * to know that the RoCE driver is now installed
- */
-static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev)
+static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev;
+
+ en_dev = rdev->en_dev;
+ return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev);
+}
+
+static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(-1);
+ hdr->target_id = cpu_to_le16(-1);
+}
+
+static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_free_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
int rc;
- if (!rdev)
- return -EINVAL;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_FREE);
- en_dev = rdev->en_dev;
+ req.vnic_id = cpu_to_le32(rdev->mirror_vnic_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL,
+ 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to free vnic, rc = %d\n", rc);
+}
+
+int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_alloc_output resp = {};
+ struct hwrm_vnic_alloc_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_ALLOC);
+
+ req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id);
+ req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to alloc vnic, rc = %d\n", rc);
- rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
- BNXT_ROCE_ULP);
return rc;
}
-static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
+int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id)
{
- struct bnxt_en_dev *en_dev;
- int rc = 0;
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_cfg_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
- if (!rdev)
- return -EINVAL;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_CFG);
- en_dev = rdev->en_dev;
+ req.flags = cpu_to_le32(VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE);
+ req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_RAW_QP_ID |
+ VNIC_CFG_REQ_ENABLES_MRU);
+ req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id);
+ req.raw_qp_id = cpu_to_le32(qp_id);
+ req.mru = cpu_to_le16(rdev->netdev->mtu + VLAN_ETH_HLEN);
+
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL,
+ 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to cfg vnic, rc = %d\n", rc);
- rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
- &bnxt_re_ulp_ops, rdev);
- rdev->qplib_res.pdev = rdev->en_dev->pdev;
return rc;
}
-static int bnxt_re_free_msix(struct bnxt_re_dev *rdev)
+/* Query device config using common hwrm */
+static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
+ u32 *offset)
{
- struct bnxt_en_dev *en_dev;
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_func_qcfg_output resp = {0};
+ struct hwrm_func_qcfg_input req = {0};
+ struct bnxt_fw_msg fw_msg = {};
int rc;
- if (!rdev)
- return -EINVAL;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
+ req.fid = cpu_to_le16(0xffff);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (!rc) {
+ *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
+ *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
+ rdev->mirror_vnic_id = le16_to_cpu(resp.mirror_vnic_id);
+ }
+ return rc;
+}
- en_dev = rdev->en_dev;
+/* Query function capabilities using common hwrm */
+int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_func_qcaps_output resp = {};
+ struct hwrm_func_qcaps_input req = {};
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ u32 flags_ext2;
+ int rc;
+ cctx = rdev->chip_ctx;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS);
+ req.fid = cpu_to_le16(0xffff);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
- rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ return rc;
+ cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
- return rc;
+ flags_ext2 = le32_to_cpu(resp.flags_ext2);
+ cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ||
+ flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED;
+ cctx->modes.roce_mirror = !!(le32_to_cpu(resp.flags_ext3) &
+ FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED);
+ return 0;
}
-static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
+static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
{
- int rc = 0, num_msix_want = BNXT_RE_MAX_MSIX, num_msix_got;
- struct bnxt_en_dev *en_dev;
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ struct hwrm_func_dbr_pacing_qcfg_output resp = {};
+ struct hwrm_func_dbr_pacing_qcfg_input req = {};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
- if (!rdev)
- return -EINVAL;
+ cctx = rdev->chip_ctx;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ return rc;
- en_dev = rdev->en_dev;
+ if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
+ FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
+ FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
+ cctx->dbr_stat_db_fifo =
+ le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
+ ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
- num_msix_want = min_t(u32, BNXT_RE_MAX_MSIX, num_online_cpus());
+ pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth);
+ if (!pacing_data->fifo_max_depth)
+ pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx);
+ pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask);
+ pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift;
- num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
- rdev->msix_entries,
- num_msix_want);
- if (num_msix_got < BNXT_RE_MIN_MSIX) {
- rc = -EINVAL;
- goto done;
+ return 0;
+}
+
+/* Update the pacing tunable parameters to the default values */
+static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+
+ pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing;
+ pacing_data->pacing_th = rdev->pacing.pacing_algo_th;
+ pacing_data->alarm_th =
+ pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
+}
+
+static u32 __get_fifo_occupancy(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ u32 read_val, fifo_occup;
+
+ read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
+ fifo_occup = pacing_data->fifo_max_depth -
+ ((read_val & pacing_data->fifo_room_mask) >>
+ pacing_data->fifo_room_shift);
+ return fifo_occup;
+}
+
+static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev)
+{
+ u32 max_occup, fifo_occup;
+
+ fifo_occup = __get_fifo_occupancy(rdev);
+ max_occup = BNXT_RE_MAX_FIFO_DEPTH(rdev->chip_ctx) - 1;
+ if (fifo_occup == max_occup)
+ return true;
+
+ return false;
+}
+
+static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ u32 retry_fifo_check = 1000;
+ u32 fifo_occup;
+
+ /* loop shouldn't run infintely as the occupancy usually goes
+ * below pacing algo threshold as soon as pacing kicks in.
+ */
+ while (1) {
+ fifo_occup = __get_fifo_occupancy(rdev);
+ /* Fifo occupancy cannot be greater the MAX FIFO depth */
+ if (fifo_occup > pacing_data->fifo_max_depth)
+ break;
+
+ if (fifo_occup < pacing_data->pacing_th)
+ break;
+ if (!retry_fifo_check--) {
+ dev_info_once(rdev_to_dev(rdev),
+ "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n",
+ __func__, fifo_occup, pacing_data->fifo_max_depth,
+ pacing_data->pacing_th);
+ break;
+ }
+
+ }
+}
+
+static void bnxt_re_db_fifo_check(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ dbq_fifo_check_work);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+ u32 pacing_save;
+
+ if (!mutex_trylock(&rdev->pacing.dbq_lock))
+ return;
+ pacing_data = rdev->qplib_res.pacing_data;
+ pacing_save = rdev->pacing.do_pacing_save;
+ __wait_for_fifo_occupancy_below_th(rdev);
+ cancel_delayed_work_sync(&rdev->dbq_pacing_work);
+ if (pacing_save > rdev->pacing.dbr_def_do_pacing) {
+ /* Double the do_pacing value during the congestion */
+ pacing_save = pacing_save << 1;
+ } else {
+ /*
+ * when a new congestion is detected increase the do_pacing
+ * by 8 times. And also increase the pacing_th by 4 times. The
+ * reason to increase pacing_th is to give more space for the
+ * queue to oscillate down without getting empty, but also more
+ * room for the queue to increase without causing another alarm.
+ */
+ pacing_save = pacing_save << 3;
+ pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4;
}
- if (num_msix_got != num_msix_want) {
- dev_warn(rdev_to_dev(rdev),
- "Requested %d MSI-X vectors, got %d\n",
- num_msix_want, num_msix_got);
+
+ if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
+ pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
+
+ pacing_data->do_pacing = pacing_save;
+ rdev->pacing.do_pacing_save = pacing_data->do_pacing;
+ pacing_data->alarm_th =
+ pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
+ schedule_delayed_work(&rdev->dbq_pacing_work,
+ msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
+ rdev->stats.pacing.alerts++;
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+static void bnxt_re_pacing_timer_exp(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ dbq_pacing_work.work);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+ u32 fifo_occup;
+
+ if (!mutex_trylock(&rdev->pacing.dbq_lock))
+ return;
+
+ pacing_data = rdev->qplib_res.pacing_data;
+ fifo_occup = __get_fifo_occupancy(rdev);
+
+ if (fifo_occup > pacing_data->pacing_th)
+ goto restart_timer;
+
+ /*
+ * Instead of immediately going back to the default do_pacing
+ * reduce it by 1/8 times and restart the timer.
+ */
+ pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
+ pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing);
+ if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) {
+ bnxt_re_set_default_pacing_data(rdev);
+ rdev->stats.pacing.complete++;
+ goto dbq_unlock;
}
- rdev->num_msix = num_msix_got;
-done:
- return rc;
+
+restart_timer:
+ schedule_delayed_work(&rdev->dbq_pacing_work,
+ msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
+ rdev->stats.pacing.resched++;
+dbq_unlock:
+ rdev->pacing.do_pacing_save = pacing_data->do_pacing;
+ mutex_unlock(&rdev->pacing.dbq_lock);
}
-static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
- u16 opcd, u16 crid, u16 trid)
+void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
{
- hdr->req_type = cpu_to_le16(opcd);
- hdr->cmpl_ring = cpu_to_le16(crid);
- hdr->target_id = cpu_to_le16(trid);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+
+ if (!rdev->pacing.dbr_pacing)
+ return;
+ mutex_lock(&rdev->pacing.dbq_lock);
+ pacing_data = rdev->qplib_res.pacing_data;
+
+ /*
+ * Increase the alarm_th to max so that other user lib instances do not
+ * keep alerting the driver.
+ */
+ pacing_data->alarm_th = pacing_data->fifo_max_depth;
+ pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
+ cancel_work_sync(&rdev->dbq_fifo_check_work);
+ schedule_work(&rdev->dbq_fifo_check_work);
+ mutex_unlock(&rdev->pacing.dbq_lock);
}
-static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
- int msg_len, void *resp, int resp_max_len,
- int timeout)
+static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
{
- fw_msg->msg = msg;
- fw_msg->msg_len = msg_len;
- fw_msg->resp = resp;
- fw_msg->resp_max_len = resp_max_len;
- fw_msg->timeout = timeout;
+ /* Allocate a page for app use */
+ rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!rdev->pacing.dbr_page)
+ return -ENOMEM;
+
+ memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE);
+ rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page;
+
+ if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) {
+ free_page((u64)rdev->pacing.dbr_page);
+ rdev->pacing.dbr_page = NULL;
+ return -EIO;
+ }
+
+ /* MAP HW window 2 for reading db fifo depth */
+ writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK,
+ rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+ rdev->pacing.dbr_db_fifo_reg_off =
+ (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
+ BNXT_RE_GRC_FIFO_REG_BASE;
+ rdev->pacing.dbr_bar_addr =
+ pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off;
+
+ if (is_dbr_fifo_full(rdev)) {
+ free_page((u64)rdev->pacing.dbr_page);
+ rdev->pacing.dbr_page = NULL;
+ return -EIO;
+ }
+
+ rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
+ rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME;
+ rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
+ rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing;
+ rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off;
+ bnxt_re_set_default_pacing_data(rdev);
+ /* Initialize worker for DBR Pacing */
+ INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
+ INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
+ return 0;
+}
+
+static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
+{
+ cancel_work_sync(&rdev->dbq_fifo_check_work);
+ cancel_delayed_work_sync(&rdev->dbq_pacing_work);
+ if (rdev->pacing.dbr_page)
+ free_page((u64)rdev->pacing.dbr_page);
+
+ rdev->pacing.dbr_page = NULL;
+ rdev->pacing.dbr_pacing = false;
}
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
u16 fw_ring_id, int type)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_ring_free_input req = {0};
+ struct hwrm_ring_free_input req = {};
struct hwrm_ring_free_output resp;
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!en_dev)
- return rc;
-
- memset(&fw_msg, 0, sizeof(fw_msg));
+ if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
+ return 0;
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
req.ring_type = type;
req.ring_id = cpu_to_le16(fw_ring_id);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
- rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
if (rc)
- dev_err(rdev_to_dev(rdev),
- "Failed to free HW ring:%d :%#x", req.ring_id, rc);
+ ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
+ req.ring_id, rc);
return rc;
}
-static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
- int pages, int type, u32 ring_mask,
- u32 map_index, u16 *fw_ring_id)
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
+ struct bnxt_re_ring_attr *ring_attr,
+ u16 *fw_ring_id)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_ring_alloc_input req = {0};
+ struct hwrm_ring_alloc_input req = {};
struct hwrm_ring_alloc_output resp;
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!en_dev)
- return rc;
-
- memset(&fw_msg, 0, sizeof(fw_msg));
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
req.enables = 0;
- req.page_tbl_addr = cpu_to_le64(dma_arr[0]);
- if (pages > 1) {
+ req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
+ if (ring_attr->pages > 1) {
/* Page size is in log2 units */
req.page_size = BNXT_PAGE_SHIFT;
req.page_tbl_depth = 1;
}
req.fbo = 0;
/* Association of ring index with doorbell index and MSIX number */
- req.logical_id = cpu_to_le16(map_index);
- req.length = cpu_to_le32(ring_mask + 1);
- req.ring_type = type;
- req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ req.logical_id = cpu_to_le16(ring_attr->lrid);
+ req.length = cpu_to_le32(ring_attr->depth + 1);
+ req.ring_type = ring_attr->type;
+ req.int_mode = ring_attr->mode;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
- rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
if (!rc)
*fw_ring_id = le16_to_cpu(resp.ring_id);
@@ -436,168 +974,361 @@ static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
u32 fw_stats_ctx_id)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_stat_ctx_free_input req = {0};
- struct bnxt_fw_msg fw_msg;
+ struct hwrm_stat_ctx_free_input req = {};
+ struct hwrm_stat_ctx_free_output resp = {};
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- if (!en_dev)
- return rc;
-
- memset(&fw_msg, 0, sizeof(fw_msg));
+ if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
+ return 0;
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
- bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
- sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
- rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
if (rc)
- dev_err(rdev_to_dev(rdev),
- "Failed to free HW stats context %#x", rc);
+ ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
+ rc);
return rc;
}
static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
- dma_addr_t dma_map,
- u32 *fw_stats_ctx_id)
+ struct bnxt_qplib_stats *stats)
{
- struct hwrm_stat_ctx_alloc_output resp = {0};
- struct hwrm_stat_ctx_alloc_input req = {0};
+ struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
+ struct hwrm_stat_ctx_alloc_output resp = {};
+ struct hwrm_stat_ctx_alloc_input req = {};
struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc = -EINVAL;
- *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
-
- if (!en_dev)
- return rc;
-
- memset(&fw_msg, 0, sizeof(fw_msg));
+ stats->fw_id = INVALID_STATS_CTX_ID;
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
req.update_period_ms = cpu_to_le32(1000);
- req.stats_dma_addr = cpu_to_le64(dma_map);
+ req.stats_dma_addr = cpu_to_le64(stats->dma_map);
+ req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
- rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
if (!rc)
- *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
+ stats->fw_id = le32_to_cpu(resp.stat_ctx_id);
return rc;
}
+static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
/* Device */
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev =
+ rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->revision);
+}
+static DEVICE_ATTR_RO(hw_rev);
-static bool is_bnxt_re_dev(struct net_device *netdev)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
- struct ethtool_drvinfo drvinfo;
+ struct bnxt_re_dev *rdev =
+ rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
- if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
- memset(&drvinfo, 0, sizeof(drvinfo));
- netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->device);
+}
+static DEVICE_ATTR_RO(hca_type);
- if (strcmp(drvinfo.driver, "bnxt_en"))
- return false;
- return true;
- }
- return false;
+static ssize_t board_id_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device,
+ struct bnxt_re_dev, ibdev);
+ char buffer[BNXT_VPD_FLD_LEN] = {};
+
+ if (!rdev->is_virtfn)
+ memcpy(buffer, rdev->board_partno, BNXT_VPD_FLD_LEN - 1);
+ else
+ scnprintf(buffer, BNXT_VPD_FLD_LEN, "0x%x-VF",
+ rdev->en_dev->pdev->device);
+
+ return sysfs_emit(buf, "%s\n", buffer);
}
+static DEVICE_ATTR_RO(board_id);
+
+static struct attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
+
+static const struct attribute_group bnxt_re_dev_attr_group = {
+ .attrs = bnxt_re_attributes,
+};
-static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
+static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
{
- struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_hwq *mr_hwq;
+ struct nlattr *table_attr;
+ struct bnxt_re_mr *mr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ mr_hwq = &mr->qplib_mr.hwq;
+
+ if (rdma_nl_put_driver_u32(msg, "page_size",
+ mr_hwq->qe_ppg * mr_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
- rcu_read_lock();
- list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
- if (rdev->netdev == netdev) {
- rcu_read_unlock();
- return rdev;
- }
- }
- rcu_read_unlock();
- return NULL;
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
}
-static void bnxt_re_dev_unprobe(struct net_device *netdev,
- struct bnxt_en_dev *en_dev)
+static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
{
- dev_put(netdev);
- module_put(en_dev->pdev->driver->driver.owner);
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_mr *mr;
+ int err, len;
+ void *data;
+
+ mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ rdev = mr->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW,
+ mr->qplib_mr.lkey, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
}
-static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
+static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
{
- struct bnxt *bp = netdev_priv(netdev);
- struct bnxt_en_dev *en_dev;
- struct pci_dev *pdev;
+ struct bnxt_qplib_hwq *cq_hwq;
+ struct nlattr *table_attr;
+ struct bnxt_re_cq *cq;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ cq_hwq = &cq->qplib_cq.hwq;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
- /* Call bnxt_en's RoCE probe via indirect API */
- if (!bp->ulp_probe)
- return ERR_PTR(-EINVAL);
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
- en_dev = bp->ulp_probe(netdev);
- if (IS_ERR(en_dev))
- return en_dev;
+static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int err, len;
+ void *data;
- pdev = en_dev->pdev;
- if (!pdev)
- return ERR_PTR(-EINVAL);
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
- if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
- dev_info(&pdev->dev,
- "%s: probe error: RoCE is not supported on this device",
- ROCE_DRV_MODULE_NAME);
- return ERR_PTR(-ENODEV);
- }
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
- /* Bump net device reference count */
- if (!try_module_get(pdev->driver->driver.owner))
- return ERR_PTR(-ENODEV);
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
- dev_hold(netdev);
+ err = bnxt_qplib_read_context(&rdev->rcfw,
+ CMDQ_READ_CONTEXT_TYPE_CQ,
+ cq->qplib_cq.id, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
- return en_dev;
+ kfree(data);
+ return err;
}
-static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
- char *buf)
+static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
{
- struct bnxt_re_dev *rdev =
- rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
+ struct bnxt_qplib_qp *qplib_qp;
+ struct nlattr *table_attr;
+ struct bnxt_re_qp *qp;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ qplib_qp = &qp->qplib_qp;
+
+ if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
}
-static DEVICE_ATTR_RO(hw_rev);
-static ssize_t hca_type_show(struct device *device,
- struct device_attribute *attr, char *buf)
+static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
{
- struct bnxt_re_dev *rdev =
- rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev);
+ int err, len;
+ void *data;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC,
+ ibqp->qp_num, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
- return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+ kfree(data);
+ return err;
}
-static DEVICE_ATTR_RO(hca_type);
-static struct attribute *bnxt_re_attributes[] = {
- &dev_attr_hw_rev.attr,
- &dev_attr_hca_type.attr,
- NULL
-};
+static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct nlattr *table_attr;
+ struct bnxt_re_srq *srq;
-static const struct attribute_group bnxt_re_dev_attr_group = {
- .attrs = bnxt_re_attributes,
-};
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+
+ if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "srq_limit", srq->qplib_srq.threshold))
+ goto err;
-static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
{
- ib_unregister_device(&rdev->ibdev);
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ int err, len;
+ void *data;
+
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+ rdev = srq->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5;
+
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ,
+ srq->qplib_srq.id, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
}
static const struct ib_device_ops bnxt_re_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_BNXT_RE,
+ .uverbs_abi_ver = BNXT_RE_ABI_VERSION,
+
.add_gid = bnxt_re_add_gid,
- .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats,
+ .alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
.alloc_mr = bnxt_re_alloc_mr,
.alloc_pd = bnxt_re_alloc_pd,
.alloc_ucontext = bnxt_re_alloc_ucontext,
@@ -605,6 +1336,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.create_cq = bnxt_re_create_cq,
.create_qp = bnxt_re_create_qp,
.create_srq = bnxt_re_create_srq,
+ .create_user_ah = bnxt_re_create_ah,
.dealloc_pd = bnxt_re_dealloc_pd,
.dealloc_ucontext = bnxt_re_dealloc_ucontext,
.del_gid = bnxt_re_del_gid,
@@ -613,6 +1345,8 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.destroy_cq = bnxt_re_destroy_cq,
.destroy_qp = bnxt_re_destroy_qp,
.destroy_srq = bnxt_re_destroy_srq,
+ .device_group = &bnxt_re_dev_attr_group,
+ .disassociate_ucontext = bnxt_re_disassociate_ucontext,
.get_dev_fw_str = bnxt_re_query_fw_str,
.get_dma_mr = bnxt_re_get_dma_mr,
.get_hw_stats = bnxt_re_ib_get_hw_stats,
@@ -620,102 +1354,77 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.get_port_immutable = bnxt_re_get_port_immutable,
.map_mr_sg = bnxt_re_map_mr_sg,
.mmap = bnxt_re_mmap,
- .modify_ah = bnxt_re_modify_ah,
- .modify_device = bnxt_re_modify_device,
+ .mmap_free = bnxt_re_mmap_free,
.modify_qp = bnxt_re_modify_qp,
.modify_srq = bnxt_re_modify_srq,
.poll_cq = bnxt_re_poll_cq,
.post_recv = bnxt_re_post_recv,
.post_send = bnxt_re_post_send,
.post_srq_recv = bnxt_re_post_srq_recv,
+ .process_mad = bnxt_re_process_mad,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
+ .modify_device = bnxt_re_modify_device,
.query_pkey = bnxt_re_query_pkey,
.query_port = bnxt_re_query_port,
.query_qp = bnxt_re_query_qp,
.query_srq = bnxt_re_query_srq,
.reg_user_mr = bnxt_re_reg_user_mr,
+ .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
.req_notify_cq = bnxt_re_req_notify_cq,
+ .resize_cq = bnxt_re_resize_cq,
+ .create_flow = bnxt_re_create_flow,
+ .destroy_flow = bnxt_re_destroy_flow,
INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
};
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry = bnxt_re_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = bnxt_re_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = bnxt_re_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw,
+ .fill_res_srq_entry = bnxt_re_fill_res_srq_entry,
+ .fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw,
+};
+
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
{
struct ib_device *ibdev = &rdev->ibdev;
int ret;
/* ib device init */
- ibdev->owner = THIS_MODULE;
ibdev->node_type = RDMA_NODE_IB_CA;
- strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
- strlen(BNXT_RE_DESC) + 5);
+ strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA");
ibdev->phys_port_cnt = 1;
- bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
+ addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
- ibdev->num_comp_vectors = 1;
+ ibdev->num_comp_vectors = rdev->nqr->num_msix - 1;
ibdev->dev.parent = &rdev->en_dev->pdev->dev;
ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
- /* User space */
- ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
- ibdev->uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
- (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
- /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
-
-
- rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
- ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ ibdev->driver_def = bnxt_re_uapi_defs;
+
ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
+ ib_set_device_ops(ibdev, &restrack_ops);
ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
if (ret)
return ret;
- return ib_register_device(ibdev, "bnxt_re%d");
-}
-
-static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
-{
- dev_put(rdev->netdev);
- rdev->netdev = NULL;
-
- mutex_lock(&bnxt_re_dev_lock);
- list_del_rcu(&rdev->list);
- mutex_unlock(&bnxt_re_dev_lock);
-
- synchronize_rcu();
-
- ib_dealloc_device(&rdev->ibdev);
- /* rdev is gone */
+ dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
+ ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
+ return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
}
-static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
+static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
struct bnxt_en_dev *en_dev)
{
struct bnxt_re_dev *rdev;
@@ -723,28 +1432,38 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
/* Allocate bnxt_re_dev instance here */
rdev = ib_alloc_device(bnxt_re_dev, ibdev);
if (!rdev) {
- dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
- ROCE_DRV_MODULE_NAME);
+ ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+ ROCE_DRV_MODULE_NAME);
return NULL;
}
/* Default values */
- rdev->netdev = netdev;
- dev_hold(rdev->netdev);
+ rdev->netdev = en_dev->net;
rdev->en_dev = en_dev;
+ rdev->adev = adev;
rdev->id = rdev->en_dev->pdev->devfn;
INIT_LIST_HEAD(&rdev->qp_list);
mutex_init(&rdev->qp_lock);
- atomic_set(&rdev->qp_count, 0);
- atomic_set(&rdev->cq_count, 0);
- atomic_set(&rdev->srq_count, 0);
- atomic_set(&rdev->mr_count, 0);
- atomic_set(&rdev->mw_count, 0);
+ mutex_init(&rdev->pacing.dbq_lock);
+ atomic_set(&rdev->stats.res.qp_count, 0);
+ atomic_set(&rdev->stats.res.cq_count, 0);
+ atomic_set(&rdev->stats.res.srq_count, 0);
+ atomic_set(&rdev->stats.res.mr_count, 0);
+ atomic_set(&rdev->stats.res.mw_count, 0);
+ atomic_set(&rdev->stats.res.ah_count, 0);
+ atomic_set(&rdev->stats.res.pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
+ rdev->cq_coalescing.enable = 1;
+ rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
+ if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7;
+ } else {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5;
+ }
+ rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE;
- mutex_lock(&bnxt_re_dev_lock);
- list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
- mutex_unlock(&bnxt_re_dev_lock);
return rdev;
}
@@ -783,24 +1502,163 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
struct bnxt_re_qp *qp)
{
- struct ib_event event;
+ struct creq_qp_error_notification *err_event;
+ struct bnxt_re_srq *srq = NULL;
+ struct ib_event event = {};
unsigned int flags;
- if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ if (qp->qplib_qp.srq)
+ srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq,
+ qplib_srq);
+
+ if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
+ rdma_is_kernel_res(&qp->ib_qp.res)) {
flags = bnxt_re_lock_cqs(qp);
bnxt_qplib_add_flush_qp(&qp->qplib_qp);
bnxt_re_unlock_cqs(qp, flags);
}
- memset(&event, 0, sizeof(event));
- if (qp->qplib_qp.srq) {
- event.device = &qp->rdev->ibdev;
- event.element.qp = &qp->ib_qp;
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ event.device = &qp->rdev->ibdev;
+ event.element.qp = &qp->ib_qp;
+ event.event = IB_EVENT_QP_FATAL;
+
+ err_event = (struct creq_qp_error_notification *)qp_event;
+
+ switch (err_event->req_err_state_reason) {
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+
+ default:
+ break;
+ }
+
+ switch (err_event->res_err_state_reason) {
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
+ if (srq)
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ break;
+ }
+
+ if (err_event->res_err_state_reason || err_event->req_err_state_reason) {
+ ibdev_dbg(&qp->rdev->ibdev,
+ "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
+ __func__, rdma_is_kernel_res(&qp->ib_qp.res) ? "kernel" : "user",
+ qp->qplib_qp.id,
+ err_event->sq_cons_idx,
+ err_event->rq_cons_idx,
+ err_event->req_slow_path_state,
+ err_event->req_err_state_reason,
+ err_event->res_slow_path_state,
+ err_event->res_err_state_reason);
+ } else {
+ if (srq)
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
}
- if (event.device && qp->ib_qp.event_handler)
+ if (event.event == IB_EVENT_SRQ_ERR && srq->ib_srq.event_handler) {
+ (*srq->ib_srq.event_handler)(&event,
+ srq->ib_srq.srq_context);
+ } else if (event.device && qp->ib_qp.event_handler) {
qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ }
+
+ return 0;
+}
+
+static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
+{
+ struct creq_cq_error_notification *cqerr;
+ struct ib_event ibevent = {};
+
+ cqerr = event;
+ switch (cqerr->cq_err_reason) {
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
+ ibevent.event = IB_EVENT_CQ_ERR;
+ break;
+ default:
+ break;
+ }
+
+ if (ibevent.event == IB_EVENT_CQ_ERR && cq->ib_cq.event_handler) {
+ ibevent.element.cq = &cq->ib_cq;
+ ibevent.device = &cq->rdev->ibdev;
+
+ ibdev_dbg(&cq->rdev->ibdev,
+ "%s err reason %d\n", __func__, cqerr->cq_err_reason);
+ cq->ib_cq.event_handler(&ibevent, cq->ib_cq.cq_context);
+ }
return 0;
}
@@ -808,6 +1666,10 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
void *obj)
{
+ struct bnxt_qplib_qp *lib_qp;
+ struct bnxt_qplib_cq *lib_cq;
+ struct bnxt_re_qp *qp;
+ struct bnxt_re_cq *cq;
int rc = 0;
u8 event;
@@ -815,11 +1677,19 @@ static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
return rc; /* QP was already dead, still return success */
event = affi_async->event;
- if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
- struct bnxt_qplib_qp *lib_qp = obj;
- struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
- qplib_qp);
+ switch (event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ lib_qp = obj;
+ qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp);
rc = bnxt_re_handle_qp_async_event(affi_async, qp);
+ break;
+ case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
+ lib_cq = obj;
+ cq = container_of(lib_cq, struct bnxt_re_cq, qplib_cq);
+ rc = bnxt_re_handle_cq_async_error(affi_async, cq);
+ break;
+ default:
+ rc = -EINVAL;
}
return rc;
}
@@ -850,28 +1720,17 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
qplib_srq);
struct ib_event ib_event;
- int rc = 0;
- if (!srq) {
- dev_err(NULL, "%s: SRQ is NULL, SRQN not handled",
- ROCE_DRV_MODULE_NAME);
- rc = -EINVAL;
- goto done;
- }
ib_event.device = &srq->rdev->ibdev;
ib_event.element.srq = &srq->ib_srq;
- if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
- ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
- else
- ib_event.event = IB_EVENT_SRQ_ERR;
if (srq->ib_srq.event_handler) {
- /* Lock event_handler? */
+ if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
+ ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
(*srq->ib_srq.event_handler)(&ib_event,
srq->ib_srq.srq_context);
}
-done:
- return rc;
+ return 0;
}
static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
@@ -880,31 +1739,18 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
qplib_cq);
- if (!cq) {
- dev_err(NULL, "%s: CQ is NULL, CQN not handled",
- ROCE_DRV_MODULE_NAME);
- return -EINVAL;
- }
- if (cq->ib_cq.comp_handler) {
- /* Lock comp_handler? */
+ if (cq->ib_cq.comp_handler)
(*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
- }
return 0;
}
-static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
-{
- return bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx) ?
- 0x10000 : rdev->msix_entries[indx].db_offset;
-}
-
static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
- for (i = 1; i < rdev->num_msix; i++)
- bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
+ for (i = 1; i < rdev->nqr->num_msix; i++)
+ bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]);
if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -918,15 +1764,17 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
bnxt_qplib_init_res(&rdev->qplib_res);
- for (i = 1; i < rdev->num_msix ; i++) {
- db_offt = bnxt_re_get_nqdb_offset(rdev, i);
- rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
- i - 1, rdev->msix_entries[i].vector,
+ mutex_init(&rdev->nqr->load_lock);
+
+ for (i = 1; i < rdev->nqr->num_msix ; i++) {
+ db_offt = rdev->nqr->msix_entries[i].db_offset;
+ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1],
+ i - 1, rdev->nqr->msix_entries[i].vector,
db_offt, &bnxt_re_cqn_handler,
&bnxt_re_srqn_handler);
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to enable NQ with rc = 0x%x", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to enable NQ with rc = 0x%x", rc);
goto fail;
}
num_vec_enabled++;
@@ -934,20 +1782,22 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
return 0;
fail:
for (i = num_vec_enabled; i >= 0; i--)
- bnxt_qplib_disable_nq(&rdev->nq[i]);
+ bnxt_qplib_disable_nq(&rdev->nqr->nq[i]);
return rc;
}
static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
{
+ struct bnxt_qplib_nq *nq;
u8 type;
int i;
- for (i = 0; i < rdev->num_msix - 1; i++) {
- type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
- rdev->nq[i].res = NULL;
- bnxt_qplib_free_nq(&rdev->nq[i]);
+ for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ nq = &rdev->nqr->nq[i];
+ bnxt_re_net_ring_free(rdev, nq->ring_id, type);
+ bnxt_qplib_free_nq(nq);
+ nq->res = NULL;
}
}
@@ -957,7 +1807,6 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
if (rdev->qplib_res.dpi_tbl.max) {
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
- &rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
}
if (rdev->qplib_res.rcfw) {
@@ -968,65 +1817,63 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
+ struct bnxt_re_ring_attr rattr = {};
int num_vec_created = 0;
- dma_addr_t *pg_map;
- int rc = 0, i;
- int pages;
+ int rc, i;
u8 type;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
- rdev->is_virtfn);
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
if (rc)
goto fail;
- rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
- rdev->netdev, &rdev->dev_attr);
+ rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev);
if (rc)
goto fail;
- rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res,
&rdev->dpi_privileged,
- rdev);
+ rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
if (rc)
goto dealloc_res;
- for (i = 0; i < rdev->num_msix - 1; i++) {
- rdev->nq[i].res = &rdev->qplib_res;
- rdev->nq[i].hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
- BNXT_RE_MAX_SRQC_COUNT + 2;
- rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq[i]);
+ for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
+ struct bnxt_qplib_nq *nq;
+
+ nq = &rdev->nqr->nq[i];
+ nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
+ rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
- i, rc);
+ ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
+ i, rc);
goto free_nq;
}
- type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
- pg_map = rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr;
- pages = rdev->nq[i].hwq.pbl[rdev->nq[i].hwq.level].pg_count;
- rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
- BNXT_QPLIB_NQE_MAX_CNT - 1,
- rdev->msix_entries[i + 1].ring_idx,
- &rdev->nq[i].ring_id);
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
+ rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count;
+ rattr.type = type;
+ rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
+ rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx;
+ rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to allocate NQ fw id with rc = 0x%x",
- rc);
- bnxt_qplib_free_nq(&rdev->nq[i]);
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate NQ fw id with rc = 0x%x",
+ rc);
+ bnxt_qplib_free_nq(nq);
goto free_nq;
}
num_vec_created++;
}
return 0;
free_nq:
- for (i = num_vec_created; i >= 0; i--) {
- type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
- bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type);
- bnxt_qplib_free_nq(&rdev->nq[i]);
+ for (i = num_vec_created - 1; i >= 0; i--) {
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type);
+ bnxt_qplib_free_nq(&rdev->nqr->nq[i]);
}
bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
- &rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged);
dealloc_res:
bnxt_qplib_free_res(&rdev->qplib_res);
@@ -1055,63 +1902,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
}
}
-#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
-static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
- u64 *cid_map)
-{
- struct hwrm_queue_pri2cos_qcfg_input req = {0};
- struct bnxt *bp = netdev_priv(rdev->netdev);
- struct hwrm_queue_pri2cos_qcfg_output resp;
- struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct bnxt_fw_msg fw_msg;
- u32 flags = 0;
- u8 *qcfgmap, *tmp_map;
- int rc = 0, i;
-
- if (!cid_map)
- return -EINVAL;
-
- memset(&fw_msg, 0, sizeof(fw_msg));
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
- HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
- flags |= (dir & 0x01);
- flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
- req.flags = cpu_to_le32(flags);
- req.port_id = bp->pf.port_id;
-
- bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
- rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
- if (rc)
- return rc;
-
- if (resp.queue_cfg_info) {
- dev_warn(rdev_to_dev(rdev),
- "Asymmetric cos queue configuration detected");
- dev_warn(rdev_to_dev(rdev),
- " on device, QoS may not be fully functional\n");
- }
- qcfgmap = &resp.pri0_cos_queue_id;
- tmp_map = (u8 *)cid_map;
- for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
- tmp_map[i] = qcfgmap[i];
-
- return rc;
-}
-
static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_re_qp *qp)
{
- return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp);
+ return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
+ (qp == rdev->gsi_ctx.gsi_sqp);
}
static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
{
- int mask = IB_QP_STATE;
- struct ib_qp_attr qp_attr;
struct bnxt_re_qp *qp;
- qp_attr.qp_state = IB_QPS_ERR;
mutex_lock(&rdev->qp_lock);
list_for_each_entry(qp, &rdev->qp_list, list) {
/* Modify the state of all QPs except QP1/Shadow QP */
@@ -1119,173 +1920,233 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
if (qp->qplib_qp.state !=
CMDQ_MODIFY_QP_NEW_STATE_RESET &&
qp->qplib_qp.state !=
- CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ CMDQ_MODIFY_QP_NEW_STATE_ERR)
bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
1, IB_EVENT_QP_FATAL);
- bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
- NULL);
- }
}
}
mutex_unlock(&rdev->qp_lock);
}
-static int bnxt_re_update_gid(struct bnxt_re_dev *rdev)
+static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
{
- struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
- struct bnxt_qplib_gid gid;
- u16 gid_idx, index;
- int rc = 0;
+ if (rdev->is_virtfn)
+ return;
- if (!test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
- return 0;
+ memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap));
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
- if (!sgid_tbl) {
- dev_err(rdev_to_dev(rdev), "QPLIB: SGID table not allocated");
- return -EINVAL;
+static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
+static void bnxt_re_read_vpd_info(struct bnxt_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->en_dev->pdev;
+ unsigned int vpd_size, kw_len;
+ int pos, size;
+ u8 *vpd_data;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ pci_warn(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
+ return;
}
- for (index = 0; index < sgid_tbl->active; index++) {
- gid_idx = sgid_tbl->hw_id[index];
+ pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+ PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
+ if (pos < 0)
+ goto free;
- if (!memcmp(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
- sizeof(bnxt_qplib_gid_zero)))
- continue;
- /* need to modify the VLAN enable setting of non VLAN GID only
- * as setting is done for VLAN GID while adding GID
- */
- if (sgid_tbl->vlan[index])
- continue;
+ size = min_t(int, kw_len, BNXT_VPD_FLD_LEN - 1);
+ memcpy(rdev->board_partno, &vpd_data[pos], size);
+free:
+ kfree(vpd_data);
+}
- memcpy(&gid, &sgid_tbl->tbl[index], sizeof(gid));
+static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ver_get_output resp = {};
+ struct hwrm_ver_get_input req = {};
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
- rc = bnxt_qplib_update_sgid(sgid_tbl, &gid, gid_idx,
- rdev->qplib_res.netdev->dev_addr);
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET);
+ req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ req.hwrm_intf_min = HWRM_VERSION_MINOR;
+ req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
+ rc);
+ return rc;
}
- return rc;
+ cctx = rdev->chip_ctx;
+ cctx->hwrm_intf_ver =
+ (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
+ (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
+ (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
+ le16_to_cpu(resp.hwrm_intf_patch);
+
+ cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
+
+ if (!cctx->hwrm_cmd_max_timeout)
+ cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
+
+ return 0;
}
-static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
+static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
{
- u32 prio_map = 0, tmp_map = 0;
- struct net_device *netdev;
- struct dcb_app app;
+ int rc;
+ u32 event;
- netdev = rdev->netdev;
+ /* Register ib dev */
+ rc = bnxt_re_register_ib(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %#x\n", rc);
+ return rc;
+ }
+ dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
+ set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
- memset(&app, 0, sizeof(app));
- app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
- app.protocol = ETH_P_IBOE;
- tmp_map = dcb_ieee_getapp_mask(netdev, &app);
- prio_map = tmp_map;
+ event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
- app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
- app.protocol = ROCE_V2_UDP_DPORT;
- tmp_map = dcb_ieee_getapp_mask(netdev, &app);
- prio_map |= tmp_map;
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
- return prio_map;
+ return rc;
}
-static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
+static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev)
{
- u16 prio;
- u8 id;
+ rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
+ if (!rdev->nqr)
+ return -ENOMEM;
- for (prio = 0, id = 0; prio < 8; prio++) {
- if (prio_map & (1 << prio)) {
- cosq[id] = cid_map[prio];
- id++;
- if (id == 2) /* Max 2 tcs supported */
- break;
- }
+ return 0;
+}
+
+static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev)
+{
+ kfree(rdev->nqr);
+ rdev->nqr = NULL;
+}
+
+/* When DEL_GID fails, driver is not freeing GID ctx memory.
+ * To avoid the memory leak, free the memory during unload
+ */
+static void bnxt_re_free_gid_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ int i;
+
+ if (!sgid_tbl->active)
+ return;
+
+ ctx_tbl = sgid_tbl->ctx;
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (sgid_tbl->hw_id[i] == 0xFFFF)
+ continue;
+
+ ctx = ctx_tbl[i];
+ kfree(ctx);
}
}
-static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
+static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev)
{
- u8 prio_map = 0;
- u64 cid_map;
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
int rc;
- /* Get priority for roce */
- prio_map = bnxt_re_get_priority_mask(rdev);
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats);
+ if (rc)
+ goto free_stat_mem;
+
+ return 0;
+free_stat_mem:
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats);
- if (prio_map == rdev->cur_prio_map)
+ return rc;
+}
+
+static int bnxt_re_get_stats3_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
return 0;
- rdev->cur_prio_map = prio_map;
- /* Get cosq id for this priority */
- rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
- if (rc) {
- dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
- return rc;
- }
- /* Parse CoS IDs for app priority */
- bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
- /* Config BONO. */
- rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
- if (rc) {
- dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
- rdev->cosq[0], rdev->cosq[1]);
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats3);
+ if (rc)
return rc;
- }
-
- /* Actual priorities are not programmed as they are already
- * done by L2 driver; just enable or disable priority vlan tagging
- */
- if ((prio_map == 0 && rdev->qplib_res.prio) ||
- (prio_map != 0 && !rdev->qplib_res.prio)) {
- rdev->qplib_res.prio = prio_map ? true : false;
- bnxt_re_update_gid(rdev);
- }
+ rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats3);
+ if (rc)
+ goto free_stat_mem;
return 0;
+free_stat_mem:
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3);
+
+ return rc;
}
-static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
+static void bnxt_re_put_stats3_ctx(struct bnxt_re_dev *rdev)
{
- struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct hwrm_ver_get_output resp = {0};
- struct hwrm_ver_get_input req = {0};
- struct bnxt_fw_msg fw_msg;
- int rc = 0;
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
- memset(&fw_msg, 0, sizeof(fw_msg));
- bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
- HWRM_VER_GET, -1, -1);
- req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
- req.hwrm_intf_min = HWRM_VERSION_MINOR;
- req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
- bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
- sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
- rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
- if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to query HW version, rc = 0x%x", rc);
+ if (!rdev->rcfw.roce_mirror)
return;
- }
- rdev->qplib_ctx.hwrm_intf_ver =
- (u64)resp.hwrm_intf_major << 48 |
- (u64)resp.hwrm_intf_minor << 32 |
- (u64)resp.hwrm_intf_build << 16 |
- resp.hwrm_intf_patch;
+
+ bnxt_re_net_stats_ctx_free(rdev, hctx->stats3.fw_id);
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3);
}
-static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
+static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+
+ bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id);
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats);
+}
+
+static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
{
u8 type;
int rc;
- if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
- /* Cleanup ib dev */
- bnxt_re_unregister_ib(rdev);
- }
- if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
- cancel_delayed_work_sync(&rdev->worker);
+ bnxt_re_debugfs_rem_pdev(rdev);
+
+ bnxt_re_net_unregister_async_event(rdev);
+ bnxt_re_uninit_dcb_wq(rdev);
+
+ bnxt_re_put_stats3_ctx(rdev);
+ bnxt_re_free_gid_ctx(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
&rdev->flags))
bnxt_re_cleanup_res(rdev);
@@ -1295,138 +2156,157 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
if (rc)
- dev_warn(rdev_to_dev(rdev),
- "Failed to deinitialize RCFW: %#x", rc);
- bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
- bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+ ibdev_warn(&rdev->ibdev,
+ "Failed to deinitialize RCFW: %#x", rc);
+ bnxt_re_put_stats_ctx(rdev);
+ bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
- type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
- bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
- if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
- rc = bnxt_re_free_msix(rdev);
- if (rc)
- dev_warn(rdev_to_dev(rdev),
- "Failed to free MSI-X vectors: %#x", rc);
- }
- bnxt_re_destroy_chip_ctx(rdev);
- if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
- rc = bnxt_re_unregister_netdev(rdev);
- if (rc)
- dev_warn(rdev_to_dev(rdev),
- "Failed to unregister with netdev: %#x", rc);
- }
-}
+ rdev->nqr->num_msix = 0;
-/* worker thread for polling periodic events. Now used for QoS programming*/
-static void bnxt_re_worker(struct work_struct *work)
-{
- struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
- worker.work);
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_deinitialize_dbr_pacing(rdev);
- bnxt_re_setup_qos(rdev);
- schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+ bnxt_re_free_nqr_mem(rdev);
+ bnxt_re_destroy_chip_ctx(rdev);
+ if (op_type == BNXT_RE_COMPLETE_REMOVE) {
+ if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
+ bnxt_unregister_dev(rdev->en_dev);
+ }
}
-static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
{
- dma_addr_t *pg_map;
- u32 db_offt, ridx;
- int pages, vid;
- bool locked;
+ struct bnxt_re_ring_attr rattr = {};
+ struct bnxt_qplib_creq_ctx *creq;
+ u32 db_offt;
+ int vid;
u8 type;
int rc;
- /* Acquire rtnl lock through out this function */
- rtnl_lock();
- locked = true;
+ if (op_type == BNXT_RE_COMPLETE_INIT) {
+ /* Registered a new RoCE device instance to netdev */
+ rc = bnxt_re_register_netdev(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to register with Ethernet driver, rc %d\n",
+ rc);
+ return rc;
+ }
+ }
+ set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
- /* Registered a new RoCE device instance to netdev */
- rc = bnxt_re_register_netdev(rdev);
- if (rc) {
- rtnl_unlock();
- pr_err("Failed to register with netedev: %#x\n", rc);
+ if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) {
+ ibdev_err(&rdev->ibdev,
+ "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
+ rdev->en_dev->ulp_tbl->msix_requested);
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
return -EINVAL;
}
- set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
+ rdev->en_dev->ulp_tbl->msix_requested);
rc = bnxt_re_setup_chip_ctx(rdev);
if (rc) {
- dev_err(rdev_to_dev(rdev), "Failed to get chip context\n");
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
return -EINVAL;
}
- /* Check whether VF or PF */
- bnxt_re_get_sriov_func_type(rdev);
-
- rc = bnxt_re_request_msix(rdev);
+ rc = bnxt_re_alloc_nqr_mem(rdev);
if (rc) {
- pr_err("Failed to get MSI-X vectors: %#x\n", rc);
- rc = -EINVAL;
- goto fail;
+ bnxt_re_destroy_chip_ctx(rdev);
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return rc;
}
- set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+ rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
+ memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries,
+ sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix);
- bnxt_re_query_hwrm_intf_version(rdev);
+ /* Check whether VF or PF */
+ bnxt_re_get_sriov_func_type(rdev);
/* Establish RCFW Communication Channel to initialize the context
* memory for the function and all child VFs
*/
- rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
- &rdev->qplib_ctx,
- BNXT_RE_MAX_QPC_COUNT);
+ rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
if (rc) {
- pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate RCFW Channel: %#x\n", rc);
goto fail;
}
- type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
- pg_map = rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr;
- pages = rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count;
- ridx = rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
- rc = bnxt_re_net_ring_alloc(rdev, pg_map, pages, type,
- BNXT_QPLIB_CREQE_MAX_CNT - 1,
- ridx, &rdev->rcfw.creq_ring_id);
+
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ creq = &rdev->rcfw.creq;
+ rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
+ rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
+ rattr.type = type;
+ rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
+ rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
+ rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
if (rc) {
- pr_err("Failed to allocate CREQ: %#x\n", rc);
+ ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
- db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
- vid = rdev->msix_entries[BNXT_RE_AEQ_IDX].vector;
- rc = bnxt_qplib_enable_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
- vid, db_offt, rdev->is_virtfn,
+ db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
+ vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector;
+ rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
+ vid, db_offt,
&bnxt_re_aeq_handler);
if (rc) {
- pr_err("Failed to enable RCFW channel: %#x\n", rc);
+ ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
+ rc);
goto free_ring;
}
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
- rdev->is_virtfn);
+ if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) {
+ rc = bnxt_re_initialize_dbr_pacing(rdev);
+ if (!rc) {
+ rdev->pacing.dbr_pacing = true;
+ } else {
+ ibdev_err(&rdev->ibdev,
+ "DBR pacing disabled with error : %d\n", rc);
+ rdev->pacing.dbr_pacing = false;
+ }
+ }
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
if (rc)
goto disable_rcfw;
- if (!rdev->is_virtfn)
- bnxt_re_set_resource_limits(rdev);
- rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0,
- bnxt_qplib_is_chip_gen_p5(&rdev->chip_ctx));
- if (rc) {
- pr_err("Failed to allocate QPLIB context: %#x\n", rc);
- goto disable_rcfw;
+ bnxt_qplib_query_version(&rdev->rcfw);
+ bnxt_re_set_resource_limits(rdev);
+
+ if (!rdev->is_virtfn &&
+ !bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate hw context: %#x\n", rc);
+ goto disable_rcfw;
+ }
}
- rc = bnxt_re_net_stats_ctx_alloc(rdev,
- rdev->qplib_ctx.stats.dma_map,
- &rdev->qplib_ctx.stats.fw_id);
+
+ rc = bnxt_re_get_stats_ctx(rdev);
if (rc) {
- pr_err("Failed to allocate stats context: %#x\n", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate stats context: %#x\n", rc);
goto free_ctx;
}
rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
rdev->is_virtfn);
if (rc) {
- pr_err("Failed to initialize RCFW: %#x\n", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to initialize RCFW: %#x\n", rc);
goto free_sctx;
}
set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
@@ -1434,306 +2314,300 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
/* Resources based on the 'new' device caps */
rc = bnxt_re_alloc_res(rdev);
if (rc) {
- pr_err("Failed to allocate resources: %#x\n", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate resources: %#x\n", rc);
goto fail;
}
set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
rc = bnxt_re_init_res(rdev);
if (rc) {
- pr_err("Failed to initialize resources: %#x\n", rc);
+ ibdev_err(&rdev->ibdev,
+ "Failed to initialize resources: %#x\n", rc);
goto fail;
}
set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
if (!rdev->is_virtfn) {
- rc = bnxt_re_setup_qos(rdev);
+ /* Query f/w defaults of CC params */
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param);
if (rc)
- pr_info("RoCE priority not yet configured\n");
+ ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n");
- INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
- set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
- schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+ if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT))
+ bnxt_re_vf_res_config(rdev);
}
+ hash_init(rdev->cq_hash);
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT)
+ hash_init(rdev->srq_hash);
- rtnl_unlock();
- locked = false;
+ bnxt_re_debugfs_add_pdev(rdev);
- /* Register ib dev */
- rc = bnxt_re_register_ib(rdev);
- if (rc) {
- pr_err("Failed to register with IB: %#x\n", rc);
+ bnxt_re_init_dcb_wq(rdev);
+ bnxt_re_net_register_async_event(rdev);
+
+ if (!rdev->is_virtfn)
+ bnxt_re_read_vpd_info(rdev);
+
+ rc = bnxt_re_get_stats3_ctx(rdev);
+ if (rc)
goto fail;
- }
- set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
- dev_info(rdev_to_dev(rdev), "Device registered successfully");
- ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
- &rdev->active_width);
- set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
return 0;
free_sctx:
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
free_ctx:
- bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+ bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
disable_rcfw:
bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
free_ring:
- type = bnxt_qplib_get_ring_type(&rdev->chip_ctx);
- bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, type);
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
free_rcfw:
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
fail:
- if (!locked)
- rtnl_lock();
- bnxt_re_ib_unreg(rdev);
- rtnl_unlock();
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
return rc;
}
-static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
+static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
{
- struct bnxt_en_dev *en_dev = rdev->en_dev;
- struct net_device *netdev = rdev->netdev;
+ struct bnxt_qplib_cc_param cc_param = {};
+
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
- bnxt_re_dev_remove(rdev);
+ /* Currently enabling only for GenP5 adapters */
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
+ if (enable) {
+ cc_param.enable = 1;
+ cc_param.tos_ecn = 1;
+ }
- if (netdev)
- bnxt_re_dev_unprobe(netdev, en_dev);
+ cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
+ CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
+
+ if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
+ ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
}
-static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
+static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
+ struct bnxt_re_en_dev_info *en_info,
+ struct auxiliary_device *adev)
{
+ /* Before updating the rdev pointer in bnxt_re_en_dev_info structure,
+ * take the rtnl lock to avoid accessing invalid rdev pointer from
+ * L2 ULP callbacks. This is applicable in all the places where rdev
+ * pointer is updated in bnxt_re_en_dev_info.
+ */
+ rtnl_lock();
+ en_info->rdev = rdev;
+ rtnl_unlock();
+}
+
+static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
+{
+ struct bnxt_aux_priv *aux_priv =
+ container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxt_re_en_dev_info *en_info;
struct bnxt_en_dev *en_dev;
- int rc = 0;
+ struct bnxt_re_dev *rdev;
+ int rc;
- if (!is_bnxt_re_dev(netdev))
- return -ENODEV;
+ en_info = auxiliary_get_drvdata(adev);
+ en_dev = en_info->en_dev;
- en_dev = bnxt_re_dev_probe(netdev);
- if (IS_ERR(en_dev)) {
- if (en_dev != ERR_PTR(-ENODEV))
- pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
- rc = PTR_ERR(en_dev);
- goto exit;
- }
- *rdev = bnxt_re_dev_add(netdev, en_dev);
- if (!*rdev) {
+
+ rdev = bnxt_re_dev_add(adev, en_dev);
+ if (!rdev || !rdev_to_dev(rdev)) {
rc = -ENOMEM;
- bnxt_re_dev_unprobe(netdev, en_dev);
goto exit;
}
+
+ bnxt_re_update_en_info_rdev(rdev, en_info, adev);
+
+ rc = bnxt_re_dev_init(rdev, op_type);
+ if (rc)
+ goto re_dev_dealloc;
+
+ rc = bnxt_re_ib_init(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %s",
+ aux_priv->aux_dev.name);
+ goto re_dev_uninit;
+ }
+
+ bnxt_re_setup_cc(rdev, true);
+
+ return 0;
+
+re_dev_uninit:
+ bnxt_re_update_en_info_rdev(NULL, en_info, adev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+re_dev_dealloc:
+ ib_dealloc_device(&rdev->ibdev);
exit:
return rc;
}
-static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+#define BNXT_ADEV_NAME "bnxt_en"
+
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
+ struct auxiliary_device *aux_dev)
{
- pci_dev_put(rdev->en_dev->pdev);
+ bnxt_re_setup_cc(rdev, false);
+ ib_unregister_device(&rdev->ibdev);
+ bnxt_re_dev_uninit(rdev, op_type);
+ ib_dealloc_device(&rdev->ibdev);
}
-/* Handle all deferred netevents tasks */
-static void bnxt_re_task(struct work_struct *work)
+static void bnxt_re_remove(struct auxiliary_device *adev)
{
- struct bnxt_re_work *re_work;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
struct bnxt_re_dev *rdev;
- int rc = 0;
- re_work = container_of(work, struct bnxt_re_work, work);
- rdev = re_work->rdev;
+ mutex_lock(&bnxt_re_mutex);
+ rdev = en_info->rdev;
- if (re_work->event != NETDEV_REGISTER &&
- !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
- return;
+ if (rdev)
+ bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev);
+ kfree(en_info);
+ mutex_unlock(&bnxt_re_mutex);
+}
- switch (re_work->event) {
- case NETDEV_REGISTER:
- rc = bnxt_re_ib_reg(rdev);
- if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to register with IB: %#x", rc);
- bnxt_re_remove_one(rdev);
- bnxt_re_dev_unreg(rdev);
- goto exit;
- }
- break;
- case NETDEV_UP:
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
- IB_EVENT_PORT_ACTIVE);
- break;
- case NETDEV_DOWN:
- bnxt_re_dev_stop(rdev);
- break;
- case NETDEV_CHANGE:
- if (!netif_carrier_ok(rdev->netdev))
- bnxt_re_dev_stop(rdev);
- else if (netif_carrier_ok(rdev->netdev))
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
- IB_EVENT_PORT_ACTIVE);
- ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
- &rdev->active_width);
- break;
- default:
- break;
+static int bnxt_re_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct bnxt_aux_priv *aux_priv =
+ container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxt_re_en_dev_info *en_info;
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ en_dev = aux_priv->edev;
+
+ mutex_lock(&bnxt_re_mutex);
+ en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
+ if (!en_info) {
+ mutex_unlock(&bnxt_re_mutex);
+ return -ENOMEM;
}
- smp_mb__before_atomic();
- atomic_dec(&rdev->sched_count);
-exit:
- kfree(re_work);
+ en_info->en_dev = en_dev;
+
+ auxiliary_set_drvdata(adev, en_info);
+
+ rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
+ if (rc)
+ kfree(en_info);
+
+ mutex_unlock(&bnxt_re_mutex);
+
+ return rc;
}
-static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
+static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
{
- pci_dev_get(rdev->en_dev->pdev);
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
+
+ rdev = en_info->rdev;
+ en_dev = en_info->en_dev;
+ mutex_lock(&bnxt_re_mutex);
+
+ ibdev_info(&rdev->ibdev, "Handle device suspend call");
+ /* Check the current device state from bnxt_en_dev and move the
+ * device to detached state if FW_FATAL_COND is set.
+ * This prevents more commands to HW during clean-up,
+ * in case the device is already in error.
+ */
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) {
+ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dev_stop(rdev);
+ }
+
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_set_pacing_dev_state(rdev);
+
+ ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx",
+ __func__, en_dev->en_state);
+ bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev);
+ bnxt_re_update_en_info_rdev(NULL, en_info, adev);
+ mutex_unlock(&bnxt_re_mutex);
+
+ return 0;
}
-/*
- * "Notifier chain callback can be invoked for the same chain from
- * different CPUs at the same time".
- *
- * For cases when the netdev is already present, our call to the
- * register_netdevice_notifier() will actually get the rtnl_lock()
- * before sending NETDEV_REGISTER and (if up) NETDEV_UP
- * events.
- *
- * But for cases when the netdev is not already present, the notifier
- * chain is subjected to be invoked from different CPUs simultaneously.
- *
- * This is protected by the netdev_mutex.
- */
-static int bnxt_re_netdev_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
+static int bnxt_re_resume(struct auxiliary_device *adev)
{
- struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
- struct bnxt_re_work *re_work;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
struct bnxt_re_dev *rdev;
- int rc = 0;
- bool sch_work = false;
- real_dev = rdma_vlan_dev_real_dev(netdev);
- if (!real_dev)
- real_dev = netdev;
+ mutex_lock(&bnxt_re_mutex);
+ bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT);
+ rdev = en_info->rdev;
+ ibdev_info(&rdev->ibdev, "Device resume completed");
+ mutex_unlock(&bnxt_re_mutex);
- rdev = bnxt_re_from_netdev(real_dev);
- if (!rdev && event != NETDEV_REGISTER)
- goto exit;
- if (real_dev != netdev)
- goto exit;
+ return 0;
+}
- switch (event) {
- case NETDEV_REGISTER:
- if (rdev)
- break;
- rc = bnxt_re_dev_reg(&rdev, real_dev);
- if (rc == -ENODEV)
- break;
- if (rc) {
- pr_err("Failed to register with the device %s: %#x\n",
- real_dev->name, rc);
- break;
- }
- bnxt_re_init_one(rdev);
- sch_work = true;
- break;
+static void bnxt_re_shutdown(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
- case NETDEV_UNREGISTER:
- /* netdev notifier will call NETDEV_UNREGISTER again later since
- * we are still holding the reference to the netdev
- */
- if (atomic_read(&rdev->sched_count) > 0)
- goto exit;
- bnxt_re_ib_unreg(rdev);
- bnxt_re_remove_one(rdev);
- bnxt_re_dev_unreg(rdev);
- break;
+ rdev = en_info->rdev;
+ ib_unregister_device(&rdev->ibdev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+}
- default:
- sch_work = true;
- break;
- }
- if (sch_work) {
- /* Allocate for the deferred task */
- re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
- if (re_work) {
- re_work->rdev = rdev;
- re_work->event = event;
- re_work->vlan_dev = (real_dev == netdev ?
- NULL : netdev);
- INIT_WORK(&re_work->work, bnxt_re_task);
- atomic_inc(&rdev->sched_count);
- queue_work(bnxt_re_wq, &re_work->work);
- }
- }
+static const struct auxiliary_device_id bnxt_re_id_table[] = {
+ { .name = BNXT_ADEV_NAME ".rdma", },
+ {},
+};
-exit:
- return NOTIFY_DONE;
-}
+MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table);
-static struct notifier_block bnxt_re_netdev_notifier = {
- .notifier_call = bnxt_re_netdev_event
+static struct auxiliary_driver bnxt_re_driver = {
+ .name = "rdma",
+ .probe = bnxt_re_probe,
+ .remove = bnxt_re_remove,
+ .shutdown = bnxt_re_shutdown,
+ .suspend = bnxt_re_suspend,
+ .resume = bnxt_re_resume,
+ .id_table = bnxt_re_id_table,
};
static int __init bnxt_re_mod_init(void)
{
- int rc = 0;
+ int rc;
pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+ bnxt_re_register_debugfs();
- bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
- if (!bnxt_re_wq)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bnxt_re_dev_list);
-
- rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
+ rc = auxiliary_driver_register(&bnxt_re_driver);
if (rc) {
- pr_err("%s: Cannot register to netdevice_notifier",
- ROCE_DRV_MODULE_NAME);
- goto err_netdev;
+ pr_err("%s: Failed to register auxiliary driver\n",
+ ROCE_DRV_MODULE_NAME);
+ goto err_debug;
}
return 0;
-
-err_netdev:
- destroy_workqueue(bnxt_re_wq);
-
+err_debug:
+ bnxt_re_unregister_debugfs();
return rc;
}
static void __exit bnxt_re_mod_exit(void)
{
- struct bnxt_re_dev *rdev, *next;
- LIST_HEAD(to_be_deleted);
-
- mutex_lock(&bnxt_re_dev_lock);
- /* Free all adapter allocated resources */
- if (!list_empty(&bnxt_re_dev_list))
- list_splice_init(&bnxt_re_dev_list, &to_be_deleted);
- mutex_unlock(&bnxt_re_dev_lock);
- /*
- * Cleanup the devices in reverse order so that the VF device
- * cleanup is done before PF cleanup
- */
- list_for_each_entry_safe_reverse(rdev, next, &to_be_deleted, list) {
- dev_info(rdev_to_dev(rdev), "Unregistering Device");
- /*
- * Flush out any scheduled tasks before destroying the
- * resources
- */
- flush_workqueue(bnxt_re_wq);
- bnxt_re_dev_stop(rdev);
- /* Acquire the rtnl_lock as the L2 resources are freed here */
- rtnl_lock();
- bnxt_re_ib_unreg(rdev);
- rtnl_unlock();
- bnxt_re_remove_one(rdev);
- bnxt_re_dev_unreg(rdev);
- }
- unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
- if (bnxt_re_wq)
- destroy_workqueue(bnxt_re_wq);
+ auxiliary_driver_unregister(&bnxt_re_driver);
+ bnxt_re_unregister_debugfs();
}
module_init(bnxt_re_mod_init);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 958c1ff9c515..c88f049136fc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -43,8 +43,10 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/pci.h>
+#include <linux/delay.h>
#include <linux/prefetch.h>
#include <linux/if_ether.h>
+#include <rdma/ib_mad.h>
#include "roce_hsi.h"
@@ -52,10 +54,12 @@
#include "qplib_rcfw.h"
#include "qplib_sp.h"
#include "qplib_fp.h"
+#include <rdma/ib_addr.h>
+#include "bnxt_ulp.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
-static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
-static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type);
static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp)
{
@@ -179,11 +183,11 @@ static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
if (qp->rq_hdr_buf)
dma_free_coherent(&res->pdev->dev,
- rq->hwq.max_elements * qp->rq_hdr_buf_size,
+ rq->max_wqe * qp->rq_hdr_buf_size,
qp->rq_hdr_buf, qp->rq_hdr_buf_map);
if (qp->sq_hdr_buf)
dma_free_coherent(&res->pdev->dev,
- sq->hwq.max_elements * qp->sq_hdr_buf_size,
+ sq->max_wqe * qp->sq_hdr_buf_size,
qp->sq_hdr_buf, qp->sq_hdr_buf_map);
qp->rq_hdr_buf = NULL;
qp->sq_hdr_buf = NULL;
@@ -200,10 +204,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
struct bnxt_qplib_q *sq = &qp->sq;
int rc = 0;
- if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
+ if (qp->sq_hdr_buf_size && sq->max_wqe) {
qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
- sq->hwq.max_elements *
- qp->sq_hdr_buf_size,
+ sq->max_wqe * qp->sq_hdr_buf_size,
&qp->sq_hdr_buf_map, GFP_KERNEL);
if (!qp->sq_hdr_buf) {
rc = -ENOMEM;
@@ -213,9 +216,9 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
}
}
- if (qp->rq_hdr_buf_size && rq->hwq.max_elements) {
+ if (qp->rq_hdr_buf_size && rq->max_wqe) {
qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
- rq->hwq.max_elements *
+ rq->max_wqe *
qp->rq_hdr_buf_size,
&qp->rq_hdr_buf_map,
GFP_KERNEL);
@@ -233,27 +236,20 @@ fail:
return rc;
}
-static void bnxt_qplib_service_nq(unsigned long data)
+static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
{
- struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
struct bnxt_qplib_hwq *hwq = &nq->hwq;
struct nq_base *nqe, **nq_ptr;
- struct bnxt_qplib_cq *cq;
- int num_cqne_processed = 0;
- int num_srqne_processed = 0;
- u32 sw_cons, raw_cons;
- u16 type;
int budget = nq->budget;
uintptr_t q_handle;
- bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
+ u16 type;
+ spin_lock_bh(&hwq->lock);
/* Service the NQ until empty */
- raw_cons = hwq->cons;
while (budget--) {
- sw_cons = HWQ_CMP(raw_cons, hwq);
nq_ptr = (struct nq_base **)hwq->pbl_ptr;
- nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
- if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+ nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)];
+ if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
break;
/*
@@ -271,33 +267,114 @@ static void bnxt_qplib_service_nq(unsigned long data)
q_handle = le32_to_cpu(nqcne->cq_handle_low);
q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
<< 32;
+ if ((unsigned long)cq == q_handle) {
+ nqcne->cq_handle_low = 0;
+ nqcne->cq_handle_high = 0;
+ cq->cnq_events++;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &nq->nq_db.dbinfo.flags);
+ }
+ spin_unlock_bh(&hwq->lock);
+}
+
+/* Wait for receiving all NQEs for this CQ and clean the NQEs associated with
+ * this CQ.
+ */
+static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events)
+{
+ u32 retry_cnt = 100;
+
+ while (retry_cnt--) {
+ if (cnq_events == cq->cnq_events)
+ return;
+ usleep_range(50, 100);
+ clean_nq(cq->nq, cq);
+ }
+}
+
+static void bnxt_qplib_service_nq(struct tasklet_struct *t)
+{
+ struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet);
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct bnxt_qplib_cq *cq;
+ int budget = nq->budget;
+ struct nq_base *nqe;
+ uintptr_t q_handle;
+ u32 hw_polled = 0;
+ u16 type;
+
+ spin_lock_bh(&hwq->lock);
+ /* Service the NQ until empty */
+ while (budget--) {
+ nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+ if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
+ break;
+
+ /*
+ * The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+
+ type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+ switch (type) {
+ case NQ_BASE_TYPE_CQ_NOTIFICATION:
+ {
+ struct nq_cn *nqcne = (struct nq_cn *)nqe;
+ struct bnxt_re_cq *cq_p;
+
+ q_handle = le32_to_cpu(nqcne->cq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
+ << 32;
cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle;
- bnxt_qplib_arm_cq_enable(cq);
+ if (!cq)
+ break;
+ cq->toggle = (le16_to_cpu(nqe->info10_type) &
+ NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT;
+ cq->dbinfo.toggle = cq->toggle;
+ cq_p = container_of(cq, struct bnxt_re_cq, qplib_cq);
+ if (cq_p->uctx_cq_page)
+ *((u32 *)cq_p->uctx_cq_page) = cq->toggle;
+
+ bnxt_qplib_armen_db(&cq->dbinfo,
+ DBC_DBC_TYPE_CQ_ARMENA);
spin_lock_bh(&cq->compl_lock);
atomic_set(&cq->arm_state, 0);
- if (!nq->cqn_handler(nq, (cq)))
- num_cqne_processed++;
- else
+ if (nq->cqn_handler(nq, (cq)))
dev_warn(&nq->pdev->dev,
"cqn - type 0x%x not handled\n", type);
+ cq->cnq_events++;
spin_unlock_bh(&cq->compl_lock);
break;
}
case NQ_BASE_TYPE_SRQ_EVENT:
{
+ struct bnxt_qplib_srq *srq;
+ struct bnxt_re_srq *srq_p;
struct nq_srq_event *nqsrqe =
(struct nq_srq_event *)nqe;
q_handle = le32_to_cpu(nqsrqe->srq_handle_low);
q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
<< 32;
- bnxt_qplib_arm_srq((struct bnxt_qplib_srq *)q_handle,
- DBC_DBC_TYPE_SRQ_ARMENA);
- if (!nq->srqn_handler(nq,
- (struct bnxt_qplib_srq *)q_handle,
- nqsrqe->event))
- num_srqne_processed++;
- else
+ srq = (struct bnxt_qplib_srq *)q_handle;
+ srq->toggle = (le16_to_cpu(nqe->info10_type) & NQ_CN_TOGGLE_MASK)
+ >> NQ_CN_TOGGLE_SFT;
+ srq->dbinfo.toggle = srq->toggle;
+ srq_p = container_of(srq, struct bnxt_re_srq, qplib_srq);
+ if (srq_p->uctx_srq_page)
+ *((u32 *)srq_p->uctx_srq_page) = srq->toggle;
+ bnxt_qplib_armen_db(&srq->dbinfo,
+ DBC_DBC_TYPE_SRQ_ARMENA);
+ if (nq->srqn_handler(nq,
+ (struct bnxt_qplib_srq *)q_handle,
+ nqsrqe->event))
dev_warn(&nq->pdev->dev,
"SRQ event 0x%x not handled\n",
nqsrqe->event);
@@ -310,50 +387,67 @@ static void bnxt_qplib_service_nq(unsigned long data)
"nqe with type = 0x%x not handled\n", type);
break;
}
- raw_cons++;
- }
- if (hwq->cons != raw_cons) {
- hwq->cons = raw_cons;
- bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, hwq->cons,
- hwq->max_elements, nq->ring_id,
- gen_p5);
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &nq->nq_db.dbinfo.flags);
}
+ if (hw_polled)
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
+ spin_unlock_bh(&hwq->lock);
+}
+
+/* bnxt_re_synchronize_nq - self polling notification queue.
+ * @nq - notification queue pointer
+ *
+ * This function will start polling entries of a given notification queue
+ * for all pending entries.
+ * This function is useful to synchronize notification entries while resources
+ * are going away.
+ */
+
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq)
+{
+ int budget = nq->budget;
+
+ nq->budget = nq->hwq.max_elements;
+ bnxt_qplib_service_nq(&nq->nq_tasklet);
+ nq->budget = budget;
}
static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
{
struct bnxt_qplib_nq *nq = dev_instance;
struct bnxt_qplib_hwq *hwq = &nq->hwq;
- struct nq_base **nq_ptr;
u32 sw_cons;
/* Prefetch the NQ element */
sw_cons = HWQ_CMP(hwq->cons, hwq);
- nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr;
- prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]);
+ prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL));
/* Fan out to CPU affinitized kthreads? */
- tasklet_schedule(&nq->worker);
+ tasklet_schedule(&nq->nq_tasklet);
return IRQ_HANDLED;
}
void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
{
- bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
- tasklet_disable(&nq->worker);
+ if (!nq->requested)
+ return;
+
+ nq->requested = false;
/* Mask h/w interrupt */
- bnxt_qplib_ring_nq_db(nq->bar_reg_iomem, nq->hwq.cons,
- nq->hwq.max_elements, nq->ring_id, gen_p5);
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
/* Sync with last running IRQ handler */
- synchronize_irq(nq->vector);
+ synchronize_irq(nq->msix_vec);
+ irq_set_affinity_hint(nq->msix_vec, NULL);
+ free_irq(nq->msix_vec, nq);
+ kfree(nq->name);
+ nq->name = NULL;
+
if (kill)
- tasklet_kill(&nq->worker);
- if (nq->requested) {
- irq_set_affinity_hint(nq->vector, NULL);
- free_irq(nq->vector, nq);
- nq->requested = false;
- }
+ tasklet_kill(&nq->nq_tasklet);
+ tasklet_disable(&nq->nq_tasklet);
}
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
@@ -364,89 +458,114 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
}
/* Make sure the HW is stopped! */
- if (nq->requested)
- bnxt_qplib_nq_stop_irq(nq, true);
+ bnxt_qplib_nq_stop_irq(nq, true);
- if (nq->bar_reg_iomem)
- iounmap(nq->bar_reg_iomem);
- nq->bar_reg_iomem = NULL;
+ if (nq->nq_db.reg.bar_reg) {
+ iounmap(nq->nq_db.reg.bar_reg);
+ nq->nq_db.reg.bar_reg = NULL;
+ }
nq->cqn_handler = NULL;
nq->srqn_handler = NULL;
- nq->vector = 0;
+ nq->msix_vec = 0;
}
int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
int msix_vector, bool need_init)
{
- bool gen_p5 = bnxt_qplib_is_chip_gen_p5(nq->res->cctx);
+ struct bnxt_qplib_res *res = nq->res;
int rc;
if (nq->requested)
return -EFAULT;
- nq->vector = msix_vector;
+ nq->msix_vec = msix_vector;
if (need_init)
- tasklet_init(&nq->worker, bnxt_qplib_service_nq,
- (unsigned long)nq);
+ tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq);
else
- tasklet_enable(&nq->worker);
+ tasklet_enable(&nq->nq_tasklet);
- snprintf(nq->name, sizeof(nq->name), "bnxt_qplib_nq-%d", nq_indx);
- rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, nq->name, nq);
- if (rc)
+ nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s",
+ nq_indx, pci_name(res->pdev));
+ if (!nq->name)
+ return -ENOMEM;
+ rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq);
+ if (rc) {
+ kfree(nq->name);
+ nq->name = NULL;
+ tasklet_disable(&nq->nq_tasklet);
return rc;
+ }
cpumask_clear(&nq->mask);
cpumask_set_cpu(nq_indx, &nq->mask);
- rc = irq_set_affinity_hint(nq->vector, &nq->mask);
+ rc = irq_set_affinity_hint(nq->msix_vec, &nq->mask);
if (rc) {
dev_warn(&nq->pdev->dev,
"set affinity failed; vector: %d nq_idx: %d\n",
- nq->vector, nq_indx);
+ nq->msix_vec, nq_indx);
}
nq->requested = true;
- bnxt_qplib_ring_nq_db_rearm(nq->bar_reg_iomem, nq->hwq.cons,
- nq->hwq.max_elements, nq->ring_id, gen_p5);
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true);
return rc;
}
+static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt)
+{
+ resource_size_t reg_base;
+ struct bnxt_qplib_nq_db *nq_db;
+ struct pci_dev *pdev;
+
+ pdev = nq->pdev;
+ nq_db = &nq->nq_db;
+
+ nq_db->dbinfo.flags = 0;
+ nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION;
+ nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id);
+ if (!nq_db->reg.bar_base) {
+ dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!",
+ nq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ reg_base = nq_db->reg.bar_base + reg_offt;
+ /* Unconditionally map 8 bytes to support 57500 series */
+ nq_db->reg.len = 8;
+ nq_db->reg.bar_reg = ioremap(reg_base, nq_db->reg.len);
+ if (!nq_db->reg.bar_reg) {
+ dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed",
+ nq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ nq_db->dbinfo.db = nq_db->reg.bar_reg;
+ nq_db->dbinfo.hwq = &nq->hwq;
+ nq_db->dbinfo.xid = nq->ring_id;
+
+ return 0;
+}
+
int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
int nq_idx, int msix_vector, int bar_reg_offset,
- int (*cqn_handler)(struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_cq *),
- int (*srqn_handler)(struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_srq *,
- u8 event))
+ cqn_handler_t cqn_handler,
+ srqn_handler_t srqn_handler)
{
- resource_size_t nq_base;
- int rc = -1;
-
- if (cqn_handler)
- nq->cqn_handler = cqn_handler;
+ int rc;
- if (srqn_handler)
- nq->srqn_handler = srqn_handler;
+ nq->pdev = pdev;
+ nq->cqn_handler = cqn_handler;
+ nq->srqn_handler = srqn_handler;
+ nq->load = 0;
/* Have a task to schedule CQ notifiers in post send case */
nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq");
if (!nq->cqn_wq)
return -ENOMEM;
- nq->bar_reg = NQ_CONS_PCI_BAR_REGION;
- nq->bar_reg_off = bar_reg_offset;
- nq_base = pci_resource_start(pdev, nq->bar_reg);
- if (!nq_base) {
- rc = -ENOMEM;
- goto fail;
- }
- /* Unconditionally map 8 bytes to support 57500 series */
- nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 8);
- if (!nq->bar_reg_iomem) {
- rc = -ENOMEM;
+ rc = bnxt_qplib_map_nq_db(nq, bar_reg_offset);
+ if (rc)
goto fail;
- }
rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
if (rc) {
@@ -464,97 +583,86 @@ fail:
void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
{
if (nq->hwq.max_elements) {
- bnxt_qplib_free_hwq(nq->pdev, &nq->hwq);
+ bnxt_qplib_free_hwq(nq->res, &nq->hwq);
nq->hwq.max_elements = 0;
}
}
-int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
+int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq)
{
- u8 hwq_type;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
- nq->pdev = pdev;
+ nq->pdev = res->pdev;
+ nq->res = res;
if (!nq->hwq.max_elements ||
nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
- hwq_type = bnxt_qplib_get_hwq_type(nq->res);
- if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL,
- &nq->hwq.max_elements,
- BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
- PAGE_SIZE, hwq_type))
- return -ENOMEM;
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.depth = nq->hwq.max_elements;
+ hwq_attr.stride = sizeof(struct nq_base);
+ hwq_attr.type = bnxt_qplib_get_hwq_type(nq->res);
+ if (bnxt_qplib_alloc_init_hwq(&nq->hwq, &hwq_attr)) {
+ dev_err(&nq->pdev->dev, "FP NQ allocation failed");
+ return -ENOMEM;
+ }
nq->budget = 8;
return 0;
}
/* SRQ */
-static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type)
-{
- struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
- void __iomem *db;
- u32 sw_prod;
- u64 val = 0;
-
- /* Ring DB */
- sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ?
- srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq);
- db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base :
- srq->dpi->dbr;
- val = ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
- val <<= 32;
- val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
- writeq(val, db);
-}
-
void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_srq *srq)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_destroy_srq req;
- struct creq_destroy_srq_resp resp;
- u16 cmd_flags = 0;
+ struct creq_destroy_srq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_srq req = {};
int rc;
- RCFW_CMD_PREP(req, DESTROY_SRQ, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_SRQ,
+ sizeof(req));
/* Configure the request */
req.srq_cid = cpu_to_le32(srq->id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (struct cmdq_base *)&req,
- (struct creq_base *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
kfree(srq->swq);
if (rc)
return;
- bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
+ bnxt_qplib_free_hwq(res, &srq->hwq);
}
int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_srq *srq)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_create_srq req;
- struct creq_create_srq_resp resp;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct creq_create_srq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_create_srq req = {};
struct bnxt_qplib_pbl *pbl;
- u16 cmd_flags = 0;
+ u16 pg_sz_lvl;
int rc, idx;
- srq->hwq.max_elements = srq->max_wqe;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &srq->hwq, &srq->sg_info,
- &srq->hwq.max_elements,
- BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_QUEUE);
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &srq->sg_info;
+ hwq_attr.depth = srq->max_wqe;
+ hwq_attr.stride = srq->wqe_size;
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr);
if (rc)
- goto exit;
-
- srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq),
- GFP_KERNEL);
- if (!srq->swq) {
- rc = -ENOMEM;
- goto fail;
- }
-
- RCFW_CMD_PREP(req, CREATE_SRQ, cmd_flags);
+ return rc;
+ srq->dbinfo.flags = 0;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_SRQ,
+ sizeof(req));
/* Configure the request */
req.dpi = cpu_to_le32(srq->dpi->dpi);
@@ -562,97 +670,82 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
pbl = &srq->hwq.pbl[PBL_LVL_0];
- req.pg_size_lvl = cpu_to_le16((((u16)srq->hwq.level &
- CMDQ_CREATE_SRQ_LVL_MASK) <<
- CMDQ_CREATE_SRQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_SRQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_SRQ_PG_SIZE_PG_4K));
+ pg_sz_lvl = ((u16)bnxt_qplib_base_pg_size(&srq->hwq) <<
+ CMDQ_CREATE_SRQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (srq->hwq.level & CMDQ_CREATE_SRQ_LVL_MASK) <<
+ CMDQ_CREATE_SRQ_LVL_SFT;
+ req.pg_size_lvl = cpu_to_le16(pg_sz_lvl);
req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
req.pd_id = cpu_to_le32(srq->pd->id);
req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto fail;
spin_lock_init(&srq->lock);
srq->start_idx = 0;
srq->last_idx = srq->hwq.max_elements - 1;
- for (idx = 0; idx < srq->hwq.max_elements; idx++)
- srq->swq[idx].next_idx = idx + 1;
- srq->swq[srq->last_idx].next_idx = -1;
+ if (!srq->hwq.is_user) {
+ srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq),
+ GFP_KERNEL);
+ if (!srq->swq) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ for (idx = 0; idx < srq->hwq.max_elements; idx++)
+ srq->swq[idx].next_idx = idx + 1;
+ srq->swq[srq->last_idx].next_idx = -1;
+ }
srq->id = le32_to_cpu(resp.xid);
- srq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
- if (srq->threshold)
- bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARMENA);
- srq->arm_req = false;
+ srq->dbinfo.hwq = &srq->hwq;
+ srq->dbinfo.xid = srq->id;
+ srq->dbinfo.db = srq->dpi->dbr;
+ srq->dbinfo.max_slot = 1;
+ srq->dbinfo.priv_db = res->dpi_tbl.priv_db;
+ bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA);
return 0;
fail:
- bnxt_qplib_free_hwq(res->pdev, &srq->hwq);
+ bnxt_qplib_free_hwq(res, &srq->hwq);
kfree(srq->swq);
-exit:
- return rc;
-}
-
-int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
- struct bnxt_qplib_srq *srq)
-{
- struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
- u32 sw_prod, sw_cons, count = 0;
- sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
-
- count = sw_prod > sw_cons ? sw_prod - sw_cons :
- srq_hwq->max_elements - sw_cons + sw_prod;
- if (count > srq->threshold) {
- srq->arm_req = false;
- bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
- } else {
- /* Deferred arming */
- srq->arm_req = true;
- }
-
- return 0;
+ return rc;
}
int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_srq *srq)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_query_srq req;
- struct creq_query_srq_resp resp;
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_srq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
struct creq_query_srq_resp_sb *sb;
- u16 cmd_flags = 0;
- int rc = 0;
+ struct cmdq_query_srq req = {};
+ int rc;
- RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags);
- req.srq_cid = cpu_to_le32(srq->id);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_SRQ,
+ sizeof(req));
/* Configure the request */
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf)
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- sb = sbuf->sb;
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- (void *)sbuf, 0);
- srq->threshold = le16_to_cpu(sb->srq_limit);
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ req.srq_cid = cpu_to_le32(srq->id);
+ sb = sbuf.sb;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (!rc)
+ srq->threshold = le16_to_cpu(sb->srq_limit);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
@@ -661,27 +754,23 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
struct bnxt_qplib_swqe *wqe)
{
struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
- struct rq_wqe *srqe, **srqe_ptr;
+ struct rq_wqe *srqe;
struct sq_sge *hw_sge;
- u32 sw_prod, sw_cons, count = 0;
- int i, rc = 0, next;
+ int i, next;
spin_lock(&srq_hwq->lock);
if (srq->start_idx == srq->last_idx) {
dev_err(&srq_hwq->pdev->dev,
"FP: SRQ (0x%x) is full!\n", srq->id);
- rc = -EINVAL;
spin_unlock(&srq_hwq->lock);
- goto done;
+ return -EINVAL;
}
next = srq->start_idx;
srq->start_idx = srq->swq[next].next_idx;
spin_unlock(&srq_hwq->lock);
- sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- srqe_ptr = (struct rq_wqe **)srq_hwq->pbl_ptr;
- srqe = &srqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
- memset(srqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL);
+ memset(srqe, 0, srq->wqe_size);
/* Calculate wqe_size16 and data_len */
for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
i < wqe->num_sge; i++, hw_sge++) {
@@ -696,185 +785,199 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
srqe->wr_id[0] = cpu_to_le32((u32)next);
srq->swq[next].wr_id = wqe->wr_id;
- srq_hwq->prod++;
+ bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot);
- spin_lock(&srq_hwq->lock);
- sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- /* retaining srq_hwq->cons for this logic
- * actually the lock is only required to
- * read srq_hwq->cons.
- */
- sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
- count = sw_prod > sw_cons ? sw_prod - sw_cons :
- srq_hwq->max_elements - sw_cons + sw_prod;
- spin_unlock(&srq_hwq->lock);
/* Ring DB */
- bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ);
- if (srq->arm_req == true && count > srq->threshold) {
- srq->arm_req = false;
- bnxt_qplib_arm_srq(srq, DBC_DBC_TYPE_SRQ_ARM);
- }
-done:
- return rc;
+ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ);
+
+ return 0;
}
/* QP */
+
+static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
+{
+ int indx;
+
+ que->swq = kcalloc(que->max_sw_wqe, sizeof(*que->swq), GFP_KERNEL);
+ if (!que->swq)
+ return -ENOMEM;
+
+ que->swq_start = 0;
+ que->swq_last = que->max_sw_wqe - 1;
+ for (indx = 0; indx < que->max_sw_wqe; indx++)
+ que->swq[indx].next_idx = indx + 1;
+ que->swq[que->swq_last].next_idx = 0; /* Make it circular */
+ que->swq_last = 0;
+
+ return 0;
+}
+
int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_create_qp1 req;
- struct creq_create_qp1_resp resp;
- struct bnxt_qplib_pbl *pbl;
+ struct creq_create_qp1_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
struct bnxt_qplib_q *sq = &qp->sq;
struct bnxt_qplib_q *rq = &qp->rq;
- int rc;
- u16 cmd_flags = 0;
+ struct cmdq_create_qp1 req = {};
+ struct bnxt_qplib_pbl *pbl;
u32 qp_flags = 0;
+ u8 pg_sz_lvl;
+ u32 tbl_indx;
+ int rc;
- RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags);
-
+ sq->dbinfo.flags = 0;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_QP1,
+ sizeof(req));
/* General */
req.type = qp->type;
req.dpi = cpu_to_le32(qp->dpi->dpi);
req.qp_handle = cpu_to_le64(qp->qp_handle);
/* SQ */
- sq->hwq.max_elements = sq->max_wqe;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL,
- &sq->hwq.max_elements,
- BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_QUEUE);
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, false);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
- goto exit;
+ return rc;
- sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
- if (!sq->swq) {
- rc = -ENOMEM;
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
goto fail_sq;
- }
+
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.sq_pg_size_sq_lvl =
- ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK)
- << CMDQ_CREATE_QP1_SQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K);
-
- if (qp->scq)
- req.scq_cid = cpu_to_le32(qp->scq->id);
-
- qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK);
+ req.sq_pg_size_sq_lvl = pg_sz_lvl;
+ req.sq_fwo_sq_sge =
+ cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_SQ_SGE_SFT);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
/* RQ */
if (rq->max_wqe) {
- rq->hwq.max_elements = qp->rq.max_wqe;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL,
- &rq->hwq.max_elements,
- BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_QUEUE);
+ rq->dbinfo.flags = 0;
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &rq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
+ if (rc)
+ goto sq_swq;
+ rc = bnxt_qplib_alloc_init_swq(rq);
if (rc)
- goto fail_sq;
-
- rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
- GFP_KERNEL);
- if (!rq->swq) {
- rc = -ENOMEM;
goto fail_rq;
- }
+ req.rq_size = cpu_to_le32(rq->max_wqe);
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.rq_pg_size_rq_lvl =
- ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) <<
- CMDQ_CREATE_QP1_RQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K);
- if (qp->rcq)
- req.rcq_cid = cpu_to_le32(qp->rcq->id);
- }
-
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK);
+ req.rq_pg_size_rq_lvl = pg_sz_lvl;
+ req.rq_fwo_rq_sge =
+ cpu_to_le16((rq->max_sge &
+ CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_RQ_SGE_SFT);
+ }
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
/* Header buffer - allow hdr_buf pass in */
rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
if (rc) {
rc = -ENOMEM;
- goto fail;
+ goto rq_rwq;
}
+ qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
req.qp_flags = cpu_to_le32(qp_flags);
- req.sq_size = cpu_to_le32(sq->hwq.max_elements);
- req.rq_size = cpu_to_le32(rq->hwq.max_elements);
-
- req.sq_fwo_sq_sge =
- cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
- CMDQ_CREATE_QP1_SQ_SGE_SFT);
- req.rq_fwo_rq_sge =
- cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
- CMDQ_CREATE_QP1_RQ_SGE_SFT);
-
req.pd_id = cpu_to_le32(qp->pd->id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto fail;
qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
- rcfw->qp_tbl[qp->id].qp_id = qp->id;
- rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
+ qp->cctx = res->cctx;
+ sq->dbinfo.hwq = &sq->hwq;
+ sq->dbinfo.xid = qp->id;
+ sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
+ if (rq->max_wqe) {
+ rq->dbinfo.hwq = &rq->hwq;
+ rq->dbinfo.xid = qp->id;
+ rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
+ }
+ tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
+ rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
+ rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp;
return 0;
fail:
bnxt_qplib_free_qp_hdr_buf(res, qp);
-fail_rq:
- bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+rq_rwq:
kfree(rq->swq);
-fail_sq:
- bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+fail_rq:
+ bnxt_qplib_free_hwq(res, &rq->hwq);
+sq_swq:
kfree(sq->swq);
-exit:
+fail_sq:
+ bnxt_qplib_free_hwq(res, &sq->hwq);
return rc;
}
+static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size)
+{
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_q *sq;
+ u64 fpsne, psn_pg;
+ u16 indx_pad = 0;
+
+ sq = &qp->sq;
+ hwq = &sq->hwq;
+ /* First psn entry */
+ fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg);
+ if (!IS_ALIGNED(fpsne, PAGE_SIZE))
+ indx_pad = (fpsne & ~PAGE_MASK) / size;
+ hwq->pad_pgofft = indx_pad;
+ hwq->pad_pg = (u64 *)psn_pg;
+ hwq->pad_stride = size;
+}
+
int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- unsigned long int psn_search, poff = 0;
- struct sq_psn_search **psn_search_ptr;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct creq_create_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
struct bnxt_qplib_q *sq = &qp->sq;
struct bnxt_qplib_q *rq = &qp->rq;
- int i, rc, req_size, psn_sz = 0;
- struct sq_send **hw_sq_send_ptr;
- struct creq_create_qp_resp resp;
+ struct cmdq_create_qp req = {};
+ int rc, req_size, psn_sz = 0;
struct bnxt_qplib_hwq *xrrq;
- u16 cmd_flags = 0, max_ssge;
- struct cmdq_create_qp req;
struct bnxt_qplib_pbl *pbl;
u32 qp_flags = 0;
- u16 max_rsge;
+ u8 pg_sz_lvl;
+ u32 tbl_indx;
+ u16 nsge;
- RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
+ qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2);
+ sq->dbinfo.flags = 0;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_QP,
+ sizeof(req));
/* General */
req.type = qp->type;
@@ -883,144 +986,110 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* SQ */
if (qp->type == CMDQ_CREATE_QP_TYPE_RC) {
- psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ?
+ psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
+
+ if (qp->is_host_msn_tbl) {
+ psn_sz = sizeof(struct sq_msn_search);
+ qp->msn = 0;
+ }
}
- sq->hwq.max_elements = sq->max_wqe;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, &sq->sg_info,
- &sq->hwq.max_elements,
- BNXT_QPLIB_MAX_SQE_ENTRY_SIZE,
- psn_sz,
- PAGE_SIZE, HWQ_TYPE_QUEUE);
+
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, true);
+ hwq_attr.aux_stride = psn_sz;
+ hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode)
+ : 0;
+ /* Update msn tbl size */
+ if (qp->is_host_msn_tbl && psn_sz) {
+ if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ hwq_attr.aux_depth =
+ roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
+ else
+ hwq_attr.aux_depth =
+ roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)) / 2;
+ qp->msn_tbl_sz = hwq_attr.aux_depth;
+ qp->msn = 0;
+ }
+
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
- goto exit;
+ return rc;
- sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
- if (!sq->swq) {
- rc = -ENOMEM;
- goto fail_sq;
- }
- hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
- if (psn_sz) {
- psn_search_ptr = (struct sq_psn_search **)
- &hw_sq_send_ptr[get_sqe_pg
- (sq->hwq.max_elements)];
- psn_search = (unsigned long int)
- &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)]
- [get_sqe_idx(sq->hwq.max_elements)];
- if (psn_search & ~PAGE_MASK) {
- /* If the psn_search does not start on a page boundary,
- * then calculate the offset
- */
- poff = (psn_search & ~PAGE_MASK) /
- BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
- }
- for (i = 0; i < sq->hwq.max_elements; i++) {
- sq->swq[i].psn_search =
- &psn_search_ptr[get_psne_pg(i + poff)]
- [get_psne_idx(i + poff)];
- /*psns_ext will be used only for P5 chips. */
- sq->swq[i].psn_ext =
- (struct sq_psn_search_ext *)
- &psn_search_ptr[get_psne_pg(i + poff)]
- [get_psne_idx(i + poff)];
- }
+ if (!sq->hwq.is_user) {
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
+ goto fail_sq;
+
+ if (psn_sz)
+ bnxt_qplib_init_psn_ptr(qp, psn_sz);
}
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
pbl = &sq->hwq.pbl[PBL_LVL_0];
req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.sq_pg_size_sq_lvl =
- ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK)
- << CMDQ_CREATE_QP_SQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
-
- if (qp->scq)
- req.scq_cid = cpu_to_le32(qp->scq->id);
-
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
- if (qp->sig_type)
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
+ CMDQ_CREATE_QP_SQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK);
+ req.sq_pg_size_sq_lvl = pg_sz_lvl;
+ req.sq_fwo_sq_sge =
+ cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
/* RQ */
- if (rq->max_wqe) {
- rq->hwq.max_elements = rq->max_wqe;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq,
- &rq->sg_info,
- &rq->hwq.max_elements,
- BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (!qp->srq) {
+ rq->dbinfo.flags = 0;
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &rq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false);
+ hwq_attr.aux_stride = 0;
+ hwq_attr.aux_depth = 0;
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
if (rc)
- goto fail_sq;
-
- rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
- GFP_KERNEL);
- if (!rq->swq) {
- rc = -ENOMEM;
- goto fail_rq;
+ goto sq_swq;
+ if (!rq->hwq.is_user) {
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
+ goto fail_rq;
}
+
+ req.rq_size = cpu_to_le32(rq->max_wqe);
pbl = &rq->hwq.pbl[PBL_LVL_0];
req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
- req.rq_pg_size_rq_lvl =
- ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) <<
- CMDQ_CREATE_QP_RQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
+ CMDQ_CREATE_QP_RQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK);
+ req.rq_pg_size_rq_lvl = pg_sz_lvl;
+ nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ 6 : rq->max_sge;
+ req.rq_fwo_rq_sge =
+ cpu_to_le16(((nsge &
+ CMDQ_CREATE_QP_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
} else {
/* SRQ */
- if (qp->srq) {
- qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
- req.srq_cid = cpu_to_le32(qp->srq->id);
- }
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
+ req.srq_cid = cpu_to_le32(qp->srq->id);
}
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
- if (qp->rcq)
- req.rcq_cid = cpu_to_le32(qp->rcq->id);
- req.qp_flags = cpu_to_le32(qp_flags);
- req.sq_size = cpu_to_le32(sq->hwq.max_elements);
- req.rq_size = cpu_to_le32(rq->hwq.max_elements);
- qp->sq_hdr_buf = NULL;
- qp->rq_hdr_buf = NULL;
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
+ if (qp->sig_type)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+ if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
+ if (bnxt_ext_stats_supported(res->cctx, res->dattr->dev_cap_flags, res->is_vf))
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED;
- rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
- if (rc)
- goto fail_rq;
+ req.qp_flags = cpu_to_le32(qp_flags);
- /* CTRL-22434: Irrespective of the requested SGE count on the SQ
- * always create the QP with max send sges possible if the requested
- * inline size is greater than 0.
- */
- max_ssge = qp->max_inline_data ? 6 : sq->max_sge;
- req.sq_fwo_sq_sge = cpu_to_le16(
- ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
- << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
- max_rsge = bnxt_qplib_is_chip_gen_p5(res->cctx) ? 6 : rq->max_sge;
- req.rq_fwo_rq_sge = cpu_to_le16(
- ((max_rsge & CMDQ_CREATE_QP_RQ_SGE_MASK)
- << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
/* ORRQ and IRRQ */
if (psn_sz) {
xrrq = &qp->orrq;
@@ -1029,12 +1098,19 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
req_size = xrrq->max_elements *
BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1;
req_size &= ~(PAGE_SIZE - 1);
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL,
- &xrrq->max_elements,
- BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE,
- 0, req_size, HWQ_TYPE_CTX);
+ sginfo.pgsize = req_size;
+ sginfo.pgshft = PAGE_SHIFT;
+
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.depth = xrrq->max_elements;
+ hwq_attr.stride = BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE;
+ hwq_attr.aux_stride = 0;
+ hwq_attr.aux_depth = 0;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
if (rc)
- goto fail_buf_free;
+ goto rq_swq;
pbl = &xrrq->pbl[PBL_LVL_0];
req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
@@ -1044,11 +1120,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
req_size = xrrq->max_elements *
BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1;
req_size &= ~(PAGE_SIZE - 1);
-
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL,
- &xrrq->max_elements,
- BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE,
- 0, req_size, HWQ_TYPE_CTX);
+ sginfo.pgsize = req_size;
+ hwq_attr.depth = xrrq->max_elements;
+ hwq_attr.stride = BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
if (rc)
goto fail_orrq;
@@ -1057,36 +1132,46 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
}
req.pd_id = cpu_to_le32(qp->pd->id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto fail;
qp->id = le32_to_cpu(resp.xid);
qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
- qp->cctx = res->cctx;
INIT_LIST_HEAD(&qp->sq_flush);
INIT_LIST_HEAD(&qp->rq_flush);
- rcfw->qp_tbl[qp->id].qp_id = qp->id;
- rcfw->qp_tbl[qp->id].qp_handle = (void *)qp;
+ qp->cctx = res->cctx;
+ sq->dbinfo.hwq = &sq->hwq;
+ sq->dbinfo.xid = qp->id;
+ sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
+ if (rq->max_wqe) {
+ rq->dbinfo.hwq = &rq->hwq;
+ rq->dbinfo.xid = qp->id;
+ rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
+ }
+ spin_lock_bh(&rcfw->tbl_lock);
+ tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
+ rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
+ rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp;
+ spin_unlock_bh(&rcfw->tbl_lock);
return 0;
-
fail:
- if (qp->irrq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+ bnxt_qplib_free_hwq(res, &qp->irrq);
fail_orrq:
- if (qp->orrq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
-fail_buf_free:
- bnxt_qplib_free_qp_hdr_buf(res, qp);
-fail_rq:
- bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+ bnxt_qplib_free_hwq(res, &qp->orrq);
+rq_swq:
kfree(rq->swq);
-fail_sq:
- bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+fail_rq:
+ bnxt_qplib_free_hwq(res, &rq->hwq);
+sq_swq:
kfree(sq->swq);
-exit:
+fail_sq:
+ bnxt_qplib_free_hwq(res, &sq->hwq);
return rc;
}
@@ -1104,8 +1189,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
qp->path_mtu =
CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
}
- qp->modify_flags &=
- ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
/* Bono FW require the max_dest_rd_atomic to be >= 1 */
if (qp->max_dest_rd_atomic < 1)
qp->max_dest_rd_atomic = 1;
@@ -1179,20 +1262,73 @@ static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
}
}
+static void bnxt_set_mandatory_attributes(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp,
+ struct cmdq_modify_qp *req)
+{
+ u32 mandatory_flags = 0;
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+
+ if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) {
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq)
+ req->flags = cpu_to_le16(CMDQ_MODIFY_QP_FLAGS_SRQ_USED);
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ }
+
+ if (_is_min_rnr_in_rtr_rts_mandatory(res->dattr->dev_cap_flags2) &&
+ (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS)) {
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
+ mandatory_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER;
+ }
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD ||
+ qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+
+ qp->modify_flags |= mandatory_flags;
+ req->qp_type = qp->type;
+}
+
+static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp)
+{
+ if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) ||
+ (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS))
+ return true;
+
+ return false;
+}
+
int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &res->sgid_tbl;
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_modify_qp req;
- struct creq_modify_qp_resp resp;
- u16 cmd_flags = 0, pkey;
+ struct creq_modify_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_modify_qp req = {};
+ u16 vlan_pcp_vlan_dei_vlan_id;
u32 temp32[4];
u32 bmask;
int rc;
- RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_MODIFY_QP,
+ sizeof(req));
/* Filter out the qp_attr_mask based on the state->new transition */
__filter_modify_flags(qp);
+ if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */
+ if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) &&
+ is_optimized_state_transition(qp))
+ bnxt_set_mandatory_attributes(res, qp, &req);
+ }
bmask = qp->modify_flags;
req.modify_mask = cpu_to_le32(qp->modify_flags);
req.qp_cid = cpu_to_le32(qp->id);
@@ -1207,11 +1343,9 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS)
req.access = qp->access;
- if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) {
- if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl,
- qp->pkey_index, &pkey))
- req.pkey = cpu_to_le16(pkey);
- }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY)
+ req.pkey = cpu_to_le16(IB_DEFAULT_PKEY_FULL);
+
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY)
req.qkey = cpu_to_le32(qp->qkey);
@@ -1225,9 +1359,14 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL)
req.flow_label = cpu_to_le32(qp->ah.flow_label);
- if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)
- req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id
- [qp->ah.sgid_index]);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) {
+ if (qp->type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE)
+ req.sgid_index =
+ cpu_to_le16(sgid_tbl->hw_id[qp->ugid_index]);
+ else
+ req.sgid_index =
+ cpu_to_le16(sgid_tbl->hw_id[qp->ah.sgid_index]);
+ }
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT)
req.hop_limit = qp->ah.hop_limit;
@@ -1239,7 +1378,7 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
memcpy(req.dest_mac, qp->ah.dmac, 6);
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)
- req.path_mtu = qp->path_mtu;
+ req.path_mtu_pingpong_push_enable |= qp->path_mtu;
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT)
req.timeout = qp->timeout;
@@ -1275,10 +1414,19 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
- req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID) {
+ vlan_pcp_vlan_dei_vlan_id =
+ ((res->sgid_tbl.tbl[qp->ah.sgid_index].vlan_id <<
+ CMDQ_MODIFY_QP_VLAN_ID_SFT) &
+ CMDQ_MODIFY_QP_VLAN_ID_MASK);
+ vlan_pcp_vlan_dei_vlan_id |=
+ ((qp->ah.sl << CMDQ_MODIFY_QP_VLAN_PCP_SFT) &
+ CMDQ_MODIFY_QP_VLAN_PCP_MASK);
+ req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(vlan_pcp_vlan_dei_vlan_id);
+ }
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
qp->cur_qp_state = qp->state;
@@ -1288,36 +1436,41 @@ int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_query_qp req;
- struct creq_query_qp_resp resp;
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
struct creq_query_qp_resp_sb *sb;
- u16 cmd_flags = 0;
+ struct cmdq_query_qp req = {};
u32 temp32[4];
- int i, rc = 0;
+ int i, rc;
- RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
-
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf)
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- sb = sbuf->sb;
+ sb = sbuf.sb;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_QP,
+ sizeof(req));
req.qp_cid = cpu_to_le32(qp->id);
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- (void *)sbuf, 0);
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto bail;
/* Extract the context from the side buffer */
qp->state = sb->en_sqd_async_notify_state &
CREQ_QUERY_QP_RESP_SB_STATE_MASK;
qp->en_sqd_async_notify = sb->en_sqd_async_notify_state &
- CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ?
- true : false;
+ CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY;
qp->access = sb->access;
qp->pkey_index = le16_to_cpu(sb->pkey);
qp->qkey = le32_to_cpu(sb->qkey);
+ qp->udp_sport = le16_to_cpu(sb->udp_src_port);
temp32[0] = le32_to_cpu(sb->dgid[0]);
temp32[1] = le32_to_cpu(sb->dgid[1]);
@@ -1363,21 +1516,25 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
memcpy(qp->smac, sb->src_mac, 6);
qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
+ qp->port_id = le16_to_cpu(sb->port_id);
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
{
struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
- struct cq_base *hw_cqe, **hw_cqe_ptr;
+ u32 peek_flags, peek_cons;
+ struct cq_base *hw_cqe;
int i;
+ peek_flags = cq->dbinfo.flags;
+ peek_cons = cq_hwq->cons;
for (i = 0; i < cq_hwq->max_elements; i++) {
- hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr;
- hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)];
- if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+ hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL);
+ if (!CQE_CMP_VALID(hw_cqe, peek_flags))
continue;
/*
* The valid test of the entry must be done first before
@@ -1407,6 +1564,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
default:
break;
}
+ bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons,
+ 1, &peek_flags);
}
}
@@ -1414,22 +1573,31 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_destroy_qp req;
- struct creq_destroy_qp_resp resp;
- u16 cmd_flags = 0;
+ struct creq_destroy_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_qp req = {};
+ u32 tbl_indx;
int rc;
- rcfw->qp_tbl[qp->id].qp_id = BNXT_QPLIB_QP_ID_INVALID;
- rcfw->qp_tbl[qp->id].qp_handle = NULL;
+ spin_lock_bh(&rcfw->tbl_lock);
+ tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
+ rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID;
+ rcfw->qp_tbl[tbl_indx].qp_handle = NULL;
+ spin_unlock_bh(&rcfw->tbl_lock);
- RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_QP,
+ sizeof(req));
req.qp_cid = cpu_to_le32(qp->id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc) {
- rcfw->qp_tbl[qp->id].qp_id = qp->id;
- rcfw->qp_tbl[qp->id].qp_handle = qp;
+ spin_lock_bh(&rcfw->tbl_lock);
+ rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
+ rcfw->qp_tbl[tbl_indx].qp_handle = qp;
+ spin_unlock_bh(&rcfw->tbl_lock);
return rc;
}
@@ -1440,16 +1608,16 @@ void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
struct bnxt_qplib_qp *qp)
{
bnxt_qplib_free_qp_hdr_buf(res, qp);
- bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
+ bnxt_qplib_free_hwq(res, &qp->sq.hwq);
kfree(qp->sq.swq);
- bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq);
+ bnxt_qplib_free_hwq(res, &qp->rq.hwq);
kfree(qp->rq.swq);
if (qp->irrq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+ bnxt_qplib_free_hwq(res, &qp->irrq);
if (qp->orrq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+ bnxt_qplib_free_hwq(res, &qp->orrq);
}
@@ -1462,7 +1630,7 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
memset(sge, 0, sizeof(*sge));
if (qp->sq_hdr_buf) {
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ sw_prod = sq->swq_start;
sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
sw_prod * qp->sq_hdr_buf_size);
sge->lkey = 0xFFFFFFFF;
@@ -1476,7 +1644,7 @@ u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
- return HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ return rq->swq_start;
}
dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
@@ -1493,7 +1661,7 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
memset(sge, 0, sizeof(*sge));
if (qp->rq_hdr_buf) {
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ sw_prod = rq->swq_start;
sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
sw_prod * qp->rq_hdr_buf_size);
sge->lkey = 0xFFFFFFFF;
@@ -1503,142 +1671,298 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
return NULL;
}
+/* Fil the MSN table into the next psn row */
+static void bnxt_qplib_fill_msn_search(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ struct bnxt_qplib_swq *swq)
+{
+ struct sq_msn_search *msns;
+ u32 start_psn, next_psn;
+ u16 start_idx;
+
+ msns = (struct sq_msn_search *)swq->psn_search;
+ msns->start_idx_next_psn_start_psn = 0;
+
+ start_psn = swq->start_psn;
+ next_psn = swq->next_psn;
+ start_idx = swq->slot_idx;
+ msns->start_idx_next_psn_start_psn |=
+ bnxt_re_update_msn_tbl(start_idx, next_psn, start_psn);
+ qp->msn++;
+ qp->msn %= qp->msn_tbl_sz;
+}
+
+static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ struct bnxt_qplib_swq *swq)
+{
+ struct sq_psn_search_ext *psns_ext;
+ struct sq_psn_search *psns;
+ u32 flg_npsn;
+ u32 op_spsn;
+
+ if (!swq->psn_search)
+ return;
+ /* Handle MSN differently on cap flags */
+ if (qp->is_host_msn_tbl) {
+ bnxt_qplib_fill_msn_search(qp, wqe, swq);
+ return;
+ }
+ psns = (struct sq_psn_search *)swq->psn_search;
+ psns = swq->psn_search;
+ psns_ext = swq->psn_ext;
+
+ op_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
+ SQ_PSN_SEARCH_START_PSN_MASK);
+ op_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
+ SQ_PSN_SEARCH_OPCODE_MASK);
+ flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_PSN_SEARCH_NEXT_PSN_MASK);
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) {
+ psns_ext->opcode_start_psn = cpu_to_le32(op_spsn);
+ psns_ext->flags_next_psn = cpu_to_le32(flg_npsn);
+ psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx);
+ } else {
+ psns->opcode_start_psn = cpu_to_le32(op_spsn);
+ psns->flags_next_psn = cpu_to_le32(flg_npsn);
+ }
+}
+
+static unsigned int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u32 *idx)
+{
+ struct bnxt_qplib_hwq *hwq;
+ int len, t_len, offt;
+ bool pull_dst = true;
+ void *il_dst = NULL;
+ void *il_src = NULL;
+ int t_cplen, cplen;
+ int indx;
+
+ hwq = &qp->sq.hwq;
+ t_len = 0;
+ for (indx = 0; indx < wqe->num_sge; indx++) {
+ len = wqe->sg_list[indx].size;
+ il_src = (void *)wqe->sg_list[indx].addr;
+ t_len += len;
+ if (t_len > qp->max_inline_data)
+ return BNXT_RE_INVAL_MSG_SIZE;
+ while (len) {
+ if (pull_dst) {
+ pull_dst = false;
+ il_dst = bnxt_qplib_get_prod_qe(hwq, *idx);
+ (*idx)++;
+ t_cplen = 0;
+ offt = 0;
+ }
+ cplen = min_t(int, len, sizeof(struct sq_sge));
+ cplen = min_t(int, cplen,
+ (sizeof(struct sq_sge) - offt));
+ memcpy(il_dst, il_src, cplen);
+ t_cplen += cplen;
+ il_src += cplen;
+ il_dst += cplen;
+ offt += cplen;
+ len -= cplen;
+ if (t_cplen == sizeof(struct sq_sge))
+ pull_dst = true;
+ }
+ }
+
+ return t_len;
+}
+
+static unsigned int bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_sge *ssge,
+ u32 nsge, u32 *idx)
+{
+ struct sq_sge *dsge;
+ int indx, len = 0;
+
+ for (indx = 0; indx < nsge; indx++, (*idx)++) {
+ dsge = bnxt_qplib_get_prod_qe(hwq, *idx);
+ dsge->va_or_pa = cpu_to_le64(ssge[indx].addr);
+ dsge->l_key = cpu_to_le32(ssge[indx].lkey);
+ dsge->size = cpu_to_le32(ssge[indx].size);
+ len += ssge[indx].size;
+ }
+
+ return len;
+}
+
+static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u16 *wqe_sz, u16 *qdf, u8 mode)
+{
+ u32 ilsize, bytes;
+ u16 nsge;
+ u16 slot;
+
+ nsge = wqe->num_sge;
+ /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */
+ bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data);
+ bytes = ALIGN(ilsize, sizeof(struct sq_sge));
+ bytes += sizeof(struct sq_send_hdr);
+ }
+
+ *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes);
+ slot = bytes >> 4;
+ *wqe_sz = slot;
+ if (mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ slot = 8;
+ return slot;
+}
+
+static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *sq,
+ struct bnxt_qplib_swq *swq, bool hw_retx)
+{
+ struct bnxt_qplib_hwq *hwq;
+ u32 pg_num, pg_indx;
+ void *buff;
+ u32 tail;
+
+ hwq = &sq->hwq;
+ if (!hwq->pad_pg)
+ return;
+ tail = swq->slot_idx / sq->dbinfo.max_slot;
+ if (hw_retx) {
+ /* For HW retx use qp msn index */
+ tail = qp->msn;
+ tail %= qp->msn_tbl_sz;
+ }
+ pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride);
+ pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride);
+ buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride);
+ swq->psn_ext = buff;
+ swq->psn_search = buff;
+}
+
void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *sq = &qp->sq;
- u32 sw_prod;
- u64 val = 0;
- val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
- DBC_DBC_TYPE_SQ);
- val <<= 32;
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
- /* Flush all the WQE writes to HW */
- writeq(val, qp->dpi->dbr);
+ bnxt_qplib_ring_prod_db(&sq->dbinfo, DBC_DBC_TYPE_SQ);
}
int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swqe *wqe)
{
+ struct bnxt_qplib_nq_work *nq_work = NULL;
+ int i, rc = 0, data_len = 0, pkt_num = 0;
struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_hwq *hwq;
struct bnxt_qplib_swq *swq;
- struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
- struct sq_sge *hw_sge;
- struct bnxt_qplib_nq_work *nq_work = NULL;
bool sch_handler = false;
- u32 sw_prod;
- u8 wqe_size16;
- int i, rc = 0, data_len = 0, pkt_num = 0;
+ u32 wqe_idx, slots, idx;
+ u16 wqe_sz, qdf = 0;
+ bool msn_update;
+ void *base_hdr;
+ void *ext_hdr;
__le32 temp32;
- if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
- if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- sch_handler = true;
- dev_dbg(&sq->hwq.pdev->dev,
- "%s Error QP. Scheduling for poll_cq\n",
- __func__);
- goto queue_err;
- }
+ hwq = &sq->hwq;
+ if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS &&
+ qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
}
- if (bnxt_qplib_queue_full(sq)) {
- dev_err(&sq->hwq.pdev->dev,
+ slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode);
+ if (bnxt_qplib_queue_full(sq, slots + qdf)) {
+ dev_err(&hwq->pdev->dev,
"prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
- sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
- sq->q_full_delta);
+ hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta);
rc = -ENOMEM;
goto done;
}
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- swq = &sq->swq[sw_prod];
+
+ swq = bnxt_qplib_get_swqe(sq, &wqe_idx);
+ bnxt_qplib_pull_psn_buff(qp, sq, swq, qp->is_host_msn_tbl);
+
+ idx = 0;
+ swq->slot_idx = hwq->prod;
+ swq->slots = slots;
swq->wr_id = wqe->wr_id;
swq->type = wqe->type;
swq->flags = wqe->flags;
+ swq->start_psn = sq->psn & BTH_PSN_MASK;
if (qp->sig_type)
swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
- swq->start_psn = sq->psn & BTH_PSN_MASK;
- hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
- hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
- [get_sqe_idx(sw_prod)];
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
+ }
- memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
- if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE)
/* Copy the inline data */
- if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
- dev_warn(&sq->hwq.pdev->dev,
- "Inline data length > 96 detected\n");
- data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
- } else {
- data_len = wqe->inline_len;
- }
- memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len);
- wqe_size16 = (data_len + 15) >> 4;
- } else {
- for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data;
- i < wqe->num_sge; i++, hw_sge++) {
- hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
- hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
- hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
- data_len += wqe->sg_list[i].size;
- }
- /* Each SGE entry = 1 WQE size16 */
- wqe_size16 = wqe->num_sge;
- /* HW requires wqe size has room for atleast one SGE even if
- * none was supplied by ULP
- */
- if (!wqe->num_sge)
- wqe_size16++;
+ data_len = bnxt_qplib_put_inline(qp, wqe, &idx);
+ else
+ data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge,
+ &idx);
+ if (data_len > BNXT_RE_MAX_MSG_SIZE) {
+ rc = -EINVAL;
+ goto done;
}
-
+ /* Make sure we update MSN table only for wired wqes */
+ msn_update = true;
/* Specifics */
switch (wqe->type) {
case BNXT_QPLIB_SWQE_TYPE_SEND:
if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ struct sq_send_raweth_qp1_hdr *sqe = base_hdr;
+ struct sq_raw_ext_hdr *ext_sqe = ext_hdr;
/* Assemble info for Raw Ethertype QPs */
- struct sq_send_raweth_qp1 *sqe =
- (struct sq_send_raweth_qp1 *)hw_sq_send_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->wqe_size = wqe_sz;
sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
sqe->length = cpu_to_le32(data_len);
- sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+ ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
break;
}
- /* fall thru */
+ fallthrough;
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
{
- struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr;
+ struct sq_ud_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_send_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
- sqe->inv_key_or_imm_data = cpu_to_le32(
- wqe->send.inv_key);
+ sqe->wqe_size = wqe_sz;
+ sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key);
if (qp->type == CMDQ_CREATE_QP_TYPE_UD ||
qp->type == CMDQ_CREATE_QP_TYPE_GSI) {
sqe->q_key = cpu_to_le32(wqe->send.q_key);
- sqe->dst_qp = cpu_to_le32(
- wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
sqe->length = cpu_to_le32(data_len);
- sqe->avid = cpu_to_le32(wqe->send.avid &
- SQ_SEND_AVID_MASK);
sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+ ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp &
+ SQ_SEND_DST_QP_MASK);
+ ext_sqe->avid = cpu_to_le32(wqe->send.avid &
+ SQ_SEND_AVID_MASK);
+ msn_update = false;
} else {
sqe->length = cpu_to_le32(data_len);
- sqe->dst_qp = 0;
- sqe->avid = 0;
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1651,16 +1975,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
{
- struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr;
+ struct sq_rdma_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_rdma_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
- sqe->wqe_size = wqe_size16 +
- ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->wqe_size = wqe_sz;
sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
sqe->length = cpu_to_le32((u32)data_len);
- sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
- sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+ ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+ ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1671,14 +1995,15 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
{
- struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr;
+ struct sq_atomic_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_atomic_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
- sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
- sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+ ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+ ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
if (qp->mtu)
pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
if (!pkt_num)
@@ -1688,18 +2013,18 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
{
- struct sq_localinvalidate *sqe =
- (struct sq_localinvalidate *)hw_sq_send_hdr;
+ struct sq_localinvalidate *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key);
-
+ msn_update = false;
break;
}
case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
{
- struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr;
+ struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_fr_pmr_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1723,14 +2048,16 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
wqe->frmr.pbl_ptr[i] = cpu_to_le64(
wqe->frmr.page_list[i] |
PTU_PTE_VALID);
- sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
- sqe->va = cpu_to_le64(wqe->frmr.va);
+ ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+ ext_sqe->va = cpu_to_le64(wqe->frmr.va);
+ msn_update = false;
break;
}
case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
{
- struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr;
+ struct sq_bind_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_bind_hdr *sqe = base_hdr;
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
@@ -1739,9 +2066,9 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
(wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
sqe->l_key = cpu_to_le32(wqe->bind.r_key);
- sqe->va = cpu_to_le64(wqe->bind.va);
- temp32 = cpu_to_le32(wqe->bind.length);
- memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length));
+ ext_sqe->va = cpu_to_le64(wqe->bind.va);
+ ext_sqe->length_lo = cpu_to_le32(wqe->bind.length);
+ msn_update = false;
break;
}
default:
@@ -1749,44 +2076,14 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
rc = -EINVAL;
goto done;
}
- swq->next_psn = sq->psn & BTH_PSN_MASK;
- if (swq->psn_search) {
- u32 opcd_spsn;
- u32 flg_npsn;
-
- opcd_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
- SQ_PSN_SEARCH_START_PSN_MASK);
- opcd_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
- SQ_PSN_SEARCH_OPCODE_MASK);
- flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
- SQ_PSN_SEARCH_NEXT_PSN_MASK);
- if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
- swq->psn_ext->opcode_start_psn =
- cpu_to_le32(opcd_spsn);
- swq->psn_ext->flags_next_psn =
- cpu_to_le32(flg_npsn);
- } else {
- swq->psn_search->opcode_start_psn =
- cpu_to_le32(opcd_spsn);
- swq->psn_search->flags_next_psn =
- cpu_to_le32(flg_npsn);
- }
+ if (!qp->is_host_msn_tbl || msn_update) {
+ swq->next_psn = sq->psn & BTH_PSN_MASK;
+ bnxt_qplib_fill_psn_search(qp, wqe, swq);
}
queue_err:
- if (sch_handler) {
- /* Store the ULP info in the software structures */
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
- swq = &sq->swq[sw_prod];
- swq->wr_id = wqe->wr_id;
- swq->type = wqe->type;
- swq->flags = wqe->flags;
- if (qp->sig_type)
- swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
- swq->start_psn = sq->psn & BTH_PSN_MASK;
- }
- sq->hwq.prod++;
+ bnxt_qplib_swq_mod_start(sq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots);
qp->wqe_cnt++;
-
done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
@@ -1796,7 +2093,7 @@ done:
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else {
- dev_err(&sq->hwq.pdev->dev,
+ dev_err(&hwq->pdev->dev,
"FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM;
}
@@ -1807,77 +2104,74 @@ done:
void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
{
struct bnxt_qplib_q *rq = &qp->rq;
- u32 sw_prod;
- u64 val = 0;
- val = (((qp->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
- DBC_DBC_TYPE_RQ);
- val <<= 32;
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- val |= (sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
- /* Flush the writes to HW Rx WQE before the ringing Rx DB */
- writeq(val, qp->dpi->dbr);
+ bnxt_qplib_ring_prod_db(&rq->dbinfo, DBC_DBC_TYPE_RQ);
}
int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swqe *wqe)
{
- struct bnxt_qplib_q *rq = &qp->rq;
- struct rq_wqe *rqe, **rqe_ptr;
- struct sq_sge *hw_sge;
struct bnxt_qplib_nq_work *nq_work = NULL;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct rq_wqe_hdr *base_hdr;
+ struct rq_ext_hdr *ext_hdr;
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_swq *swq;
bool sch_handler = false;
- u32 sw_prod;
- int i, rc = 0;
+ u32 wqe_idx, idx;
+ u16 wqe_sz;
+ int rc = 0;
- if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
- sch_handler = true;
- dev_dbg(&rq->hwq.pdev->dev,
- "%s: Error QP. Scheduling for poll_cq\n", __func__);
- goto queue_err;
+ hwq = &rq->hwq;
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
}
- if (bnxt_qplib_queue_full(rq)) {
- dev_err(&rq->hwq.pdev->dev,
+
+ if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) {
+ dev_err(&hwq->pdev->dev,
"FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL;
goto done;
}
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- rq->swq[sw_prod].wr_id = wqe->wr_id;
-
- rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr;
- rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
- memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ swq = bnxt_qplib_get_swqe(rq, &wqe_idx);
+ swq->wr_id = wqe->wr_id;
+ swq->slots = rq->dbinfo.max_slot;
- /* Calculate wqe_size16 and data_len */
- for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
- i < wqe->num_sge; i++, hw_sge++) {
- hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
- hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
- hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s: Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
}
- rqe->wqe_type = wqe->type;
- rqe->flags = wqe->flags;
- rqe->wqe_size = wqe->num_sge +
- ((offsetof(typeof(*rqe), data) + 15) >> 4);
- /* HW requires wqe size has room for atleast one SGE even if none
- * was supplied by ULP
- */
- if (!wqe->num_sge)
- rqe->wqe_size++;
-
- /* Supply the rqe->wr_id index to the wr_id_tbl for now */
- rqe->wr_id[0] = cpu_to_le32(sw_prod);
+ idx = 0;
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+ wqe_sz = (sizeof(struct rq_wqe_hdr) +
+ wqe->num_sge * sizeof(struct sq_sge)) >> 4;
+ bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx);
+ if (!wqe->num_sge) {
+ struct sq_sge *sge;
+
+ sge = bnxt_qplib_get_prod_qe(hwq, idx++);
+ sge->size = 0;
+ wqe_sz++;
+ }
+ base_hdr->wqe_type = wqe->type;
+ base_hdr->flags = wqe->flags;
+ base_hdr->wqe_size = wqe_sz;
+ base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
queue_err:
- if (sch_handler) {
- /* Store the ULP info in the software structures */
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
- rq->swq[sw_prod].wr_id = wqe->wr_id;
- }
-
- rq->hwq.prod++;
+ bnxt_qplib_swq_mod_start(rq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots);
+done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
if (nq_work) {
@@ -1886,97 +2180,87 @@ queue_err:
INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else {
- dev_err(&rq->hwq.pdev->dev,
+ dev_err(&hwq->pdev->dev,
"FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM;
}
}
-done:
+
return rc;
}
/* CQ */
-
-/* Spinlock must be held */
-static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq)
-{
- u64 val = 0;
-
- val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) |
- DBC_DBC_TYPE_CQ_ARMENA;
- val <<= 32;
- /* Flush memory writes before enabling the CQ */
- writeq(val, cq->dbr_base);
-}
-
-static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
-{
- struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
- u32 sw_cons;
- u64 val = 0;
-
- /* Ring DB */
- val = ((cq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type;
- val <<= 32;
- sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq);
- val |= (sw_cons << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK;
- /* flush memory writes before arming the CQ */
- writeq(val, cq->dpi->dbr);
-}
-
int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_create_cq req;
- struct creq_create_cq_resp resp;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct creq_create_cq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_create_cq req = {};
struct bnxt_qplib_pbl *pbl;
- u16 cmd_flags = 0;
+ u32 coalescing = 0;
+ u32 pg_sz_lvl;
int rc;
- cq->hwq.max_elements = cq->max_wqe;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, &cq->sg_info,
- &cq->hwq.max_elements,
- BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_QUEUE);
- if (rc)
- goto exit;
-
- RCFW_CMD_PREP(req, CREATE_CQ, cmd_flags);
-
if (!cq->dpi) {
dev_err(&rcfw->pdev->dev,
"FP: CREATE_CQ failed due to NULL DPI\n");
return -EINVAL;
}
+
+ cq->dbinfo.flags = 0;
+ hwq_attr.res = res;
+ hwq_attr.depth = cq->max_wqe;
+ hwq_attr.stride = sizeof(struct cq_base);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ hwq_attr.sginfo = &cq->sg_info;
+ rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_CQ,
+ sizeof(req));
+
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
+ req.cq_size = cpu_to_le32(cq->max_wqe);
+
+ if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2) &&
+ cq->coalescing->enable) {
+ req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID);
+ coalescing |= ((cq->coalescing->buf_maxtime <<
+ CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &
+ CMDQ_CREATE_CQ_BUF_MAXTIME_MASK);
+ coalescing |= ((cq->coalescing->normal_maxbuf <<
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK);
+ coalescing |= ((cq->coalescing->during_maxbuf <<
+ CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_DURING_MAXBUF_MASK);
+ if (cq->coalescing->en_ring_idle_mode)
+ coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ else
+ coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ req.coalescing = cpu_to_le32(coalescing);
+ }
- req.cq_size = cpu_to_le32(cq->hwq.max_elements);
pbl = &cq->hwq.pbl[PBL_LVL_0];
- req.pg_size_lvl = cpu_to_le32(
- ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) <<
- CMDQ_CREATE_CQ_LVL_SFT) |
- (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G :
- CMDQ_CREATE_CQ_PG_SIZE_PG_4K));
-
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
+ CMDQ_CREATE_CQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK);
+ req.pg_size_lvl = cpu_to_le32(pg_sz_lvl);
req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
-
req.cq_fco_cnq_id = cpu_to_le32(
(cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
CMDQ_CREATE_CQ_CNQ_ID_SFT);
-
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto fail;
cq->id = le32_to_cpu(resp.xid);
- cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
init_waitqueue_head(&cq->waitq);
INIT_LIST_HEAD(&cq->sqf_head);
@@ -1984,51 +2268,112 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
spin_lock_init(&cq->compl_lock);
spin_lock_init(&cq->flush_lock);
- bnxt_qplib_arm_cq_enable(cq);
+ cq->dbinfo.hwq = &cq->hwq;
+ cq->dbinfo.xid = cq->id;
+ cq->dbinfo.db = cq->dpi->dbr;
+ cq->dbinfo.priv_db = res->dpi_tbl.priv_db;
+ cq->dbinfo.flags = 0;
+ cq->dbinfo.toggle = 0;
+
+ bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA);
+
return 0;
fail:
- bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
-exit:
+ bnxt_qplib_free_hwq(res, &cq->hwq);
+ return rc;
+}
+
+void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cq *cq)
+{
+ bnxt_qplib_free_hwq(res, &cq->hwq);
+ memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
+ /* Reset only the cons bit in the flags */
+ cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT);
+}
+
+int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
+ int new_cqes)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_resize_cq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_resize_cq req = {};
+ struct bnxt_qplib_pbl *pbl;
+ u32 pg_sz, lvl, new_sz;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_RESIZE_CQ,
+ sizeof(req));
+ hwq_attr.sginfo = &cq->sg_info;
+ hwq_attr.res = res;
+ hwq_attr.depth = new_cqes;
+ hwq_attr.stride = sizeof(struct cq_base);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr);
+ if (rc)
+ return rc;
+
+ req.cq_cid = cpu_to_le32(cq->id);
+ pbl = &cq->resize_hwq.pbl[PBL_LVL_0];
+ pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq);
+ lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) &
+ CMDQ_RESIZE_CQ_LVL_MASK;
+ new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) &
+ CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK;
+ req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl);
+ req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
return rc;
}
int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_destroy_cq req;
- struct creq_destroy_cq_resp resp;
- u16 cmd_flags = 0;
+ struct creq_destroy_cq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_cq req = {};
+ u16 total_cnq_events;
int rc;
- RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_CQ,
+ sizeof(req));
req.cq_cid = cpu_to_le32(cq->id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
- bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+ total_cnq_events = le16_to_cpu(resp.total_cnq_events);
+ __wait_for_all_nqes(cq, total_cnq_events);
+ bnxt_qplib_free_hwq(res, &cq->hwq);
return 0;
}
static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cqe **pcqe, int *budget)
{
- u32 sw_prod, sw_cons;
struct bnxt_qplib_cqe *cqe;
+ u32 start, last;
int rc = 0;
/* Now complete all outstanding SQEs with FLUSHED_ERR */
- sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ start = sq->swq_start;
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == sw_prod) {
+ last = sq->swq_last;
+ if (start == last)
break;
- }
/* Skip the FENCE WQE completions */
- if (sq->swq[sw_cons].wr_id == BNXT_QPLIB_FENCE_WRID) {
+ if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) {
bnxt_qplib_cancel_phantom_processing(qp);
goto skip_compl;
}
@@ -2036,16 +2381,18 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->wr_id = sq->swq[last].wr_id;
cqe->src_qp = qp->id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->type = sq->swq[last].type;
cqe++;
(*budget)--;
skip_compl:
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ sq->swq[last].slots, &sq->dbinfo.flags);
+ sq->swq_last = sq->swq[last].next_idx;
}
*pcqe = cqe;
- if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod)
+ if (!(*budget) && sq->swq_last != start)
/* Out of budget */
rc = -EAGAIN;
@@ -2056,9 +2403,9 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
struct bnxt_qplib_cqe **pcqe, int *budget)
{
struct bnxt_qplib_cqe *cqe;
- u32 sw_prod, sw_cons;
- int rc = 0;
+ u32 start, last;
int opcode = 0;
+ int rc = 0;
switch (qp->type) {
case CMDQ_CREATE_QP1_TYPE_GSI:
@@ -2074,24 +2421,26 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
}
/* Flush the rest of the RQ */
- sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ start = rq->swq_start;
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq);
- if (sw_cons == sw_prod)
+ last = rq->swq_last;
+ if (last == start)
break;
memset(cqe, 0, sizeof(*cqe));
cqe->status =
CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR;
cqe->opcode = opcode;
cqe->qp_handle = (unsigned long)qp;
- cqe->wr_id = rq->swq[sw_cons].wr_id;
+ cqe->wr_id = rq->swq[last].wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ rq->swq[last].slots, &rq->dbinfo.flags);
+ rq->swq_last = rq->swq[last].next_idx;
}
*pcqe = cqe;
- if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod)
+ if (!*budget && rq->swq_last != start)
/* Out of budget */
rc = -EAGAIN;
@@ -2114,20 +2463,20 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
* CQE is track from sw_cq_cons to max_element but valid only if VALID=1
*/
static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
- u32 cq_cons, u32 sw_sq_cons, u32 cqe_sq_cons)
+ u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
{
+ u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags;
struct bnxt_qplib_q *sq = &qp->sq;
- struct bnxt_qplib_swq *swq;
- u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
- struct cq_base *peek_hwcqe, **peek_hw_cqe_ptr;
struct cq_req *peek_req_hwcqe;
struct bnxt_qplib_qp *peek_qp;
struct bnxt_qplib_q *peek_sq;
+ struct bnxt_qplib_swq *swq;
+ struct cq_base *peek_hwcqe;
int i, rc = 0;
/* Normal mode */
/* Check for the psn_search marking before completing */
- swq = &sq->swq[sw_sq_cons];
+ swq = &sq->swq[swq_last];
if (swq->psn_search &&
le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
/* Unmark */
@@ -2136,29 +2485,25 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
& ~0x80000000);
dev_dbg(&cq->hwq.pdev->dev,
"FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
- cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
sq->condition = true;
sq->send_phantom = true;
/* TODO: Only ARM if the previous SQE is ARMALL */
- bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ_ARMALL);
-
+ bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMALL);
rc = -EAGAIN;
goto out;
}
if (sq->condition) {
/* Peek at the completions */
- peek_raw_cq_cons = cq->hwq.cons;
+ peek_flags = cq->dbinfo.flags;
peek_sw_cq_cons = cq_cons;
i = cq->hwq.max_elements;
while (i--) {
- peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
- peek_hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
- peek_hwcqe = &peek_hw_cqe_ptr[CQE_PG(peek_sw_cq_cons)]
- [CQE_IDX(peek_sw_cq_cons)];
+ peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq,
+ peek_sw_cq_cons, NULL);
/* If the next hwcqe is VALID */
- if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
- cq->hwq.max_elements)) {
+ if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) {
/*
* The valid test of the entry must be done first before
* reading any further.
@@ -2175,9 +2520,10 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
le64_to_cpu
(peek_req_hwcqe->qp_handle));
peek_sq = &peek_qp->sq;
- peek_sq_cons_idx = HWQ_CMP(le16_to_cpu(
- peek_req_hwcqe->sq_cons_idx) - 1
- , &sq->hwq);
+ peek_sq_cons_idx =
+ ((le16_to_cpu(
+ peek_req_hwcqe->sq_cons_idx)
+ - 1) % sq->max_wqe);
/* If the hwcqe's sq's wr_id matches */
if (peek_sq == sq &&
sq->swq[peek_sq_cons_idx].wr_id ==
@@ -2200,28 +2546,56 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
rc = -EINVAL;
goto out;
}
- peek_sw_cq_cons++;
- peek_raw_cq_cons++;
+ bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements,
+ &peek_sw_cq_cons,
+ 1, &peek_flags);
}
dev_err(&cq->hwq.pdev->dev,
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
- cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
rc = -EINVAL;
}
out:
return rc;
}
+static int bnxt_qplib_get_cqe_sq_cons(struct bnxt_qplib_q *sq, u32 cqe_slot)
+{
+ struct bnxt_qplib_hwq *sq_hwq;
+ struct bnxt_qplib_swq *swq;
+ int cqe_sq_cons = -1;
+ u32 start, last;
+
+ sq_hwq = &sq->hwq;
+
+ start = sq->swq_start;
+ last = sq->swq_last;
+
+ while (last != start) {
+ swq = &sq->swq[last];
+ if (swq->slot_idx == cqe_slot) {
+ cqe_sq_cons = swq->next_idx;
+ dev_err(&sq_hwq->pdev->dev, "%s: Found cons wqe = %d slot = %d\n",
+ __func__, cqe_sq_cons, cqe_slot);
+ break;
+ }
+
+ last = swq->next_idx;
+ }
+ return cqe_sq_cons;
+}
+
static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
struct cq_req *hwcqe,
struct bnxt_qplib_cqe **pcqe, int *budget,
u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
{
+ struct bnxt_qplib_swq *swq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 cqe_sq_cons, slot_num;
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq;
- struct bnxt_qplib_cqe *cqe;
- u32 sw_sq_cons, cqe_sq_cons;
- struct bnxt_qplib_swq *swq;
+ int cqe_cons;
int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -2233,31 +2607,37 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
sq = &qp->sq;
- cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
- if (cqe_sq_cons > sq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
- cqe_sq_cons, sq->hwq.max_elements);
- return -EINVAL;
- }
-
+ cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_sw_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
+
+ if (__is_err_cqe_for_var_wqe(qp, hwcqe->status)) {
+ slot_num = le16_to_cpu(hwcqe->sq_cons_idx);
+ cqe_cons = bnxt_qplib_get_cqe_sq_cons(sq, slot_num);
+ if (cqe_cons < 0) {
+ dev_err(&cq->hwq.pdev->dev, "%s: Wrong SQ cons cqe_slot_indx = %d\n",
+ __func__, slot_num);
+ goto done;
+ }
+ cqe_sq_cons = cqe_cons;
+ dev_err(&cq->hwq.pdev->dev, "%s: cqe_sq_cons = %d swq_last = %d swq_start = %d\n",
+ __func__, cqe_sq_cons, sq->swq_last, sq->swq_start);
+ }
+
/* Require to walk the sq's swq to fabricate CQEs for all previously
* signaled SWQEs due to CQE aggregation from the current sq cons
* to the cqe_sq_cons
*/
cqe = *pcqe;
while (*budget) {
- sw_sq_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_sq_cons == cqe_sq_cons)
+ if (sq->swq_last == cqe_sq_cons)
/* Done */
break;
- swq = &sq->swq[sw_sq_cons];
+ swq = &sq->swq[sq->swq_last];
memset(cqe, 0, sizeof(*cqe));
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
@@ -2271,38 +2651,42 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
* of the request being signaled or not, it must complete with
* the hwcqe error status
*/
- if (HWQ_CMP((sw_sq_cons + 1), &sq->hwq) == cqe_sq_cons &&
+ if (swq->next_idx == cqe_sq_cons &&
hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
- sw_sq_cons, cqe->wr_id, cqe->status);
+ sq->swq_last, cqe->wr_id, cqe->status);
cqe++;
(*budget)--;
bnxt_qplib_mark_qp_error(qp);
/* Add qp to flush list of the CQ */
bnxt_qplib_add_flush_qp(qp);
} else {
- if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
- /* Before we complete, do WA 9060 */
- if (do_wa9060(qp, cq, cq_cons, sw_sq_cons,
+ /* Before we complete, do WA 9060 */
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) {
+ if (do_wa9060(qp, cq, cq_cons, sq->swq_last,
cqe_sq_cons)) {
*lib_qp = qp;
goto out;
}
+ }
+ if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
cqe->status = CQ_REQ_STATUS_OK;
cqe++;
(*budget)--;
}
}
skip:
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ swq->slots, &sq->dbinfo.flags);
+ sq->swq_last = swq->next_idx;
if (sq->single)
break;
}
out:
*pcqe = cqe;
- if (HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_sq_cons) {
+ if (sq->swq_last != cqe_sq_cons) {
/* Out of budget */
rc = -EAGAIN;
goto done;
@@ -2322,7 +2706,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
srq->swq[srq->last_idx].next_idx = (int)tag;
srq->last_idx = (int)tag;
srq->swq[srq->last_idx].next_idx = -1;
- srq->hwq.cons++; /* Support for SRQE counter */
+ bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons,
+ srq->dbinfo.max_slot, &srq->dbinfo.flags);
spin_unlock(&srq->hwq.lock);
}
@@ -2331,12 +2716,11 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe,
int *budget)
{
- struct bnxt_qplib_qp *qp;
- struct bnxt_qplib_q *rq;
struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
u32 wr_id_idx;
- int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
@@ -2347,7 +2731,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
- goto done;
+ return 0;
}
cqe = *pcqe;
@@ -2377,17 +2761,24 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (wr_id_idx != rq->swq_last)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2397,8 +2788,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
}
}
-done:
- return rc;
+ return 0;
}
static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
@@ -2406,12 +2796,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe **pcqe,
int *budget)
{
- struct bnxt_qplib_qp *qp;
- struct bnxt_qplib_q *rq;
struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
u32 wr_id_idx;
- int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
@@ -2422,11 +2811,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
- goto done;
+ return 0;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
- cqe->length = (u32)le16_to_cpu(hwcqe->length);
+ cqe->length = le16_to_cpu(hwcqe->length) & CQ_RES_UD_LENGTH_MASK;
cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata);
cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
cqe->flags = le16_to_cpu(hwcqe->flags);
@@ -2458,18 +2847,25 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2478,23 +2874,18 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
bnxt_qplib_add_flush_qp(qp);
}
}
-done:
- return rc;
+
+ return 0;
}
bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
{
- struct cq_base *hw_cqe, **hw_cqe_ptr;
- u32 sw_cons, raw_cons;
+ struct cq_base *hw_cqe;
bool rc = true;
- raw_cons = cq->hwq.cons;
- sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
- hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
- hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
-
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
/* Check for Valid bit. If the CQE is valid, return false */
- rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
+ rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags);
return rc;
}
@@ -2508,7 +2899,6 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_srq *srq;
struct bnxt_qplib_cqe *cqe;
u32 wr_id_idx;
- int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
@@ -2519,7 +2909,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
- goto done;
+ return 0;
}
cqe = *pcqe;
cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
@@ -2562,17 +2952,24 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
(*budget)--;
*pcqe = cqe;
} else {
+ struct bnxt_qplib_swq *swq;
+
rq = &qp->rq;
- if (wr_id_idx >= rq->hwq.max_elements) {
+ if (wr_id_idx > (rq->max_wqe - 1)) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
- wr_id_idx, rq->hwq.max_elements);
+ wr_id_idx, rq->max_wqe);
return -EINVAL;
}
- cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- rq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
+ rq->swq_last = swq->next_idx;
*pcqe = cqe;
if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
@@ -2582,8 +2979,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
}
}
-done:
- return rc;
+ return 0;
}
static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
@@ -2594,7 +2990,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq, *rq;
struct bnxt_qplib_cqe *cqe;
- u32 sw_cons = 0, cqe_cons;
+ u32 swq_last = 0, cqe_cons;
int rc = 0;
/* Check the Status */
@@ -2605,11 +3001,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle));
- if (!qp) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process terminal qp is NULL\n");
+ if (!qp)
return -EINVAL;
- }
/* Must block new posting of SQ and RQ */
qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
@@ -2620,13 +3013,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
if (cqe_cons == 0xFFFF)
goto do_rq;
-
- if (cqe_cons > sq->hwq.max_elements) {
- dev_err(&cq->hwq.pdev->dev,
- "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
- cqe_cons, sq->hwq.max_elements);
- goto do_rq;
- }
+ cqe_cons %= sq->max_sw_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
@@ -2640,24 +3027,26 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
*/
cqe = *pcqe;
while (*budget) {
- sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
- if (sw_cons == cqe_cons)
+ swq_last = sq->swq_last;
+ if (swq_last == cqe_cons)
break;
- if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
memset(cqe, 0, sizeof(*cqe));
cqe->status = CQ_REQ_STATUS_OK;
cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
cqe->qp_handle = (u64)(unsigned long)qp;
cqe->src_qp = qp->id;
- cqe->wr_id = sq->swq[sw_cons].wr_id;
- cqe->type = sq->swq[sw_cons].type;
+ cqe->wr_id = sq->swq[swq_last].wr_id;
+ cqe->type = sq->swq[swq_last].type;
cqe++;
(*budget)--;
}
- sq->hwq.cons++;
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ sq->swq[swq_last].slots, &sq->dbinfo.flags);
+ sq->swq_last = sq->swq[swq_last].next_idx;
}
*pcqe = cqe;
- if (!(*budget) && sw_cons != cqe_cons) {
+ if (!(*budget) && swq_last != cqe_cons) {
/* Out of budget */
rc = -EAGAIN;
goto sq_done;
@@ -2669,10 +3058,11 @@ do_rq:
cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx);
if (cqe_cons == 0xFFFF) {
goto done;
- } else if (cqe_cons > rq->hwq.max_elements) {
+ } else if (cqe_cons > rq->max_wqe - 1) {
dev_err(&cq->hwq.pdev->dev,
"FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
- cqe_cons, rq->hwq.max_elements);
+ cqe_cons, rq->max_wqe);
+ rc = -EINVAL;
goto done;
}
@@ -2736,20 +3126,18 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
int num_cqes, struct bnxt_qplib_qp **lib_qp)
{
- struct cq_base *hw_cqe, **hw_cqe_ptr;
- u32 sw_cons, raw_cons;
+ struct cq_base *hw_cqe;
int budget, rc = 0;
+ u32 hw_polled = 0;
+ u8 type;
- raw_cons = cq->hwq.cons;
budget = num_cqes;
while (budget) {
- sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
- hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
- hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
/* Check for Valid bit */
- if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
+ if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags))
break;
/*
@@ -2758,12 +3146,13 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
*/
dma_rmb();
/* From the device's respective CQE format to qplib_wc*/
- switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ switch (type) {
case CQ_BASE_CQE_TYPE_REQ:
rc = bnxt_qplib_cq_process_req(cq,
(struct cq_req *)hw_cqe,
&cqe, &budget,
- sw_cons, lib_qp);
+ cq->hwq.cons, lib_qp);
break;
case CQ_BASE_CQE_TYPE_RES_RC:
rc = bnxt_qplib_cq_process_res_rc(cq,
@@ -2805,23 +3194,26 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
/* Error while processing the CQE, just skip to the
* next one
*/
- dev_err(&cq->hwq.pdev->dev,
- "process_cqe error rc = 0x%x\n", rc);
+ if (type != CQ_BASE_CQE_TYPE_TERMINAL)
+ dev_err(&cq->hwq.pdev->dev,
+ "process_cqe error rc = 0x%x\n", rc);
}
- raw_cons++;
- }
- if (cq->hwq.cons != raw_cons) {
- cq->hwq.cons = raw_cons;
- bnxt_qplib_arm_cq(cq, DBC_DBC_TYPE_CQ);
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons,
+ 1, &cq->dbinfo.flags);
+
}
+ if (hw_polled)
+ bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ);
exit:
return num_cqes - budget;
}
void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
{
+ cq->dbinfo.toggle = cq->toggle;
if (arm_type)
- bnxt_qplib_arm_cq(cq, arm_type);
+ bnxt_qplib_ring_db(&cq->dbinfo, arm_type);
/* Using cq->arm_state variable to track whether to issue cq handler */
atomic_set(&cq->arm_state, 1);
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 99e0a13cbefa..1b414a73b46d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -39,12 +39,60 @@
#ifndef __BNXT_QPLIB_FP_H__
#define __BNXT_QPLIB_FP_H__
+#include <rdma/bnxt_re-abi.h>
+
+/* Few helper structures temporarily defined here
+ * should get rid of these when roce_hsi.h is updated
+ * in original code base
+ */
+struct sq_ud_ext_hdr {
+ __le32 dst_qp;
+ __le32 avid;
+ __le64 rsvd;
+};
+
+struct sq_raw_ext_hdr {
+ __le32 cfa_meta;
+ __le32 rsvd0;
+ __le64 rsvd1;
+};
+
+struct sq_rdma_ext_hdr {
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 rsvd;
+};
+
+struct sq_atomic_ext_hdr {
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
+struct sq_fr_pmr_ext_hdr {
+ __le64 pblptr;
+ __le64 va;
+};
+
+struct sq_bind_ext_hdr {
+ __le64 va;
+ __le32 length_lo;
+ __le32 length_hi;
+};
+
+struct rq_ext_hdr {
+ __le64 rsvd1;
+ __le64 rsvd2;
+};
+
+/* Helper structures end */
+
struct bnxt_qplib_srq {
struct bnxt_qplib_pd *pd;
struct bnxt_qplib_dpi *dpi;
- void __iomem *dbr_base;
+ struct bnxt_qplib_db_info dbinfo;
u64 srq_handle;
u32 id;
+ u16 wqe_size;
u32 max_wqe;
u32 max_sge;
u32 threshold;
@@ -57,6 +105,7 @@ struct bnxt_qplib_srq {
struct bnxt_qplib_sg_info sg_info;
u16 eventq_hw_ring_id;
spinlock_t lock; /* protect SRQE link list */
+ u8 toggle;
};
struct bnxt_qplib_sge {
@@ -65,38 +114,6 @@ struct bnxt_qplib_sge {
u32 size;
};
-#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE sizeof(struct sq_send)
-
-#define SQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE)
-#define SQE_MAX_IDX_PER_PG (SQE_CNT_PER_PG - 1)
-
-static inline u32 get_sqe_pg(u32 val)
-{
- return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG);
-}
-
-static inline u32 get_sqe_idx(u32 val)
-{
- return (val & SQE_MAX_IDX_PER_PG);
-}
-
-#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE sizeof(struct sq_psn_search)
-
-#define PSNE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE)
-#define PSNE_MAX_IDX_PER_PG (PSNE_CNT_PER_PG - 1)
-
-static inline u32 get_psne_pg(u32 val)
-{
- return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG);
-}
-
-static inline u32 get_psne_idx(u32 val)
-{
- return (val & PSNE_MAX_IDX_PER_PG);
-}
-
-#define BNXT_QPLIB_QP_MAX_SGL 6
-
struct bnxt_qplib_swq {
u64 wr_id;
int next_idx;
@@ -104,6 +121,8 @@ struct bnxt_qplib_swq {
u8 flags;
u32 start_psn;
u32 next_psn;
+ u32 slot_idx;
+ u8 slots;
struct sq_psn_search *psn_search;
struct sq_psn_search_ext *psn_ext;
};
@@ -134,7 +153,7 @@ struct bnxt_qplib_swqe {
#define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE BIT(2)
#define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT BIT(3)
#define BNXT_QPLIB_SWQE_FLAGS_INLINE BIT(4)
- struct bnxt_qplib_sge sg_list[BNXT_QPLIB_QP_MAX_SGL];
+ struct bnxt_qplib_sge sg_list[BNXT_VAR_MAX_SGE];
int num_sge;
/* Max inline data is 96 bytes */
u32 inline_len;
@@ -145,12 +164,12 @@ struct bnxt_qplib_swqe {
/* Send, with imm, inval key */
struct {
union {
- __be32 imm_data;
+ u32 imm_data;
u32 inv_key;
};
u32 q_key;
u32 dst_qp;
- u16 avid;
+ u32 avid;
} send;
/* Send Raw Ethernet and QP1 */
@@ -163,7 +182,7 @@ struct bnxt_qplib_swqe {
/* RDMA write, with imm, read */
struct {
union {
- __be32 imm_data;
+ u32 imm_data;
u32 inv_key;
};
u64 remote_va;
@@ -226,18 +245,14 @@ struct bnxt_qplib_swqe {
};
};
-#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE sizeof(struct rq_wqe)
-
-#define RQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE)
-#define RQE_MAX_IDX_PER_PG (RQE_CNT_PER_PG - 1)
-#define RQE_PG(x) (((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG)
-#define RQE_IDX(x) ((x) & RQE_MAX_IDX_PER_PG)
-
struct bnxt_qplib_q {
struct bnxt_qplib_hwq hwq;
struct bnxt_qplib_swq *swq;
+ struct bnxt_qplib_db_info dbinfo;
struct bnxt_qplib_sg_info sg_info;
u32 max_wqe;
+ u32 max_sw_wqe;
+ u16 wqe_size;
u16 q_full_delta;
u16 max_sge;
u32 psn;
@@ -248,6 +263,8 @@ struct bnxt_qplib_q {
u32 phantom_cqe_cnt;
u32 next_cq_cons;
bool flushed;
+ u32 swq_start;
+ u32 swq_last;
};
struct bnxt_qplib_qp {
@@ -255,13 +272,14 @@ struct bnxt_qplib_qp {
struct bnxt_qplib_dpi *dpi;
struct bnxt_qplib_chip_ctx *cctx;
u64 qp_handle;
-#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
+#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
u32 id;
u8 type;
u8 sig_type;
- u32 modify_flags;
+ u8 wqe_mode;
u8 state;
u8 cur_qp_state;
+ u64 modify_flags;
u32 max_inline_data;
u32 mtu;
u8 path_mtu;
@@ -280,6 +298,8 @@ struct bnxt_qplib_qp {
u32 dest_qpn;
u8 smac[6];
u16 vlan_id;
+ u16 port_id;
+ u16 udp_sport;
u8 nw_type;
struct bnxt_qplib_ah ah;
@@ -321,8 +341,16 @@ struct bnxt_qplib_qp {
dma_addr_t rq_hdr_buf_map;
struct list_head sq_flush;
struct list_head rq_flush;
+ u32 msn;
+ u32 msn_tbl_sz;
+ bool is_host_msn_tbl;
+ u8 tos_dscp;
+ u32 ugid_index;
};
+#define BNXT_RE_MAX_MSG_SIZE 0x80000000
+#define BNXT_RE_INVAL_MSG_SIZE 0xFFFFFFFF
+
#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
#define CQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_CQE_ENTRY_SIZE)
@@ -331,17 +359,56 @@ struct bnxt_qplib_qp {
#define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG)
#define ROCE_CQE_CMP_V 0
-#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+#define CQE_CMP_VALID(hdr, pass) \
(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
- !((raw_cons) & (cp_bit)))
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+
+static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq)
+{
+ int cons, prod, avail;
+
+ cons = hwq->cons;
+ prod = hwq->prod;
+ avail = cons - prod;
+ if (cons <= prod)
+ avail += hwq->depth;
+ return avail;
+}
-static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *qplib_q)
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
+ u8 slots)
{
- return HWQ_CMP((qplib_q->hwq.prod + qplib_q->q_full_delta),
- &qplib_q->hwq) == HWQ_CMP(qplib_q->hwq.cons,
- &qplib_q->hwq);
+ struct bnxt_qplib_hwq *hwq;
+ int avail;
+
+ hwq = &que->hwq;
+ /* False full is possible, retrying post-send makes sense */
+ avail = hwq->cons - hwq->prod;
+ if (hwq->cons <= hwq->prod)
+ avail += hwq->depth;
+ return avail <= slots;
}
+/* CQ coalescing parameters */
+struct bnxt_qplib_cq_coal_param {
+ u16 buf_maxtime;
+ u8 normal_maxbuf;
+ u8 during_maxbuf;
+ u8 en_ring_idle_mode;
+ u8 enable;
+};
+
+#define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE 0x1
+#define BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME 0x1bf
+#define BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE 0x1
+
struct bnxt_qplib_cqe {
u8 status;
u8 type;
@@ -350,7 +417,7 @@ struct bnxt_qplib_cqe {
u16 cfa_meta;
u64 wr_id;
union {
- __be32 immdata;
+ u32 immdata;
u32 invrkey;
};
u64 qp_handle;
@@ -370,17 +437,19 @@ struct bnxt_qplib_cqe {
#define BNXT_QPLIB_QUEUE_START_PERIOD 0x01
struct bnxt_qplib_cq {
struct bnxt_qplib_dpi *dpi;
- void __iomem *dbr_base;
+ struct bnxt_qplib_db_info dbinfo;
u32 max_wqe;
u32 id;
u16 count;
u16 period;
struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_hwq resize_hwq;
u32 cnq_hw_ring_id;
struct bnxt_qplib_nq *nq;
bool resize_in_progress;
struct bnxt_qplib_sg_info sg_info;
u64 cq_handle;
+ u8 toggle;
#define CQ_RESIZE_WAIT_TIME_MS 500
unsigned long flags;
@@ -401,6 +470,8 @@ struct bnxt_qplib_cq {
* of the same QP while manipulating the flush list.
*/
spinlock_t flush_lock; /* QP flush management */
+ u16 cnq_events;
+ struct bnxt_qplib_cq_coal_param *coalescing;
};
#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
@@ -417,9 +488,9 @@ struct bnxt_qplib_cq {
#define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
#define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG)
-#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+#define NQE_CMP_VALID(hdr, pass) \
(!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \
- !((raw_cons) & (cp_bit)))
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
#define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024)
@@ -433,66 +504,33 @@ struct bnxt_qplib_cq {
NQ_DB_IDX_VALID | \
NQ_DB_IRQ_DIS)
-static inline void bnxt_qplib_ring_nq_db64(void __iomem *db, u32 index,
- u32 xid, bool arm)
-{
- u64 val;
-
- val = xid & DBC_DBC_XID_MASK;
- val |= DBC_DBC_PATH_ROCE;
- val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
- val <<= 32;
- val |= index & DBC_DBC_INDEX_MASK;
- writeq(val, db);
-}
-
-static inline void bnxt_qplib_ring_nq_db_rearm(void __iomem *db, u32 raw_cons,
- u32 max_elements, u32 xid,
- bool gen_p5)
-{
- u32 index = raw_cons & (max_elements - 1);
-
- if (gen_p5)
- bnxt_qplib_ring_nq_db64(db, index, xid, true);
- else
- writel(NQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK), db);
-}
-
-static inline void bnxt_qplib_ring_nq_db(void __iomem *db, u32 raw_cons,
- u32 max_elements, u32 xid,
- bool gen_p5)
-{
- u32 index = raw_cons & (max_elements - 1);
+struct bnxt_qplib_nq_db {
+ struct bnxt_qplib_reg_desc reg;
+ struct bnxt_qplib_db_info dbinfo;
+};
- if (gen_p5)
- bnxt_qplib_ring_nq_db64(db, index, xid, false);
- else
- writel(NQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK), db);
-}
+typedef int (*cqn_handler_t)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq);
+typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_srq *srq, u8 event);
struct bnxt_qplib_nq {
- struct pci_dev *pdev;
- struct bnxt_qplib_res *res;
-
- int vector;
- cpumask_t mask;
- int budget;
- bool requested;
- struct tasklet_struct worker;
- struct bnxt_qplib_hwq hwq;
-
- u16 bar_reg;
- u32 bar_reg_off;
- u16 ring_id;
- void __iomem *bar_reg_iomem;
-
- int (*cqn_handler)(struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_cq *cq);
- int (*srqn_handler)(struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_srq *srq,
- u8 event);
- struct workqueue_struct *cqn_wq;
- char name[32];
+ struct pci_dev *pdev;
+ struct bnxt_qplib_res *res;
+ char *name;
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_nq_db nq_db;
+ u16 ring_id;
+ int msix_vec;
+ cpumask_t mask;
+ struct tasklet_struct nq_tasklet;
+ bool requested;
+ int budget;
+ u32 load;
+
+ cqn_handler_t cqn_handler;
+ srqn_handler_t srqn_handler;
+ struct workqueue_struct *cqn_wq;
};
struct bnxt_qplib_nq_work {
@@ -507,15 +545,10 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
int msix_vector, bool need_init);
int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
int nq_idx, int msix_vector, int bar_reg_offset,
- int (*cqn_handler)(struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_cq *cq),
- int (*srqn_handler)(struct bnxt_qplib_nq *nq,
- struct bnxt_qplib_srq *srq,
- u8 event));
+ cqn_handler_t cqn_handler,
+ srqn_handler_t srq_handler);
int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_srq *srq);
-int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
- struct bnxt_qplib_srq *srq);
int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_srq *srq);
void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
@@ -544,13 +577,17 @@ void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swqe *wqe);
int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
+ int new_cqes);
+void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cq *cq);
int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
int num, struct bnxt_qplib_qp **qp);
bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq);
void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
-int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
+int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq);
void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
unsigned long *flags);
@@ -560,4 +597,93 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe,
int num_cqes);
void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq);
+
+static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
+{
+ u32 idx;
+
+ idx = que->swq_start;
+ if (swq_idx)
+ *swq_idx = idx;
+ return &que->swq[idx];
+}
+
+static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx)
+{
+ que->swq_start = que->swq[idx].next_idx;
+}
+
+static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que, u8 wqe_mode, bool is_sq)
+{
+ u32 slots;
+
+ /* Queue depth is the number of slots. */
+ slots = (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge);
+ /* For variable WQE mode, need to align the slots to 256 */
+ if (wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE && is_sq)
+ slots = ALIGN(slots, BNXT_VAR_MAX_SLOT_ALIGN);
+ return slots;
+}
+
+static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ que->max_wqe : bnxt_qplib_get_depth(que, wqe_mode, true);
+}
+
+static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ sizeof(struct sq_send) / sizeof(struct sq_sge) : 1;
+}
+
+static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size)
+{
+ return (wqe_size / sizeof(struct sq_sge));
+}
+
+static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes)
+{
+ /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128
+ * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128.
+ * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512.
+ * when 8916 is disabled.
+ */
+ return (delta * wqe_bytes) / sizeof(struct sq_sge);
+}
+
+static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max)
+{
+ u16 size = 0;
+ int indx;
+
+ for (indx = 0; indx < wqe->num_sge; indx++)
+ size += wqe->sg_list[indx].size;
+ if (size > max)
+ size = max;
+
+ return size;
+}
+
+/* MSN table update inlin */
+static inline __le64 bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn)
+{
+ return cpu_to_le64((((u64)(st_idx) << SQ_MSN_SEARCH_START_IDX_SFT) &
+ SQ_MSN_SEARCH_START_IDX_MASK) |
+ (((u64)(npsn) << SQ_MSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_MSN_SEARCH_NEXT_PSN_MASK) |
+ (((start_psn) << SQ_MSN_SEARCH_START_PSN_SFT) &
+ SQ_MSN_SEARCH_START_PSN_MASK));
+}
+
+static inline bool __is_var_wqe(struct bnxt_qplib_qp *qp)
+{
+ return (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE);
+}
+
+static inline bool __is_err_cqe_for_var_wqe(struct bnxt_qplib_qp *qp, u8 status)
+{
+ return (status != CQ_REQ_STATUS_OK) && __is_var_wqe(qp);
+}
#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 48b04d2f175f..295a9610f3e6 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -49,204 +49,535 @@
#include "qplib_rcfw.h"
#include "qplib_sp.h"
#include "qplib_fp.h"
+#include "qplib_tlv.h"
-static void bnxt_qplib_service_creq(unsigned long data);
+static void bnxt_qplib_service_creq(struct tasklet_struct *t);
-/* Hardware communication channel */
+/**
+ * bnxt_qplib_map_rc - map return type based on opcode
+ * @opcode: roce slow path opcode
+ *
+ * case #1
+ * Firmware initiated error recovery is a safe state machine and
+ * driver can consider all the underlying rdma resources are free.
+ * In this state, it is safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * case #2
+ * If driver detect potential firmware stall, it is not safe state machine
+ * and the driver can not consider all the underlying rdma resources are
+ * freed.
+ * In this state, it is not safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * Scope of this helper function is only for case #1.
+ *
+ * Returns:
+ * 0 to communicate success to caller.
+ * Non zero error code to communicate failure to caller.
+ */
+static int bnxt_qplib_map_rc(u8 opcode)
+{
+ switch (opcode) {
+ case CMDQ_BASE_OPCODE_DESTROY_QP:
+ case CMDQ_BASE_OPCODE_DESTROY_SRQ:
+ case CMDQ_BASE_OPCODE_DESTROY_CQ:
+ case CMDQ_BASE_OPCODE_DEALLOCATE_KEY:
+ case CMDQ_BASE_OPCODE_DEREGISTER_MR:
+ case CMDQ_BASE_OPCODE_DELETE_GID:
+ case CMDQ_BASE_OPCODE_DESTROY_QP1:
+ case CMDQ_BASE_OPCODE_DESTROY_AH:
+ case CMDQ_BASE_OPCODE_DEINITIALIZE_FW:
+ case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC:
+ case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE:
+ return 0;
+ default:
+ return -ETIMEDOUT;
+ }
+}
+
+/**
+ * bnxt_re_is_fw_stalled - Check firmware health
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * If firmware has not responded any rcfw command within
+ * rcfw->max_timeout, consider firmware as stalled.
+ *
+ * Returns:
+ * 0 if firmware is responding
+ * -ENODEV if firmware is not responding
+ */
+static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
+ u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ cmdq = &rcfw->cmdq;
+
+ if (time_after(jiffies, cmdq->last_seen +
+ (rcfw->max_timeout * HZ))) {
+ dev_warn_ratelimited(&rcfw->pdev->dev,
+ "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
+ __func__, cookie, crsqe->opcode,
+ jiffies_to_msecs(jiffies - cmdq->last_seen),
+ rcfw->max_timeout * 1000,
+ crsqe->is_in_used);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * __wait_for_resp - Don't hold the cpu context and wait for response
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * Wait for command completion in sleepable context.
+ *
+ * Returns:
+ * 0 if command is completed by firmware.
+ * Non zero error code for rest of the case.
+ */
static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
- u16 cbit;
- int rc;
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ int ret;
+
+ cmdq = &rcfw->cmdq;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ wait_event_timeout(cmdq->waitq,
+ !crsqe->is_in_used ||
+ test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
+ secs_to_jiffies(rcfw->max_timeout));
- cbit = cookie % rcfw->cmdq_depth;
- rc = wait_event_timeout(rcfw->waitq,
- !test_bit(cbit, rcfw->cmdq_bitmap),
- msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
- return rc ? 0 : -ETIMEDOUT;
+ if (!crsqe->is_in_used)
+ return 0;
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie);
+ if (ret)
+ return ret;
+
+ } while (true);
};
+/**
+ * __block_for_resp - hold the cpu context and wait for response
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * This function will hold the cpu (non-sleepable context) and
+ * wait for command completion. Maximum holding interval is 8 second.
+ *
+ * Returns:
+ * -ETIMEDOUT if command is not completed in specific time interval.
+ * 0 if command is completed by firmware.
+ */
static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
- u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
- u16 cbit;
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long issue_time = 0;
+
+ issue_time = jiffies;
+ crsqe = &rcfw->crsqe_tbl[cookie];
- cbit = cookie % rcfw->cmdq_depth;
- if (!test_bit(cbit, rcfw->cmdq_bitmap))
- goto done;
do {
- mdelay(1); /* 1m sec */
- bnxt_qplib_service_creq((unsigned long)rcfw);
- } while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
-done:
- return count ? 0 : -ETIMEDOUT;
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ udelay(1);
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+ if (!crsqe->is_in_used)
+ return 0;
+
+ } while (time_before(jiffies, issue_time + (8 * HZ)));
+
+ return -ETIMEDOUT;
};
-static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
- struct creq_base *resp, void *sb, u8 is_block)
+/* __send_message_no_waiter - get cookie and post the message.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
+ *
+ * This function will just post and don't bother about completion.
+ * Current design of this function is -
+ * user must hold the completion queue hwq->lock.
+ * user must have used existing completion and free the resources.
+ * this function will not check queue full condition.
+ * this function will explicitly set is_waiter_alive=false.
+ * current use case is - send destroy_ah if create_ah is return
+ * after waiter of create_ah is lost. It can be extended for other
+ * use case as well.
+ *
+ * Returns: Nothing
+ *
+ */
+static void __send_message_no_waiter(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
{
- struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
- struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
- u32 cmdq_depth = rcfw->cmdq_depth;
- struct bnxt_qplib_crsq *crsqe;
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_hwq *hwq = &cmdq->hwq;
+ struct bnxt_qplib_crsqe *crsqe;
+ struct bnxt_qplib_cmdqe *cmdqe;
u32 sw_prod, cmdq_prod;
- unsigned long flags;
- u32 size, opcode;
- u16 cookie, cbit;
+ u16 cookie;
+ u32 bsize;
u8 *preq;
- opcode = req->opcode;
- if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
- (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
- opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
- opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
- dev_err(&rcfw->pdev->dev,
- "RCFW not initialized, reject opcode 0x%x\n", opcode);
- return -EINVAL;
- }
+ cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+ crsqe = &rcfw->crsqe_tbl[cookie];
- if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
- opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
- dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
- return -EINVAL;
- }
+ /* Set cmd_size in terms of 16B slots in req. */
+ bsize = bnxt_qplib_set_cmd_slots(msg->req);
+ /* GET_CMD_SIZE would return number of slots in either case of tlv
+ * and non-tlv commands after call to bnxt_qplib_set_cmd_slots()
+ */
+ crsqe->is_internal_cmd = true;
+ crsqe->is_waiter_alive = false;
+ crsqe->is_in_used = true;
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
- if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags))
- return -ETIMEDOUT;
+ preq = (u8 *)msg->req;
+ do {
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
+
+ cmdq_prod = hwq->prod;
+ atomic_inc(&rcfw->timeout_send);
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+}
+
+static int __send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg, u8 opcode)
+{
+ u32 bsize, free_slots, required_slots;
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ struct bnxt_qplib_cmdqe *cmdqe;
+ struct bnxt_qplib_hwq *hwq;
+ u32 sw_prod, cmdq_prod;
+ struct pci_dev *pdev;
+ u16 cookie;
+ u8 *preq;
+
+ cmdq = &rcfw->cmdq;
+ hwq = &cmdq->hwq;
+ pdev = rcfw->pdev;
/* Cmdq are in 16-byte units, each request can consume 1 or more
* cmdqe
*/
- spin_lock_irqsave(&cmdq->lock, flags);
- if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
- dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
- spin_unlock_irqrestore(&cmdq->lock, flags);
+ spin_lock_bh(&hwq->lock);
+ required_slots = bnxt_qplib_get_cmd_slots(msg->req);
+ free_slots = HWQ_FREE_SLOTS(hwq);
+ cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (required_slots >= free_slots) {
+ dev_info_ratelimited(&pdev->dev,
+ "CMDQ is full req/free %d/%d!",
+ required_slots, free_slots);
+ spin_unlock_bh(&hwq->lock);
return -EAGAIN;
}
-
-
- cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % rcfw->cmdq_depth;
- if (is_block)
+ if (msg->block)
cookie |= RCFW_CMD_IS_BLOCKING;
-
- set_bit(cbit, rcfw->cmdq_bitmap);
- req->cookie = cpu_to_le16(cookie);
- crsqe = &rcfw->crsqe_tbl[cbit];
- if (crsqe->resp) {
- spin_unlock_irqrestore(&cmdq->lock, flags);
- return -EBUSY;
- }
- memset(resp, 0, sizeof(*resp));
- crsqe->resp = (struct creq_qp_event *)resp;
- crsqe->resp->cookie = req->cookie;
- crsqe->req_size = req->cmd_size;
- if (req->resp_size && sb) {
- struct bnxt_qplib_rcfw_sbuf *sbuf = sb;
-
- req->resp_addr = cpu_to_le64(sbuf->dma_addr);
- req->resp_size = (sbuf->size + BNXT_QPLIB_CMDQE_UNITS - 1) /
- BNXT_QPLIB_CMDQE_UNITS;
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+
+ bsize = bnxt_qplib_set_cmd_slots(msg->req);
+ crsqe->free_slots = free_slots;
+ crsqe->resp = (struct creq_qp_event *)msg->resp;
+ crsqe->resp->cookie = cpu_to_le16(cookie);
+ crsqe->is_internal_cmd = false;
+ crsqe->is_waiter_alive = true;
+ crsqe->is_in_used = true;
+ crsqe->opcode = opcode;
+
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
+ if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
+ struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb;
+
+ __set_cmdq_base_resp_addr(msg->req, msg->req_sz,
+ cpu_to_le64(sbuf->dma_addr));
+ __set_cmdq_base_resp_size(msg->req, msg->req_sz,
+ ALIGN(sbuf->size,
+ BNXT_QPLIB_CMDQE_UNITS) /
+ BNXT_QPLIB_CMDQE_UNITS);
}
- cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
- preq = (u8 *)req;
- size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
+ preq = (u8 *)msg->req;
do {
/* Locate the next cmdq slot */
- sw_prod = HWQ_CMP(cmdq->prod, cmdq);
- cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
- [get_cmdq_idx(sw_prod, cmdq_depth)];
- if (!cmdqe) {
- dev_err(&rcfw->pdev->dev,
- "RCFW request failed with no cmdqe!\n");
- goto done;
- }
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
/* Copy a segment of the req cmd to the cmdq */
memset(cmdqe, 0, sizeof(*cmdqe));
- memcpy(cmdqe, preq, min_t(u32, size, sizeof(*cmdqe)));
- preq += min_t(u32, size, sizeof(*cmdqe));
- size -= min_t(u32, size, sizeof(*cmdqe));
- cmdq->prod++;
- rcfw->seq_num++;
- } while (size > 0);
-
- rcfw->seq_num++;
-
- cmdq_prod = cmdq->prod;
- if (test_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags)) {
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
+
+ cmdq_prod = hwq->prod & 0xFFFF;
+ if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
/* The very first doorbell write
* is required to set this flag
* which prompts the FW to reset
* its internal pointers
*/
cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG);
- clear_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
+ clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
}
-
/* ring CMDQ DB */
wmb();
- writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
- rcfw->cmdq_bar_reg_prod_off);
- writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
- rcfw->cmdq_bar_reg_trig_off);
-done:
- spin_unlock_irqrestore(&cmdq->lock, flags);
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+ print_hex_dump_bytes("req: ", DUMP_PREFIX_OFFSET, msg->req, msg->req_sz);
+ spin_unlock_bh(&hwq->lock);
/* Return the CREQ response pointer */
return 0;
}
-int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
- struct cmdq_base *req,
- struct creq_base *resp,
- void *sb, u8 is_block)
+/**
+ * __poll_for_resp - self poll completion for rcfw command
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * It works same as __wait_for_resp except this function will
+ * do self polling in sort interval since interrupt is disabled.
+ * This function can not be called from non-sleepable context.
+ *
+ * Returns:
+ * -ETIMEDOUT if command is not completed in specific time interval.
+ * 0 if command is completed by firmware.
+ */
+static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
{
- struct creq_qp_event *evnt = (struct creq_qp_event *)resp;
- u16 cookie;
- u8 opcode, retry_cnt = 0xFF;
- int rc = 0;
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long issue_time;
+ int ret;
- do {
- opcode = req->opcode;
- rc = __send_message(rcfw, req, resp, sb, is_block);
- cookie = le16_to_cpu(req->cookie) & RCFW_MAX_COOKIE_VALUE;
- if (!rc)
- break;
+ issue_time = jiffies;
+ crsqe = &rcfw->crsqe_tbl[cookie];
- if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
- /* send failed */
- dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
- cookie, opcode);
- return rc;
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ usleep_range(1000, 1001);
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+ if (!crsqe->is_in_used)
+ return 0;
+ if (jiffies_to_msecs(jiffies - issue_time) >
+ (rcfw->max_timeout * 1000)) {
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie);
+ if (ret)
+ return ret;
}
- is_block ? mdelay(1) : usleep_range(500, 1000);
+ } while (true);
+};
+
+static int __send_message_basic_sanity(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg,
+ u8 opcode)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+
+ cmdq = &rcfw->cmdq;
+
+ /* Prevent posting if f/w is not in a state to process */
+ if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags))
+ return -ENXIO;
+
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ return -EINVAL;
+ }
- } while (retry_cnt--);
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
+ opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/* This function will just post and do not bother about completion */
+static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_create_ah_resp *create_ah_resp)
+{
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_ah req = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_AH,
+ sizeof(req));
+ req.ah_cid = create_ah_resp->xid;
+ msg.req = (struct cmdq_base *)&req;
+ msg.req_sz = sizeof(req);
+ __send_message_no_waiter(rcfw, &msg);
+ dev_info_ratelimited(&rcfw->pdev->dev,
+ "From %s: ah_cid = %d timeout_send %d\n",
+ __func__, req.ah_cid,
+ atomic_read(&rcfw->timeout_send));
+}
- if (is_block)
+/**
+ * __bnxt_qplib_rcfw_send_message - qplib interface to send
+ * and complete rcfw command.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
+ *
+ * This function does not account shadow queue depth. It will send
+ * all the command unconditionally as long as send queue is not full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
+{
+ struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
+ struct bnxt_qplib_crsqe *crsqe;
+ u16 cookie;
+ int rc;
+ u8 opcode;
+
+ opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
+
+ rc = __send_message_basic_sanity(rcfw, msg, opcode);
+ if (rc)
+ return rc == -ENXIO ? bnxt_qplib_map_rc(opcode) : rc;
+
+ rc = __send_message(rcfw, msg, opcode);
+ if (rc)
+ return rc;
+
+ cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
+ & RCFW_MAX_COOKIE_VALUE;
+
+ if (msg->block)
rc = __block_for_resp(rcfw, cookie);
- else
+ else if (atomic_read(&rcfw->rcfw_intr_enabled))
rc = __wait_for_resp(rcfw, cookie);
+ else
+ rc = __poll_for_resp(rcfw, cookie);
+
if (rc) {
- /* timed out */
- dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
- cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
- set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
- return rc;
+ spin_lock_bh(&rcfw->cmdq.hwq.lock);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ crsqe->is_waiter_alive = false;
+ if (rc == -ENODEV)
+ set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
+ spin_unlock_bh(&rcfw->cmdq.hwq.lock);
+ return -ETIMEDOUT;
}
if (evnt->status) {
/* failed with status */
dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
cookie, opcode, evnt->status);
- rc = -EFAULT;
+ rc = -EIO;
}
return rc;
}
+
+/**
+ * bnxt_qplib_rcfw_send_message - qplib interface to send
+ * and complete rcfw command.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
+ *
+ * Driver interact with Firmware through rcfw channel/slow path in two ways.
+ * a. Blocking rcfw command send. In this path, driver cannot hold
+ * the context for longer period since it is holding cpu until
+ * command is not completed.
+ * b. Non-blocking rcfw command send. In this path, driver can hold the
+ * context for longer period. There may be many pending command waiting
+ * for completion because of non-blocking nature.
+ *
+ * Driver will use shadow queue depth. Current queue depth of 8K
+ * (due to size of rcfw message there can be actual ~4K rcfw outstanding)
+ * is not optimal for rcfw command processing in firmware.
+ *
+ * Restrict at max #RCFW_CMD_NON_BLOCKING_SHADOW_QD Non-Blocking rcfw commands.
+ * Allow all blocking commands until there is no queue full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
+{
+ int ret;
+
+ if (!msg->block) {
+ down(&rcfw->rcfw_inflight);
+ ret = __bnxt_qplib_rcfw_send_message(rcfw, msg);
+ up(&rcfw->rcfw_inflight);
+ } else {
+ ret = __bnxt_qplib_rcfw_send_message(rcfw, msg);
+ }
+
+ return ret;
+}
+
/* Completions */
static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
struct creq_func_event *func_event)
{
+ int rc;
+
switch (func_event->event) {
case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
break;
@@ -280,37 +611,47 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
default:
return -EINVAL;
}
- return 0;
+
+ rc = rcfw->creq.aeq_handler(rcfw, (void *)func_event, NULL);
+ return rc;
}
static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
- struct creq_qp_event *qp_event)
+ struct creq_qp_event *qp_event,
+ u32 *num_wait)
{
- struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
struct creq_qp_error_notification *err_event;
- struct bnxt_qplib_crsq *crsqe;
- unsigned long flags;
+ struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
+ struct bnxt_qplib_crsqe *crsqe;
+ u32 qp_id, tbl_indx, req_size;
struct bnxt_qplib_qp *qp;
- u16 cbit, blocked = 0;
- u16 cookie;
- __le16 mcookie;
- u32 qp_id;
+ u16 cookie, blocked = 0;
+ bool is_waiter_alive;
+ struct pci_dev *pdev;
+ u32 wait_cmds = 0;
+ int rc = 0;
+ pdev = rcfw->pdev;
+ print_hex_dump_bytes("event: ", DUMP_PREFIX_OFFSET, qp_event, sizeof(*qp_event));
switch (qp_event->event) {
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
err_event = (struct creq_qp_error_notification *)qp_event;
qp_id = le32_to_cpu(err_event->xid);
- qp = rcfw->qp_tbl[qp_id].qp_handle;
- dev_dbg(&rcfw->pdev->dev,
- "Received QP error notification\n");
- dev_dbg(&rcfw->pdev->dev,
+ spin_lock(&rcfw->tbl_lock);
+ tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw);
+ qp = rcfw->qp_tbl[tbl_indx].qp_handle;
+ if (!qp) {
+ spin_unlock(&rcfw->tbl_lock);
+ break;
+ }
+ bnxt_qplib_mark_qp_error(qp);
+ rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp);
+ spin_unlock(&rcfw->tbl_lock);
+ dev_dbg(&pdev->dev, "Received QP error notification\n");
+ dev_dbg(&pdev->dev,
"qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason);
- if (!qp)
- break;
- bnxt_qplib_mark_qp_error(qp);
- rcfw->aeq_handler(rcfw, qp_event, qp);
break;
default:
/*
@@ -322,75 +663,107 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
*
*/
- spin_lock_irqsave_nested(&cmdq->lock, flags,
- SINGLE_DEPTH_NESTING);
+ spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie);
- mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % rcfw->cmdq_depth;
- crsqe = &rcfw->crsqe_tbl[cbit];
- if (crsqe->resp &&
- crsqe->resp->cookie == mcookie) {
- memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
- crsqe->resp = NULL;
- } else {
- if (crsqe->resp && crsqe->resp->cookie)
- dev_err(&rcfw->pdev->dev,
- "CMD %s cookie sent=%#x, recd=%#x\n",
- crsqe->resp ? "mismatch" : "collision",
- crsqe->resp ? crsqe->resp->cookie : 0,
- mcookie);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
+ &rcfw->cmdq.flags),
+ "QPLIB: Unreponsive rcfw channel detected.!!")) {
+ dev_info(&pdev->dev,
+ "rcfw timedout: cookie = %#x, free_slots = %d",
+ cookie, crsqe->free_slots);
+ spin_unlock(&hwq->lock);
+ return rc;
}
- if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
- dev_warn(&rcfw->pdev->dev,
- "CMD bit %d was not requested\n", cbit);
- cmdq->cons += crsqe->req_size;
+
+ if (crsqe->is_internal_cmd && !qp_event->status)
+ atomic_dec(&rcfw->timeout_send);
+
+ if (crsqe->is_waiter_alive) {
+ if (crsqe->resp) {
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ /* Insert write memory barrier to ensure that
+ * response data is copied before clearing the
+ * flags
+ */
+ smp_wmb();
+ }
+ if (!blocked)
+ wait_cmds++;
+ }
+
+ req_size = crsqe->req_size;
+ is_waiter_alive = crsqe->is_waiter_alive;
+
crsqe->req_size = 0;
+ if (!is_waiter_alive)
+ crsqe->resp = NULL;
- if (!blocked)
- wake_up(&rcfw->waitq);
- spin_unlock_irqrestore(&cmdq->lock, flags);
+ crsqe->is_in_used = false;
+
+ hwq->cons += req_size;
+
+ /* This is a case to handle below scenario -
+ * Create AH is completed successfully by firmware,
+ * but completion took more time and driver already lost
+ * the context of create_ah from caller.
+ * We have already return failure for create_ah verbs,
+ * so let's destroy the same address vector since it is
+ * no more used in stack. We don't care about completion
+ * in __send_message_no_waiter.
+ * If destroy_ah is failued by firmware, there will be AH
+ * resource leak and relatively not critical + unlikely
+ * scenario. Current design is not to handle such case.
+ */
+ if (!is_waiter_alive && !qp_event->status &&
+ qp_event->event == CREQ_QP_EVENT_EVENT_CREATE_AH)
+ __destroy_timedout_ah(rcfw,
+ (struct creq_create_ah_resp *)
+ qp_event);
+ spin_unlock(&hwq->lock);
}
- return 0;
+ *num_wait += wait_cmds;
+ return rc;
}
/* SP - CREQ Completion handlers */
-static void bnxt_qplib_service_creq(unsigned long data)
+static void bnxt_qplib_service_creq(struct tasklet_struct *t)
{
- struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
- bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
- struct bnxt_qplib_hwq *creq = &rcfw->creq;
+ struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
+ struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
- struct creq_base *creqe, **creq_ptr;
- u32 sw_cons, raw_cons;
- unsigned long flags;
+ struct bnxt_qplib_hwq *hwq = &creq->hwq;
+ struct creq_base *creqe;
+ u32 num_wakeup = 0;
+ u32 hw_polled = 0;
/* Service the CREQ until budget is over */
- spin_lock_irqsave(&creq->lock, flags);
- raw_cons = creq->cons;
+ spin_lock_bh(&hwq->lock);
while (budget > 0) {
- sw_cons = HWQ_CMP(raw_cons, creq);
- creq_ptr = (struct creq_base **)creq->pbl_ptr;
- creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
- if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements))
+ creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+ if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
break;
/* The valid test of the entry must be done first before
* reading any further.
*/
dma_rmb();
+ rcfw->cmdq.last_seen = jiffies;
type = creqe->type & CREQ_BASE_TYPE_MASK;
switch (type) {
case CREQ_BASE_TYPE_QP_EVENT:
bnxt_qplib_process_qp_event
- (rcfw, (struct creq_qp_event *)creqe);
- rcfw->creq_qp_event_processed++;
+ (rcfw, (struct creq_qp_event *)creqe,
+ &num_wakeup);
+ creq->stats.creq_qp_event_processed++;
break;
case CREQ_BASE_TYPE_FUNC_EVENT:
if (!bnxt_qplib_process_func_event
(rcfw, (struct creq_func_event *)creqe))
- rcfw->creq_func_event_processed++;
+ creq->stats.creq_func_event_processed++;
else
dev_warn(&rcfw->pdev->dev,
"aeqe:%#x Not handled\n", type);
@@ -402,32 +775,34 @@ static void bnxt_qplib_service_creq(unsigned long data)
type);
break;
}
- raw_cons++;
budget--;
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &creq->creq_db.dbinfo.flags);
}
- if (creq->cons != raw_cons) {
- creq->cons = raw_cons;
- bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
- raw_cons, creq->max_elements,
- rcfw->creq_ring_id, gen_p5);
- }
- spin_unlock_irqrestore(&creq->lock, flags);
+ if (hw_polled)
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
+ rcfw->res->cctx, true);
+ spin_unlock_bh(&hwq->lock);
+ if (num_wakeup)
+ wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
}
static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
{
struct bnxt_qplib_rcfw *rcfw = dev_instance;
- struct bnxt_qplib_hwq *creq = &rcfw->creq;
- struct creq_base **creq_ptr;
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_hwq *hwq;
u32 sw_cons;
+ creq = &rcfw->creq;
+ hwq = &creq->hwq;
/* Prefetch the CREQ element */
- sw_cons = HWQ_CMP(creq->cons, creq);
- creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr;
- prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]);
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL));
- tasklet_schedule(&rcfw->worker);
+ tasklet_schedule(&creq->creq_tasklet);
return IRQ_HANDLED;
}
@@ -435,47 +810,37 @@ static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
/* RCFW */
int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
{
- struct cmdq_deinitialize_fw req;
- struct creq_deinitialize_fw_resp resp;
- u16 cmd_flags = 0;
+ struct creq_deinitialize_fw_resp resp = {};
+ struct cmdq_deinitialize_fw req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
int rc;
- RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 0);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEINITIALIZE_FW,
+ sizeof(req));
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL,
+ sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
- clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+ clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
return 0;
}
-static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
-{
- return (pbl->pg_size == ROCE_PG_SIZE_4K ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K :
- pbl->pg_size == ROCE_PG_SIZE_8K ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K :
- pbl->pg_size == ROCE_PG_SIZE_64K ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K :
- pbl->pg_size == ROCE_PG_SIZE_2M ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M :
- pbl->pg_size == ROCE_PG_SIZE_8M ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M :
- pbl->pg_size == ROCE_PG_SIZE_1G ?
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G :
- CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K);
-}
-
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn)
{
- struct cmdq_initialize_fw req;
- struct creq_initialize_fw_resp resp;
- u16 cmd_flags = 0, level;
+ struct creq_initialize_fw_resp resp = {};
+ struct cmdq_initialize_fw req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ u16 flags = 0;
+ u8 pgsz, lvl;
int rc;
- RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_INITIALIZE_FW,
+ sizeof(req));
/* Supply (log-base-2-of-host-page-size - base-page-shift)
* to bono to adjust the doorbell page sizes.
*/
@@ -488,34 +853,33 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
* shall setup this area for VF. Skipping the
* HW programming
*/
- if (is_virtfn || bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+ if (is_virtfn || bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
goto skip_ctx_setup;
- level = ctx->qpc_tbl.level;
- req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]);
- level = ctx->mrw_tbl.level;
- req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]);
- level = ctx->srqc_tbl.level;
- req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
- level = ctx->cq_tbl.level;
- req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
- level = ctx->srqc_tbl.level;
- req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
- level = ctx->cq_tbl.level;
- req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
- level = ctx->tim_tbl.level;
- req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]);
- level = ctx->tqm_pde_level;
- req.tqm_pg_size_tqm_lvl = (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
- __get_pbl_pg_idx(&ctx->tqm_pde.pbl[level]);
-
+ lvl = ctx->qpc_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl);
+ req.qpc_pg_size_qpc_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->mrw_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->mrw_tbl);
+ req.mrw_pg_size_mrw_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->srqc_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->srqc_tbl);
+ req.srq_pg_size_srq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->cq_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->cq_tbl);
+ req.cq_pg_size_cq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->tim_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->tim_tbl);
+ req.tim_pg_size_tim_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->tqm_ctx.pde.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->tqm_ctx.pde);
+ req.tqm_pg_size_tqm_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
req.qpc_page_dir =
cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
req.mrw_page_dir =
@@ -527,83 +891,91 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
req.tim_page_dir =
cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
req.tqm_page_dir =
- cpu_to_le64(ctx->tqm_pde.pbl[PBL_LVL_0].pg_map_arr[0]);
+ cpu_to_le64(ctx->tqm_ctx.pde.pbl[PBL_LVL_0].pg_map_arr[0]);
req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
- req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
- req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
- req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
- req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
- req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
-
skip_ctx_setup:
+ if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
+ if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
+ if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT;
+ if (bnxt_qplib_roce_mirror_supported(rcfw->res->cctx)) {
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED;
+ rcfw->roce_mirror = true;
+ }
+ req.flags |= cpu_to_le16(flags);
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
- set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+ set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
return 0;
}
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
- bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
- bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
+ bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
+ bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq);
rcfw->pdev = NULL;
}
-int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz)
+ struct bnxt_qplib_ctx *ctx)
{
- u8 hwq_type;
-
- rcfw->pdev = pdev;
- rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
- hwq_type = bnxt_qplib_get_hwq_type(rcfw->res);
- if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL,
- &rcfw->creq.max_elements,
- BNXT_QPLIB_CREQE_UNITS,
- 0, PAGE_SIZE, hwq_type)) {
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_creq_ctx *creq;
+
+ rcfw->pdev = res->pdev;
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ rcfw->res = res;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.res = rcfw->res;
+ hwq_attr.depth = BNXT_QPLIB_CREQE_MAX_CNT;
+ hwq_attr.stride = BNXT_QPLIB_CREQE_UNITS;
+ hwq_attr.type = bnxt_qplib_get_hwq_type(res);
+
+ if (bnxt_qplib_alloc_init_hwq(&creq->hwq, &hwq_attr)) {
dev_err(&rcfw->pdev->dev,
"HW channel CREQ allocation failed\n");
goto fail;
}
- if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK)
- rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256;
- else
- rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
-
- rcfw->cmdq.max_elements = rcfw->cmdq_depth;
- if (bnxt_qplib_alloc_init_hwq
- (rcfw->pdev, &rcfw->cmdq, NULL,
- &rcfw->cmdq.max_elements,
- BNXT_QPLIB_CMDQE_UNITS, 0,
- bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth),
- HWQ_TYPE_CTX)) {
+
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT;
+
+ sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth);
+ hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
+ hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
dev_err(&rcfw->pdev->dev,
"HW channel CMDQ allocation failed\n");
goto fail;
}
- rcfw->crsqe_tbl = kcalloc(rcfw->cmdq.max_elements,
+ rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements,
sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
if (!rcfw->crsqe_tbl)
goto fail;
- rcfw->qp_tbl_size = qp_tbl_sz;
- rcfw->qp_tbl = kcalloc(qp_tbl_sz, sizeof(struct bnxt_qplib_qp_node),
- GFP_KERNEL);
- if (!rcfw->qp_tbl)
- goto fail;
+ spin_lock_init(&rcfw->tbl_lock);
+
+ rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
return 0;
@@ -614,137 +986,206 @@ fail:
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
{
- bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
+ struct bnxt_qplib_creq_ctx *creq;
+
+ creq = &rcfw->creq;
- tasklet_disable(&rcfw->worker);
+ if (!creq->requested)
+ return;
+
+ creq->requested = false;
/* Mask h/w interrupts */
- bnxt_qplib_ring_creq_db(rcfw->creq_bar_reg_iomem, rcfw->creq.cons,
- rcfw->creq.max_elements, rcfw->creq_ring_id,
- gen_p5);
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
/* Sync with last running IRQ-handler */
- synchronize_irq(rcfw->vector);
+ synchronize_irq(creq->msix_vec);
+ free_irq(creq->msix_vec, rcfw);
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ atomic_set(&rcfw->rcfw_intr_enabled, 0);
if (kill)
- tasklet_kill(&rcfw->worker);
-
- if (rcfw->requested) {
- free_irq(rcfw->vector, rcfw);
- rcfw->requested = false;
- }
+ tasklet_kill(&creq->creq_tasklet);
+ tasklet_disable(&creq->creq_tasklet);
}
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- unsigned long indx;
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ creq = &rcfw->creq;
+ cmdq = &rcfw->cmdq;
+ /* Make sure the HW channel is stopped! */
bnxt_qplib_rcfw_stop_irq(rcfw, true);
- iounmap(rcfw->cmdq_bar_reg_iomem);
- iounmap(rcfw->creq_bar_reg_iomem);
+ iounmap(cmdq->cmdq_mbox.reg.bar_reg);
+ iounmap(creq->creq_db.reg.bar_reg);
- indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
- if (indx != rcfw->bmap_size)
- dev_err(&rcfw->pdev->dev,
- "disabling RCFW with pending cmd-bit %lx\n", indx);
- kfree(rcfw->cmdq_bitmap);
- rcfw->bmap_size = 0;
-
- rcfw->cmdq_bar_reg_iomem = NULL;
- rcfw->creq_bar_reg_iomem = NULL;
- rcfw->aeq_handler = NULL;
- rcfw->vector = 0;
+ cmdq->cmdq_mbox.reg.bar_reg = NULL;
+ creq->creq_db.reg.bar_reg = NULL;
+ creq->aeq_handler = NULL;
+ creq->msix_vec = 0;
}
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
bool need_init)
{
- bool gen_p5 = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx);
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_res *res;
int rc;
- if (rcfw->requested)
+ creq = &rcfw->creq;
+ res = rcfw->res;
+
+ if (creq->requested)
return -EFAULT;
- rcfw->vector = msix_vector;
+ creq->msix_vec = msix_vector;
if (need_init)
- tasklet_init(&rcfw->worker,
- bnxt_qplib_service_creq, (unsigned long)rcfw);
+ tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
else
- tasklet_enable(&rcfw->worker);
- rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
- "bnxt_qplib_creq", rcfw);
- if (rc)
+ tasklet_enable(&creq->creq_tasklet);
+
+ creq->irq_name = kasprintf(GFP_KERNEL, "bnxt_re-creq@pci:%s",
+ pci_name(res->pdev));
+ if (!creq->irq_name)
+ return -ENOMEM;
+ rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
+ creq->irq_name, rcfw);
+ if (rc) {
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ tasklet_disable(&creq->creq_tasklet);
return rc;
- rcfw->requested = true;
- bnxt_qplib_ring_creq_db_rearm(rcfw->creq_bar_reg_iomem,
- rcfw->creq.cons, rcfw->creq.max_elements,
- rcfw->creq_ring_id, gen_p5);
+ }
+ creq->requested = true;
+
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
+ atomic_inc(&rcfw->rcfw_intr_enabled);
return 0;
}
-int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
- struct bnxt_qplib_rcfw *rcfw,
- int msix_vector,
- int cp_bar_reg_off, int virt_fn,
- int (*aeq_handler)(struct bnxt_qplib_rcfw *,
- void *, void *))
+static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw)
{
- resource_size_t res_base;
- struct cmdq_init init;
- u16 bmap_size;
- int rc;
-
- /* General */
- rcfw->seq_num = 0;
- set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
- bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth) * sizeof(unsigned long);
- rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
- if (!rcfw->cmdq_bitmap)
- return -ENOMEM;
- rcfw->bmap_size = bmap_size;
-
- /* CMDQ */
- rcfw->cmdq_bar_reg = RCFW_COMM_PCI_BAR_REGION;
- res_base = pci_resource_start(pdev, rcfw->cmdq_bar_reg);
- if (!res_base)
+ struct bnxt_qplib_cmdq_mbox *mbox;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ mbox = &rcfw->cmdq.cmdq_mbox;
+
+ mbox->reg.bar_id = RCFW_COMM_PCI_BAR_REGION;
+ mbox->reg.len = RCFW_COMM_SIZE;
+ mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id);
+ if (!mbox->reg.bar_base) {
+ dev_err(&pdev->dev,
+ "QPLIB: CMDQ BAR region %d resc start is 0!\n",
+ mbox->reg.bar_id);
return -ENOMEM;
+ }
- rcfw->cmdq_bar_reg_iomem = ioremap_nocache(res_base +
- RCFW_COMM_BASE_OFFSET,
- RCFW_COMM_SIZE);
- if (!rcfw->cmdq_bar_reg_iomem) {
- dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
- rcfw->cmdq_bar_reg);
+ bar_reg = mbox->reg.bar_base + RCFW_COMM_BASE_OFFSET;
+ mbox->reg.len = RCFW_COMM_SIZE;
+ mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len);
+ if (!mbox->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "QPLIB: CMDQ BAR region %d mapping failed\n",
+ mbox->reg.bar_id);
return -ENOMEM;
}
- rcfw->cmdq_bar_reg_prod_off = virt_fn ? RCFW_VF_COMM_PROD_OFFSET :
- RCFW_PF_COMM_PROD_OFFSET;
-
- rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
+ mbox->prod = (void __iomem *)(mbox->reg.bar_reg +
+ RCFW_PF_VF_COMM_PROD_OFFSET);
+ mbox->db = (void __iomem *)(mbox->reg.bar_reg + RCFW_COMM_TRIG_OFFSET);
+ return 0;
+}
- /* CREQ */
- rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
- res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
- if (!res_base)
- dev_err(&rcfw->pdev->dev,
- "CREQ BAR region %d resc start is 0!\n",
- rcfw->creq_bar_reg);
+static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
+{
+ struct bnxt_qplib_creq_db *creq_db;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ creq_db = &rcfw->creq.creq_db;
+
+ creq_db->dbinfo.flags = 0;
+ creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
+ creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
+ if (!creq_db->reg.bar_id)
+ dev_err(&pdev->dev,
+ "QPLIB: CREQ BAR region %d resc start is 0!",
+ creq_db->reg.bar_id);
+
+ bar_reg = creq_db->reg.bar_base + reg_offt;
/* Unconditionally map 8 bytes to support 57500 series */
- rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
- 8);
- if (!rcfw->creq_bar_reg_iomem) {
- dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
- rcfw->creq_bar_reg);
- iounmap(rcfw->cmdq_bar_reg_iomem);
- rcfw->cmdq_bar_reg_iomem = NULL;
+ creq_db->reg.len = 8;
+ creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len);
+ if (!creq_db->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "QPLIB: CREQ BAR region %d mapping failed",
+ creq_db->reg.bar_id);
return -ENOMEM;
}
- rcfw->creq_qp_event_processed = 0;
- rcfw->creq_func_event_processed = 0;
+ creq_db->dbinfo.db = creq_db->reg.bar_reg;
+ creq_db->dbinfo.hwq = &rcfw->creq.hwq;
+ creq_db->dbinfo.xid = rcfw->creq.ring_id;
+ return 0;
+}
+
+static void bnxt_qplib_start_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_cmdq_mbox *mbox;
+ struct cmdq_init init = {0};
+
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ mbox = &cmdq->cmdq_mbox;
+
+ init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ init.cmdq_size_cmdq_lvl =
+ cpu_to_le16(((rcfw->cmdq_depth <<
+ CMDQ_INIT_CMDQ_SIZE_SFT) &
+ CMDQ_INIT_CMDQ_SIZE_MASK) |
+ ((cmdq->hwq.level <<
+ CMDQ_INIT_CMDQ_LVL_SFT) &
+ CMDQ_INIT_CMDQ_LVL_MASK));
+ init.creq_ring_id = cpu_to_le16(creq->ring_id);
+ /* Write to the Bono mailbox register */
+ __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4);
+}
+
+int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off,
+ aeq_handler_t aeq_handler)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_creq_ctx *creq;
+ int rc;
+
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+
+ /* Clear to defaults */
+
+ cmdq->seq_num = 0;
+ set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ init_waitqueue_head(&cmdq->waitq);
- if (aeq_handler)
- rcfw->aeq_handler = aeq_handler;
- init_waitqueue_head(&rcfw->waitq);
+ creq->stats.creq_qp_event_processed = 0;
+ creq->stats.creq_func_event_processed = 0;
+ creq->aeq_handler = aeq_handler;
+
+ rc = bnxt_qplib_map_cmdq_mbox(rcfw);
+ if (rc)
+ return rc;
+
+ rc = bnxt_qplib_map_creq_db(rcfw, cp_bar_reg_off);
+ if (rc)
+ return rc;
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) {
@@ -754,46 +1195,8 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
return rc;
}
- init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
- init.cmdq_size_cmdq_lvl = cpu_to_le16(
- ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) &
- CMDQ_INIT_CMDQ_SIZE_MASK) |
- ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
- CMDQ_INIT_CMDQ_LVL_MASK));
- init.creq_ring_id = cpu_to_le16(rcfw->creq_ring_id);
+ sema_init(&rcfw->rcfw_inflight, RCFW_CMD_NON_BLOCKING_SHADOW_QD);
+ bnxt_qplib_start_rcfw(rcfw);
- /* Write to the Bono mailbox register */
- __iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
return 0;
}
-
-struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
- struct bnxt_qplib_rcfw *rcfw,
- u32 size)
-{
- struct bnxt_qplib_rcfw_sbuf *sbuf;
-
- sbuf = kzalloc(sizeof(*sbuf), GFP_ATOMIC);
- if (!sbuf)
- return NULL;
-
- sbuf->size = size;
- sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size,
- &sbuf->dma_addr, GFP_ATOMIC);
- if (!sbuf->sb)
- goto bail;
-
- return sbuf;
-bail:
- kfree(sbuf);
- return NULL;
-}
-
-void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_rcfw_sbuf *sbuf)
-{
- if (sbuf->sb)
- dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
- sbuf->sb, sbuf->dma_addr);
- kfree(sbuf);
-}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 2138533bb642..988c89b4232e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -39,39 +39,40 @@
#ifndef __BNXT_QPLIB_RCFW_H__
#define __BNXT_QPLIB_RCFW_H__
+#include "qplib_tlv.h"
+
#define RCFW_CMDQ_TRIG_VAL 1
#define RCFW_COMM_PCI_BAR_REGION 0
#define RCFW_COMM_CONS_PCI_BAR_REGION 2
#define RCFW_COMM_BASE_OFFSET 0x600
-#define RCFW_PF_COMM_PROD_OFFSET 0xc
-#define RCFW_VF_COMM_PROD_OFFSET 0xc
+#define RCFW_PF_VF_COMM_PROD_OFFSET 0xc
#define RCFW_COMM_TRIG_OFFSET 0x100
#define RCFW_COMM_SIZE 0x104
#define RCFW_DBR_PCI_BAR_REGION 2
#define RCFW_DBR_BASE_PAGE_SHIFT 12
-
-#define RCFW_CMD_PREP(req, CMD, cmd_flags) \
- do { \
- memset(&(req), 0, sizeof((req))); \
- (req).opcode = CMDQ_BASE_OPCODE_##CMD; \
- (req).cmd_size = (sizeof((req)) + \
- BNXT_QPLIB_CMDQE_UNITS - 1) / \
- BNXT_QPLIB_CMDQE_UNITS; \
- (req).flags = cpu_to_le16(cmd_flags); \
- } while (0)
-
-#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+#define RCFW_FW_STALL_MAX_TIMEOUT 40
/* Cmdq contains a fix number of a 16-Byte slots */
struct bnxt_qplib_cmdqe {
u8 data[16];
};
-/* CMDQ elements */
-#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256
-#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192
#define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe)
+
+static inline void bnxt_qplib_rcfw_cmd_prep(struct cmdq_base *req,
+ u8 opcode, u8 cmd_size)
+{
+ req->opcode = opcode;
+ req->cmd_size = cmd_size;
+}
+
+/* Shadow queue depth for non blocking command */
+#define RCFW_CMD_NON_BLOCKING_SHADOW_QD 64
+#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+
+/* CMDQ elements */
+#define BNXT_QPLIB_CMDQE_MAX_CNT 8192
#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS)
static inline u32 bnxt_qplib_cmdqe_npages(u32 depth)
@@ -89,36 +90,50 @@ static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth)
return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE);
}
-static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth)
+/* Get the number of command units required for the req. The
+ * function returns correct value only if called before
+ * setting using bnxt_qplib_set_cmd_slots
+ */
+static inline u32 bnxt_qplib_get_cmd_slots(struct cmdq_base *req)
{
- return (bnxt_qplib_cmdqe_page_size(depth) /
- BNXT_QPLIB_CMDQE_UNITS);
-}
+ u32 cmd_units = 0;
-#define MAX_CMDQ_IDX(depth) ((depth) - 1)
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
-static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth)
-{
- return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1);
+ cmd_units = tlv_req->total_size;
+ } else {
+ cmd_units = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
+
+ return cmd_units;
}
-#define RCFW_MAX_COOKIE_VALUE 0x7FFF
-#define RCFW_CMD_IS_BLOCKING 0x8000
-#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
+static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_byte = 0;
-#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
-static inline u32 get_cmdq_pg(u32 val, u32 depth)
-{
- return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) /
- (bnxt_qplib_cmdqe_cnt_per_pg(depth));
-}
+ cmd_byte = tlv_req->total_size * BNXT_QPLIB_CMDQE_UNITS;
+ } else {
+ cmd_byte = req->cmd_size;
+ req->cmd_size = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
-static inline u32 get_cmdq_idx(u32 val, u32 depth)
-{
- return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth));
+ return cmd_byte;
}
+#define RCFW_MAX_COOKIE_VALUE (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
+#define RCFW_CMD_IS_BLOCKING 0x8000
+
+#define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL
+/* HWRM version 1.10.3.18 */
+#define HWRM_VERSION_READ_CTX 0x1000A00030012
+
/* Crsq buf is 1024-Byte */
struct bnxt_qplib_crsbe {
u8 data[1024];
@@ -128,83 +143,23 @@ struct bnxt_qplib_crsbe {
/* Allocate 1 per QP for async error notification for now */
#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
#define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */
-#define BNXT_QPLIB_CREQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS)
-
-#define MAX_CREQ_IDX (BNXT_QPLIB_CREQE_MAX_CNT - 1)
-#define MAX_CREQ_IDX_PER_PG (BNXT_QPLIB_CREQE_CNT_PER_PG - 1)
-
-static inline u32 get_creq_pg(u32 val)
-{
- return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG;
-}
-
-static inline u32 get_creq_idx(u32 val)
-{
- return val & MAX_CREQ_IDX_PER_PG;
-}
-
-#define BNXT_QPLIB_CREQE_PER_PG (PAGE_SIZE / sizeof(struct creq_base))
-
-#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \
+#define CREQ_CMP_VALID(hdr, pass) \
(!!((hdr)->v & CREQ_BASE_V) == \
- !((raw_cons) & (cp_bit)))
-
-#define CREQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
-#define CREQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
-#define CREQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
-#define CREQ_DB_CP_FLAGS_REARM (CREQ_DB_KEY_CP | \
- CREQ_DB_IDX_VALID)
-#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \
- CREQ_DB_IDX_VALID | \
- CREQ_DB_IRQ_DIS)
-
-static inline void bnxt_qplib_ring_creq_db64(void __iomem *db, u32 index,
- u32 xid, bool arm)
-{
- u64 val = 0;
-
- val = xid & DBC_DBC_XID_MASK;
- val |= DBC_DBC_PATH_ROCE;
- val |= arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
- val <<= 32;
- val |= index & DBC_DBC_INDEX_MASK;
-
- writeq(val, db);
-}
-
-static inline void bnxt_qplib_ring_creq_db_rearm(void __iomem *db, u32 raw_cons,
- u32 max_elements, u32 xid,
- bool gen_p5)
-{
- u32 index = raw_cons & (max_elements - 1);
-
- if (gen_p5)
- bnxt_qplib_ring_creq_db64(db, index, xid, true);
- else
- writel(CREQ_DB_CP_FLAGS_REARM | (index & DBC_DBC32_XID_MASK),
- db);
-}
-
-static inline void bnxt_qplib_ring_creq_db(void __iomem *db, u32 raw_cons,
- u32 max_elements, u32 xid,
- bool gen_p5)
-{
- u32 index = raw_cons & (max_elements - 1);
-
- if (gen_p5)
- bnxt_qplib_ring_creq_db64(db, index, xid, true);
- else
- writel(CREQ_DB_CP_FLAGS | (index & DBC_DBC32_XID_MASK),
- db);
-}
-
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
#define CREQ_ENTRY_POLL_BUDGET 0x100
/* HWQ */
+typedef int (*aeq_handler_t)(struct bnxt_qplib_rcfw *, void *, void *);
-struct bnxt_qplib_crsq {
+struct bnxt_qplib_crsqe {
struct creq_qp_event *resp;
u32 req_size;
+ /* Free slots at the time of submission */
+ u32 free_slots;
+ u8 opcode;
+ bool is_waiter_alive;
+ bool is_internal_cmd;
+ bool is_in_used;
};
struct bnxt_qplib_rcfw_sbuf {
@@ -220,63 +175,103 @@ struct bnxt_qplib_qp_node {
#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
+#define FIRMWARE_INITIALIZED_FLAG (0)
+#define FIRMWARE_FIRST_FLAG (31)
+#define FIRMWARE_STALL_DETECTED (3)
+#define ERR_DEVICE_DETACHED (4)
+
+struct bnxt_qplib_cmdq_mbox {
+ struct bnxt_qplib_reg_desc reg;
+ void __iomem *prod;
+ void __iomem *db;
+};
+
+struct bnxt_qplib_cmdq_ctx {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_cmdq_mbox cmdq_mbox;
+ wait_queue_head_t waitq;
+ unsigned long flags;
+ unsigned long last_seen;
+ u32 seq_num;
+};
+
+struct bnxt_qplib_creq_db {
+ struct bnxt_qplib_reg_desc reg;
+ struct bnxt_qplib_db_info dbinfo;
+};
+
+struct bnxt_qplib_creq_stat {
+ u64 creq_qp_event_processed;
+ u64 creq_func_event_processed;
+};
+
+struct bnxt_qplib_creq_ctx {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_creq_db creq_db;
+ struct bnxt_qplib_creq_stat stats;
+ struct tasklet_struct creq_tasklet;
+ aeq_handler_t aeq_handler;
+ u16 ring_id;
+ int msix_vec;
+ bool requested; /*irq handler installed */
+ char *irq_name;
+};
+
/* RCFW Communication Channels */
struct bnxt_qplib_rcfw {
struct pci_dev *pdev;
struct bnxt_qplib_res *res;
- int vector;
- struct tasklet_struct worker;
- bool requested;
- unsigned long *cmdq_bitmap;
- u32 bmap_size;
- unsigned long flags;
-#define FIRMWARE_INITIALIZED_FLAG 0
-#define FIRMWARE_FIRST_FLAG 31
-#define FIRMWARE_TIMED_OUT 3
- wait_queue_head_t waitq;
- int (*aeq_handler)(struct bnxt_qplib_rcfw *,
- void *, void *);
- u32 seq_num;
-
- /* Bar region info */
- void __iomem *cmdq_bar_reg_iomem;
- u16 cmdq_bar_reg;
- u16 cmdq_bar_reg_prod_off;
- u16 cmdq_bar_reg_trig_off;
- u16 creq_ring_id;
- u16 creq_bar_reg;
- void __iomem *creq_bar_reg_iomem;
-
- /* Cmd-Resp and Async Event notification queue */
- struct bnxt_qplib_hwq creq;
- u64 creq_qp_event_processed;
- u64 creq_func_event_processed;
-
- /* Actual Cmd and Resp Queues */
- struct bnxt_qplib_hwq cmdq;
- struct bnxt_qplib_crsq *crsqe_tbl;
+ struct bnxt_qplib_cmdq_ctx cmdq;
+ struct bnxt_qplib_creq_ctx creq;
+ struct bnxt_qplib_crsqe *crsqe_tbl;
int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl;
+ /* To synchronize the qp-handle hash table */
+ spinlock_t tbl_lock;
u64 oos_prev;
u32 init_oos_stats;
u32 cmdq_depth;
+ atomic_t rcfw_intr_enabled;
+ struct semaphore rcfw_inflight;
+ atomic_t timeout_send;
+ /* cached from chip cctx for quick reference in slow path */
+ u16 max_timeout;
+ bool roce_mirror;
};
+struct bnxt_qplib_cmdqmsg {
+ struct cmdq_base *req;
+ struct creq_base *resp;
+ void *sb;
+ u32 req_sz;
+ u32 res_sz;
+ u8 block;
+};
+
+static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg,
+ void *req, void *resp, void *sb,
+ u32 req_sz, u32 res_sz, u8 block)
+{
+ msg->req = req;
+ msg->resp = resp;
+ msg->sb = sb;
+ msg->req_sz = req_sz;
+ msg->res_sz = res_sz;
+ msg->block = block;
+}
+
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
-int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz);
+ struct bnxt_qplib_ctx *ctx);
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
bool need_init);
-int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
- struct bnxt_qplib_rcfw *rcfw,
+int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
int msix_vector,
- int cp_bar_reg_off, int virt_fn,
- int (*aeq_handler)(struct bnxt_qplib_rcfw *,
- void *aeqe, void *obj));
+ int cp_bar_reg_off,
+ aeq_handler_t aeq_handler);
struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
struct bnxt_qplib_rcfw *rcfw,
@@ -284,11 +279,16 @@ struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_rcfw_sbuf *sbuf);
int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
- struct cmdq_base *req, struct creq_base *resp,
- void *sbuf, u8 is_block);
+ struct bnxt_qplib_cmdqmsg *msg);
int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn);
void bnxt_qplib_mark_qp_error(void *qp_handle);
+
+static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw)
+{
+ /* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/
+ return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2));
+}
#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 37928b1111df..875d7b52c06a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -44,20 +44,20 @@
#include <linux/inetdevice.h>
#include <linux/dma-mapping.h>
#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
#include "qplib_rcfw.h"
-static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_stats *stats);
-static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_stats *stats);
-
/* PBL */
-static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
bool is_umem)
{
+ struct pci_dev *pdev = res->pdev;
int i;
if (!is_umem) {
@@ -74,37 +74,60 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
pbl->pg_arr[i] = NULL;
}
}
- kfree(pbl->pg_arr);
+ vfree(pbl->pg_arr);
pbl->pg_arr = NULL;
- kfree(pbl->pg_map_arr);
+ vfree(pbl->pg_map_arr);
pbl->pg_map_arr = NULL;
pbl->pg_count = 0;
pbl->pg_size = 0;
}
-static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
- struct scatterlist *sghead, u32 pages,
- u32 nmaps, u32 pg_size)
+static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl,
+ struct bnxt_qplib_sg_info *sginfo)
+{
+ struct ib_block_iter biter;
+ int i = 0;
+
+ rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) {
+ pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter);
+ pbl->pg_arr[i] = NULL;
+ pbl->pg_count++;
+ i++;
+ }
+}
+
+static int __alloc_pbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pbl *pbl,
+ struct bnxt_qplib_sg_info *sginfo)
{
- struct sg_dma_page_iter sg_iter;
+ struct pci_dev *pdev = res->pdev;
bool is_umem = false;
+ u32 pages;
int i;
+ if (sginfo->nopte)
+ return 0;
+ if (sginfo->umem)
+ pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize);
+ else
+ pages = sginfo->npages;
/* page ptr arrays */
- pbl->pg_arr = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ pbl->pg_arr = vmalloc_array(pages, sizeof(void *));
if (!pbl->pg_arr)
return -ENOMEM;
+ memset(pbl->pg_arr, 0, pages * sizeof(void *));
- pbl->pg_map_arr = kcalloc(pages, sizeof(dma_addr_t), GFP_KERNEL);
+ pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t));
if (!pbl->pg_map_arr) {
- kfree(pbl->pg_arr);
+ vfree(pbl->pg_arr);
pbl->pg_arr = NULL;
return -ENOMEM;
}
+ memset(pbl->pg_map_arr, 0, pages * sizeof(dma_addr_t));
pbl->pg_count = 0;
- pbl->pg_size = pg_size;
+ pbl->pg_size = sginfo->pgsize;
- if (!sghead) {
+ if (!sginfo->umem) {
for (i = 0; i < pages; i++) {
pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
pbl->pg_size,
@@ -115,25 +138,19 @@ static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
pbl->pg_count++;
}
} else {
- i = 0;
is_umem = true;
- for_each_sg_dma_page(sghead, &sg_iter, nmaps, 0) {
- pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter);
- pbl->pg_arr[i] = NULL;
- pbl->pg_count++;
- i++;
- }
+ bnxt_qplib_fill_user_dma_pages(pbl, sginfo);
}
return 0;
-
fail:
- __free_pbl(pdev, pbl, is_umem);
+ __free_pbl(res, pbl, is_umem);
return -ENOMEM;
}
/* HWQ */
-void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
+void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_hwq *hwq)
{
int i;
@@ -144,9 +161,9 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
for (i = 0; i < hwq->level + 1; i++) {
if (i == hwq->level)
- __free_pbl(pdev, &hwq->pbl[i], hwq->is_user);
+ __free_pbl(res, &hwq->pbl[i], hwq->is_user);
else
- __free_pbl(pdev, &hwq->pbl[i], false);
+ __free_pbl(res, &hwq->pbl[i], false);
}
hwq->level = PBL_LVL_MAX;
@@ -158,79 +175,96 @@ void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
}
/* All HWQs are power of 2 in size */
-int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
- struct bnxt_qplib_sg_info *sg_info,
- u32 *elements, u32 element_size, u32 aux,
- u32 pg_size, enum bnxt_qplib_hwq_type hwq_type)
+
+int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_hwq_attr *hwq_attr)
{
- u32 pages, maps, slots, size, aux_pages = 0, aux_size = 0;
+ u32 npages, aux_slots, pg_size, aux_pages = 0, aux_size = 0;
+ struct bnxt_qplib_sg_info sginfo = {};
+ u32 depth, stride, npbl, npde;
dma_addr_t *src_phys_ptr, **dst_virt_ptr;
- struct scatterlist *sghead = NULL;
- int i, rc;
+ struct bnxt_qplib_res *res;
+ struct pci_dev *pdev;
+ int i, rc, lvl;
+ res = hwq_attr->res;
+ pdev = res->pdev;
+ pg_size = hwq_attr->sginfo->pgsize;
hwq->level = PBL_LVL_MAX;
- slots = roundup_pow_of_two(*elements);
- if (aux) {
- aux_size = roundup_pow_of_two(aux);
- aux_pages = (slots * aux_size) / pg_size;
- if ((slots * aux_size) % pg_size)
+ depth = roundup_pow_of_two(hwq_attr->depth);
+ stride = roundup_pow_of_two(hwq_attr->stride);
+ if (hwq_attr->aux_depth) {
+ aux_slots = hwq_attr->aux_depth;
+ aux_size = roundup_pow_of_two(hwq_attr->aux_stride);
+ aux_pages = (aux_slots * aux_size) / pg_size;
+ if ((aux_slots * aux_size) % pg_size)
aux_pages++;
}
- size = roundup_pow_of_two(element_size);
- if (sg_info)
- sghead = sg_info->sglist;
-
- if (!sghead) {
+ if (!hwq_attr->sginfo->umem) {
hwq->is_user = false;
- pages = (slots * size) / pg_size + aux_pages;
- if ((slots * size) % pg_size)
- pages++;
- if (!pages)
+ npages = (depth * stride) / pg_size + aux_pages;
+ if ((depth * stride) % pg_size)
+ npages++;
+ if (!npages)
return -EINVAL;
- maps = 0;
+ hwq_attr->sginfo->npages = npages;
} else {
+ npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem,
+ hwq_attr->sginfo->pgsize);
hwq->is_user = true;
- pages = sg_info->npages;
- maps = sg_info->nmap;
}
- /* Alloc the 1st memory block; can be a PDL/PTL/PBL */
- if (sghead && (pages == MAX_PBL_LVL_0_PGS))
- rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead,
- pages, maps, pg_size);
- else
- rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL,
- 1, 0, pg_size);
- if (rc)
- goto fail;
-
- hwq->level = PBL_LVL_0;
+ if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
+ /* This request is Level 0, map PTE */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = PBL_LVL_0;
+ goto done;
+ }
- if (pages > MAX_PBL_LVL_0_PGS) {
- if (pages > MAX_PBL_LVL_1_PGS) {
+ if (npages >= MAX_PBL_LVL_0_PGS) {
+ if (npages > MAX_PBL_LVL_1_PGS) {
+ u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
+ 0 : PTU_PTE_VALID;
/* 2 levels of indirection */
- rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL,
- MAX_PBL_LVL_1_PGS_FOR_LVL_2,
- 0, pg_size);
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ npde = npbl >> MAX_PDL_LVL_SHIFT;
+ if (npbl % BIT(MAX_PDL_LVL_SHIFT))
+ npde++;
+ /* Alloc PDE pages */
+ sginfo.pgsize = npde * pg_size;
+ sginfo.npages = 1;
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+
+ /* Alloc PBL pages */
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1], &sginfo);
if (rc)
goto fail;
- /* Fill in lvl0 PBL */
+ /* Fill PDL with PBL page pointers */
dst_virt_ptr =
(dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
- dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
- src_phys_ptr[i] | PTU_PDE_VALID;
- hwq->level = PBL_LVL_1;
+ dst_virt_ptr[0][i] = src_phys_ptr[i] | flag;
- rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead,
- pages, maps, pg_size);
+ /* Alloc or init PTEs */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2],
+ hwq_attr->sginfo);
if (rc)
goto fail;
-
- /* Fill in lvl1 PBL */
+ hwq->level = PBL_LVL_2;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBLs with PTE pointers */
dst_virt_ptr =
(dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr;
src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr;
@@ -238,7 +272,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
src_phys_ptr[i] | PTU_PTE_VALID;
}
- if (hwq_type == HWQ_TYPE_QUEUE) {
+ if (hwq_attr->type == HWQ_TYPE_QUEUE) {
/* Find the last pg of the size */
i = hwq->pbl[PBL_LVL_2].pg_count;
dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
@@ -248,25 +282,36 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
[PTR_IDX(i - 2)] |=
PTU_PTE_NEXT_TO_LAST;
}
- hwq->level = PBL_LVL_2;
- } else {
- u32 flag = hwq_type == HWQ_TYPE_L2_CMPL ? 0 :
- PTU_PTE_VALID;
+ } else { /* pages < 512 npbl = 1, npde = 0 */
+ u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
+ 0 : PTU_PTE_VALID;
/* 1 level of indirection */
- rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead,
- pages, maps, pg_size);
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ /* Alloc PBL page */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+ /* Alloc or init PTEs */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1],
+ hwq_attr->sginfo);
if (rc)
goto fail;
- /* Fill in lvl0 PBL */
+ hwq->level = PBL_LVL_1;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBL with PTE pointers */
dst_virt_ptr =
(dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
- for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) {
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
src_phys_ptr[i] | flag;
- }
- if (hwq_type == HWQ_TYPE_QUEUE) {
+ if (hwq_attr->type == HWQ_TYPE_QUEUE) {
/* Find the last pg of the size */
i = hwq->pbl[PBL_LVL_1].pg_count;
dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
@@ -276,46 +321,145 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
[PTR_IDX(i - 2)] |=
PTU_PTE_NEXT_TO_LAST;
}
- hwq->level = PBL_LVL_1;
}
}
- hwq->pdev = pdev;
- spin_lock_init(&hwq->lock);
+done:
hwq->prod = 0;
hwq->cons = 0;
- *elements = hwq->max_elements = slots;
- hwq->element_size = size;
-
+ hwq->pdev = pdev;
+ hwq->depth = hwq_attr->depth;
+ hwq->max_elements = hwq->depth;
+ hwq->element_size = stride;
+ hwq->qe_ppg = pg_size / stride;
/* For direct access to the elements */
- hwq->pbl_ptr = hwq->pbl[hwq->level].pg_arr;
- hwq->pbl_dma_ptr = hwq->pbl[hwq->level].pg_map_arr;
+ lvl = hwq->level;
+ if (hwq_attr->sginfo->nopte && hwq->level)
+ lvl = hwq->level - 1;
+ hwq->pbl_ptr = hwq->pbl[lvl].pg_arr;
+ hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr;
+ spin_lock_init(&hwq->lock);
return 0;
-
fail:
- bnxt_qplib_free_hwq(pdev, hwq);
+ bnxt_qplib_free_hwq(res, hwq);
return -ENOMEM;
}
/* Context Tables */
-void bnxt_qplib_free_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_ctx *ctx)
+void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
{
int i;
- bnxt_qplib_free_hwq(pdev, &ctx->qpc_tbl);
- bnxt_qplib_free_hwq(pdev, &ctx->mrw_tbl);
- bnxt_qplib_free_hwq(pdev, &ctx->srqc_tbl);
- bnxt_qplib_free_hwq(pdev, &ctx->cq_tbl);
- bnxt_qplib_free_hwq(pdev, &ctx->tim_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->qpc_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->mrw_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->srqc_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->cq_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->tim_tbl);
for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
- bnxt_qplib_free_hwq(pdev, &ctx->tqm_tbl[i]);
- bnxt_qplib_free_hwq(pdev, &ctx->tqm_pde);
- bnxt_qplib_free_stats_ctx(pdev, &ctx->stats);
+ bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.qtbl[i]);
+ /* restore original pde level before destroy */
+ ctx->tqm_ctx.pde.level = ctx->tqm_ctx.pde_level;
+ bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.pde);
+}
+
+static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct bnxt_qplib_tqm_ctx *tqmctx;
+ int rc;
+ int i;
+
+ tqmctx = &ctx->tqm_ctx;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.res = res;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ hwq_attr.depth = 512;
+ hwq_attr.stride = sizeof(u64);
+ /* Alloc pdl buffer */
+ rc = bnxt_qplib_alloc_init_hwq(&tqmctx->pde, &hwq_attr);
+ if (rc)
+ goto out;
+ /* Save original pdl level */
+ tqmctx->pde_level = tqmctx->pde.level;
+
+ hwq_attr.stride = 1;
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
+ if (!tqmctx->qcount[i])
+ continue;
+ hwq_attr.depth = ctx->qpc_count * tqmctx->qcount[i];
+ rc = bnxt_qplib_alloc_init_hwq(&tqmctx->qtbl[i], &hwq_attr);
+ if (rc)
+ goto out;
+ }
+out:
+ return rc;
+}
+
+static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx)
+{
+ struct bnxt_qplib_hwq *tbl;
+ dma_addr_t *dma_ptr;
+ __le64 **pbl_ptr, *ptr;
+ int i, j, k;
+ int fnz_idx = -1;
+ int pg_count;
+
+ pbl_ptr = (__le64 **)ctx->pde.pbl_ptr;
+
+ for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
+ i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
+ tbl = &ctx->qtbl[i];
+ if (!tbl->max_elements)
+ continue;
+ if (fnz_idx == -1)
+ fnz_idx = i; /* first non-zero index */
+ switch (tbl->level) {
+ case PBL_LVL_2:
+ pg_count = tbl->pbl[PBL_LVL_1].pg_count;
+ for (k = 0; k < pg_count; k++) {
+ ptr = &pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)];
+ dma_ptr = &tbl->pbl[PBL_LVL_1].pg_map_arr[k];
+ *ptr = cpu_to_le64(*dma_ptr | PTU_PTE_VALID);
+ }
+ break;
+ case PBL_LVL_1:
+ case PBL_LVL_0:
+ default:
+ ptr = &pbl_ptr[PTR_PG(j)][PTR_IDX(j)];
+ *ptr = cpu_to_le64(tbl->pbl[PBL_LVL_0].pg_map_arr[0] |
+ PTU_PTE_VALID);
+ break;
+ }
+ }
+ if (fnz_idx == -1)
+ fnz_idx = 0;
+ /* update pde level as per page table programming */
+ ctx->pde.level = (ctx->qtbl[fnz_idx].level == PBL_LVL_2) ? PBL_LVL_2 :
+ ctx->qtbl[fnz_idx].level + 1;
+}
+
+static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
+{
+ int rc;
+
+ rc = bnxt_qplib_alloc_tqm_rings(res, ctx);
+ if (rc)
+ goto fail;
+
+ bnxt_qplib_map_tqm_pgtbl(&ctx->tqm_ctx);
+fail:
+ return rc;
}
/*
- * Routine: bnxt_qplib_alloc_ctx
+ * Routine: bnxt_qplib_alloc_hwctx
* Description:
* Context tables are memories which are used by the chip fw.
* The 6 tables defined are:
@@ -335,140 +479,66 @@ void bnxt_qplib_free_ctx(struct pci_dev *pdev,
* Returns:
* 0 if success, else -ERRORS
*/
-int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_ctx *ctx,
- bool virt_fn, bool is_p5)
+int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
{
- int i, j, k, rc = 0;
- int fnz_idx = -1;
- __le64 **pbl_ptr;
-
- if (virt_fn || is_p5)
- goto stats_alloc;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ int rc;
/* QPC Tables */
- ctx->qpc_tbl.max_elements = ctx->qpc_count;
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL,
- &ctx->qpc_tbl.max_elements,
- BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_CTX);
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo = &sginfo;
+
+ hwq_attr.res = res;
+ hwq_attr.depth = ctx->qpc_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->qpc_tbl, &hwq_attr);
if (rc)
goto fail;
/* MRW Tables */
- ctx->mrw_tbl.max_elements = ctx->mrw_count;
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL,
- &ctx->mrw_tbl.max_elements,
- BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_CTX);
+ hwq_attr.depth = ctx->mrw_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->mrw_tbl, &hwq_attr);
if (rc)
goto fail;
/* SRQ Tables */
- ctx->srqc_tbl.max_elements = ctx->srqc_count;
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL,
- &ctx->srqc_tbl.max_elements,
- BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_CTX);
+ hwq_attr.depth = ctx->srqc_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->srqc_tbl, &hwq_attr);
if (rc)
goto fail;
/* CQ Tables */
- ctx->cq_tbl.max_elements = ctx->cq_count;
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL,
- &ctx->cq_tbl.max_elements,
- BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_CTX);
+ hwq_attr.depth = ctx->cq_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->cq_tbl, &hwq_attr);
if (rc)
goto fail;
/* TQM Buffer */
- ctx->tqm_pde.max_elements = 512;
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL,
- &ctx->tqm_pde.max_elements, sizeof(u64),
- 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ rc = bnxt_qplib_setup_tqm_rings(res, ctx);
if (rc)
goto fail;
-
- for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
- if (!ctx->tqm_count[i])
- continue;
- ctx->tqm_tbl[i].max_elements = ctx->qpc_count *
- ctx->tqm_count[i];
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL,
- &ctx->tqm_tbl[i].max_elements, 1,
- 0, PAGE_SIZE, HWQ_TYPE_CTX);
- if (rc)
- goto fail;
- }
- pbl_ptr = (__le64 **)ctx->tqm_pde.pbl_ptr;
- for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
- i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
- if (!ctx->tqm_tbl[i].max_elements)
- continue;
- if (fnz_idx == -1)
- fnz_idx = i;
- switch (ctx->tqm_tbl[i].level) {
- case PBL_LVL_2:
- for (k = 0; k < ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_count;
- k++)
- pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)] =
- cpu_to_le64(
- ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_map_arr[k]
- | PTU_PTE_VALID);
- break;
- case PBL_LVL_1:
- case PBL_LVL_0:
- default:
- pbl_ptr[PTR_PG(j)][PTR_IDX(j)] = cpu_to_le64(
- ctx->tqm_tbl[i].pbl[PBL_LVL_0].pg_map_arr[0] |
- PTU_PTE_VALID);
- break;
- }
- }
- if (fnz_idx == -1)
- fnz_idx = 0;
- ctx->tqm_pde_level = ctx->tqm_tbl[fnz_idx].level == PBL_LVL_2 ?
- PBL_LVL_2 : ctx->tqm_tbl[fnz_idx].level + 1;
-
/* TIM Buffer */
ctx->tim_tbl.max_elements = ctx->qpc_count * 16;
- rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL,
- &ctx->tim_tbl.max_elements, 1,
- 0, PAGE_SIZE, HWQ_TYPE_CTX);
- if (rc)
- goto fail;
-
-stats_alloc:
- /* Stats */
- rc = bnxt_qplib_alloc_stats_ctx(pdev, &ctx->stats);
+ hwq_attr.depth = ctx->qpc_count * 16;
+ hwq_attr.stride = 1;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr);
if (rc)
goto fail;
return 0;
fail:
- bnxt_qplib_free_ctx(pdev, ctx);
+ bnxt_qplib_free_hwctx(res, ctx);
return rc;
}
-/* GUID */
-void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid)
-{
- u8 mac[ETH_ALEN];
-
- /* MAC-48 to EUI-64 mapping */
- memcpy(mac, dev_addr, ETH_ALEN);
- guid[0] = mac[0] ^ 2;
- guid[1] = mac[1];
- guid[2] = mac[2];
- guid[3] = 0xff;
- guid[4] = 0xfe;
- guid[5] = mac[3];
- guid[6] = mac[4];
- guid[7] = mac[5];
-}
-
static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl)
{
@@ -488,7 +558,7 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl,
u16 max)
{
- sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL);
+ sgid_tbl->tbl = kcalloc(max, sizeof(*sgid_tbl->tbl), GFP_KERNEL);
if (!sgid_tbl->tbl)
return -ENOMEM;
@@ -526,9 +596,10 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
for (i = 0; i < sgid_tbl->max; i++) {
if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
sizeof(bnxt_qplib_gid_zero)))
- bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true);
+ bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i].gid,
+ sgid_tbl->tbl[i].vlan_id, true);
}
- memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+ memset(sgid_tbl->tbl, 0, sizeof(*sgid_tbl->tbl) * sgid_tbl->max);
memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max);
sgid_tbl->active = 0;
@@ -537,61 +608,53 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct net_device *netdev)
{
- memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
- memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
-}
+ u32 i;
-static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl)
-{
- if (!pkey_tbl->tbl)
- dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
- else
- kfree(pkey_tbl->tbl);
+ for (i = 0; i < sgid_tbl->max; i++)
+ sgid_tbl->tbl[i].vlan_id = 0xffff;
- pkey_tbl->tbl = NULL;
- pkey_tbl->max = 0;
- pkey_tbl->active = 0;
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
}
-static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl,
- u16 max)
-{
- pkey_tbl->tbl = kcalloc(max, sizeof(u16), GFP_KERNEL);
- if (!pkey_tbl->tbl)
- return -ENOMEM;
-
- pkey_tbl->max = max;
- return 0;
-};
-
/* PDs */
-int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd)
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd)
{
+ struct bnxt_qplib_pd_tbl *pdt = &res->pd_tbl;
u32 bit_num;
+ int rc = 0;
+ mutex_lock(&res->pd_tbl_lock);
bit_num = find_first_bit(pdt->tbl, pdt->max);
- if (bit_num == pdt->max)
- return -ENOMEM;
+ if (bit_num == pdt->max) {
+ rc = -ENOMEM;
+ goto exit;
+ }
/* Found unused PD */
clear_bit(bit_num, pdt->tbl);
pd->id = bit_num;
- return 0;
+exit:
+ mutex_unlock(&res->pd_tbl_lock);
+ return rc;
}
int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd_tbl *pdt,
struct bnxt_qplib_pd *pd)
{
+ int rc = 0;
+
+ mutex_lock(&res->pd_tbl_lock);
if (test_and_set_bit(pd->id, pdt->tbl)) {
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
pd->id);
- return -EINVAL;
+ rc = -EINVAL;
+ goto exit;
}
pd->id = 0;
- return 0;
+exit:
+ mutex_unlock(&res->pd_tbl_lock);
+ return rc;
}
static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
@@ -616,49 +679,82 @@ static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
pdt->max = max;
memset((u8 *)pdt->tbl, 0xFF, bytes);
+ mutex_init(&res->pd_tbl_lock);
return 0;
}
/* DPIs */
-int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
- struct bnxt_qplib_dpi *dpi,
- void *app)
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi,
+ void *app, u8 type)
{
+ struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl;
+ struct bnxt_qplib_reg_desc *reg;
u32 bit_num;
+ u64 umaddr;
+
+ reg = &dpit->wcreg;
+ mutex_lock(&res->dpi_tbl_lock);
bit_num = find_first_bit(dpit->tbl, dpit->max);
- if (bit_num == dpit->max)
+ if (bit_num == dpit->max) {
+ mutex_unlock(&res->dpi_tbl_lock);
return -ENOMEM;
+ }
/* Found unused DPI */
clear_bit(bit_num, dpit->tbl);
dpit->app_tbl[bit_num] = app;
- dpi->dpi = bit_num;
- dpi->dbr = dpit->dbr_bar_reg_iomem + (bit_num * PAGE_SIZE);
- dpi->umdbr = dpit->unmapped_dbr + (bit_num * PAGE_SIZE);
+ dpi->bit = bit_num;
+ dpi->dpi = bit_num + (reg->offset - dpit->ucreg.offset) / PAGE_SIZE;
+
+ umaddr = reg->bar_base + reg->offset + bit_num * PAGE_SIZE;
+ dpi->umdbr = umaddr;
+
+ switch (type) {
+ case BNXT_QPLIB_DPI_TYPE_KERNEL:
+ /* privileged dbr was already mapped just initialize it. */
+ dpi->umdbr = dpit->ucreg.bar_base +
+ dpit->ucreg.offset + bit_num * PAGE_SIZE;
+ dpi->dbr = dpit->priv_db;
+ dpi->dpi = dpi->bit;
+ break;
+ case BNXT_QPLIB_DPI_TYPE_WC:
+ dpi->dbr = ioremap_wc(umaddr, PAGE_SIZE);
+ break;
+ default:
+ dpi->dbr = ioremap(umaddr, PAGE_SIZE);
+ break;
+ }
+ dpi->type = type;
+ mutex_unlock(&res->dpi_tbl_lock);
return 0;
+
}
int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
- struct bnxt_qplib_dpi_tbl *dpit,
- struct bnxt_qplib_dpi *dpi)
+ struct bnxt_qplib_dpi *dpi)
{
- if (dpi->dpi >= dpit->max) {
- dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
- return -EINVAL;
- }
- if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
- dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
- dpi->dpi);
+ struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl;
+
+ mutex_lock(&res->dpi_tbl_lock);
+ if (dpi->dpi && dpi->type != BNXT_QPLIB_DPI_TYPE_KERNEL)
+ pci_iounmap(res->pdev, dpi->dbr);
+
+ if (test_and_set_bit(dpi->bit, dpit->tbl)) {
+ dev_warn(&res->pdev->dev,
+ "Freeing an unused DPI? dpi = %d, bit = %d\n",
+ dpi->dpi, dpi->bit);
+ mutex_unlock(&res->dpi_tbl_lock);
return -EINVAL;
}
if (dpit->app_tbl)
- dpit->app_tbl[dpi->dpi] = NULL;
+ dpit->app_tbl[dpi->bit] = NULL;
memset(dpi, 0, sizeof(*dpi));
-
+ mutex_unlock(&res->dpi_tbl_lock);
return 0;
}
@@ -667,52 +763,38 @@ static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res *res,
{
kfree(dpit->tbl);
kfree(dpit->app_tbl);
- if (dpit->dbr_bar_reg_iomem)
- pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
- memset(dpit, 0, sizeof(*dpit));
+ dpit->tbl = NULL;
+ dpit->app_tbl = NULL;
+ dpit->max = 0;
}
-static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_dpi_tbl *dpit,
- u32 dbr_offset)
+static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dev_attr *dev_attr)
{
- u32 dbr_bar_reg = RCFW_DBR_PCI_BAR_REGION;
- resource_size_t bar_reg_base;
- u32 dbr_len, bytes;
-
- if (dpit->dbr_bar_reg_iomem) {
- dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
- dbr_bar_reg);
- return -EALREADY;
- }
-
- bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
- if (!bar_reg_base) {
- dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
- dbr_bar_reg);
- return -ENOMEM;
- }
+ struct bnxt_qplib_dpi_tbl *dpit;
+ struct bnxt_qplib_reg_desc *reg;
+ unsigned long bar_len;
+ u32 dbr_offset;
+ u32 bytes;
- dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
- if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
- dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
- return -ENOMEM;
- }
+ dpit = &res->dpi_tbl;
+ reg = &dpit->wcreg;
- dpit->dbr_bar_reg_iomem = ioremap_nocache(bar_reg_base + dbr_offset,
- dbr_len);
- if (!dpit->dbr_bar_reg_iomem) {
- dev_err(&res->pdev->dev,
- "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
- return -ENOMEM;
+ if (!bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ /* Offest should come from L2 driver */
+ dbr_offset = dev_attr->l2_db_size;
+ dpit->ucreg.offset = dbr_offset;
+ dpit->wcreg.offset = dbr_offset;
}
- dpit->unmapped_dbr = bar_reg_base + dbr_offset;
- dpit->max = dbr_len / PAGE_SIZE;
+ bar_len = pci_resource_len(res->pdev, reg->bar_id);
+ dpit->max = (bar_len - reg->offset) / PAGE_SIZE;
+ if (dev_attr->max_dpi)
+ dpit->max = min_t(u32, dpit->max, dev_attr->max_dpi);
- dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
+ dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
if (!dpit->app_tbl)
- goto unmap_io;
+ return -ENOMEM;
bytes = dpit->max >> 3;
if (!bytes)
@@ -722,39 +804,20 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
if (!dpit->tbl) {
kfree(dpit->app_tbl);
dpit->app_tbl = NULL;
- goto unmap_io;
+ return -ENOMEM;
}
memset((u8 *)dpit->tbl, 0xFF, bytes);
+ mutex_init(&res->dpi_tbl_lock);
+ dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset;
return 0;
-unmap_io:
- pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
- return -ENOMEM;
-}
-
-/* PKEYs */
-static void bnxt_qplib_cleanup_pkey_tbl(struct bnxt_qplib_pkey_tbl *pkey_tbl)
-{
- memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
- pkey_tbl->active = 0;
-}
-
-static void bnxt_qplib_init_pkey_tbl(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl)
-{
- u16 pkey = 0xFFFF;
-
- memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
-
- /* pkey default = 0xFFFF */
- bnxt_qplib_add_pkey(res, pkey_tbl, &pkey, false);
}
/* Stats */
-static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_stats *stats)
+void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
{
if (stats->dma) {
dma_free_coherent(&pdev->dev, stats->size,
@@ -764,16 +827,13 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
stats->fw_id = -1;
}
-static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_stats *stats)
+int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
+ struct bnxt_qplib_stats *stats)
{
memset(stats, 0, sizeof(*stats));
stats->fw_id = -1;
- /* 128 byte aligned context memory is required only for 57500.
- * However making this unconditional, it does not harm previous
- * generation.
- */
- stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
+ stats->size = cctx->hw_stats_size;
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL);
if (!stats->dma) {
@@ -785,43 +845,41 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res)
{
- bnxt_qplib_cleanup_pkey_tbl(&res->pkey_tbl);
bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl);
}
int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
{
bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev);
- bnxt_qplib_init_pkey_tbl(res, &res->pkey_tbl);
return 0;
}
void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
{
- bnxt_qplib_free_pkey_tbl(res, &res->pkey_tbl);
+ kfree(res->rcfw->qp_tbl);
bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
bnxt_qplib_free_pd_tbl(&res->pd_tbl);
bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
-
- res->netdev = NULL;
- res->pdev = NULL;
}
-int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
- struct net_device *netdev,
- struct bnxt_qplib_dev_attr *dev_attr)
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev)
{
- int rc = 0;
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct bnxt_qplib_dev_attr *dev_attr;
+ int rc;
- res->pdev = pdev;
res->netdev = netdev;
+ dev_attr = res->dattr;
- rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
- if (rc)
- goto fail;
+ /* Allocate one extra to hold the QP1 entries */
+ rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp);
+ rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
+ GFP_KERNEL);
+ if (!rcfw->qp_tbl)
+ return -ENOMEM;
- rc = bnxt_qplib_alloc_pkey_tbl(res, &res->pkey_tbl, dev_attr->max_pkey);
+ rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
if (rc)
goto fail;
@@ -829,7 +887,7 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
if (rc)
goto fail;
- rc = bnxt_qplib_alloc_dpi_tbl(res, &res->dpi_tbl, dev_attr->l2_db_size);
+ rc = bnxt_qplib_alloc_dpi_tbl(res, dev_attr);
if (rc)
goto fail;
@@ -838,3 +896,60 @@ fail:
bnxt_qplib_free_res(res);
return rc;
}
+
+void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res)
+{
+ struct bnxt_qplib_reg_desc *reg;
+
+ reg = &res->dpi_tbl.ucreg;
+ if (reg->bar_reg)
+ pci_iounmap(res->pdev, reg->bar_reg);
+ reg->bar_reg = NULL;
+ reg->bar_base = 0;
+ reg->len = 0;
+ reg->bar_id = 0;
+}
+
+int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res)
+{
+ struct bnxt_qplib_reg_desc *ucreg;
+ struct bnxt_qplib_reg_desc *wcreg;
+
+ wcreg = &res->dpi_tbl.wcreg;
+ wcreg->bar_id = RCFW_DBR_PCI_BAR_REGION;
+ wcreg->bar_base = pci_resource_start(res->pdev, wcreg->bar_id);
+
+ ucreg = &res->dpi_tbl.ucreg;
+ ucreg->bar_id = RCFW_DBR_PCI_BAR_REGION;
+ ucreg->bar_base = pci_resource_start(res->pdev, ucreg->bar_id);
+ ucreg->len = ucreg->offset + PAGE_SIZE;
+ if (!ucreg->len || ((ucreg->len & (PAGE_SIZE - 1)) != 0)) {
+ dev_err(&res->pdev->dev, "QPLIB: invalid dbr length %d",
+ (int)ucreg->len);
+ return -EINVAL;
+ }
+ ucreg->bar_reg = ioremap(ucreg->bar_base, ucreg->len);
+ if (!ucreg->bar_reg) {
+ dev_err(&res->pdev->dev, "privileged dpi map failed!");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int bnxt_qplib_determine_atomics(struct pci_dev *dev)
+{
+ int comp;
+ u16 ctl2;
+
+ comp = pci_enable_atomic_ops_to_root(dev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32);
+ if (comp)
+ return -EOPNOTSUPP;
+ comp = pci_enable_atomic_ops_to_root(dev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (comp)
+ return -EOPNOTSUPP;
+ pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, &ctl2);
+ return !(ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 30c42c92fac7..2ea3b7f232a3 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -39,8 +39,65 @@
#ifndef __BNXT_QPLIB_RES_H__
#define __BNXT_QPLIB_RES_H__
+#include "bnxt_ulp.h"
+
extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+#define CHIP_NUM_57508 0x1750
+#define CHIP_NUM_57504 0x1751
+#define CHIP_NUM_57502 0x1752
+#define CHIP_NUM_58818 0xd818
+#define CHIP_NUM_57608 0x1760
+
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
+
+#define BNXT_QPLIB_DBR_VALID (0x1UL << 26)
+#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24
+#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25
+
+struct bnxt_qplib_drv_modes {
+ u8 wqe_mode;
+ bool db_push;
+ bool dbr_pacing;
+ u32 toggle_bits;
+ u8 roce_mirror;
+};
+
+enum bnxt_re_toggle_modes {
+ BNXT_QPLIB_CQ_TOGGLE_BIT = 0x1,
+ BNXT_QPLIB_SRQ_TOGGLE_BIT = 0x2,
+};
+
+struct bnxt_qplib_chip_ctx {
+ u16 chip_num;
+ u8 chip_rev;
+ u8 chip_metal;
+ u16 hw_stats_size;
+ u16 hwrm_cmd_max_timeout;
+ struct bnxt_qplib_drv_modes modes;
+ u64 hwrm_intf_ver;
+ u32 dbr_stat_db_fifo;
+};
+
+struct bnxt_qplib_db_pacing_data {
+ u32 do_pacing;
+ u32 pacing_th;
+ u32 alarm_th;
+ u32 fifo_max_depth;
+ u32 fifo_room_mask;
+ u32 fifo_room_shift;
+ u32 grc_reg_offset;
+ u32 dev_err_state;
+};
+
+#define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000
+#define BNXT_QPLIB_DBR_VF_DB_OFFSET 0x4000
+
#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
@@ -55,7 +112,8 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
enum bnxt_qplib_hwq_type {
HWQ_TYPE_CTX,
HWQ_TYPE_QUEUE,
- HWQ_TYPE_L2_CMPL
+ HWQ_TYPE_L2_CMPL,
+ HWQ_TYPE_MR
};
#define MAX_PBL_LVL_0_PGS 1
@@ -63,6 +121,7 @@ enum bnxt_qplib_hwq_type {
#define MAX_PBL_LVL_1_PGS_SHIFT 9
#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
#define MAX_PBL_LVL_2_PGS (256 * 512)
+#define MAX_PDL_LVL_SHIFT 9
enum bnxt_qplib_pbl_lvl {
PBL_LVL_0,
@@ -78,6 +137,23 @@ enum bnxt_qplib_pbl_lvl {
#define ROCE_PG_SIZE_8M (8 * 1024 * 1024)
#define ROCE_PG_SIZE_1G (1024 * 1024 * 1024)
+enum bnxt_qplib_hwrm_pg_size {
+ BNXT_QPLIB_HWRM_PG_SIZE_4K = 0,
+ BNXT_QPLIB_HWRM_PG_SIZE_8K = 1,
+ BNXT_QPLIB_HWRM_PG_SIZE_64K = 2,
+ BNXT_QPLIB_HWRM_PG_SIZE_2M = 3,
+ BNXT_QPLIB_HWRM_PG_SIZE_8M = 4,
+ BNXT_QPLIB_HWRM_PG_SIZE_1G = 5,
+};
+
+struct bnxt_qplib_reg_desc {
+ u8 bar_id;
+ resource_size_t bar_base;
+ unsigned long offset;
+ void __iomem *bar_reg;
+ size_t len;
+};
+
struct bnxt_qplib_pbl {
u32 pg_count;
u32 pg_size;
@@ -85,23 +161,68 @@ struct bnxt_qplib_pbl {
dma_addr_t *pg_map_arr;
};
+struct bnxt_qplib_sg_info {
+ struct ib_umem *umem;
+ u32 npages;
+ u32 pgshft;
+ u32 pgsize;
+ bool nopte;
+};
+
+struct bnxt_qplib_hwq_attr {
+ struct bnxt_qplib_res *res;
+ struct bnxt_qplib_sg_info *sginfo;
+ enum bnxt_qplib_hwq_type type;
+ u32 depth;
+ u32 stride;
+ u32 aux_stride;
+ u32 aux_depth;
+};
+
struct bnxt_qplib_hwq {
struct pci_dev *pdev;
/* lock to protect qplib_hwq */
spinlock_t lock;
- struct bnxt_qplib_pbl pbl[PBL_LVL_MAX];
+ struct bnxt_qplib_pbl pbl[PBL_LVL_MAX + 1];
enum bnxt_qplib_pbl_lvl level; /* 0, 1, or 2 */
/* ptr for easy access to the PBL entries */
void **pbl_ptr;
/* ptr for easy access to the dma_addr */
dma_addr_t *pbl_dma_ptr;
u32 max_elements;
+ u32 depth;
u16 element_size; /* Size of each entry */
+ u16 qe_ppg; /* queue entry per page */
u32 prod; /* raw */
u32 cons; /* raw */
u8 cp_bit;
u8 is_user;
+ u64 *pad_pg;
+ u32 pad_stride;
+ u32 pad_pgofft;
+};
+
+struct bnxt_qplib_db_info {
+ void __iomem *db;
+ void __iomem *priv_db;
+ struct bnxt_qplib_hwq *hwq;
+ u32 xid;
+ u32 max_slot;
+ u32 flags;
+ u8 toggle;
+};
+
+enum bnxt_qplib_db_info_flags_mask {
+ BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT = 0x0UL,
+ BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT = 0x1UL,
+ BNXT_QPLIB_FLAG_EPOCH_CONS_MASK = 0x1UL,
+ BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL,
+};
+
+enum bnxt_qplib_db_epoch_flag_shift {
+ BNXT_QPLIB_DB_EPOCH_CONS_SHIFT = BNXT_QPLIB_DBR_EPOCH_SHIFT,
+ BNXT_QPLIB_DB_EPOCH_PROD_SHIFT = (BNXT_QPLIB_DBR_EPOCH_SHIFT - 1),
};
/* Tables */
@@ -111,7 +232,7 @@ struct bnxt_qplib_pd_tbl {
};
struct bnxt_qplib_sgid_tbl {
- struct bnxt_qplib_gid *tbl;
+ struct bnxt_qplib_gid_info *tbl;
u16 *hw_id;
u16 max;
u16 active;
@@ -119,24 +240,27 @@ struct bnxt_qplib_sgid_tbl {
u8 *vlan;
};
-struct bnxt_qplib_pkey_tbl {
- u16 *tbl;
- u16 max;
- u16 active;
+enum {
+ BNXT_QPLIB_DPI_TYPE_KERNEL = 0,
+ BNXT_QPLIB_DPI_TYPE_UC = 1,
+ BNXT_QPLIB_DPI_TYPE_WC = 2
};
struct bnxt_qplib_dpi {
u32 dpi;
+ u32 bit;
void __iomem *dbr;
u64 umdbr;
+ u8 type;
};
struct bnxt_qplib_dpi_tbl {
void **app_tbl;
unsigned long *tbl;
u16 max;
- void __iomem *dbr_bar_reg_iomem;
- u64 unmapped_dbr;
+ struct bnxt_qplib_reg_desc ucreg; /* Hold entire DB bar. */
+ struct bnxt_qplib_reg_desc wcreg;
+ void __iomem *priv_db;
};
struct bnxt_qplib_stats {
@@ -159,6 +283,15 @@ struct bnxt_qplib_vf_res {
#define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE 64
#define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE 128
+#define MAX_TQM_ALLOC_REQ 48
+#define MAX_TQM_ALLOC_BLK_SIZE 8
+struct bnxt_qplib_tqm_ctx {
+ struct bnxt_qplib_hwq pde;
+ u8 pde_level; /* Original level */
+ struct bnxt_qplib_hwq qtbl[MAX_TQM_ALLOC_REQ];
+ u8 qcount[MAX_TQM_ALLOC_REQ];
+};
+
struct bnxt_qplib_ctx {
u32 qpc_count;
struct bnxt_qplib_hwq qpc_tbl;
@@ -169,61 +302,113 @@ struct bnxt_qplib_ctx {
u32 cq_count;
struct bnxt_qplib_hwq cq_tbl;
struct bnxt_qplib_hwq tim_tbl;
-#define MAX_TQM_ALLOC_REQ 48
-#define MAX_TQM_ALLOC_BLK_SIZE 8
- u8 tqm_count[MAX_TQM_ALLOC_REQ];
- struct bnxt_qplib_hwq tqm_pde;
- u32 tqm_pde_level;
- struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ];
+ struct bnxt_qplib_tqm_ctx tqm_ctx;
struct bnxt_qplib_stats stats;
+ struct bnxt_qplib_stats stats3;
struct bnxt_qplib_vf_res vf_res;
- u64 hwrm_intf_ver;
};
-struct bnxt_qplib_chip_ctx {
- u16 chip_num;
- u8 chip_rev;
- u8 chip_metal;
-};
-
-#define CHIP_NUM_57500 0x1750
-
struct bnxt_qplib_res {
struct pci_dev *pdev;
struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_qplib_dev_attr *dattr;
struct net_device *netdev;
-
+ struct bnxt_en_dev *en_dev;
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
+ /* To protect the pd table bit map */
+ struct mutex pd_tbl_lock;
struct bnxt_qplib_sgid_tbl sgid_tbl;
- struct bnxt_qplib_pkey_tbl pkey_tbl;
struct bnxt_qplib_dpi_tbl dpi_tbl;
+ /* To protect the dpi table bit map */
+ struct mutex dpi_tbl_lock;
bool prio;
+ bool is_vf;
+ struct bnxt_qplib_db_pacing_data *pacing_data;
};
+static inline bool bnxt_qplib_is_chip_gen_p7(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return (cctx->chip_num == CHIP_NUM_58818 ||
+ cctx->chip_num == CHIP_NUM_57608);
+}
+
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
{
- return (cctx->chip_num == CHIP_NUM_57500);
+ return (cctx->chip_num == CHIP_NUM_57508 ||
+ cctx->chip_num == CHIP_NUM_57504 ||
+ cctx->chip_num == CHIP_NUM_57502);
+}
+
+static inline bool bnxt_qplib_is_chip_gen_p5_p7(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return bnxt_qplib_is_chip_gen_p5(cctx) || bnxt_qplib_is_chip_gen_p7(cctx);
}
static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
{
- return bnxt_qplib_is_chip_gen_p5(res->cctx) ?
+ return bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ?
HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL;
}
static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
{
- return bnxt_qplib_is_chip_gen_p5(cctx) ?
+ return bnxt_qplib_is_chip_gen_p5_p7(cctx) ?
RING_ALLOC_REQ_RING_TYPE_NQ :
RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
}
-struct bnxt_qplib_sg_info {
- struct scatterlist *sglist;
- u32 nmap;
- u32 npages;
-};
+static inline u8 bnxt_qplib_base_pg_size(struct bnxt_qplib_hwq *hwq)
+{
+ u8 pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K;
+ struct bnxt_qplib_pbl *pbl;
+
+ pbl = &hwq->pbl[PBL_LVL_0];
+ switch (pbl->pg_size) {
+ case ROCE_PG_SIZE_4K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K;
+ break;
+ case ROCE_PG_SIZE_8K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8K;
+ break;
+ case ROCE_PG_SIZE_64K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_64K;
+ break;
+ case ROCE_PG_SIZE_2M:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_2M;
+ break;
+ case ROCE_PG_SIZE_8M:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8M;
+ break;
+ case ROCE_PG_SIZE_1G:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_1G;
+ break;
+ default:
+ break;
+ }
+
+ return pg_size;
+}
+
+static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq,
+ u32 indx, u64 *pg)
+{
+ u32 pg_num, pg_idx;
+
+ pg_num = (indx / hwq->qe_ppg);
+ pg_idx = (indx % hwq->qe_ppg);
+ if (pg)
+ *pg = (u64)&hwq->pbl_ptr[pg_num];
+ return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
+}
+
+static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx)
+{
+ idx += hwq->prod;
+ if (idx >= hwq->depth)
+ idx -= hwq->depth;
+ return bnxt_qplib_get_qe(hwq, idx, NULL);
+}
#define to_bnxt_qplib(ptr, type, member) \
container_of(ptr, type, member)
@@ -231,32 +416,211 @@ struct bnxt_qplib_sg_info {
struct bnxt_qplib_pd;
struct bnxt_qplib_dev_attr;
-void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq);
-int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
- struct bnxt_qplib_sg_info *sg_info, u32 *elements,
- u32 elements_per_page, u32 aux, u32 pg_size,
- enum bnxt_qplib_hwq_type hwq_type);
-void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid);
-int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
+void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_hwq *hwq);
+int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_hwq_attr *hwq_attr);
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd *pd);
int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd_tbl *pd_tbl,
struct bnxt_qplib_pd *pd);
-int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
- struct bnxt_qplib_dpi *dpi,
- void *app);
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi,
+ void *app, u8 type);
int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
- struct bnxt_qplib_dpi_tbl *dpi_tbl,
struct bnxt_qplib_dpi *dpi);
void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
-int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
- struct net_device *netdev,
- struct bnxt_qplib_dev_attr *dev_attr);
-void bnxt_qplib_free_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_ctx *ctx);
-int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
- struct bnxt_qplib_ctx *ctx,
- bool virt_fn, bool is_p5);
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev);
+void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res);
+void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
+
+int bnxt_qplib_determine_atomics(struct pci_dev *dev);
+int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
+ struct bnxt_qplib_stats *stats);
+void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
+
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo,
+ struct bnxt_qplib_hwq *hwq, u32 cnt)
+{
+ /* move prod and update toggle/epoch if wrap around */
+ hwq->prod += cnt;
+ if (hwq->prod >= hwq->depth) {
+ hwq->prod %= hwq->depth;
+ dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT;
+ }
+}
+
+static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+ u32 *dbinfo_flags)
+{
+ /* move cons and update toggle/epoch if wrap around */
+ *cons += cnt;
+ if (*cons >= max_elements) {
+ *cons %= max_elements;
+ *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT;
+ }
+}
+
+static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
+ bool arm)
+{
+ u32 key = 0;
+
+ key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID |
+ (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK));
+ if (!arm)
+ key |= CMPL_DOORBELL_MASK;
+ writel(key, info->db);
+}
+
+#define BNXT_QPLIB_INIT_DBHDR(xid, type, indx, toggle) \
+ (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \
+ (type) | BNXT_QPLIB_DBR_VALID) << 32) | (indx) | \
+ (((u32)(toggle)) << (BNXT_QPLIB_DBR_TOGGLE_SHIFT)))
+
+static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u32 indx;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMALL ||
+ type == DBC_DBC_TYPE_CQ_ARMSE)
+ toggle = info->toggle;
+
+ indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK) <<
+ BNXT_QPLIB_DB_EPOCH_CONS_SHIFT);
+
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, toggle);
+ writeq(key, info->db);
+}
+
+static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u32 indx;
+
+ indx = (((info->hwq->prod / info->max_slot) & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNXT_QPLIB_FLAG_EPOCH_PROD_MASK) <<
+ BNXT_QPLIB_DB_EPOCH_PROD_SHIFT));
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, 0);
+ writeq(key, info->db);
+}
+
+static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMENA || type == DBC_DBC_TYPE_SRQ_ARMENA)
+ toggle = info->toggle;
+ /* Index always at 0 */
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, 0, toggle);
+ writeq(key, info->priv_db);
+}
+
+static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info,
+ u32 th)
+{
+ u64 key = 0;
+
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, DBC_DBC_TYPE_SRQ_ARM, th, info->toggle);
+ writeq(key, info->priv_db);
+}
+
+static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info,
+ struct bnxt_qplib_chip_ctx *cctx,
+ bool arm)
+{
+ u32 type;
+
+ type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
+ if (bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ bnxt_qplib_ring_db(info, type);
+ else
+ bnxt_qplib_ring_db32(info, arm);
+}
+
+static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
+}
+
+static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx,
+ u16 flags, bool virtfn)
+{
+ /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */
+ return (_is_ext_stats_supported(flags) &&
+ ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn)));
+}
+
+static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED |
+ CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED);
+}
+
+#define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a))
+
+static inline bool _is_host_msn_table(u16 dev_cap_ext_flags2)
+{
+ return (dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK) ==
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE;
+}
+
+static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return cctx->modes.dbr_pacing;
+}
+
+static inline u8 bnxt_qplib_roce_mirror_supported(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return cctx->modes.roce_mirror;
+}
+
+static inline bool _is_alloc_mr_unified(u16 dev_cap_flags)
+{
+ return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC;
+}
+
+static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED;
+}
+
+static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
+}
+
+static inline bool _is_min_rnr_in_rtr_rts_mandatory(u16 dev_cap_ext_flags2)
+{
+ return !!(dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED);
+}
+
+static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
+}
+
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 48793d3512ac..408a34df2667 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -48,81 +48,107 @@
#include "qplib_res.h"
#include "qplib_rcfw.h"
#include "qplib_sp.h"
+#include "qplib_tlv.h"
const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 } };
/* Device */
-static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
- char *fw_ver)
+static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
{
- struct cmdq_query_version req;
- struct creq_query_version_resp resp;
- u16 cmd_flags = 0;
- int rc = 0;
+ u16 pcie_ctl2 = 0;
- RCFW_CMD_PREP(req, QUERY_VERSION, cmd_flags);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
+ return false;
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2);
+ return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
+
+void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct creq_query_version_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_query_version req = {};
+ struct bnxt_qplib_dev_attr *attr;
+ int rc;
+
+ attr = rcfw->res->dattr;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_VERSION,
+ sizeof(req));
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return;
- fw_ver[0] = resp.fw_maj;
- fw_ver[1] = resp.fw_minor;
- fw_ver[2] = resp.fw_bld;
- fw_ver[3] = resp.fw_rsvd;
+ attr->fw_ver[0] = resp.fw_maj;
+ attr->fw_ver[1] = resp.fw_minor;
+ attr->fw_ver[2] = resp.fw_bld;
+ attr->fw_ver[3] = resp.fw_rsvd;
}
-int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_dev_attr *attr, bool vf)
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
{
- struct cmdq_query_func req;
- struct creq_query_func_resp resp;
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct bnxt_qplib_dev_attr *attr = rcfw->res->dattr;
+ struct creq_query_func_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
struct creq_query_func_resp_sb *sb;
- u16 cmd_flags = 0;
- u32 temp;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct cmdq_query_func req = {};
u8 *tqm_alloc;
- int i, rc = 0;
+ int i, rc;
+ u32 temp;
- RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
+ cctx = rcfw->res->cctx;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_FUNC,
+ sizeof(req));
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf) {
- dev_err(&rcfw->pdev->dev,
- "SP: QUERY_FUNC alloc side buffer failed\n");
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- }
-
- sb = sbuf->sb;
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- (void *)sbuf, 0);
+ sb = sbuf.sb;
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto bail;
/* Extract the context from the side buffer */
attr->max_qp = le32_to_cpu(sb->max_qp);
- /* max_qp value reported by FW for PF doesn't include the QP1 for PF */
- if (!vf)
- attr->max_qp += 1;
+ /* max_qp value reported by FW doesn't include the QP1 */
+ attr->max_qp += 1;
attr->max_qp_rd_atom =
sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
attr->max_qp_init_rd_atom =
sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
- attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
- /*
- * 128 WQEs needs to be reserved for the HW (8916). Prevent
- * reporting the max number
- */
- attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS;
- attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ?
- 6 : sb->max_sge;
+ attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1;
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) {
+ /*
+ * 128 WQEs needs to be reserved for the HW (8916). Prevent
+ * reporting the max number on legacy devices
+ */
+ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1;
+ }
+
+ /* Adjust for max_qp_wqes for variable wqe */
+ if (cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ attr->max_qp_wqes = BNXT_VAR_MAX_WQE - 1;
+
+ attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ?
+ min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6;
attr->max_cq = le32_to_cpu(sb->max_cq);
attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+ if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx))
+ attr->max_cq_wqes = min_t(u32, BNXT_QPLIB_MAX_CQ_WQES, attr->max_cq_wqes);
attr->max_cq_sges = attr->max_qp_sges;
attr->max_mr = le32_to_cpu(sb->max_mr);
attr->max_mw = le32_to_cpu(sb->max_mw);
@@ -132,29 +158,27 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
attr->max_ah = le32_to_cpu(sb->max_ah);
- attr->max_fmr = le32_to_cpu(sb->max_fmr);
- attr->max_map_per_fmr = sb->max_map_per_fmr;
-
attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge;
- attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+ attr->max_pkey = 1;
+ attr->max_inline_data = attr->max_qp_sges * sizeof(struct sq_sge);
+ if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx))
+ attr->l2_db_size = (sb->l2_db_space_size + 1) *
+ (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
/*
- * Some versions of FW reports more than 0xFFFF.
- * Restrict it for now to 0xFFFF to avoid
- * reporting trucated value
+ * Read the max gid supported by HW.
+ * For each entry in HW GID in HW table, we consume 2
+ * GID entries in the kernel GID table. So max_gid reported
+ * to stack can be up to twice the value reported by the HW, up to 256 gids.
*/
- if (attr->max_pkey > 0xFFFF) {
- /* ib_port_attr::pkey_tbl_len is u16 */
- attr->max_pkey = 0xFFFF;
- }
-
- attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
- attr->l2_db_size = (sb->l2_db_space_size + 1) *
- (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
attr->max_sgid = le32_to_cpu(sb->max_gid);
+ attr->max_sgid = min_t(u32, BNXT_QPLIB_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid);
+ attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
+ attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
- bnxt_qplib_query_version(rcfw, attr->fw_ver);
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
@@ -165,9 +189,13 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
}
- attr->is_atomic = false;
+ if (rcfw->res->cctx->hwrm_intf_ver >= HWRM_VERSION_DEV_ATTR_MAX_DPI)
+ attr->max_dpi = le32_to_cpu(sb->max_dpi);
+
+ attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
@@ -175,12 +203,14 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx)
{
- struct cmdq_set_func_resources req;
- struct creq_set_func_resources_resp resp;
- u16 cmd_flags = 0;
- int rc = 0;
+ struct creq_set_func_resources_resp resp = {};
+ struct cmdq_set_func_resources req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ int rc;
- RCFW_CMD_PREP(req, SET_FUNC_RESOURCES, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES,
+ sizeof(req));
req.number_of_qp = cpu_to_le32(ctx->qpc_count);
req.number_of_mrw = cpu_to_le32(ctx->mrw_count);
@@ -193,9 +223,9 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp,
- NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc) {
dev_err(&res->pdev->dev, "Failed to set function resources\n");
}
@@ -213,12 +243,12 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
index, sgid_tbl->max);
return -EINVAL;
}
- memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid));
+ memcpy(gid, &sgid_tbl->tbl[index].gid, sizeof(*gid));
return 0;
}
int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, bool update)
+ struct bnxt_qplib_gid *gid, u16 vlan_id, bool update)
{
struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
struct bnxt_qplib_res,
@@ -226,17 +256,14 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
int index;
- if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "SGID table not allocated\n");
- return -EINVAL;
- }
/* Do we need a sgid_lock here? */
if (!sgid_tbl->active) {
dev_err(&res->pdev->dev, "SGID table has no active entries\n");
return -ENOMEM;
}
for (index = 0; index < sgid_tbl->max; index++) {
- if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid)))
+ if (!memcmp(&sgid_tbl->tbl[index].gid, gid, sizeof(*gid)) &&
+ vlan_id == sgid_tbl->tbl[index].vlan_id)
break;
}
if (index == sgid_tbl->max) {
@@ -245,25 +272,29 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
/* Remove GID from the SGID table */
if (update) {
- struct cmdq_delete_gid req;
- struct creq_delete_gid_resp resp;
- u16 cmd_flags = 0;
+ struct creq_delete_gid_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_delete_gid req = {};
int rc;
- RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DELETE_GID,
+ sizeof(req));
if (sgid_tbl->hw_id[index] == 0xFFFF) {
dev_err(&res->pdev->dev,
"GID entry contains an invalid HW id\n");
return -EINVAL;
}
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
}
- memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
+ memcpy(&sgid_tbl->tbl[index].gid, &bnxt_qplib_gid_zero,
sizeof(bnxt_qplib_gid_zero));
+ sgid_tbl->tbl[index].vlan_id = 0xFFFF;
sgid_tbl->vlan[index] = 0;
sgid_tbl->active--;
dev_dbg(&res->pdev->dev,
@@ -276,8 +307,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id,
- bool update, u32 *index)
+ struct bnxt_qplib_gid *gid, const u8 *smac,
+ u16 vlan_id, bool update, u32 *index,
+ bool is_ugid, u32 stats_ctx_id)
{
struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
struct bnxt_qplib_res,
@@ -285,10 +317,6 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
int i, free_idx;
- if (!sgid_tbl) {
- dev_err(&res->pdev->dev, "SGID table not allocated\n");
- return -EINVAL;
- }
/* Do we need a sgid_lock here? */
if (sgid_tbl->active == sgid_tbl->max) {
dev_err(&res->pdev->dev, "SGID table is full\n");
@@ -296,7 +324,8 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
free_idx = sgid_tbl->max;
for (i = 0; i < sgid_tbl->max; i++) {
- if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
+ if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid)) &&
+ sgid_tbl->tbl[i].vlan_id == vlan_id) {
dev_dbg(&res->pdev->dev,
"SGID entry already exist in entry %d!\n", i);
*index = i;
@@ -313,12 +342,14 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
return -ENOMEM;
}
if (update) {
- struct cmdq_add_gid req;
- struct creq_add_gid_resp resp;
- u16 cmd_flags = 0;
+ struct creq_add_gid_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_add_gid req = {};
int rc;
- RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ADD_GID,
+ sizeof(req));
req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]);
req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]);
@@ -343,14 +374,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ req.stats_ctx = cpu_to_le16(CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID |
+ (u16)stats_ctx_id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
}
/* Add GID to the sgid_tbl */
memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
+ sgid_tbl->tbl[free_idx].vlan_id = vlan_id;
sgid_tbl->active++;
if (vlan_id != 0xFFFF)
sgid_tbl->vlan[free_idx] = 1;
@@ -364,143 +400,21 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
return 0;
}
-int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, u16 gid_idx,
- u8 *smac)
-{
- struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
- struct bnxt_qplib_res,
- sgid_tbl);
- struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct creq_modify_gid_resp resp;
- struct cmdq_modify_gid req;
- int rc;
- u16 cmd_flags = 0;
-
- RCFW_CMD_PREP(req, MODIFY_GID, cmd_flags);
-
- req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]);
- req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]);
- req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]);
- req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]);
- if (res->prio) {
- req.vlan |= cpu_to_le16
- (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
- CMDQ_ADD_GID_VLAN_VLAN_EN);
- }
-
- /* MAC in network format */
- req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]);
- req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
- req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
-
- req.gid_index = cpu_to_le16(gid_idx);
-
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
- return rc;
-}
-
-/* pkeys */
-int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
- u16 *pkey)
-{
- if (index == 0xFFFF) {
- *pkey = 0xFFFF;
- return 0;
- }
- if (index >= pkey_tbl->max) {
- dev_err(&res->pdev->dev,
- "Index %d exceeded PKEY table max (%d)\n",
- index, pkey_tbl->max);
- return -EINVAL;
- }
- memcpy(pkey, &pkey_tbl->tbl[index], sizeof(*pkey));
- return 0;
-}
-
-int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update)
-{
- int i, rc = 0;
-
- if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "PKEY table not allocated\n");
- return -EINVAL;
- }
-
- /* Do we need a pkey_lock here? */
- if (!pkey_tbl->active) {
- dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
- return -ENOMEM;
- }
- for (i = 0; i < pkey_tbl->max; i++) {
- if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
- break;
- }
- if (i == pkey_tbl->max) {
- dev_err(&res->pdev->dev,
- "PKEY 0x%04x not found in the pkey table\n", *pkey);
- return -ENOMEM;
- }
- memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
- pkey_tbl->active--;
-
- /* unlock */
- return rc;
-}
-
-int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update)
-{
- int i, free_idx, rc = 0;
-
- if (!pkey_tbl) {
- dev_err(&res->pdev->dev, "PKEY table not allocated\n");
- return -EINVAL;
- }
-
- /* Do we need a pkey_lock here? */
- if (pkey_tbl->active == pkey_tbl->max) {
- dev_err(&res->pdev->dev, "PKEY table is full\n");
- return -ENOMEM;
- }
- free_idx = pkey_tbl->max;
- for (i = 0; i < pkey_tbl->max; i++) {
- if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
- return -EALREADY;
- else if (!pkey_tbl->tbl[i] && free_idx == pkey_tbl->max)
- free_idx = i;
- }
- if (free_idx == pkey_tbl->max) {
- dev_err(&res->pdev->dev,
- "PKEY table is FULL but count is not MAX??\n");
- return -ENOMEM;
- }
- /* Add PKEY to the pkey_tbl */
- memcpy(&pkey_tbl->tbl[free_idx], pkey, sizeof(*pkey));
- pkey_tbl->active++;
-
- /* unlock */
- return rc;
-}
-
/* AH */
int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_create_ah req;
- struct creq_create_ah_resp resp;
- u16 cmd_flags = 0;
+ struct creq_create_ah_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_create_ah req = {};
u32 temp32[4];
u16 temp16[3];
int rc;
- RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_AH,
+ sizeof(req));
memcpy(temp32, ah->dgid.data, sizeof(struct bnxt_qplib_gid));
req.dgid[0] = cpu_to_le32(temp32[0]);
@@ -523,8 +437,9 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
req.dest_mac[1] = cpu_to_le16(temp16[1]);
req.dest_mac[2] = cpu_to_le16(temp16[2]);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, block);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), block);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
@@ -532,30 +447,35 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
return 0;
}
-void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
- bool block)
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_destroy_ah req;
- struct creq_destroy_ah_resp resp;
- u16 cmd_flags = 0;
+ struct creq_destroy_ah_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_ah req = {};
+ int rc;
/* Clean up the AH table in the device */
- RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_AH,
+ sizeof(req));
req.ah_cid = cpu_to_le32(ah->id);
- bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, NULL,
- block);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), block);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ return rc;
}
/* MRW */
int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
{
+ struct creq_deallocate_key_resp resp = {};
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_deallocate_key req;
- struct creq_deallocate_key_resp resp;
- u16 cmd_flags = 0;
+ struct cmdq_deallocate_key req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
int rc;
if (mrw->lkey == 0xFFFFFFFF) {
@@ -563,7 +483,9 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
return 0;
}
- RCFW_CMD_PREP(req, DEALLOCATE_KEY, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEALLOCATE_KEY,
+ sizeof(req));
req.mrw_flags = mrw->type;
@@ -574,14 +496,15 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
else
req.key = cpu_to_le32(mrw->lkey);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
/* Free the qplib's MRW memory */
if (mrw->hwq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+ bnxt_qplib_free_hwq(res, &mrw->hwq);
return 0;
}
@@ -589,26 +512,29 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_allocate_mrw req;
- struct creq_allocate_mrw_resp resp;
- u16 cmd_flags = 0;
+ struct creq_allocate_mrw_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_allocate_mrw req = {};
unsigned long tmp;
int rc;
- RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ALLOCATE_MRW,
+ sizeof(req));
req.pd_id = cpu_to_le32(mrw->pd->id);
req.mrw_flags = mrw->type;
if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
- mrw->flags & BNXT_QPLIB_FR_PMR) ||
+ mrw->access_flags & BNXT_QPLIB_FR_PMR) ||
mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
tmp = (unsigned long)mrw;
req.mrw_handle = cpu_to_le64(tmp);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, 0);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
@@ -625,16 +551,19 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_deregister_mr req;
- struct creq_deregister_mr_resp resp;
- u16 cmd_flags = 0;
+ struct creq_deregister_mr_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_deregister_mr req = {};
int rc;
- RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEREGISTER_MR,
+ sizeof(req));
req.lkey = cpu_to_le32(mrw->lkey);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, block);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), block);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
return rc;
@@ -642,61 +571,53 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
if (mrw->hwq.max_elements) {
mrw->va = 0;
mrw->total_size = 0;
- bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+ bnxt_qplib_free_hwq(res, &mrw->hwq);
}
return 0;
}
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size)
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_register_mr req;
- struct creq_register_mr_resp resp;
- u16 cmd_flags = 0, level;
- int pg_ptrs, pages, i, rc;
- dma_addr_t **pbl_ptr;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct creq_register_mr_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_register_mr req = {};
+ int pages, rc;
u32 pg_size;
+ u16 level;
if (num_pbls) {
+ pages = roundup_pow_of_two(num_pbls);
/* Allocate memory for the non-leaf pages to store buf ptrs.
* Non-leaf pages always uses system PAGE_SIZE
*/
- pg_ptrs = roundup_pow_of_two(num_pbls);
- pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
- if (!pages)
- pages++;
-
- if (pages > MAX_PBL_LVL_1_PGS) {
- dev_err(&res->pdev->dev,
- "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
- pages, MAX_PBL_LVL_1_PGS);
- return -ENOMEM;
- }
/* Free the hwq if it already exist, must be a rereg */
if (mr->hwq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
-
- mr->hwq.max_elements = pages;
- /* Use system PAGE_SIZE */
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL,
- &mr->hwq.max_elements,
- PAGE_SIZE, 0, PAGE_SIZE,
- HWQ_TYPE_CTX);
+ bnxt_qplib_free_hwq(res, &mr->hwq);
+ hwq_attr.res = res;
+ hwq_attr.depth = pages;
+ hwq_attr.stride = sizeof(dma_addr_t);
+ hwq_attr.type = HWQ_TYPE_MR;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.sginfo->umem = umem;
+ hwq_attr.sginfo->npages = pages;
+ hwq_attr.sginfo->pgsize = buf_pg_size;
+ hwq_attr.sginfo->pgshft = ilog2(buf_pg_size);
+ rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
if (rc) {
dev_err(&res->pdev->dev,
"SP: Reg MR memory allocation failed\n");
return -ENOMEM;
}
- /* Write to the hwq */
- pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
- for (i = 0; i < num_pbls; i++)
- pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
- (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
}
- RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_REGISTER_MR,
+ sizeof(req));
/* Configure the request */
if (mr->hwq.level == PBL_LVL_MAX) {
@@ -705,7 +626,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.pbl = 0;
pg_size = PAGE_SIZE;
} else {
- level = mr->hwq.level + 1;
+ level = mr->hwq.level;
req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
}
pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE;
@@ -716,21 +637,30 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.log2_pbl_pg_size = cpu_to_le16(((ilog2(PAGE_SIZE) <<
CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT) &
CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK));
- req.access = (mr->flags & 0xFFFF);
+ req.access = (mr->access_flags & BNXT_QPLIB_MR_ACCESS_MASK);
req.va = cpu_to_le64(mr->va);
req.key = cpu_to_le32(mr->lkey);
+ if (unified_mr)
+ req.key = cpu_to_le32(mr->pd->id);
+ req.flags = cpu_to_le16(mr->flags);
req.mr_size = cpu_to_le64(mr->total_size);
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
- (void *)&resp, NULL, block);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto fail;
+ if (unified_mr) {
+ mr->lkey = le32_to_cpu(resp.xid);
+ mr->rkey = mr->lkey;
+ }
+
return 0;
fail:
if (mr->hwq.max_elements)
- bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+ bnxt_qplib_free_hwq(res, &mr->hwq);
return rc;
}
@@ -738,6 +668,8 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
struct bnxt_qplib_frpl *frpl,
int max_pg_ptrs)
{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
int pg_ptrs, pages, rc;
/* Re-calculate the max to fit the HWQ allocation model */
@@ -749,10 +681,15 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
if (pages > MAX_PBL_LVL_1_PGS)
return -ENOMEM;
- frpl->hwq.max_elements = pages;
- rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL,
- &frpl->hwq.max_elements, PAGE_SIZE, 0,
- PAGE_SIZE, HWQ_TYPE_CTX);
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.nopte = true;
+
+ hwq_attr.res = res;
+ hwq_attr.depth = pg_ptrs;
+ hwq_attr.stride = PAGE_SIZE;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ rc = bnxt_qplib_alloc_init_hwq(&frpl->hwq, &hwq_attr);
if (!rc)
frpl->max_pg_ptrs = pg_ptrs;
@@ -762,48 +699,35 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
struct bnxt_qplib_frpl *frpl)
{
- bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
+ bnxt_qplib_free_hwq(res, &frpl->hwq);
return 0;
}
-int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
-{
- struct bnxt_qplib_rcfw *rcfw = res->rcfw;
- struct cmdq_map_tc_to_cos req;
- struct creq_map_tc_to_cos_resp resp;
- u16 cmd_flags = 0;
-
- RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
- req.cos0 = cpu_to_le16(cids[0]);
- req.cos1 = cpu_to_le16(cids[1]);
-
- return bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 0);
-}
-
int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_roce_stats *stats)
{
- struct cmdq_query_roce_stats req;
- struct creq_query_roce_stats_resp resp;
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct creq_query_roce_stats_resp resp = {};
struct creq_query_roce_stats_resp_sb *sb;
- u16 cmd_flags = 0;
- int rc = 0;
+ struct cmdq_query_roce_stats req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
- RCFW_CMD_PREP(req, QUERY_ROCE_STATS, cmd_flags);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_ROCE_STATS,
+ sizeof(req));
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf) {
- dev_err(&rcfw->pdev->dev,
- "SP: QUERY_ROCE_STATS alloc side buffer failed\n");
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- }
+ sb = sbuf.sb;
- sb = sbuf->sb;
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- (void *)sbuf, 0);
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto bail;
/* Extract the context from the side buffer */
@@ -857,6 +781,365 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
}
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
+
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+ struct bnxt_qplib_ext_stat *estat)
+{
+ struct creq_query_roce_stats_ext_resp resp = {};
+ struct creq_query_roce_stats_ext_resp_sb *sb;
+ struct cmdq_query_roce_stats_ext req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
+
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ sb = sbuf.sb;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS,
+ sizeof(req));
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ req.resp_addr = cpu_to_le64(sbuf.dma_addr);
+ if (bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx) && rcfw->res->is_vf)
+ req.function_id =
+ cpu_to_le32(CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID |
+ (fid << CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT));
+ else
+ req.function_id = cpu_to_le32(fid);
+ req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+
+ estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts);
+ estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts);
+ estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts);
+ estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts);
+ estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts);
+ estat->tx_roce_pkts = le64_to_cpu(sb->tx_roce_pkts);
+ estat->tx_roce_bytes = le64_to_cpu(sb->tx_roce_bytes);
+ estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts);
+ estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts);
+ estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts);
+ estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts);
+ estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts);
+ estat->rx_roce_pkts = le64_to_cpu(sb->rx_roce_pkts);
+ estat->rx_roce_bytes = le64_to_cpu(sb->rx_roce_bytes);
+ estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts);
+ estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes);
+ estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts);
+ estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts);
+ estat->tx_cnp = le64_to_cpu(sb->tx_cnp_pkts);
+ estat->rx_cnp = le64_to_cpu(sb->rx_cnp_pkts);
+ estat->rx_ecn_marked = le64_to_cpu(sb->rx_ecn_marked_pkts);
+
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+static void bnxt_qplib_fill_cc_gen1(struct cmdq_modify_roce_cc_gen1_tlv *ext_req,
+ struct bnxt_qplib_cc_param_ext *cc_ext)
+{
+ ext_req->modify_mask = cpu_to_le64(cc_ext->ext_mask);
+ cc_ext->ext_mask = 0;
+ ext_req->inactivity_th_hi = cpu_to_le16(cc_ext->inact_th_hi);
+ ext_req->min_time_between_cnps = cpu_to_le16(cc_ext->min_delta_cnp);
+ ext_req->init_cp = cpu_to_le16(cc_ext->init_cp);
+ ext_req->tr_update_mode = cc_ext->tr_update_mode;
+ ext_req->tr_update_cycles = cc_ext->tr_update_cyls;
+ ext_req->fr_num_rtts = cc_ext->fr_rtt;
+ ext_req->ai_rate_increase = cc_ext->ai_rate_incr;
+ ext_req->reduction_relax_rtts_th = cpu_to_le16(cc_ext->rr_rtt_th);
+ ext_req->additional_relax_cr_th = cpu_to_le16(cc_ext->ar_cr_th);
+ ext_req->cr_min_th = cpu_to_le16(cc_ext->cr_min_th);
+ ext_req->bw_avg_weight = cc_ext->bw_avg_weight;
+ ext_req->actual_cr_factor = cc_ext->cr_factor;
+ ext_req->max_cp_cr_th = cpu_to_le16(cc_ext->cr_th_max_cp);
+ ext_req->cp_bias_en = cc_ext->cp_bias_en;
+ ext_req->cp_bias = cc_ext->cp_bias;
+ ext_req->cnp_ecn = cc_ext->cnp_ecn;
+ ext_req->rtt_jitter_en = cc_ext->rtt_jitter_en;
+ ext_req->link_bytes_per_usec = cpu_to_le16(cc_ext->bytes_per_usec);
+ ext_req->reset_cc_cr_th = cpu_to_le16(cc_ext->cc_cr_reset_th);
+ ext_req->cr_width = cc_ext->cr_width;
+ ext_req->quota_period_min = cc_ext->min_quota;
+ ext_req->quota_period_max = cc_ext->max_quota;
+ ext_req->quota_period_abs_max = cc_ext->abs_max_quota;
+ ext_req->tr_lower_bound = cpu_to_le16(cc_ext->tr_lb);
+ ext_req->cr_prob_factor = cc_ext->cr_prob_fac;
+ ext_req->tr_prob_factor = cc_ext->tr_prob_fac;
+ ext_req->fairness_cr_th = cpu_to_le16(cc_ext->fair_cr_th);
+ ext_req->red_div = cc_ext->red_div;
+ ext_req->cnp_ratio_th = cc_ext->cnp_ratio_th;
+ ext_req->exp_ai_rtts = cpu_to_le16(cc_ext->ai_ext_rtt);
+ ext_req->exp_ai_cr_cp_ratio = cc_ext->exp_crcp_ratio;
+ ext_req->use_rate_table = cc_ext->low_rate_en;
+ ext_req->cp_exp_update_th = cpu_to_le16(cc_ext->cpcr_update_th);
+ ext_req->high_exp_ai_rtts_th1 = cpu_to_le16(cc_ext->ai_rtt_th1);
+ ext_req->high_exp_ai_rtts_th2 = cpu_to_le16(cc_ext->ai_rtt_th2);
+ ext_req->actual_cr_cong_free_rtts_th = cpu_to_le16(cc_ext->cf_rtt_th);
+ ext_req->severe_cong_cr_th1 = cpu_to_le16(cc_ext->sc_cr_th1);
+ ext_req->severe_cong_cr_th2 = cpu_to_le16(cc_ext->sc_cr_th2);
+ ext_req->link64B_per_rtt = cpu_to_le32(cc_ext->l64B_per_rtt);
+ ext_req->cc_ack_bytes = cc_ext->cc_ack_bytes;
+}
+
+int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param)
+{
+ struct bnxt_qplib_tlv_modify_cc_req tlv_req = {};
+ struct creq_modify_roce_cc_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_modify_roce_cc *req;
+ int req_size;
+ void *cmd;
+ int rc;
+
+ /* Prepare the older base command */
+ req = &tlv_req.base_req;
+ cmd = req;
+ req_size = sizeof(*req);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)req, CMDQ_BASE_OPCODE_MODIFY_ROCE_CC,
+ sizeof(*req));
+ req->modify_mask = cpu_to_le32(cc_param->mask);
+ req->enable_cc = cc_param->enable;
+ req->g = cc_param->g;
+ req->num_phases_per_state = cc_param->nph_per_state;
+ req->time_per_phase = cc_param->time_pph;
+ req->pkts_per_phase = cc_param->pkts_pph;
+ req->init_cr = cpu_to_le16(cc_param->init_cr);
+ req->init_tr = cpu_to_le16(cc_param->init_tr);
+ req->tos_dscp_tos_ecn = (cc_param->tos_dscp << CMDQ_MODIFY_ROCE_CC_TOS_DSCP_SFT) |
+ (cc_param->tos_ecn & CMDQ_MODIFY_ROCE_CC_TOS_ECN_MASK);
+ req->alt_vlan_pcp = cc_param->alt_vlan_pcp;
+ req->alt_tos_dscp = cpu_to_le16(cc_param->alt_tos_dscp);
+ req->rtt = cpu_to_le16(cc_param->rtt);
+ req->tcp_cp = cpu_to_le16(cc_param->tcp_cp);
+ req->cc_mode = cc_param->cc_mode;
+ req->inactivity_th = cpu_to_le16(cc_param->inact_th);
+
+ /* For chip gen P5 onwards fill extended cmd and header */
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ struct roce_tlv *hdr;
+ u32 payload;
+ u32 chunks;
+
+ cmd = &tlv_req;
+ req_size = sizeof(tlv_req);
+ /* Prepare primary tlv header */
+ hdr = &tlv_req.tlv_hdr;
+ chunks = CHUNKS(sizeof(struct bnxt_qplib_tlv_modify_cc_req));
+ payload = sizeof(struct cmdq_modify_roce_cc);
+ __roce_1st_tlv_prep(hdr, chunks, payload, true);
+ /* Prepare secondary tlv header */
+ hdr = (struct roce_tlv *)&tlv_req.ext_req;
+ payload = sizeof(struct cmdq_modify_roce_cc_gen1_tlv) -
+ sizeof(struct roce_tlv);
+ __roce_ext_tlv_prep(hdr, TLV_TYPE_MODIFY_ROCE_CC_GEN1, payload, false, true);
+ bnxt_qplib_fill_cc_gen1(&tlv_req.ext_req, &cc_param->cc_ext);
+ }
+
+ bnxt_qplib_fill_cmdqmsg(&msg, cmd, &resp, NULL, req_size,
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
+ return rc;
+}
+
+int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 res_type,
+ u32 xid, u32 resp_size, void *resp_va)
+{
+ struct creq_read_context resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_read_context req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
+
+ sbuf.size = resp_size;
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_READ_CONTEXT, sizeof(req));
+ req.resp_addr = cpu_to_le64(sbuf.dma_addr);
+ req.resp_size = resp_size / BNXT_QPLIB_CMDQE_UNITS;
+
+ req.xid = cpu_to_le32(xid);
+ req.type = res_type;
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto free_mem;
+
+ memcpy(resp_va, sbuf.sb, resp_size);
+free_mem:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+static void bnxt_qplib_read_cc_gen1(struct bnxt_qplib_cc_param_ext *cc_ext,
+ struct creq_query_roce_cc_gen1_resp_sb_tlv *sb)
+{
+ cc_ext->inact_th_hi = le16_to_cpu(sb->inactivity_th_hi);
+ cc_ext->min_delta_cnp = le16_to_cpu(sb->min_time_between_cnps);
+ cc_ext->init_cp = le16_to_cpu(sb->init_cp);
+ cc_ext->tr_update_mode = sb->tr_update_mode;
+ cc_ext->tr_update_cyls = sb->tr_update_cycles;
+ cc_ext->fr_rtt = sb->fr_num_rtts;
+ cc_ext->ai_rate_incr = sb->ai_rate_increase;
+ cc_ext->rr_rtt_th = le16_to_cpu(sb->reduction_relax_rtts_th);
+ cc_ext->ar_cr_th = le16_to_cpu(sb->additional_relax_cr_th);
+ cc_ext->cr_min_th = le16_to_cpu(sb->cr_min_th);
+ cc_ext->bw_avg_weight = sb->bw_avg_weight;
+ cc_ext->cr_factor = sb->actual_cr_factor;
+ cc_ext->cr_th_max_cp = le16_to_cpu(sb->max_cp_cr_th);
+ cc_ext->cp_bias_en = sb->cp_bias_en;
+ cc_ext->cp_bias = sb->cp_bias;
+ cc_ext->cnp_ecn = sb->cnp_ecn;
+ cc_ext->rtt_jitter_en = sb->rtt_jitter_en;
+ cc_ext->bytes_per_usec = le16_to_cpu(sb->link_bytes_per_usec);
+ cc_ext->cc_cr_reset_th = le16_to_cpu(sb->reset_cc_cr_th);
+ cc_ext->cr_width = sb->cr_width;
+ cc_ext->min_quota = sb->quota_period_min;
+ cc_ext->max_quota = sb->quota_period_max;
+ cc_ext->abs_max_quota = sb->quota_period_abs_max;
+ cc_ext->tr_lb = le16_to_cpu(sb->tr_lower_bound);
+ cc_ext->cr_prob_fac = sb->cr_prob_factor;
+ cc_ext->tr_prob_fac = sb->tr_prob_factor;
+ cc_ext->fair_cr_th = le16_to_cpu(sb->fairness_cr_th);
+ cc_ext->red_div = sb->red_div;
+ cc_ext->cnp_ratio_th = sb->cnp_ratio_th;
+ cc_ext->ai_ext_rtt = le16_to_cpu(sb->exp_ai_rtts);
+ cc_ext->exp_crcp_ratio = sb->exp_ai_cr_cp_ratio;
+ cc_ext->low_rate_en = sb->use_rate_table;
+ cc_ext->cpcr_update_th = le16_to_cpu(sb->cp_exp_update_th);
+ cc_ext->ai_rtt_th1 = le16_to_cpu(sb->high_exp_ai_rtts_th1);
+ cc_ext->ai_rtt_th2 = le16_to_cpu(sb->high_exp_ai_rtts_th2);
+ cc_ext->cf_rtt_th = le16_to_cpu(sb->actual_cr_cong_free_rtts_th);
+ cc_ext->sc_cr_th1 = le16_to_cpu(sb->severe_cong_cr_th1);
+ cc_ext->sc_cr_th2 = le16_to_cpu(sb->severe_cong_cr_th2);
+ cc_ext->l64B_per_rtt = le32_to_cpu(sb->link64B_per_rtt);
+ cc_ext->cc_ack_bytes = sb->cc_ack_bytes;
+ cc_ext->reduce_cf_rtt_th = le16_to_cpu(sb->reduce_init_cong_free_rtts_th);
+}
+
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param)
+{
+ struct bnxt_qplib_tlv_query_rcc_sb *ext_sb;
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_query_roce_cc_resp resp = {};
+ struct creq_query_roce_cc_resp_sb *sb;
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_query_roce_cc req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ size_t resp_size;
+ int rc;
+
+ /* Query the parameters from chip */
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_CC,
+ sizeof(req));
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx))
+ resp_size = sizeof(*ext_sb);
+ else
+ resp_size = sizeof(*sb);
+
+ sbuf.size = ALIGN(resp_size, BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
+ if (rc)
+ goto out;
+
+ ext_sb = sbuf.sb;
+ sb = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? &ext_sb->base_sb :
+ (struct creq_query_roce_cc_resp_sb *)ext_sb;
+
+ cc_param->enable = sb->enable_cc & CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC;
+ cc_param->tos_ecn = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT;
+ cc_param->tos_dscp = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT;
+ cc_param->alt_tos_dscp = sb->alt_tos_dscp;
+ cc_param->alt_vlan_pcp = sb->alt_vlan_pcp;
+
+ cc_param->g = sb->g;
+ cc_param->nph_per_state = sb->num_phases_per_state;
+ cc_param->init_cr = le16_to_cpu(sb->init_cr);
+ cc_param->init_tr = le16_to_cpu(sb->init_tr);
+ cc_param->cc_mode = sb->cc_mode;
+ cc_param->inact_th = le16_to_cpu(sb->inactivity_th);
+ cc_param->rtt = le16_to_cpu(sb->rtt);
+ cc_param->tcp_cp = le16_to_cpu(sb->tcp_cp);
+ cc_param->time_pph = sb->time_per_phase;
+ cc_param->pkts_pph = sb->pkts_per_phase;
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ bnxt_qplib_read_cc_gen1(&cc_param->cc_ext, &ext_sb->gen1_sb);
+ cc_param->inact_th |= (cc_param->cc_ext.inact_th_hi & 0x3F) << 16;
+ }
+out:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+int bnxt_qplib_create_flow(struct bnxt_qplib_res *res)
+{
+ struct creq_roce_mirror_cfg_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_roce_mirror_cfg req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG,
+ sizeof(req));
+
+ req.mirror_flags = (u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE;
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ return bnxt_qplib_rcfw_send_message(rcfw, &msg);
+}
+
+int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res)
+{
+ struct creq_roce_mirror_cfg_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_roce_mirror_cfg req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG,
+ sizeof(req));
+
+ req.mirror_flags &= ~((u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+
+ return bnxt_qplib_rcfw_send_message(rcfw, &msg);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 0ec3b12b0bcd..5a45c55c6464 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -40,13 +40,13 @@
#ifndef __BNXT_QPLIB_SP_H__
#define __BNXT_QPLIB_SP_H__
+#include <rdma/bnxt_re-abi.h>
#define BNXT_QPLIB_RESERVED_QP_WRS 128
-#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040
-
struct bnxt_qplib_dev_attr {
#define FW_VER_ARR_LEN 4
u8 fw_ver[FW_VER_ARR_LEN];
+#define BNXT_QPLIB_NUM_GIDS_SUPPORTED 256
u16 max_sgid;
u16 max_mrw;
u32 max_qp;
@@ -56,6 +56,7 @@ struct bnxt_qplib_dev_attr {
u32 max_qp_wqes;
u32 max_qp_sges;
u32 max_cq;
+#define BNXT_QPLIB_MAX_CQ_WQES 0xfffff
u32 max_cq_wqes;
u32 max_cq_sges;
u32 max_mr;
@@ -64,8 +65,6 @@ struct bnxt_qplib_dev_attr {
u32 max_mw;
u32 max_raw_ethy_qp;
u32 max_ah;
- u32 max_fmr;
- u32 max_map_per_fmr;
u32 max_srq;
u32 max_srq_wqes;
u32 max_srq_sges;
@@ -74,6 +73,9 @@ struct bnxt_qplib_dev_attr {
u32 l2_db_size;
u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
bool is_atomic;
+ u16 dev_cap_flags;
+ u16 dev_cap_flags2;
+ u32 max_dpi;
};
struct bnxt_qplib_pd {
@@ -84,6 +86,11 @@ struct bnxt_qplib_gid {
u8 data[16];
};
+struct bnxt_qplib_gid_info {
+ struct bnxt_qplib_gid gid;
+ u16 vlan_id;
+};
+
struct bnxt_qplib_ah {
struct bnxt_qplib_gid dgid;
struct bnxt_qplib_pd *pd;
@@ -103,7 +110,8 @@ struct bnxt_qplib_ah {
struct bnxt_qplib_mrw {
struct bnxt_qplib_pd *pd;
int type;
- u32 flags;
+ u32 access_flags;
+#define BNXT_QPLIB_MR_ACCESS_MASK 0xFF
#define BNXT_QPLIB_FR_PMR 0x80000000
u32 lkey;
u32 rkey;
@@ -111,6 +119,7 @@ struct bnxt_qplib_mrw {
u64 va;
u64 total_size;
u32 npages;
+ u16 flags;
u64 mr_handle;
struct bnxt_qplib_hwq hwq;
};
@@ -217,40 +226,122 @@ struct bnxt_qplib_roce_stats {
/* port 3 active qps */
};
+struct bnxt_qplib_ext_stat {
+ u64 tx_atomic_req;
+ u64 tx_read_req;
+ u64 tx_read_res;
+ u64 tx_write_req;
+ u64 tx_send_req;
+ u64 tx_roce_pkts;
+ u64 tx_roce_bytes;
+ u64 rx_atomic_req;
+ u64 rx_read_req;
+ u64 rx_read_res;
+ u64 rx_write_req;
+ u64 rx_send_req;
+ u64 rx_roce_pkts;
+ u64 rx_roce_bytes;
+ u64 rx_roce_good_pkts;
+ u64 rx_roce_good_bytes;
+ u64 rx_out_of_buffer;
+ u64 rx_out_of_sequence;
+ u64 tx_cnp;
+ u64 rx_cnp;
+ u64 rx_ecn_marked;
+};
+
+struct bnxt_qplib_cc_param_ext {
+ u64 ext_mask;
+ u16 inact_th_hi;
+ u16 min_delta_cnp;
+ u16 init_cp;
+ u8 tr_update_mode;
+ u8 tr_update_cyls;
+ u8 fr_rtt;
+ u8 ai_rate_incr;
+ u16 rr_rtt_th;
+ u16 ar_cr_th;
+ u16 cr_min_th;
+ u8 bw_avg_weight;
+ u8 cr_factor;
+ u16 cr_th_max_cp;
+ u8 cp_bias_en;
+ u8 cp_bias;
+ u8 cnp_ecn;
+ u8 rtt_jitter_en;
+ u16 bytes_per_usec;
+ u16 cc_cr_reset_th;
+ u8 cr_width;
+ u8 min_quota;
+ u8 max_quota;
+ u8 abs_max_quota;
+ u16 tr_lb;
+ u8 cr_prob_fac;
+ u8 tr_prob_fac;
+ u16 fair_cr_th;
+ u8 red_div;
+ u8 cnp_ratio_th;
+ u16 ai_ext_rtt;
+ u8 exp_crcp_ratio;
+ u8 low_rate_en;
+ u16 cpcr_update_th;
+ u16 ai_rtt_th1;
+ u16 ai_rtt_th2;
+ u16 cf_rtt_th;
+ u16 sc_cr_th1; /* severe congestion cr threshold 1 */
+ u16 sc_cr_th2; /* severe congestion cr threshold 2 */
+ u32 l64B_per_rtt;
+ u8 cc_ack_bytes;
+ u16 reduce_cf_rtt_th;
+};
+
+struct bnxt_qplib_cc_param {
+ u8 alt_vlan_pcp;
+ u8 qp1_tos_dscp;
+ u16 alt_tos_dscp;
+ u8 cc_mode;
+ u8 enable;
+ u16 inact_th;
+ u16 init_cr;
+ u16 init_tr;
+ u16 rtt;
+ u8 g;
+ u8 nph_per_state;
+ u8 time_pph;
+ u8 pkts_pph;
+ u8 tos_ecn;
+ u8 tos_dscp;
+ u16 tcp_cp;
+ struct bnxt_qplib_cc_param_ext cc_ext;
+ u32 mask;
+};
+
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
struct bnxt_qplib_gid *gid);
int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, bool update);
+ struct bnxt_qplib_gid *gid, u16 vlan_id, bool update);
int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
- bool update, u32 *index);
+ struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id,
+ bool update, u32 *index,
+ bool is_ugid, u32 stats_ctx_id);
int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
- struct bnxt_qplib_gid *gid, u16 gid_idx, u8 *smac);
-int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
- u16 *pkey);
-int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update);
-int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
- struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
- bool update);
-int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_dev_attr *attr, bool vf);
+ struct bnxt_qplib_gid *gid, u16 gid_idx,
+ const u8 *smac);
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx);
int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
bool block);
-void bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
- bool block);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mrw);
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
bool block);
int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
- u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size);
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr);
int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mr, int max);
@@ -258,7 +349,25 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
struct bnxt_qplib_frpl *frpl, int max);
int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
struct bnxt_qplib_frpl *frpl);
-int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_roce_stats *stats);
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+ struct bnxt_qplib_ext_stat *estat);
+int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param);
+int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
+ u32 resp_size, void *resp_va);
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param);
+void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_create_flow(struct bnxt_qplib_res *res);
+int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res);
+
+#define BNXT_VAR_MAX_WQE 4352
+#define BNXT_VAR_MAX_SLOT_ALIGN 256
+#define BNXT_VAR_MAX_SGE 13
+#define BNXT_RE_MAX_RQ_WQES 65536
+
+#define BNXT_STATIC_MAX_SGE 6
+
#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_tlv.h b/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
new file mode 100644
index 000000000000..ae96a75d7f31
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+#ifndef __QPLIB_TLV_H__
+#define __QPLIB_TLV_H__
+
+struct roce_tlv {
+ struct tlv tlv;
+ u8 total_size; // in units of 16 byte chunks
+ u8 unused[7]; // for 16 byte alignment
+};
+
+#define CHUNK_SIZE 16
+#define CHUNKS(x) (((x) + CHUNK_SIZE - 1) / CHUNK_SIZE)
+
+static inline void __roce_1st_tlv_prep(struct roce_tlv *rtlv, u8 tot_chunks,
+ u16 content_bytes, u8 flags)
+{
+ rtlv->tlv.cmd_discr = cpu_to_le16(CMD_DISCR_TLV_ENCAP);
+ rtlv->tlv.tlv_type = cpu_to_le16(TLV_TYPE_ROCE_SP_COMMAND);
+ rtlv->tlv.length = cpu_to_le16(content_bytes);
+ rtlv->tlv.flags = TLV_FLAGS_REQUIRED;
+ rtlv->tlv.flags |= flags ? TLV_FLAGS_MORE : 0;
+ rtlv->total_size = (tot_chunks);
+}
+
+static inline void __roce_ext_tlv_prep(struct roce_tlv *rtlv, u16 tlv_type,
+ u16 content_bytes, u8 more, u8 flags)
+{
+ rtlv->tlv.cmd_discr = cpu_to_le16(CMD_DISCR_TLV_ENCAP);
+ rtlv->tlv.tlv_type = cpu_to_le16(tlv_type);
+ rtlv->tlv.length = cpu_to_le16(content_bytes);
+ rtlv->tlv.flags |= more ? TLV_FLAGS_MORE : 0;
+ rtlv->tlv.flags |= flags ? TLV_FLAGS_REQUIRED : 0;
+}
+
+/*
+ * TLV size in units of 16 byte chunks
+ */
+#define TLV_SIZE ((sizeof(struct roce_tlv) + 15) / 16)
+/*
+ * TLV length in bytes
+ */
+#define TLV_BYTES (TLV_SIZE * 16)
+
+#define HAS_TLV_HEADER(msg) (le16_to_cpu(((struct tlv *)(msg))->cmd_discr) == CMD_DISCR_TLV_ENCAP)
+#define GET_TLV_DATA(tlv) ((void *)&((uint8_t *)(tlv))[TLV_BYTES])
+
+static inline u8 __get_cmdq_base_opcode(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->opcode;
+ else
+ return req->opcode;
+}
+
+static inline void __set_cmdq_base_opcode(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->opcode = val;
+ else
+ req->opcode = val;
+}
+
+static inline __le16 __get_cmdq_base_cookie(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->cookie;
+ else
+ return req->cookie;
+}
+
+static inline void __set_cmdq_base_cookie(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cookie = val;
+ else
+ req->cookie = val;
+}
+
+static inline __le64 __get_cmdq_base_resp_addr(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr;
+ else
+ return req->resp_addr;
+}
+
+static inline void __set_cmdq_base_resp_addr(struct cmdq_base *req,
+ u32 size, __le64 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr = val;
+ else
+ req->resp_addr = val;
+}
+
+static inline u8 __get_cmdq_base_resp_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size;
+ else
+ return req->resp_size;
+}
+
+static inline void __set_cmdq_base_resp_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size = val;
+ else
+ req->resp_size = val;
+}
+
+static inline u8 __get_cmdq_base_cmd_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct roce_tlv *)(req))->total_size;
+ else
+ return req->cmd_size;
+}
+
+static inline void __set_cmdq_base_cmd_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cmd_size = val;
+ else
+ req->cmd_size = val;
+}
+
+static inline __le16 __get_cmdq_base_flags(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->flags;
+ else
+ return req->flags;
+}
+
+static inline void __set_cmdq_base_flags(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->flags = val;
+ else
+ req->flags = val;
+}
+
+struct bnxt_qplib_tlv_modify_cc_req {
+ struct roce_tlv tlv_hdr;
+ struct cmdq_modify_roce_cc base_req;
+ __le64 tlvpad;
+ struct cmdq_modify_roce_cc_gen1_tlv ext_req;
+};
+
+struct bnxt_qplib_tlv_query_rcc_sb {
+ struct roce_tlv tlv_hdr;
+ struct creq_query_roce_cc_resp_sb base_sb;
+ struct creq_query_roce_cc_gen1_resp_sb_tlv gen1_sb;
+};
+#endif /* __QPLIB_TLV_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index e4b09e7c2175..99ecd72e72e2 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -39,1396 +39,1381 @@
#ifndef __BNXT_RE_HSI_H__
#define __BNXT_RE_HSI_H__
-/* include bnxt_hsi.h from bnxt_en driver */
-#include "bnxt_hsi.h"
+/* include linux/bnxt/hsi.h */
+#include <linux/bnxt/hsi.h>
-/* CMP Door Bell Format (4 bytes) */
+/* tx_doorbell (size:32b/4B) */
+struct tx_doorbell {
+ __le32 key_idx;
+ #define TX_DOORBELL_IDX_MASK 0xffffffUL
+ #define TX_DOORBELL_IDX_SFT 0
+ #define TX_DOORBELL_KEY_MASK 0xf0000000UL
+ #define TX_DOORBELL_KEY_SFT 28
+ #define TX_DOORBELL_KEY_TX (0x0UL << 28)
+ #define TX_DOORBELL_KEY_LAST TX_DOORBELL_KEY_TX
+};
+
+/* rx_doorbell (size:32b/4B) */
+struct rx_doorbell {
+ __le32 key_idx;
+ #define RX_DOORBELL_IDX_MASK 0xffffffUL
+ #define RX_DOORBELL_IDX_SFT 0
+ #define RX_DOORBELL_KEY_MASK 0xf0000000UL
+ #define RX_DOORBELL_KEY_SFT 28
+ #define RX_DOORBELL_KEY_RX (0x1UL << 28)
+ #define RX_DOORBELL_KEY_LAST RX_DOORBELL_KEY_RX
+};
+
+/* cmpl_doorbell (size:32b/4B) */
struct cmpl_doorbell {
- __le32 key_mask_valid_idx;
- #define CMPL_DOORBELL_IDX_MASK 0xffffffUL
- #define CMPL_DOORBELL_IDX_SFT 0
- #define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL
- #define CMPL_DOORBELL_RESERVED_SFT 24
- #define CMPL_DOORBELL_IDX_VALID 0x4000000UL
- #define CMPL_DOORBELL_MASK 0x8000000UL
- #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
- #define CMPL_DOORBELL_KEY_SFT 28
- #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
-};
-
-/* Status Door Bell Format (4 bytes) */
+ __le32 key_mask_valid_idx;
+ #define CMPL_DOORBELL_IDX_MASK 0xffffffUL
+ #define CMPL_DOORBELL_IDX_SFT 0
+ #define CMPL_DOORBELL_IDX_VALID 0x4000000UL
+ #define CMPL_DOORBELL_MASK 0x8000000UL
+ #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
+ #define CMPL_DOORBELL_KEY_SFT 28
+ #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
+ #define CMPL_DOORBELL_KEY_LAST CMPL_DOORBELL_KEY_CMPL
+};
+
+/* status_doorbell (size:32b/4B) */
struct status_doorbell {
- __le32 key_idx;
- #define STATUS_DOORBELL_IDX_MASK 0xffffffUL
- #define STATUS_DOORBELL_IDX_SFT 0
- #define STATUS_DOORBELL_RESERVED_MASK 0xf000000UL
- #define STATUS_DOORBELL_RESERVED_SFT 24
- #define STATUS_DOORBELL_KEY_MASK 0xf0000000UL
- #define STATUS_DOORBELL_KEY_SFT 28
- #define STATUS_DOORBELL_KEY_STAT (0x3UL << 28)
-};
-
-/* RoCE Host Structures */
-
-/* Doorbell Structures */
-/* dbc_dbc (size:64b/8B) */
-struct dbc_dbc {
- __le32 index;
- #define DBC_DBC_INDEX_MASK 0xffffffUL
- #define DBC_DBC_INDEX_SFT 0
- __le32 type_path_xid;
- #define DBC_DBC_XID_MASK 0xfffffUL
- #define DBC_DBC_XID_SFT 0
- #define DBC_DBC_PATH_MASK 0x3000000UL
- #define DBC_DBC_PATH_SFT 24
- #define DBC_DBC_PATH_ROCE (0x0UL << 24)
- #define DBC_DBC_PATH_L2 (0x1UL << 24)
- #define DBC_DBC_PATH_ENGINE (0x2UL << 24)
- #define DBC_DBC_PATH_LAST DBC_DBC_PATH_ENGINE
- #define DBC_DBC_DEBUG_TRACE 0x8000000UL
- #define DBC_DBC_TYPE_MASK 0xf0000000UL
- #define DBC_DBC_TYPE_SFT 28
- #define DBC_DBC_TYPE_SQ (0x0UL << 28)
- #define DBC_DBC_TYPE_RQ (0x1UL << 28)
- #define DBC_DBC_TYPE_SRQ (0x2UL << 28)
- #define DBC_DBC_TYPE_SRQ_ARM (0x3UL << 28)
- #define DBC_DBC_TYPE_CQ (0x4UL << 28)
- #define DBC_DBC_TYPE_CQ_ARMSE (0x5UL << 28)
- #define DBC_DBC_TYPE_CQ_ARMALL (0x6UL << 28)
- #define DBC_DBC_TYPE_CQ_ARMENA (0x7UL << 28)
- #define DBC_DBC_TYPE_SRQ_ARMENA (0x8UL << 28)
- #define DBC_DBC_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
- #define DBC_DBC_TYPE_NQ (0xaUL << 28)
- #define DBC_DBC_TYPE_NQ_ARM (0xbUL << 28)
- #define DBC_DBC_TYPE_NULL (0xfUL << 28)
- #define DBC_DBC_TYPE_LAST DBC_DBC_TYPE_NULL
-};
-
-/* dbc_dbc32 (size:32b/4B) */
-struct dbc_dbc32 {
- __le32 type_abs_incr_xid;
- #define DBC_DBC32_XID_MASK 0xfffffUL
- #define DBC_DBC32_XID_SFT 0
- #define DBC_DBC32_PATH_MASK 0xc00000UL
- #define DBC_DBC32_PATH_SFT 22
- #define DBC_DBC32_PATH_ROCE (0x0UL << 22)
- #define DBC_DBC32_PATH_L2 (0x1UL << 22)
- #define DBC_DBC32_PATH_LAST DBC_DBC32_PATH_L2
- #define DBC_DBC32_INCR_MASK 0xf000000UL
- #define DBC_DBC32_INCR_SFT 24
- #define DBC_DBC32_ABS 0x10000000UL
- #define DBC_DBC32_TYPE_MASK 0xe0000000UL
- #define DBC_DBC32_TYPE_SFT 29
- #define DBC_DBC32_TYPE_SQ (0x0UL << 29)
- #define DBC_DBC32_TYPE_LAST DBC_DBC32_TYPE_SQ
-};
-
-/* SQ WQE Structures */
-/* Base SQ WQE (8 bytes) */
-struct sq_base {
- u8 wqe_type;
- #define SQ_BASE_WQE_TYPE_SEND 0x0UL
- #define SQ_BASE_WQE_TYPE_SEND_W_IMMEAD 0x1UL
- #define SQ_BASE_WQE_TYPE_SEND_W_INVALID 0x2UL
- #define SQ_BASE_WQE_TYPE_WRITE_WQE 0x4UL
- #define SQ_BASE_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
- #define SQ_BASE_WQE_TYPE_READ_WQE 0x6UL
- #define SQ_BASE_WQE_TYPE_ATOMIC_CS 0x8UL
- #define SQ_BASE_WQE_TYPE_ATOMIC_FA 0xbUL
- #define SQ_BASE_WQE_TYPE_LOCAL_INVALID 0xcUL
- #define SQ_BASE_WQE_TYPE_FR_PMR 0xdUL
- #define SQ_BASE_WQE_TYPE_BIND 0xeUL
- u8 unused_0[7];
-};
-
-/* WQE SGE (16 bytes) */
-struct sq_sge {
- __le64 va_or_pa;
- __le32 l_key;
- __le32 size;
+ __le32 key_idx;
+ #define STATUS_DOORBELL_IDX_MASK 0xffffffUL
+ #define STATUS_DOORBELL_IDX_SFT 0
+ #define STATUS_DOORBELL_KEY_MASK 0xf0000000UL
+ #define STATUS_DOORBELL_KEY_SFT 28
+ #define STATUS_DOORBELL_KEY_STAT (0x3UL << 28)
+ #define STATUS_DOORBELL_KEY_LAST STATUS_DOORBELL_KEY_STAT
};
-/* PSN Search Structure (8 bytes) */
-struct sq_psn_search {
- __le32 opcode_start_psn;
- #define SQ_PSN_SEARCH_START_PSN_MASK 0xffffffUL
- #define SQ_PSN_SEARCH_START_PSN_SFT 0
- #define SQ_PSN_SEARCH_OPCODE_MASK 0xff000000UL
- #define SQ_PSN_SEARCH_OPCODE_SFT 24
- __le32 flags_next_psn;
- #define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
- #define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
- #define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
- #define SQ_PSN_SEARCH_FLAGS_SFT 24
+/* cmdq_init (size:128b/16B) */
+struct cmdq_init {
+ __le64 cmdq_pbl;
+ __le16 cmdq_size_cmdq_lvl;
+ #define CMDQ_INIT_CMDQ_LVL_MASK 0x3UL
+ #define CMDQ_INIT_CMDQ_LVL_SFT 0
+ #define CMDQ_INIT_CMDQ_SIZE_MASK 0xfffcUL
+ #define CMDQ_INIT_CMDQ_SIZE_SFT 2
+ __le16 creq_ring_id;
+ __le32 prod_idx;
};
-/* sq_psn_search_ext (size:128b/16B) */
-struct sq_psn_search_ext {
- __le32 opcode_start_psn;
- #define SQ_PSN_SEARCH_EXT_START_PSN_MASK 0xffffffUL
- #define SQ_PSN_SEARCH_EXT_START_PSN_SFT 0
- #define SQ_PSN_SEARCH_EXT_OPCODE_MASK 0xff000000UL
- #define SQ_PSN_SEARCH_EXT_OPCODE_SFT 24
- __le32 flags_next_psn;
- #define SQ_PSN_SEARCH_EXT_NEXT_PSN_MASK 0xffffffUL
- #define SQ_PSN_SEARCH_EXT_NEXT_PSN_SFT 0
- #define SQ_PSN_SEARCH_EXT_FLAGS_MASK 0xff000000UL
- #define SQ_PSN_SEARCH_EXT_FLAGS_SFT 24
- __le16 start_slot_idx;
- __le16 reserved16;
- __le32 reserved32;
+/* cmdq_base (size:128b/16B) */
+struct cmdq_base {
+ u8 opcode;
+ #define CMDQ_BASE_OPCODE_CREATE_QP 0x1UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP 0x2UL
+ #define CMDQ_BASE_OPCODE_MODIFY_QP 0x3UL
+ #define CMDQ_BASE_OPCODE_QUERY_QP 0x4UL
+ #define CMDQ_BASE_OPCODE_CREATE_SRQ 0x5UL
+ #define CMDQ_BASE_OPCODE_DESTROY_SRQ 0x6UL
+ #define CMDQ_BASE_OPCODE_QUERY_SRQ 0x8UL
+ #define CMDQ_BASE_OPCODE_CREATE_CQ 0x9UL
+ #define CMDQ_BASE_OPCODE_DESTROY_CQ 0xaUL
+ #define CMDQ_BASE_OPCODE_RESIZE_CQ 0xcUL
+ #define CMDQ_BASE_OPCODE_ALLOCATE_MRW 0xdUL
+ #define CMDQ_BASE_OPCODE_DEALLOCATE_KEY 0xeUL
+ #define CMDQ_BASE_OPCODE_REGISTER_MR 0xfUL
+ #define CMDQ_BASE_OPCODE_DEREGISTER_MR 0x10UL
+ #define CMDQ_BASE_OPCODE_ADD_GID 0x11UL
+ #define CMDQ_BASE_OPCODE_DELETE_GID 0x12UL
+ #define CMDQ_BASE_OPCODE_MODIFY_GID 0x17UL
+ #define CMDQ_BASE_OPCODE_QUERY_GID 0x18UL
+ #define CMDQ_BASE_OPCODE_CREATE_QP1 0x13UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP1 0x14UL
+ #define CMDQ_BASE_OPCODE_CREATE_AH 0x15UL
+ #define CMDQ_BASE_OPCODE_DESTROY_AH 0x16UL
+ #define CMDQ_BASE_OPCODE_INITIALIZE_FW 0x80UL
+ #define CMDQ_BASE_OPCODE_DEINITIALIZE_FW 0x81UL
+ #define CMDQ_BASE_OPCODE_STOP_FUNC 0x82UL
+ #define CMDQ_BASE_OPCODE_QUERY_FUNC 0x83UL
+ #define CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ #define CMDQ_BASE_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_BASE_OPCODE_VF_BACKCHANNEL_REQUEST 0x86UL
+ #define CMDQ_BASE_OPCODE_READ_VF_MEMORY 0x87UL
+ #define CMDQ_BASE_OPCODE_COMPLETE_VF_REQUEST 0x88UL
+ #define CMDQ_BASE_OPCODE_EXTEND_CONTEXT_ARRRAY 0x89UL
+ #define CMDQ_BASE_OPCODE_MAP_TC_TO_COS 0x8aUL
+ #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
+ #define CMDQ_BASE_OPCODE_MODIFY_ROCE_CC 0x8cUL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE 0x8fUL
+ #define CMDQ_BASE_OPCODE_MODIFY_CQ 0x90UL
+ #define CMDQ_BASE_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG 0x99UL
+ #define CMDQ_BASE_OPCODE_LAST CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
};
-/* Send SQ WQE (40 bytes) */
-struct sq_send {
- u8 wqe_type;
- #define SQ_SEND_WQE_TYPE_SEND 0x0UL
- #define SQ_SEND_WQE_TYPE_SEND_W_IMMEAD 0x1UL
- #define SQ_SEND_WQE_TYPE_SEND_W_INVALID 0x2UL
- u8 flags;
- #define SQ_SEND_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_SEND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_SEND_FLAGS_UC_FENCE 0x4UL
- #define SQ_SEND_FLAGS_SE 0x8UL
- #define SQ_SEND_FLAGS_INLINE 0x10UL
- u8 wqe_size;
- u8 reserved8_1;
- __le32 inv_key_or_imm_data;
- __le32 length;
- __le32 q_key;
- __le32 dst_qp;
- #define SQ_SEND_DST_QP_MASK 0xffffffUL
- #define SQ_SEND_DST_QP_SFT 0
- #define SQ_SEND_RESERVED8_2_MASK 0xff000000UL
- #define SQ_SEND_RESERVED8_2_SFT 24
- __le32 avid;
- #define SQ_SEND_AVID_MASK 0xfffffUL
- #define SQ_SEND_AVID_SFT 0
- #define SQ_SEND_RESERVED_AVID_MASK 0xfff00000UL
- #define SQ_SEND_RESERVED_AVID_SFT 20
- __le64 reserved64;
- __le32 data[24];
-};
-
-/* Send Raw Ethernet and QP1 SQ WQE (40 bytes) */
-struct sq_send_raweth_qp1 {
- u8 wqe_type;
- #define SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND 0x0UL
- u8 flags;
- #define SQ_SEND_RAWETH_QP1_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_SEND_RAWETH_QP1_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_SEND_RAWETH_QP1_FLAGS_UC_FENCE 0x4UL
- #define SQ_SEND_RAWETH_QP1_FLAGS_SE 0x8UL
- #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE 0x10UL
- u8 wqe_size;
- u8 reserved8;
- __le16 lflags;
- #define SQ_SEND_RAWETH_QP1_LFLAGS_TCP_UDP_CHKSUM 0x1UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM 0x2UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_NOCRC 0x4UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_STAMP 0x8UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_T_IP_CHKSUM 0x10UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_1 0x20UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_2 0x40UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_3 0x80UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC 0x100UL
- #define SQ_SEND_RAWETH_QP1_LFLAGS_FCOE_CRC 0x200UL
- __le16 cfa_action;
- __le32 length;
- __le32 reserved32_1;
- __le32 cfa_meta;
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK 0xfffUL
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT 0
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_DE 0x1000UL
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_MASK 0xe000UL
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_SFT 13
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_MASK 0x70000UL
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_SFT 16
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_LAST \
- SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
- #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_SFT 19
- #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_MASK 0xf0000000UL
- #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_SFT 28
- #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_NONE (0x0UL << 28)
- #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
- #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_LAST \
- SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG
- __le32 reserved32_2;
- __le64 reserved64;
- __le32 data[24];
-};
-
-/* RDMA SQ WQE (40 bytes) */
-struct sq_rdma {
- u8 wqe_type;
- #define SQ_RDMA_WQE_TYPE_WRITE_WQE 0x4UL
- #define SQ_RDMA_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
- #define SQ_RDMA_WQE_TYPE_READ_WQE 0x6UL
- u8 flags;
- #define SQ_RDMA_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_RDMA_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_RDMA_FLAGS_UC_FENCE 0x4UL
- #define SQ_RDMA_FLAGS_SE 0x8UL
- #define SQ_RDMA_FLAGS_INLINE 0x10UL
- u8 wqe_size;
- u8 reserved8;
- __le32 imm_data;
- __le32 length;
- __le32 reserved32_1;
- __le64 remote_va;
- __le32 remote_key;
- __le32 reserved32_2;
- __le32 data[24];
-};
-
-/* Atomic SQ WQE (40 bytes) */
-struct sq_atomic {
- u8 wqe_type;
- #define SQ_ATOMIC_WQE_TYPE_ATOMIC_CS 0x8UL
- #define SQ_ATOMIC_WQE_TYPE_ATOMIC_FA 0xbUL
- u8 flags;
- #define SQ_ATOMIC_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_ATOMIC_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_ATOMIC_FLAGS_UC_FENCE 0x4UL
- #define SQ_ATOMIC_FLAGS_SE 0x8UL
- #define SQ_ATOMIC_FLAGS_INLINE 0x10UL
- __le16 reserved16;
- __le32 remote_key;
- __le64 remote_va;
- __le64 swap_data;
- __le64 cmp_data;
- __le32 data[24];
-};
-
-/* Local Invalidate SQ WQE (40 bytes) */
-struct sq_localinvalidate {
- u8 wqe_type;
- #define SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID 0xcUL
- u8 flags;
- #define SQ_LOCALINVALIDATE_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_LOCALINVALIDATE_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_LOCALINVALIDATE_FLAGS_UC_FENCE 0x4UL
- #define SQ_LOCALINVALIDATE_FLAGS_SE 0x8UL
- #define SQ_LOCALINVALIDATE_FLAGS_INLINE 0x10UL
- __le16 reserved16;
- __le32 inv_l_key;
- __le64 reserved64;
- __le32 reserved128[4];
- __le32 data[24];
-};
-
-/* FR-PMR SQ WQE (40 bytes) */
-struct sq_fr_pmr {
- u8 wqe_type;
- #define SQ_FR_PMR_WQE_TYPE_FR_PMR 0xdUL
- u8 flags;
- #define SQ_FR_PMR_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_FR_PMR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_FR_PMR_FLAGS_UC_FENCE 0x4UL
- #define SQ_FR_PMR_FLAGS_SE 0x8UL
- #define SQ_FR_PMR_FLAGS_INLINE 0x10UL
- u8 access_cntl;
- #define SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
- #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ 0x2UL
- #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
- #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
- #define SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND 0x10UL
- u8 zero_based_page_size_log;
- #define SQ_FR_PMR_PAGE_SIZE_LOG_MASK 0x1fUL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_SFT 0
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
- #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
- #define SQ_FR_PMR_ZERO_BASED 0x20UL
- #define SQ_FR_PMR_RESERVED2_MASK 0xc0UL
- #define SQ_FR_PMR_RESERVED2_SFT 6
- __le32 l_key;
- u8 length[5];
- u8 reserved8_1;
- u8 reserved8_2;
- u8 numlevels_pbl_page_size_log;
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT 0
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
- #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
- #define SQ_FR_PMR_RESERVED1 0x20UL
- #define SQ_FR_PMR_NUMLEVELS_MASK 0xc0UL
- #define SQ_FR_PMR_NUMLEVELS_SFT 6
- #define SQ_FR_PMR_NUMLEVELS_PHYSICAL (0x0UL << 6)
- #define SQ_FR_PMR_NUMLEVELS_LAYER1 (0x1UL << 6)
- #define SQ_FR_PMR_NUMLEVELS_LAYER2 (0x2UL << 6)
- __le64 pblptr;
- __le64 va;
- __le32 data[24];
-};
-
-/* Bind SQ WQE (40 bytes) */
-struct sq_bind {
- u8 wqe_type;
- #define SQ_BIND_WQE_TYPE_BIND 0xeUL
- u8 flags;
- #define SQ_BIND_FLAGS_SIGNAL_COMP 0x1UL
- #define SQ_BIND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
- #define SQ_BIND_FLAGS_UC_FENCE 0x4UL
- #define SQ_BIND_FLAGS_SE 0x8UL
- #define SQ_BIND_FLAGS_INLINE 0x10UL
- u8 access_cntl;
- #define SQ_BIND_ACCESS_CNTL_LOCAL_WRITE 0x1UL
- #define SQ_BIND_ACCESS_CNTL_REMOTE_READ 0x2UL
- #define SQ_BIND_ACCESS_CNTL_REMOTE_WRITE 0x4UL
- #define SQ_BIND_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
- #define SQ_BIND_ACCESS_CNTL_WINDOW_BIND 0x10UL
- u8 reserved8_1;
- u8 mw_type_zero_based;
- #define SQ_BIND_ZERO_BASED 0x1UL
- #define SQ_BIND_MW_TYPE 0x2UL
- #define SQ_BIND_MW_TYPE_TYPE1 (0x0UL << 1)
- #define SQ_BIND_MW_TYPE_TYPE2 (0x1UL << 1)
- #define SQ_BIND_RESERVED6_MASK 0xfcUL
- #define SQ_BIND_RESERVED6_SFT 2
- u8 reserved8_2;
- __le16 reserved16;
- __le32 parent_l_key;
- __le32 l_key;
- __le64 va;
- u8 length[5];
- u8 data_reserved24[99];
- #define SQ_BIND_RESERVED24_MASK 0xffffff00UL
- #define SQ_BIND_RESERVED24_SFT 8
- #define SQ_BIND_DATA_MASK 0xffffffffUL
- #define SQ_BIND_DATA_SFT 0
-};
-
-/* RQ/SRQ WQE Structures */
-/* RQ/SRQ WQE (40 bytes) */
-struct rq_wqe {
- u8 wqe_type;
- #define RQ_WQE_WQE_TYPE_RCV 0x80UL
- u8 flags;
- u8 wqe_size;
- u8 reserved8;
- __le32 reserved32;
- __le32 wr_id[2];
- #define RQ_WQE_WR_ID_MASK 0xfffffUL
- #define RQ_WQE_WR_ID_SFT 0
- #define RQ_WQE_RESERVED44_MASK 0xfff00000UL
- #define RQ_WQE_RESERVED44_SFT 20
- __le32 reserved128[4];
- __le32 data[24];
-};
-
-/* CQ CQE Structures */
-/* Base CQE (32 bytes) */
-struct cq_base {
- __le64 reserved64_1;
- __le64 reserved64_2;
- __le64 reserved64_3;
- u8 cqe_type_toggle;
- #define CQ_BASE_TOGGLE 0x1UL
- #define CQ_BASE_CQE_TYPE_MASK 0x1eUL
- #define CQ_BASE_CQE_TYPE_SFT 1
- #define CQ_BASE_CQE_TYPE_REQ (0x0UL << 1)
- #define CQ_BASE_CQE_TYPE_RES_RC (0x1UL << 1)
- #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1)
- #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
- #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1)
- #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1)
- #define CQ_BASE_RESERVED3_MASK 0xe0UL
- #define CQ_BASE_RESERVED3_SFT 5
- u8 status;
- __le16 reserved16;
- __le32 reserved32;
-};
-
-/* Requester CQ CQE (32 bytes) */
-struct cq_req {
- __le64 qp_handle;
- __le16 sq_cons_idx;
- __le16 reserved16_1;
- __le32 reserved32_2;
- __le64 reserved64;
- u8 cqe_type_toggle;
- #define CQ_REQ_TOGGLE 0x1UL
- #define CQ_REQ_CQE_TYPE_MASK 0x1eUL
- #define CQ_REQ_CQE_TYPE_SFT 1
- #define CQ_REQ_CQE_TYPE_REQ (0x0UL << 1)
- #define CQ_REQ_RESERVED3_MASK 0xe0UL
- #define CQ_REQ_RESERVED3_SFT 5
- u8 status;
- #define CQ_REQ_STATUS_OK 0x0UL
- #define CQ_REQ_STATUS_BAD_RESPONSE_ERR 0x1UL
- #define CQ_REQ_STATUS_LOCAL_LENGTH_ERR 0x2UL
- #define CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR 0x3UL
- #define CQ_REQ_STATUS_LOCAL_PROTECTION_ERR 0x4UL
- #define CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
- #define CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
- #define CQ_REQ_STATUS_REMOTE_ACCESS_ERR 0x7UL
- #define CQ_REQ_STATUS_REMOTE_OPERATION_ERR 0x8UL
- #define CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR 0x9UL
- #define CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR 0xaUL
- #define CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR 0xbUL
- __le16 reserved16_2;
- __le32 reserved32_1;
-};
-
-/* Responder RC CQE (32 bytes) */
-struct cq_res_rc {
- __le32 length;
- __le32 imm_data_or_inv_r_key;
- __le64 qp_handle;
- __le64 mr_handle;
- u8 cqe_type_toggle;
- #define CQ_RES_RC_TOGGLE 0x1UL
- #define CQ_RES_RC_CQE_TYPE_MASK 0x1eUL
- #define CQ_RES_RC_CQE_TYPE_SFT 1
- #define CQ_RES_RC_CQE_TYPE_RES_RC (0x1UL << 1)
- #define CQ_RES_RC_RESERVED3_MASK 0xe0UL
- #define CQ_RES_RC_RESERVED3_SFT 5
- u8 status;
- #define CQ_RES_RC_STATUS_OK 0x0UL
- #define CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR 0x1UL
- #define CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR 0x2UL
- #define CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR 0x3UL
- #define CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
- #define CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
- #define CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
- #define CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
- #define CQ_RES_RC_STATUS_HW_FLUSH_ERR 0x8UL
- __le16 flags;
- #define CQ_RES_RC_FLAGS_SRQ 0x1UL
- #define CQ_RES_RC_FLAGS_SRQ_RQ (0x0UL << 0)
- #define CQ_RES_RC_FLAGS_SRQ_SRQ (0x1UL << 0)
- #define CQ_RES_RC_FLAGS_SRQ_LAST CQ_RES_RC_FLAGS_SRQ_SRQ
- #define CQ_RES_RC_FLAGS_IMM 0x2UL
- #define CQ_RES_RC_FLAGS_INV 0x4UL
- #define CQ_RES_RC_FLAGS_RDMA 0x8UL
- #define CQ_RES_RC_FLAGS_RDMA_SEND (0x0UL << 3)
- #define CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE (0x1UL << 3)
- #define CQ_RES_RC_FLAGS_RDMA_LAST CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE
- __le32 srq_or_rq_wr_id;
- #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
- #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_SFT 0
- #define CQ_RES_RC_RESERVED12_MASK 0xfff00000UL
- #define CQ_RES_RC_RESERVED12_SFT 20
-};
-
-/* Responder UD CQE (32 bytes) */
-struct cq_res_ud {
- __le16 length;
- #define CQ_RES_UD_LENGTH_MASK 0x3fffUL
- #define CQ_RES_UD_LENGTH_SFT 0
- __le16 cfa_metadata;
- #define CQ_RES_UD_CFA_METADATA_VID_MASK 0xfffUL
- #define CQ_RES_UD_CFA_METADATA_VID_SFT 0
- #define CQ_RES_UD_CFA_METADATA_DE 0x1000UL
- #define CQ_RES_UD_CFA_METADATA_PRI_MASK 0xe000UL
- #define CQ_RES_UD_CFA_METADATA_PRI_SFT 13
- __le32 imm_data;
- __le64 qp_handle;
- __le16 src_mac[3];
- __le16 src_qp_low;
- u8 cqe_type_toggle;
- #define CQ_RES_UD_TOGGLE 0x1UL
- #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
- #define CQ_RES_UD_CQE_TYPE_SFT 1
- #define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
- u8 status;
- #define CQ_RES_UD_STATUS_OK 0x0UL
- #define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
- #define CQ_RES_UD_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
- #define CQ_RES_UD_STATUS_LOCAL_PROTECTION_ERR 0x3UL
- #define CQ_RES_UD_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
- #define CQ_RES_UD_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
- #define CQ_RES_UD_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
- #define CQ_RES_UD_STATUS_HW_FLUSH_ERR 0x8UL
- __le16 flags;
- #define CQ_RES_UD_FLAGS_SRQ 0x1UL
- #define CQ_RES_UD_FLAGS_SRQ_RQ (0x0UL << 0)
- #define CQ_RES_UD_FLAGS_SRQ_SRQ (0x1UL << 0)
- #define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
- #define CQ_RES_UD_FLAGS_IMM 0x2UL
- #define CQ_RES_UD_FLAGS_UNUSED_MASK 0xcUL
- #define CQ_RES_UD_FLAGS_UNUSED_SFT 2
- #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0x30UL
- #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 4
- #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
- #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
- #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
- #define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST \
- CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
- #define CQ_RES_UD_FLAGS_META_FORMAT_MASK 0x3c0UL
- #define CQ_RES_UD_FLAGS_META_FORMAT_SFT 6
- #define CQ_RES_UD_FLAGS_META_FORMAT_NONE (0x0UL << 6)
- #define CQ_RES_UD_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
- #define CQ_RES_UD_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
- #define CQ_RES_UD_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
- #define CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
- #define CQ_RES_UD_FLAGS_META_FORMAT_LAST \
- CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET
- #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
- #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_SFT 10
-
- __le32 src_qp_high_srq_or_rq_wr_id;
- #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
- #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
- #define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
- #define CQ_RES_UD_SRC_QP_HIGH_SFT 24
-};
-
-/* Responder RawEth and QP1 CQE (32 bytes) */
-struct cq_res_raweth_qp1 {
- __le16 length;
- #define CQ_RES_RAWETH_QP1_LENGTH_MASK 0x3fffUL
- #define CQ_RES_RAWETH_QP1_LENGTH_SFT 0
- #define CQ_RES_RAWETH_QP1_RESERVED2_MASK 0xc000UL
- #define CQ_RES_RAWETH_QP1_RESERVED2_SFT 14
- __le16 raweth_qp1_flags;
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ERROR 0x1UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_RESERVED5_1_MASK 0x3eUL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_RESERVED5_1_SFT 1
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_SFT 6
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP \
- (0x8UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP \
- (0x9UL << 6)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_LAST \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_MASK 0x3ffUL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_SFT 0
- #define CQ_RES_RAWETH_QP1_RESERVED6_MASK 0xfc00UL
- #define CQ_RES_RAWETH_QP1_RESERVED6_SFT 10
- __le16 raweth_qp1_errors;
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_RESERVED4_MASK 0xfUL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_RESERVED4_SFT 0
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR \
- (0x0UL << 9)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION \
- (0x1UL << 9)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN \
- (0x2UL << 9)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR \
- (0x3UL << 9)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR \
- (0x4UL << 9)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR \
- (0x5UL << 9)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL \
- (0x6UL << 9)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR \
- (0x0UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION \
- (0x1UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN \
- (0x2UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL \
- (0x3UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR \
- (0x4UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR \
- (0x5UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN \
- (0x6UL << 12)
- #define \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL\
- (0x7UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN \
- (0x8UL << 12)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
- CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
- __le16 raweth_qp1_cfa_code;
- __le64 qp_handle;
- __le32 raweth_qp1_flags2;
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC 0x1UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC 0x2UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_IP_CS_CALC 0x4UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_L4_CS_CALC 0x8UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_NONE \
- (0x0UL << 4)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN \
- (0x1UL << 4)
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_LAST\
- CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
- __le32 raweth_qp1_metadata;
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK 0xfffUL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_SFT 0
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_DE 0x1000UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK 0xe000UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT 13
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK 0xffff0000UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT 16
- u8 cqe_type_toggle;
- #define CQ_RES_RAWETH_QP1_TOGGLE 0x1UL
- #define CQ_RES_RAWETH_QP1_CQE_TYPE_MASK 0x1eUL
- #define CQ_RES_RAWETH_QP1_CQE_TYPE_SFT 1
- #define CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
- #define CQ_RES_RAWETH_QP1_RESERVED3_MASK 0xe0UL
- #define CQ_RES_RAWETH_QP1_RESERVED3_SFT 5
- u8 status;
- #define CQ_RES_RAWETH_QP1_STATUS_OK 0x0UL
- #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR 0x1UL
- #define CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
- #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR 0x3UL
- #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
- #define CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
- #define CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
- #define CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR 0x8UL
- __le16 flags;
- #define CQ_RES_RAWETH_QP1_FLAGS_SRQ 0x1UL
- #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_RQ 0x0UL
- #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ 0x1UL
- #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_LAST \
- CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ
- __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
- #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
- #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_SFT 0
- #define CQ_RES_RAWETH_QP1_RESERVED4_MASK 0xf00000UL
- #define CQ_RES_RAWETH_QP1_RESERVED4_SFT 20
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
- #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
-};
-
-/* Terminal CQE (32 bytes) */
-struct cq_terminal {
- __le64 qp_handle;
- __le16 sq_cons_idx;
- __le16 rq_cons_idx;
- __le32 reserved32_1;
- __le64 reserved64_3;
- u8 cqe_type_toggle;
- #define CQ_TERMINAL_TOGGLE 0x1UL
- #define CQ_TERMINAL_CQE_TYPE_MASK 0x1eUL
- #define CQ_TERMINAL_CQE_TYPE_SFT 1
- #define CQ_TERMINAL_CQE_TYPE_TERMINAL (0xeUL << 1)
- #define CQ_TERMINAL_RESERVED3_MASK 0xe0UL
- #define CQ_TERMINAL_RESERVED3_SFT 5
- u8 status;
- #define CQ_TERMINAL_STATUS_OK 0x0UL
- __le16 reserved16;
- __le32 reserved32_2;
-};
-
-/* Cutoff CQE (32 bytes) */
-struct cq_cutoff {
- __le64 reserved64_1;
- __le64 reserved64_2;
- __le64 reserved64_3;
- u8 cqe_type_toggle;
- #define CQ_CUTOFF_TOGGLE 0x1UL
- #define CQ_CUTOFF_CQE_TYPE_MASK 0x1eUL
- #define CQ_CUTOFF_CQE_TYPE_SFT 1
- #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1)
- #define CQ_CUTOFF_RESERVED3_MASK 0xe0UL
- #define CQ_CUTOFF_RESERVED3_SFT 5
- u8 status;
- #define CQ_CUTOFF_STATUS_OK 0x0UL
- __le16 reserved16;
- __le32 reserved32;
-};
-
-/* Notification Queue (NQ) Structures */
-/* Base NQ Record (16 bytes) */
-struct nq_base {
- __le16 info10_type;
- #define NQ_BASE_TYPE_MASK 0x3fUL
- #define NQ_BASE_TYPE_SFT 0
- #define NQ_BASE_TYPE_CQ_NOTIFICATION 0x30UL
- #define NQ_BASE_TYPE_SRQ_EVENT 0x32UL
- #define NQ_BASE_TYPE_DBQ_EVENT 0x34UL
- #define NQ_BASE_TYPE_QP_EVENT 0x38UL
- #define NQ_BASE_TYPE_FUNC_EVENT 0x3aUL
- #define NQ_BASE_INFO10_MASK 0xffc0UL
- #define NQ_BASE_INFO10_SFT 6
- __le16 info16;
- __le32 info32;
- __le32 info63_v[2];
- #define NQ_BASE_V 0x1UL
- #define NQ_BASE_INFO63_MASK 0xfffffffeUL
- #define NQ_BASE_INFO63_SFT 1
-};
-
-/* Completion Queue Notification (16 bytes) */
-struct nq_cn {
- __le16 type;
- #define NQ_CN_TYPE_MASK 0x3fUL
- #define NQ_CN_TYPE_SFT 0
- #define NQ_CN_TYPE_CQ_NOTIFICATION 0x30UL
- #define NQ_CN_RESERVED9_MASK 0xffc0UL
- #define NQ_CN_RESERVED9_SFT 6
- __le16 reserved16;
- __le32 cq_handle_low;
- __le32 v;
- #define NQ_CN_V 0x1UL
- #define NQ_CN_RESERVED31_MASK 0xfffffffeUL
- #define NQ_CN_RESERVED31_SFT 1
- __le32 cq_handle_high;
-};
-
-/* SRQ Event Notification (16 bytes) */
-struct nq_srq_event {
- u8 type;
- #define NQ_SRQ_EVENT_TYPE_MASK 0x3fUL
- #define NQ_SRQ_EVENT_TYPE_SFT 0
- #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL
- #define NQ_SRQ_EVENT_RESERVED1_MASK 0xc0UL
- #define NQ_SRQ_EVENT_RESERVED1_SFT 6
- u8 event;
- #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL
- __le16 reserved16;
- __le32 srq_handle_low;
- __le32 v;
- #define NQ_SRQ_EVENT_V 0x1UL
- #define NQ_SRQ_EVENT_RESERVED31_MASK 0xfffffffeUL
- #define NQ_SRQ_EVENT_RESERVED31_SFT 1
- __le32 srq_handle_high;
-};
-
-/* DBQ Async Event Notification (16 bytes) */
-struct nq_dbq_event {
- u8 type;
- #define NQ_DBQ_EVENT_TYPE_MASK 0x3fUL
- #define NQ_DBQ_EVENT_TYPE_SFT 0
- #define NQ_DBQ_EVENT_TYPE_DBQ_EVENT 0x34UL
- #define NQ_DBQ_EVENT_RESERVED1_MASK 0xc0UL
- #define NQ_DBQ_EVENT_RESERVED1_SFT 6
- u8 event;
- #define NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT 0x1UL
- __le16 db_pfid;
- #define NQ_DBQ_EVENT_DB_PFID_MASK 0xfUL
- #define NQ_DBQ_EVENT_DB_PFID_SFT 0
- #define NQ_DBQ_EVENT_RESERVED12_MASK 0xfff0UL
- #define NQ_DBQ_EVENT_RESERVED12_SFT 4
- __le32 db_dpi;
- #define NQ_DBQ_EVENT_DB_DPI_MASK 0xfffffUL
- #define NQ_DBQ_EVENT_DB_DPI_SFT 0
- #define NQ_DBQ_EVENT_RESERVED12_2_MASK 0xfff00000UL
- #define NQ_DBQ_EVENT_RESERVED12_2_SFT 20
- __le32 v;
- #define NQ_DBQ_EVENT_V 0x1UL
- #define NQ_DBQ_EVENT_RESERVED32_MASK 0xfffffffeUL
- #define NQ_DBQ_EVENT_RESERVED32_SFT 1
- __le32 db_type_db_xid;
- #define NQ_DBQ_EVENT_DB_XID_MASK 0xfffffUL
- #define NQ_DBQ_EVENT_DB_XID_SFT 0
- #define NQ_DBQ_EVENT_RESERVED8_MASK 0xff00000UL
- #define NQ_DBQ_EVENT_RESERVED8_SFT 20
- #define NQ_DBQ_EVENT_DB_TYPE_MASK 0xf0000000UL
- #define NQ_DBQ_EVENT_DB_TYPE_SFT 28
-};
-
-/* Read Request/Response Queue Structures */
-/* Input Read Request Queue (IRRQ) Message (32 bytes) */
-struct xrrq_irrq {
- __le16 credits_type;
- #define XRRQ_IRRQ_TYPE 0x1UL
- #define XRRQ_IRRQ_TYPE_READ_REQ 0x0UL
- #define XRRQ_IRRQ_TYPE_ATOMIC_REQ 0x1UL
- #define XRRQ_IRRQ_RESERVED10_MASK 0x7feUL
- #define XRRQ_IRRQ_RESERVED10_SFT 1
- #define XRRQ_IRRQ_CREDITS_MASK 0xf800UL
- #define XRRQ_IRRQ_CREDITS_SFT 11
- __le16 reserved16;
- __le32 reserved32;
- __le32 psn;
- #define XRRQ_IRRQ_PSN_MASK 0xffffffUL
- #define XRRQ_IRRQ_PSN_SFT 0
- #define XRRQ_IRRQ_RESERVED8_1_MASK 0xff000000UL
- #define XRRQ_IRRQ_RESERVED8_1_SFT 24
- __le32 msn;
- #define XRRQ_IRRQ_MSN_MASK 0xffffffUL
- #define XRRQ_IRRQ_MSN_SFT 0
- #define XRRQ_IRRQ_RESERVED8_2_MASK 0xff000000UL
- #define XRRQ_IRRQ_RESERVED8_2_SFT 24
- __le64 va_or_atomic_result;
- __le32 rdma_r_key;
- __le32 length;
-};
-
-/* Output Read Request Queue (ORRQ) Message (32 bytes) */
-struct xrrq_orrq {
- __le16 num_sges_type;
- #define XRRQ_ORRQ_TYPE 0x1UL
- #define XRRQ_ORRQ_TYPE_READ_REQ 0x0UL
- #define XRRQ_ORRQ_TYPE_ATOMIC_REQ 0x1UL
- #define XRRQ_ORRQ_RESERVED10_MASK 0x7feUL
- #define XRRQ_ORRQ_RESERVED10_SFT 1
- #define XRRQ_ORRQ_NUM_SGES_MASK 0xf800UL
- #define XRRQ_ORRQ_NUM_SGES_SFT 11
- __le16 reserved16;
- __le32 length;
- __le32 psn;
- #define XRRQ_ORRQ_PSN_MASK 0xffffffUL
- #define XRRQ_ORRQ_PSN_SFT 0
- #define XRRQ_ORRQ_RESERVED8_1_MASK 0xff000000UL
- #define XRRQ_ORRQ_RESERVED8_1_SFT 24
- __le32 end_psn;
- #define XRRQ_ORRQ_END_PSN_MASK 0xffffffUL
- #define XRRQ_ORRQ_END_PSN_SFT 0
- #define XRRQ_ORRQ_RESERVED8_2_MASK 0xff000000UL
- #define XRRQ_ORRQ_RESERVED8_2_SFT 24
- __le64 first_sge_phy_or_sing_sge_va;
- __le32 single_sge_l_key;
- __le32 single_sge_size;
-};
-
-/* Page Buffer List Memory Structures (PBL) */
-/* Page Table Entry (PTE) (8 bytes) */
-struct ptu_pte {
- __le32 page_next_to_last_last_valid[2];
- #define PTU_PTE_VALID 0x1UL
- #define PTU_PTE_LAST 0x2UL
- #define PTU_PTE_NEXT_TO_LAST 0x4UL
- #define PTU_PTE_PAGE_MASK 0xfffff000UL
- #define PTU_PTE_PAGE_SFT 12
+/* creq_base (size:128b/16B) */
+struct creq_base {
+ u8 type;
+ #define CREQ_BASE_TYPE_MASK 0x3fUL
+ #define CREQ_BASE_TYPE_SFT 0
+ #define CREQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define CREQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_BASE_TYPE_LAST CREQ_BASE_TYPE_FUNC_EVENT
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_BASE_V 0x1UL
+ u8 event;
+ u8 reserved48[6];
};
-/* Page Directory Entry (PDE) (8 bytes) */
-struct ptu_pde {
- __le32 page_valid[2];
- #define PTU_PDE_VALID 0x1UL
- #define PTU_PDE_PAGE_MASK 0xfffff000UL
- #define PTU_PDE_PAGE_SFT 12
+/* cmdq_query_version (size:128b/16B) */
+struct cmdq_query_version {
+ u8 opcode;
+ #define CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION 0x8bUL
+ #define CMDQ_QUERY_VERSION_OPCODE_LAST CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
};
-/* RoCE Fastpath Host Structures */
-/* Command Queue (CMDQ) Interface */
-/* Init CMDQ (16 bytes) */
-struct cmdq_init {
- __le64 cmdq_pbl;
- __le16 cmdq_size_cmdq_lvl;
- #define CMDQ_INIT_CMDQ_LVL_MASK 0x3UL
- #define CMDQ_INIT_CMDQ_LVL_SFT 0
- #define CMDQ_INIT_CMDQ_SIZE_MASK 0xfffcUL
- #define CMDQ_INIT_CMDQ_SIZE_SFT 2
- __le16 creq_ring_id;
- __le32 prod_idx;
+/* creq_query_version_resp (size:128b/16B) */
+struct creq_query_version_resp {
+ u8 type;
+ #define CREQ_QUERY_VERSION_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_VERSION_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_VERSION_RESP_TYPE_LAST CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ u8 fw_maj;
+ u8 fw_minor;
+ u8 fw_bld;
+ u8 fw_rsvd;
+ u8 v;
+ #define CREQ_QUERY_VERSION_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION 0x8bUL
+ #define CREQ_QUERY_VERSION_RESP_EVENT_LAST \
+ CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION
+ __le16 reserved16;
+ u8 intf_maj;
+ u8 intf_minor;
+ u8 intf_bld;
+ u8 intf_rsvd;
};
-/* Update CMDQ producer index (16 bytes) */
-struct cmdq_update {
- __le64 reserved64;
- __le32 reserved32;
- __le32 prod_idx;
+/* cmdq_initialize_fw (size:896b/112B) */
+struct cmdq_initialize_fw {
+ u8 opcode;
+ #define CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW 0x80UL
+ #define CMDQ_INITIALIZE_FW_OPCODE_LAST CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_INITIALIZE_FW_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED 0x80UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 qpc_pg_size_qpc_lvl;
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LAST CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_LAST CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G
+ u8 mrw_pg_size_mrw_lvl;
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LAST CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_LAST CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G
+ u8 srq_pg_size_srq_lvl;
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LAST CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_LAST CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G
+ u8 cq_pg_size_cq_lvl;
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LAST CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_LAST CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G
+ u8 tqm_pg_size_tqm_lvl;
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LAST CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_LAST CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G
+ u8 tim_pg_size_tim_lvl;
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LAST CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_LAST CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G
+ __le16 log2_dbr_pg_size;
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT 0
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K 0x0UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K 0x1UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K 0x2UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K 0x3UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K 0x4UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K 0x5UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K 0x6UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K 0x7UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M 0x8UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M 0x9UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M 0xaUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M 0xbUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M 0xcUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M 0xdUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M 0xeUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M 0xfUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST \
+ CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M
+ #define CMDQ_INITIALIZE_FW_RSVD_MASK 0xfff0UL
+ #define CMDQ_INITIALIZE_FW_RSVD_SFT 4
+ __le64 qpc_page_dir;
+ __le64 mrw_page_dir;
+ __le64 srq_page_dir;
+ __le64 cq_page_dir;
+ __le64 tqm_page_dir;
+ __le64 tim_page_dir;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
};
-/* CMDQ common header structure (16 bytes) */
-struct cmdq_base {
- u8 opcode;
- #define CMDQ_BASE_OPCODE_CREATE_QP 0x1UL
- #define CMDQ_BASE_OPCODE_DESTROY_QP 0x2UL
- #define CMDQ_BASE_OPCODE_MODIFY_QP 0x3UL
- #define CMDQ_BASE_OPCODE_QUERY_QP 0x4UL
- #define CMDQ_BASE_OPCODE_CREATE_SRQ 0x5UL
- #define CMDQ_BASE_OPCODE_DESTROY_SRQ 0x6UL
- #define CMDQ_BASE_OPCODE_QUERY_SRQ 0x8UL
- #define CMDQ_BASE_OPCODE_CREATE_CQ 0x9UL
- #define CMDQ_BASE_OPCODE_DESTROY_CQ 0xaUL
- #define CMDQ_BASE_OPCODE_RESIZE_CQ 0xcUL
- #define CMDQ_BASE_OPCODE_ALLOCATE_MRW 0xdUL
- #define CMDQ_BASE_OPCODE_DEALLOCATE_KEY 0xeUL
- #define CMDQ_BASE_OPCODE_REGISTER_MR 0xfUL
- #define CMDQ_BASE_OPCODE_DEREGISTER_MR 0x10UL
- #define CMDQ_BASE_OPCODE_ADD_GID 0x11UL
- #define CMDQ_BASE_OPCODE_DELETE_GID 0x12UL
- #define CMDQ_BASE_OPCODE_MODIFY_GID 0x17UL
- #define CMDQ_BASE_OPCODE_QUERY_GID 0x18UL
- #define CMDQ_BASE_OPCODE_CREATE_QP1 0x13UL
- #define CMDQ_BASE_OPCODE_DESTROY_QP1 0x14UL
- #define CMDQ_BASE_OPCODE_CREATE_AH 0x15UL
- #define CMDQ_BASE_OPCODE_DESTROY_AH 0x16UL
- #define CMDQ_BASE_OPCODE_INITIALIZE_FW 0x80UL
- #define CMDQ_BASE_OPCODE_DEINITIALIZE_FW 0x81UL
- #define CMDQ_BASE_OPCODE_STOP_FUNC 0x82UL
- #define CMDQ_BASE_OPCODE_QUERY_FUNC 0x83UL
- #define CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES 0x84UL
- #define CMDQ_BASE_OPCODE_READ_CONTEXT 0x85UL
- #define CMDQ_BASE_OPCODE_VF_BACKCHANNEL_REQUEST 0x86UL
- #define CMDQ_BASE_OPCODE_READ_VF_MEMORY 0x87UL
- #define CMDQ_BASE_OPCODE_COMPLETE_VF_REQUEST 0x88UL
- #define CMDQ_BASE_OPCODE_EXTEND_CONTEXT_ARRRAY 0x89UL
- #define CMDQ_BASE_OPCODE_MAP_TC_TO_COS 0x8aUL
- #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
- #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL
- #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL
- #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
-};
-
-/* Create QP command (96 bytes) */
+/* creq_initialize_fw_resp (size:128b/16B) */
+struct creq_initialize_fw_resp {
+ u8 type;
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_LAST CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_INITIALIZE_FW_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW 0x80UL
+ #define CREQ_INITIALIZE_FW_RESP_EVENT_LAST \
+ CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW
+ u8 reserved48[6];
+};
+
+/* cmdq_deinitialize_fw (size:128b/16B) */
+struct cmdq_deinitialize_fw {
+ u8 opcode;
+ #define CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW 0x81UL
+ #define CMDQ_DEINITIALIZE_FW_OPCODE_LAST \
+ CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_deinitialize_fw_resp (size:128b/16B) */
+struct creq_deinitialize_fw_resp {
+ u8 type;
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_LAST CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_DEINITIALIZE_FW_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW 0x81UL
+ #define CREQ_DEINITIALIZE_FW_RESP_EVENT_LAST \
+ CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW
+ u8 reserved48[6];
+};
+
+/* cmdq_create_qp (size:832b/104B) */
struct cmdq_create_qp {
- u8 opcode;
- #define CMDQ_CREATE_QP_OPCODE_CREATE_QP 0x1UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le64 qp_handle;
- __le32 qp_flags;
- #define CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED 0x1UL
- #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
- #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
- #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
- u8 type;
- #define CMDQ_CREATE_QP_TYPE_RC 0x2UL
- #define CMDQ_CREATE_QP_TYPE_UD 0x4UL
- #define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
- #define CMDQ_CREATE_QP_TYPE_GSI 0x7UL
- u8 sq_pg_size_sq_lvl;
- #define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
- #define CMDQ_CREATE_QP_SQ_LVL_SFT 0
- #define CMDQ_CREATE_QP_SQ_LVL_LVL_0 0x0UL
- #define CMDQ_CREATE_QP_SQ_LVL_LVL_1 0x1UL
- #define CMDQ_CREATE_QP_SQ_LVL_LVL_2 0x2UL
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_MASK 0xf0UL
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_SFT 4
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G (0x5UL << 4)
- u8 rq_pg_size_rq_lvl;
- #define CMDQ_CREATE_QP_RQ_LVL_MASK 0xfUL
- #define CMDQ_CREATE_QP_RQ_LVL_SFT 0
- #define CMDQ_CREATE_QP_RQ_LVL_LVL_0 0x0UL
- #define CMDQ_CREATE_QP_RQ_LVL_LVL_1 0x1UL
- #define CMDQ_CREATE_QP_RQ_LVL_LVL_2 0x2UL
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_MASK 0xf0UL
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_SFT 4
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G (0x5UL << 4)
- u8 unused_0;
- __le32 dpi;
- __le32 sq_size;
- __le32 rq_size;
- __le16 sq_fwo_sq_sge;
- #define CMDQ_CREATE_QP_SQ_SGE_MASK 0xfUL
- #define CMDQ_CREATE_QP_SQ_SGE_SFT 0
- #define CMDQ_CREATE_QP_SQ_FWO_MASK 0xfff0UL
- #define CMDQ_CREATE_QP_SQ_FWO_SFT 4
- __le16 rq_fwo_rq_sge;
- #define CMDQ_CREATE_QP_RQ_SGE_MASK 0xfUL
- #define CMDQ_CREATE_QP_RQ_SGE_SFT 0
- #define CMDQ_CREATE_QP_RQ_FWO_MASK 0xfff0UL
- #define CMDQ_CREATE_QP_RQ_FWO_SFT 4
- __le32 scq_cid;
- __le32 rcq_cid;
- __le32 srq_cid;
- __le32 pd_id;
- __le64 sq_pbl;
- __le64 rq_pbl;
- __le64 irrq_addr;
- __le64 orrq_addr;
-};
-
-/* Destroy QP command (24 bytes) */
+ u8 opcode;
+ #define CMDQ_CREATE_QP_OPCODE_CREATE_QP 0x1UL
+ #define CMDQ_CREATE_QP_OPCODE_LAST CMDQ_CREATE_QP_OPCODE_CREATE_QP
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_OPTIMIZED_TRANSMIT_ENABLED 0x20UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RESPONDER_UD_CQE_WITH_CFA 0x40UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_EXPRESS_MODE_ENABLED 0x100UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_STEERING_TAG_VALID 0x200UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RDMA_READ_OR_ATOMICS_USED 0x400UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_LAST \
+ CMDQ_CREATE_QP_QP_FLAGS_RDMA_READ_OR_ATOMICS_USED
+ u8 type;
+ #define CMDQ_CREATE_QP_TYPE_RC 0x2UL
+ #define CMDQ_CREATE_QP_TYPE_UD 0x4UL
+ #define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ #define CMDQ_CREATE_QP_TYPE_GSI 0x7UL
+ #define CMDQ_CREATE_QP_TYPE_LAST CMDQ_CREATE_QP_TYPE_GSI
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LAST CMDQ_CREATE_QP_SQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_LAST CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LAST CMDQ_CREATE_QP_RQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_LAST CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+ __le64 irrq_addr;
+ __le64 orrq_addr;
+ __le32 request_xid;
+ __le16 steering_tag;
+ __le16 reserved16;
+};
+
+/* creq_create_qp_resp (size:128b/16B) */
+struct creq_create_qp_resp {
+ u8 type;
+ #define CREQ_CREATE_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP_RESP_TYPE_LAST CREQ_CREATE_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_QP_RESP_EVENT_CREATE_QP 0x1UL
+ #define CREQ_CREATE_QP_RESP_EVENT_LAST CREQ_CREATE_QP_RESP_EVENT_CREATE_QP
+ u8 optimized_transmit_enabled;
+ u8 reserved48[5];
+};
+
+/* cmdq_destroy_qp (size:192b/24B) */
struct cmdq_destroy_qp {
- u8 opcode;
- #define CMDQ_DESTROY_QP_OPCODE_DESTROY_QP 0x2UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 qp_cid;
- __le32 unused_0;
-};
-
-/* Modify QP command (112 bytes) */
+ u8 opcode;
+ #define CMDQ_DESTROY_QP_OPCODE_DESTROY_QP 0x2UL
+ #define CMDQ_DESTROY_QP_OPCODE_LAST CMDQ_DESTROY_QP_OPCODE_DESTROY_QP
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_qp_resp (size:128b/16B) */
+struct creq_destroy_qp_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP_RESP_TYPE_LAST CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP 0x2UL
+ #define CREQ_DESTROY_QP_RESP_EVENT_LAST CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP
+ u8 reserved48[6];
+};
+
+/* cmdq_modify_qp (size:1024b/128B) */
struct cmdq_modify_qp {
- u8 opcode;
- #define CMDQ_MODIFY_QP_OPCODE_MODIFY_QP 0x3UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 modify_mask;
- #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL
+ u8 opcode;
+ #define CMDQ_MODIFY_QP_OPCODE_MODIFY_QP 0x3UL
+ #define CMDQ_MODIFY_QP_OPCODE_LAST CMDQ_MODIFY_QP_OPCODE_MODIFY_QP
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_MODIFY_QP_FLAGS_SRQ_USED 0x1UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 qp_type;
+ #define CMDQ_MODIFY_QP_QP_TYPE_RC 0x2UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_UD 0x4UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_GSI 0x7UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_LAST CMDQ_MODIFY_QP_QP_TYPE_GSI
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL
#define CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY 0x2UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS 0x4UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_PKEY 0x8UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_QKEY 0x10UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_DGID 0x20UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL 0x40UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX 0x80UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT 0x100UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS 0x200UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC 0x400UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU 0x1000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT 0x2000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT 0x4000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY 0x8000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN 0x10000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC 0x20000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER 0x40000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN 0x80000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS 0x4UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PKEY 0x8UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_QKEY 0x10UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DGID 0x20UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL 0x40UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX 0x80UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT 0x100UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS 0x200UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC 0x400UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PINGPONG_PUSH_MODE 0x800UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU 0x1000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT 0x2000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT 0x4000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY 0x8000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN 0x10000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC 0x20000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER 0x40000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN 0x80000UL
#define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC 0x100000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE 0x200000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE 0x400000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE 0x800000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE 0x1000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA 0x2000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID 0x4000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC 0x8000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID 0x10000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_ENABLE_CC 0x20000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_ECN 0x40000000UL
- #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP 0x80000000UL
- __le32 qp_cid;
- u8 network_type_en_sqd_async_notify_new_state;
- #define CMDQ_MODIFY_QP_NEW_STATE_MASK 0xfUL
- #define CMDQ_MODIFY_QP_NEW_STATE_SFT 0
- #define CMDQ_MODIFY_QP_NEW_STATE_RESET 0x0UL
- #define CMDQ_MODIFY_QP_NEW_STATE_INIT 0x1UL
- #define CMDQ_MODIFY_QP_NEW_STATE_RTR 0x2UL
- #define CMDQ_MODIFY_QP_NEW_STATE_RTS 0x3UL
- #define CMDQ_MODIFY_QP_NEW_STATE_SQD 0x4UL
- #define CMDQ_MODIFY_QP_NEW_STATE_SQE 0x5UL
- #define CMDQ_MODIFY_QP_NEW_STATE_ERR 0x6UL
- #define CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY 0x10UL
- #define CMDQ_MODIFY_QP_NETWORK_TYPE_MASK 0xc0UL
- #define CMDQ_MODIFY_QP_NETWORK_TYPE_SFT 6
- #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1 (0x0UL << 6)
- #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4 (0x2UL << 6)
- #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6)
- u8 access;
- #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
- __le16 pkey;
- __le32 qkey;
- __le32 dgid[4];
- __le32 flow_label;
- __le16 sgid_index;
- u8 hop_limit;
- u8 traffic_class;
- __le16 dest_mac[3];
- u8 tos_dscp_tos_ecn;
- #define CMDQ_MODIFY_QP_TOS_ECN_MASK 0x3UL
- #define CMDQ_MODIFY_QP_TOS_ECN_SFT 0
- #define CMDQ_MODIFY_QP_TOS_DSCP_MASK 0xfcUL
- #define CMDQ_MODIFY_QP_TOS_DSCP_SFT 2
- u8 path_mtu;
- #define CMDQ_MODIFY_QP_PATH_MTU_MASK 0xf0UL
- #define CMDQ_MODIFY_QP_PATH_MTU_SFT 4
- #define CMDQ_MODIFY_QP_PATH_MTU_MTU_256 (0x0UL << 4)
- #define CMDQ_MODIFY_QP_PATH_MTU_MTU_512 (0x1UL << 4)
- #define CMDQ_MODIFY_QP_PATH_MTU_MTU_1024 (0x2UL << 4)
- #define CMDQ_MODIFY_QP_PATH_MTU_MTU_2048 (0x3UL << 4)
- #define CMDQ_MODIFY_QP_PATH_MTU_MTU_4096 (0x4UL << 4)
- #define CMDQ_MODIFY_QP_PATH_MTU_MTU_8192 (0x5UL << 4)
- u8 timeout;
- u8 retry_cnt;
- u8 rnr_retry;
- u8 min_rnr_timer;
- __le32 rq_psn;
- __le32 sq_psn;
- u8 max_rd_atomic;
- u8 max_dest_rd_atomic;
- __le16 enable_cc;
- #define CMDQ_MODIFY_QP_ENABLE_CC 0x1UL
- __le32 sq_size;
- __le32 rq_size;
- __le16 sq_sge;
- __le16 rq_sge;
- __le32 max_inline_data;
- __le32 dest_qp_id;
- __le32 unused_3;
- __le16 src_mac[3];
- __le16 vlan_pcp_vlan_dei_vlan_id;
- #define CMDQ_MODIFY_QP_VLAN_ID_MASK 0xfffUL
- #define CMDQ_MODIFY_QP_VLAN_ID_SFT 0
- #define CMDQ_MODIFY_QP_VLAN_DEI 0x1000UL
- #define CMDQ_MODIFY_QP_VLAN_PCP_MASK 0xe000UL
- #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13
-};
-
-/* Query QP command (24 bytes) */
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE 0x200000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE 0x400000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE 0x800000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE 0x1000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA 0x2000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID 0x4000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC 0x8000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID 0x10000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ENABLE_CC 0x20000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_ECN 0x40000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP 0x80000000UL
+ __le32 qp_cid;
+ u8 network_type_en_sqd_async_notify_new_state;
+ #define CMDQ_MODIFY_QP_NEW_STATE_MASK 0xfUL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SFT 0
+ #define CMDQ_MODIFY_QP_NEW_STATE_RESET 0x0UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_INIT 0x1UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTR 0x2UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTS 0x3UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQD 0x4UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQE 0x5UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_ERR 0x6UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_LAST CMDQ_MODIFY_QP_NEW_STATE_ERR
+ #define CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY 0x10UL
+ #define CMDQ_MODIFY_QP_UNUSED1 0x20UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_MASK 0xc0UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_SFT 6
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1 (0x0UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4 (0x2UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_LAST CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6
+ u8 access;
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK 0xffUL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT 0
+ #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_QP_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_QP_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_QP_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_QP_TOS_DSCP_SFT 2
+ u8 path_mtu_pingpong_push_enable;
+ #define CMDQ_MODIFY_QP_PINGPONG_PUSH_ENABLE 0x1UL
+ #define CMDQ_MODIFY_QP_UNUSED3_MASK 0xeUL
+ #define CMDQ_MODIFY_QP_UNUSED3_SFT 1
+ #define CMDQ_MODIFY_QP_PATH_MTU_MASK 0xf0UL
+ #define CMDQ_MODIFY_QP_PATH_MTU_SFT 4
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_256 (0x0UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_512 (0x1UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_1024 (0x2UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_2048 (0x3UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_4096 (0x4UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_8192 (0x5UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_LAST CMDQ_MODIFY_QP_PATH_MTU_MTU_8192
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ __le16 enable_cc;
+ #define CMDQ_MODIFY_QP_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_QP_UNUSED15_MASK 0xfffeUL
+ #define CMDQ_MODIFY_QP_UNUSED15_SFT 1
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le32 pingpong_push_dpi;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CMDQ_MODIFY_QP_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_QP_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_QP_VLAN_DEI 0x1000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_MASK 0xe000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13
+ __le64 irrq_addr;
+ __le64 orrq_addr;
+ __le32 ext_modify_mask;
+ #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_EXT_STATS_CTX 0x1UL
+ #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_SCHQ_ID_VALID 0x2UL
+ __le32 ext_stats_ctx_id;
+ __le16 schq_id;
+ __le16 unused_0;
+ __le32 reserved32;
+};
+
+/* creq_modify_qp_resp (size:128b/16B) */
+struct creq_modify_qp_resp {
+ u8 type;
+ #define CREQ_MODIFY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_QP_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_QP_RESP_TYPE_LAST CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP 0x3UL
+ #define CREQ_MODIFY_QP_RESP_EVENT_LAST CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP
+ u8 pingpong_push_state_index_enabled;
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_ENABLED 0x1UL
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_INDEX_MASK 0xeUL
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_INDEX_SFT 1
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_STATE 0x10UL
+ u8 reserved8;
+ __le32 lag_src_mac;
+};
+
+/* cmdq_query_qp (size:192b/24B) */
struct cmdq_query_qp {
- u8 opcode;
- #define CMDQ_QUERY_QP_OPCODE_QUERY_QP 0x4UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 qp_cid;
- __le32 unused_0;
-};
-
-/* Create SRQ command (48 bytes) */
+ u8 opcode;
+ #define CMDQ_QUERY_QP_OPCODE_QUERY_QP 0x4UL
+ #define CMDQ_QUERY_QP_OPCODE_LAST CMDQ_QUERY_QP_OPCODE_QUERY_QP
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* creq_query_qp_resp (size:128b/16B) */
+struct creq_query_qp_resp {
+ u8 type;
+ #define CREQ_QUERY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_QP_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_QP_RESP_TYPE_LAST CREQ_QUERY_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_QP_RESP_EVENT_QUERY_QP 0x4UL
+ #define CREQ_QUERY_QP_RESP_EVENT_LAST CREQ_QUERY_QP_RESP_EVENT_QUERY_QP
+ u8 reserved48[6];
+};
+
+/* creq_query_qp_resp_sb (size:832b/104B) */
+struct creq_query_qp_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_OPCODE_LAST CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 en_sqd_async_notify_state;
+ #define CREQ_QUERY_QP_RESP_SB_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_LAST CREQ_QUERY_QP_RESP_SB_STATE_ERR
+ #define CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY 0x10UL
+ #define CREQ_QUERY_QP_RESP_SB_UNUSED3_MASK 0xe0UL
+ #define CREQ_QUERY_QP_RESP_SB_UNUSED3_SFT 5
+ u8 access;
+ #define \
+ CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK\
+ 0xffUL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT\
+ 0
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_LOCAL_WRITE 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_WRITE 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_READ 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le16 udp_src_port;
+ __le16 reserved16;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ __le16 path_mtu_dest_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK 0xf000UL
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT 12
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_256 (0x0UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_512 (0x1UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_1024 (0x2UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_2048 (0x3UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_4096 (0x4UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192 (0x5UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_LAST CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_SFT 2
+ u8 enable_cc;
+ #define CREQ_QUERY_QP_RESP_SB_ENABLE_CC 0x1UL
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le16 port_id;
+ u8 unused_0;
+ u8 stat_collection_id;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_DEI 0x1000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_MASK 0xe000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_SFT 13
+};
+
+/* cmdq_query_qp_extend (size:192b/24B) */
+struct cmdq_query_qp_extend {
+ u8 opcode;
+ #define CMDQ_QUERY_QP_EXTEND_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CMDQ_QUERY_QP_EXTEND_OPCODE_LAST CMDQ_QUERY_QP_EXTEND_OPCODE_QUERY_QP_EXTEND
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 num_qps;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_QP_EXTEND_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_QP_EXTEND_PF_NUM_SFT 0
+ #define CMDQ_QUERY_QP_EXTEND_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_QP_EXTEND_VF_NUM_SFT 8
+ #define CMDQ_QUERY_QP_EXTEND_VF_VALID 0x1000000UL
+ __le32 current_index;
+};
+
+/* creq_query_qp_extend_resp (size:128b/16B) */
+struct creq_query_qp_extend_resp {
+ u8 type;
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_LAST CREQ_QUERY_QP_EXTEND_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_QP_EXTEND_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_QP_EXTEND_RESP_EVENT_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_EVENT_LAST CREQ_QUERY_QP_EXTEND_RESP_EVENT_QUERY_QP_EXTEND
+ __le16 reserved16;
+ __le32 current_index;
+};
+
+/* creq_query_qp_extend_resp_sb (size:384b/48B) */
+struct creq_query_qp_extend_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_OPCODE_QUERY_QP_EXTEND
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 state;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_SFT 0
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_LAST CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_ERR
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_UNUSED4_MASK 0xf0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_UNUSED4_SFT 4
+ u8 reserved_8;
+ __le16 port_id;
+ __le32 qkey;
+ __le16 sgid_index;
+ u8 network_type;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV1 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV2_IPV4 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV2_IPV6 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV2_IPV6
+ u8 unused_0;
+ __le32 dgid[4];
+ __le32 dest_qp_id;
+ u8 stat_collection_id;
+ u8 reservred_8;
+ __le16 reserved_16;
+};
+
+/* creq_query_qp_extend_resp_sb_tlv (size:512b/64B) */
+struct creq_query_qp_extend_resp_sb_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ u8 total_size;
+ u8 reserved56[7];
+ u8 opcode;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_OPCODE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_OPCODE_QUERY_QP_EXTEND
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 state;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_SFT 0
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_ERR
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_UNUSED4_MASK 0xf0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_UNUSED4_SFT 4
+ u8 reserved_8;
+ __le16 port_id;
+ __le32 qkey;
+ __le16 sgid_index;
+ u8 network_type;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV1 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV2_IPV4 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV2_IPV6 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV2_IPV6
+ u8 unused_0;
+ __le32 dgid[4];
+ __le32 dest_qp_id;
+ u8 stat_collection_id;
+ u8 reservred_8;
+ __le16 reserved_16;
+};
+
+/* cmdq_create_srq (size:448b/56B) */
struct cmdq_create_srq {
- u8 opcode;
- #define CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ 0x5UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le64 srq_handle;
- __le16 pg_size_lvl;
- #define CMDQ_CREATE_SRQ_LVL_MASK 0x3UL
- #define CMDQ_CREATE_SRQ_LVL_SFT 0
- #define CMDQ_CREATE_SRQ_LVL_LVL_0 0x0UL
- #define CMDQ_CREATE_SRQ_LVL_LVL_1 0x1UL
- #define CMDQ_CREATE_SRQ_LVL_LVL_2 0x2UL
- #define CMDQ_CREATE_SRQ_PG_SIZE_MASK 0x1cUL
- #define CMDQ_CREATE_SRQ_PG_SIZE_SFT 2
- #define CMDQ_CREATE_SRQ_PG_SIZE_PG_4K (0x0UL << 2)
- #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8K (0x1UL << 2)
- #define CMDQ_CREATE_SRQ_PG_SIZE_PG_64K (0x2UL << 2)
- #define CMDQ_CREATE_SRQ_PG_SIZE_PG_2M (0x3UL << 2)
- #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8M (0x4UL << 2)
- #define CMDQ_CREATE_SRQ_PG_SIZE_PG_1G (0x5UL << 2)
- __le16 eventq_id;
- #define CMDQ_CREATE_SRQ_EVENTQ_ID_MASK 0xfffUL
- #define CMDQ_CREATE_SRQ_EVENTQ_ID_SFT 0
- __le16 srq_size;
- __le16 srq_fwo;
- __le32 dpi;
- __le32 pd_id;
- __le64 pbl;
-};
-
-/* Destroy SRQ command (24 bytes) */
+ u8 opcode;
+ #define CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ 0x5UL
+ #define CMDQ_CREATE_SRQ_OPCODE_LAST CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_CREATE_SRQ_FLAGS_STEERING_TAG_VALID 0x1UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 srq_handle;
+ __le16 pg_size_lvl;
+ #define CMDQ_CREATE_SRQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_SRQ_LVL_SFT 0
+ #define CMDQ_CREATE_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_SRQ_LVL_LAST CMDQ_CREATE_SRQ_LVL_LVL_2
+ #define CMDQ_CREATE_SRQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_SRQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_LAST CMDQ_CREATE_SRQ_PG_SIZE_PG_1G
+ #define CMDQ_CREATE_SRQ_UNUSED11_MASK 0xffe0UL
+ #define CMDQ_CREATE_SRQ_UNUSED11_SFT 5
+ __le16 eventq_id;
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_SFT 0
+ #define CMDQ_CREATE_SRQ_UNUSED4_MASK 0xf000UL
+ #define CMDQ_CREATE_SRQ_UNUSED4_SFT 12
+ __le16 srq_size;
+ __le16 srq_fwo;
+ __le32 dpi;
+ __le32 pd_id;
+ __le64 pbl;
+ __le16 steering_tag;
+ u8 reserved48[6];
+};
+
+/* creq_create_srq_resp (size:128b/16B) */
+struct creq_create_srq_resp {
+ u8 type;
+ #define CREQ_CREATE_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_SRQ_RESP_TYPE_LAST CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_SRQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ 0x5UL
+ #define CREQ_CREATE_SRQ_RESP_EVENT_LAST CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_srq (size:192b/24B) */
struct cmdq_destroy_srq {
- u8 opcode;
- #define CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ 0x6UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 srq_cid;
- __le32 unused_0;
-};
-
-/* Query SRQ command (24 bytes) */
+ u8 opcode;
+ #define CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ 0x6UL
+ #define CMDQ_DESTROY_SRQ_OPCODE_LAST CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_srq_resp (size:128b/16B) */
+struct creq_destroy_srq_resp {
+ u8 type;
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_LAST CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_SRQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ 0x6UL
+ #define CREQ_DESTROY_SRQ_RESP_EVENT_LAST CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ
+ __le16 enable_for_arm[3];
+ #define CREQ_DESTROY_SRQ_RESP_UNUSED0_MASK 0xffffUL
+ #define CREQ_DESTROY_SRQ_RESP_UNUSED0_SFT 0
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_MASK 0x30000UL
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_SFT 16
+};
+
+/* cmdq_query_srq (size:192b/24B) */
struct cmdq_query_srq {
- u8 opcode;
- #define CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ 0x8UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 srq_cid;
- __le32 unused_0;
-};
-
-/* Create CQ command (48 bytes) */
+ u8 opcode;
+ #define CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ 0x8UL
+ #define CMDQ_QUERY_SRQ_OPCODE_LAST CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* creq_query_srq_resp (size:128b/16B) */
+struct creq_query_srq_resp {
+ u8 type;
+ #define CREQ_QUERY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_SRQ_RESP_TYPE_LAST CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_SRQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ 0x8UL
+ #define CREQ_QUERY_SRQ_RESP_EVENT_LAST CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ
+ u8 reserved48[6];
+};
+
+/* creq_query_srq_resp_sb (size:256b/32B) */
+struct creq_query_srq_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ 0x8UL
+ #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_LAST CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ __le16 srq_limit;
+ __le16 reserved16;
+ __le32 data[4];
+};
+
+/* cmdq_create_cq (size:448b/56B) */
struct cmdq_create_cq {
- u8 opcode;
- #define CMDQ_CREATE_CQ_OPCODE_CREATE_CQ 0x9UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le64 cq_handle;
- __le32 pg_size_lvl;
- #define CMDQ_CREATE_CQ_LVL_MASK 0x3UL
- #define CMDQ_CREATE_CQ_LVL_SFT 0
- #define CMDQ_CREATE_CQ_LVL_LVL_0 0x0UL
- #define CMDQ_CREATE_CQ_LVL_LVL_1 0x1UL
- #define CMDQ_CREATE_CQ_LVL_LVL_2 0x2UL
- #define CMDQ_CREATE_CQ_PG_SIZE_MASK 0x1cUL
- #define CMDQ_CREATE_CQ_PG_SIZE_SFT 2
- #define CMDQ_CREATE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
- #define CMDQ_CREATE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
- #define CMDQ_CREATE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
- #define CMDQ_CREATE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
- #define CMDQ_CREATE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
- #define CMDQ_CREATE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
- __le32 cq_fco_cnq_id;
- #define CMDQ_CREATE_CQ_CNQ_ID_MASK 0xfffUL
- #define CMDQ_CREATE_CQ_CNQ_ID_SFT 0
- #define CMDQ_CREATE_CQ_CQ_FCO_MASK 0xfffff000UL
- #define CMDQ_CREATE_CQ_CQ_FCO_SFT 12
- __le32 dpi;
- __le32 cq_size;
- __le64 pbl;
-};
-
-/* Destroy CQ command (24 bytes) */
+ u8 opcode;
+ #define CMDQ_CREATE_CQ_OPCODE_CREATE_CQ 0x9UL
+ #define CMDQ_CREATE_CQ_OPCODE_LAST CMDQ_CREATE_CQ_OPCODE_CREATE_CQ
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL
+ #define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL
+ #define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL
+ #define CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID 0x8UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 cq_handle;
+ __le32 pg_size_lvl;
+ #define CMDQ_CREATE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_CQ_LVL_SFT 0
+ #define CMDQ_CREATE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_CQ_LVL_LAST CMDQ_CREATE_CQ_LVL_LVL_2
+ #define CMDQ_CREATE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_LAST CMDQ_CREATE_CQ_PG_SIZE_PG_1G
+ #define CMDQ_CREATE_CQ_UNUSED27_MASK 0xffffffe0UL
+ #define CMDQ_CREATE_CQ_UNUSED27_SFT 5
+ __le32 cq_fco_cnq_id;
+ #define CMDQ_CREATE_CQ_CNQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_CQ_CNQ_ID_SFT 0
+ #define CMDQ_CREATE_CQ_CQ_FCO_MASK 0xfffff000UL
+ #define CMDQ_CREATE_CQ_CQ_FCO_SFT 12
+ __le32 dpi;
+ __le32 cq_size;
+ __le64 pbl;
+ __le16 steering_tag;
+ u8 reserved48[2];
+ __le32 coalescing;
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_MASK 0x1ffUL
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_SFT 0
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK 0x3e00UL
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT 9
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_MASK 0x7c000UL
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_SFT 14
+ #define CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE 0x80000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_MASK 0xfff00000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_SFT 20
+ __le64 reserved64;
+};
+
+/* creq_create_cq_resp (size:128b/16B) */
+struct creq_create_cq_resp {
+ u8 type;
+ #define CREQ_CREATE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_CQ_RESP_TYPE_LAST CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_CQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ 0x9UL
+ #define CREQ_CREATE_CQ_RESP_EVENT_LAST CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_cq (size:192b/24B) */
struct cmdq_destroy_cq {
- u8 opcode;
- #define CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ 0xaUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 cq_cid;
- __le32 unused_0;
-};
-
-/* Resize CQ command (40 bytes) */
+ u8 opcode;
+ #define CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ 0xaUL
+ #define CMDQ_DESTROY_CQ_OPCODE_LAST CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_cq_resp (size:128b/16B) */
+struct creq_destroy_cq_resp {
+ u8 type;
+ #define CREQ_DESTROY_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_CQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_CQ_RESP_TYPE_LAST CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_CQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ 0xaUL
+ #define CREQ_DESTROY_CQ_RESP_EVENT_LAST CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ
+ __le16 cq_arm_lvl;
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_MASK 0x3UL
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_SFT 0
+ __le16 total_cnq_events;
+ __le16 reserved16;
+};
+
+/* cmdq_resize_cq (size:320b/40B) */
struct cmdq_resize_cq {
- u8 opcode;
- #define CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ 0xcUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 cq_cid;
- __le32 new_cq_size_pg_size_lvl;
- #define CMDQ_RESIZE_CQ_LVL_MASK 0x3UL
- #define CMDQ_RESIZE_CQ_LVL_SFT 0
- #define CMDQ_RESIZE_CQ_LVL_LVL_0 0x0UL
- #define CMDQ_RESIZE_CQ_LVL_LVL_1 0x1UL
- #define CMDQ_RESIZE_CQ_LVL_LVL_2 0x2UL
- #define CMDQ_RESIZE_CQ_PG_SIZE_MASK 0x1cUL
- #define CMDQ_RESIZE_CQ_PG_SIZE_SFT 2
- #define CMDQ_RESIZE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
- #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
- #define CMDQ_RESIZE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
- #define CMDQ_RESIZE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
- #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
- #define CMDQ_RESIZE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
- #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK 0x1fffe0UL
- #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT 5
- __le64 new_pbl;
- __le32 new_cq_fco;
- __le32 unused_2;
-};
-
-/* Allocate MRW command (32 bytes) */
+ u8 opcode;
+ #define CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ 0xcUL
+ #define CMDQ_RESIZE_CQ_OPCODE_LAST CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 new_cq_size_pg_size_lvl;
+ #define CMDQ_RESIZE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_RESIZE_CQ_LVL_SFT 0
+ #define CMDQ_RESIZE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_RESIZE_CQ_LVL_LAST CMDQ_RESIZE_CQ_LVL_LVL_2
+ #define CMDQ_RESIZE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_RESIZE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_LAST CMDQ_RESIZE_CQ_PG_SIZE_PG_1G
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK 0x1fffffe0UL
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT 5
+ __le64 new_pbl;
+ __le32 new_cq_fco;
+ __le32 unused_0;
+};
+
+/* creq_resize_cq_resp (size:128b/16B) */
+struct creq_resize_cq_resp {
+ u8 type;
+ #define CREQ_RESIZE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_RESIZE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_RESIZE_CQ_RESP_TYPE_LAST CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_RESIZE_CQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ 0xcUL
+ #define CREQ_RESIZE_CQ_RESP_EVENT_LAST CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ
+ u8 reserved48[6];
+};
+
+/* cmdq_allocate_mrw (size:256b/32B) */
struct cmdq_allocate_mrw {
- u8 opcode;
- #define CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW 0xdUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le64 mrw_handle;
- u8 mrw_flags;
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MASK 0xfUL
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_SFT 0
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR 0x0UL
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR 0x1UL
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 0x2UL
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A 0x3UL
- #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B 0x4UL
- u8 access;
- #define CMDQ_ALLOCATE_MRW_ACCESS_RESERVED_MASK 0x1fUL
- #define CMDQ_ALLOCATE_MRW_ACCESS_RESERVED_SFT 0
- #define CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY 0x20UL
- __le16 unused_1;
- __le32 pd_id;
-};
-
-/* De-allocate key command (24 bytes) */
+ u8 opcode;
+ #define CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW 0xdUL
+ #define CMDQ_ALLOCATE_MRW_OPCODE_LAST CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 mrw_handle;
+ u8 mrw_flags;
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_SFT 0
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B 0x4UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_LAST CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B
+ #define CMDQ_ALLOCATE_MRW_STEERING_TAG_VALID 0x10UL
+ #define CMDQ_ALLOCATE_MRW_UNUSED4_MASK 0xe0UL
+ #define CMDQ_ALLOCATE_MRW_UNUSED4_SFT 5
+ u8 access;
+ #define CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY 0x20UL
+ __le16 steering_tag;
+ __le32 pd_id;
+};
+
+/* creq_allocate_mrw_resp (size:128b/16B) */
+struct creq_allocate_mrw_resp {
+ u8 type;
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_SFT 0
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_LAST CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ALLOCATE_MRW_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW 0xdUL
+ #define CREQ_ALLOCATE_MRW_RESP_EVENT_LAST CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW
+ u8 reserved48[6];
+};
+
+/* cmdq_deallocate_key (size:192b/24B) */
struct cmdq_deallocate_key {
- u8 opcode;
- #define CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY 0xeUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- u8 mrw_flags;
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MASK 0xfUL
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_SFT 0
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MR 0x0UL
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_PMR 0x1UL
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE1 0x2UL
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2A 0x3UL
- #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B 0x4UL
- u8 unused_1[3];
- __le32 key;
-};
-
-/* Register MR command (48 bytes) */
+ u8 opcode;
+ #define CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY 0xeUL
+ #define CMDQ_DEALLOCATE_KEY_OPCODE_LAST CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mrw_flags;
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_SFT 0
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B 0x4UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_LAST CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B
+ #define CMDQ_DEALLOCATE_KEY_UNUSED4_MASK 0xf0UL
+ #define CMDQ_DEALLOCATE_KEY_UNUSED4_SFT 4
+ u8 unused24[3];
+ __le32 key;
+};
+
+/* creq_deallocate_key_resp (size:128b/16B) */
+struct creq_deallocate_key_resp {
+ u8 type;
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_SFT 0
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_LAST CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEALLOCATE_KEY_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY 0xeUL
+ #define CREQ_DEALLOCATE_KEY_RESP_EVENT_LAST CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY
+ __le16 reserved16;
+ __le32 bound_window_info;
+};
+
+/* cmdq_register_mr (size:448b/56B) */
struct cmdq_register_mr {
- u8 opcode;
- #define CMDQ_REGISTER_MR_OPCODE_REGISTER_MR 0xfUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- u8 log2_pg_size_lvl;
- #define CMDQ_REGISTER_MR_LVL_MASK 0x3UL
- #define CMDQ_REGISTER_MR_LVL_SFT 0
- #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
- #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
- #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
+ u8 opcode;
+ #define CMDQ_REGISTER_MR_OPCODE_REGISTER_MR 0xfUL
+ #define CMDQ_REGISTER_MR_OPCODE_LAST CMDQ_REGISTER_MR_OPCODE_REGISTER_MR
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_REGISTER_MR_FLAGS_ALLOC_MR 0x1UL
+ #define CMDQ_REGISTER_MR_FLAGS_STEERING_TAG_VALID 0x2UL
+ #define CMDQ_REGISTER_MR_FLAGS_ENABLE_RO 0x4UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 log2_pg_size_lvl;
+ #define CMDQ_REGISTER_MR_LVL_MASK 0x3UL
+ #define CMDQ_REGISTER_MR_LVL_SFT 0
+ #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
#define CMDQ_REGISTER_MR_LVL_LAST CMDQ_REGISTER_MR_LVL_LVL_2
- #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
- #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4K (0xcUL << 2)
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_8K (0xdUL << 2)
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_64K (0x10UL << 2)
@@ -1437,16 +1422,15 @@ struct cmdq_register_mr {
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_2M (0x15UL << 2)
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4M (0x16UL << 2)
#define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G (0x1eUL << 2)
- #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_LAST \
- CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_LAST CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G
#define CMDQ_REGISTER_MR_UNUSED1 0x80UL
- u8 access;
- #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
- #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
- #define CMDQ_REGISTER_MR_ACCESS_REMOTE_WRITE 0x4UL
- #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
- #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
- #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
+ u8 access;
+ #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_WRITE 0x4UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
+ #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
+ #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
__le16 log2_pbl_pg_size;
#define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK 0x1fUL
#define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT 0
@@ -1458,1447 +1442,511 @@ struct cmdq_register_mr {
#define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M 0x15UL
#define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M 0x16UL
#define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G 0x1eUL
- #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_LAST \
- CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_LAST CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G
#define CMDQ_REGISTER_MR_UNUSED11_MASK 0xffe0UL
#define CMDQ_REGISTER_MR_UNUSED11_SFT 5
- __le32 key;
- __le64 pbl;
- __le64 va;
- __le64 mr_size;
+ __le32 key;
+ __le64 pbl;
+ __le64 va;
+ __le64 mr_size;
+ __le16 steering_tag;
+ u8 reserved48[6];
};
-/* Deregister MR command (24 bytes) */
-struct cmdq_deregister_mr {
- u8 opcode;
- #define CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR 0x10UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 lkey;
- __le32 unused_0;
-};
-
-/* Add GID command (48 bytes) */
-struct cmdq_add_gid {
- u8 opcode;
- #define CMDQ_ADD_GID_OPCODE_ADD_GID 0x11UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __be32 gid[4];
- __be16 src_mac[3];
- __le16 vlan;
- #define CMDQ_ADD_GID_VLAN_VLAN_ID_MASK 0xfffUL
- #define CMDQ_ADD_GID_VLAN_VLAN_ID_SFT 0
- #define CMDQ_ADD_GID_VLAN_TPID_MASK 0x7000UL
- #define CMDQ_ADD_GID_VLAN_TPID_SFT 12
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
- #define CMDQ_ADD_GID_VLAN_TPID_LAST CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3
- #define CMDQ_ADD_GID_VLAN_VLAN_EN 0x8000UL
- __le16 ipid;
- __le16 stats_ctx;
- #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
- #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_SFT 0
- #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
- __le32 unused_0;
-};
-
-/* Delete GID command (24 bytes) */
-struct cmdq_delete_gid {
- u8 opcode;
- #define CMDQ_DELETE_GID_OPCODE_DELETE_GID 0x12UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le16 gid_index;
- __le16 unused_0;
- __le32 unused_1;
-};
-
-/* Modify GID command (48 bytes) */
-struct cmdq_modify_gid {
- u8 opcode;
- #define CMDQ_MODIFY_GID_OPCODE_MODIFY_GID 0x17UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __be32 gid[4];
- __be16 src_mac[3];
- __le16 vlan;
- #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL
- #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0
- #define CMDQ_MODIFY_GID_VLAN_TPID_MASK 0x7000UL
- #define CMDQ_MODIFY_GID_VLAN_TPID_SFT 12
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
- #define CMDQ_MODIFY_GID_VLAN_TPID_LAST \
- CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3
- #define CMDQ_MODIFY_GID_VLAN_VLAN_EN 0x8000UL
- __le16 ipid;
- __le16 gid_index;
- __le16 stats_ctx;
- #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
- #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_SFT 0
- #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
- __le16 unused_0;
-};
-
-/* Query GID command (24 bytes) */
-struct cmdq_query_gid {
- u8 opcode;
- #define CMDQ_QUERY_GID_OPCODE_QUERY_GID 0x18UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le16 gid_index;
- __le16 unused_0;
- __le32 unused_1;
-};
-
-/* Create QP1 command (80 bytes) */
-struct cmdq_create_qp1 {
- u8 opcode;
- #define CMDQ_CREATE_QP1_OPCODE_CREATE_QP1 0x13UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le64 qp_handle;
- __le32 qp_flags;
- #define CMDQ_CREATE_QP1_QP_FLAGS_SRQ_USED 0x1UL
- #define CMDQ_CREATE_QP1_QP_FLAGS_FORCE_COMPLETION 0x2UL
- #define CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
- u8 type;
- #define CMDQ_CREATE_QP1_TYPE_GSI 0x1UL
- u8 sq_pg_size_sq_lvl;
- #define CMDQ_CREATE_QP1_SQ_LVL_MASK 0xfUL
- #define CMDQ_CREATE_QP1_SQ_LVL_SFT 0
- #define CMDQ_CREATE_QP1_SQ_LVL_LVL_0 0x0UL
- #define CMDQ_CREATE_QP1_SQ_LVL_LVL_1 0x1UL
- #define CMDQ_CREATE_QP1_SQ_LVL_LVL_2 0x2UL
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_MASK 0xf0UL
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT 4
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G (0x5UL << 4)
- u8 rq_pg_size_rq_lvl;
- #define CMDQ_CREATE_QP1_RQ_LVL_MASK 0xfUL
- #define CMDQ_CREATE_QP1_RQ_LVL_SFT 0
- #define CMDQ_CREATE_QP1_RQ_LVL_LVL_0 0x0UL
- #define CMDQ_CREATE_QP1_RQ_LVL_LVL_1 0x1UL
- #define CMDQ_CREATE_QP1_RQ_LVL_LVL_2 0x2UL
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_MASK 0xf0UL
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT 4
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G (0x5UL << 4)
- u8 unused_0;
- __le32 dpi;
- __le32 sq_size;
- __le32 rq_size;
- __le16 sq_fwo_sq_sge;
- #define CMDQ_CREATE_QP1_SQ_SGE_MASK 0xfUL
- #define CMDQ_CREATE_QP1_SQ_SGE_SFT 0
- #define CMDQ_CREATE_QP1_SQ_FWO_MASK 0xfff0UL
- #define CMDQ_CREATE_QP1_SQ_FWO_SFT 4
- __le16 rq_fwo_rq_sge;
- #define CMDQ_CREATE_QP1_RQ_SGE_MASK 0xfUL
- #define CMDQ_CREATE_QP1_RQ_SGE_SFT 0
- #define CMDQ_CREATE_QP1_RQ_FWO_MASK 0xfff0UL
- #define CMDQ_CREATE_QP1_RQ_FWO_SFT 4
- __le32 scq_cid;
- __le32 rcq_cid;
- __le32 srq_cid;
- __le32 pd_id;
- __le64 sq_pbl;
- __le64 rq_pbl;
-};
-
-/* Destroy QP1 command (24 bytes) */
-struct cmdq_destroy_qp1 {
- u8 opcode;
- #define CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1 0x14UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 qp1_cid;
- __le32 unused_0;
-};
-
-/* Create AH command (64 bytes) */
-struct cmdq_create_ah {
- u8 opcode;
- #define CMDQ_CREATE_AH_OPCODE_CREATE_AH 0x15UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le64 ah_handle;
- __le32 dgid[4];
- u8 type;
- #define CMDQ_CREATE_AH_TYPE_V1 0x0UL
- #define CMDQ_CREATE_AH_TYPE_V2IPV4 0x2UL
- #define CMDQ_CREATE_AH_TYPE_V2IPV6 0x3UL
- u8 hop_limit;
- __le16 sgid_index;
- __le32 dest_vlan_id_flow_label;
- #define CMDQ_CREATE_AH_FLOW_LABEL_MASK 0xfffffUL
- #define CMDQ_CREATE_AH_FLOW_LABEL_SFT 0
- #define CMDQ_CREATE_AH_DEST_VLAN_ID_MASK 0xfff00000UL
- #define CMDQ_CREATE_AH_DEST_VLAN_ID_SFT 20
- __le32 pd_id;
- __le32 unused_0;
- __le16 dest_mac[3];
- u8 traffic_class;
- u8 unused_1;
-};
-
-/* Destroy AH command (24 bytes) */
-struct cmdq_destroy_ah {
- u8 opcode;
- #define CMDQ_DESTROY_AH_OPCODE_DESTROY_AH 0x16UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 ah_cid;
- __le32 unused_0;
-};
-
-/* Initialize Firmware command (112 bytes) */
-struct cmdq_initialize_fw {
- u8 opcode;
- #define CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW 0x80UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- u8 qpc_pg_size_qpc_lvl;
- #define CMDQ_INITIALIZE_FW_QPC_LVL_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_QPC_LVL_SFT 0
- #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_0 0x0UL
- #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_1 0x1UL
- #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2 0x2UL
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_MASK 0xf0UL
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT 4
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G (0x5UL << 4)
- u8 mrw_pg_size_mrw_lvl;
- #define CMDQ_INITIALIZE_FW_MRW_LVL_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_MRW_LVL_SFT 0
- #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_0 0x0UL
- #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_1 0x1UL
- #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2 0x2UL
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_MASK 0xf0UL
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_SFT 4
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G (0x5UL << 4)
- u8 srq_pg_size_srq_lvl;
- #define CMDQ_INITIALIZE_FW_SRQ_LVL_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_SRQ_LVL_SFT 0
- #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_0 0x0UL
- #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_1 0x1UL
- #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2 0x2UL
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_MASK 0xf0UL
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_SFT 4
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G (0x5UL << 4)
- u8 cq_pg_size_cq_lvl;
- #define CMDQ_INITIALIZE_FW_CQ_LVL_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_CQ_LVL_SFT 0
- #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_0 0x0UL
- #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_1 0x1UL
- #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2 0x2UL
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_MASK 0xf0UL
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_SFT 4
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G (0x5UL << 4)
- u8 tqm_pg_size_tqm_lvl;
- #define CMDQ_INITIALIZE_FW_TQM_LVL_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_TQM_LVL_SFT 0
- #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_0 0x0UL
- #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_1 0x1UL
- #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2 0x2UL
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_MASK 0xf0UL
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_SFT 4
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G (0x5UL << 4)
- u8 tim_pg_size_tim_lvl;
- #define CMDQ_INITIALIZE_FW_TIM_LVL_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_TIM_LVL_SFT 0
- #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_0 0x0UL
- #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_1 0x1UL
- #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2 0x2UL
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_MASK 0xf0UL
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_SFT 4
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_4K (0x0UL << 4)
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8K (0x1UL << 4)
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_64K (0x2UL << 4)
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4)
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4)
- #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4)
- /* This value is (log-base-2-of-DBR-page-size - 12).
- * 0 for 4KB. HW supported values are enumerated below.
- */
- __le16 log2_dbr_pg_size;
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK 0xfUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT 0
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K 0x0UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K 0x1UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K 0x2UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K 0x3UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K 0x4UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K 0x5UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K 0x6UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K 0x7UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M 0x8UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M 0x9UL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M 0xaUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M 0xbUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M 0xcUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M 0xdUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M 0xeUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M 0xfUL
- #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST \
- CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M
- __le64 qpc_page_dir;
- __le64 mrw_page_dir;
- __le64 srq_page_dir;
- __le64 cq_page_dir;
- __le64 tqm_page_dir;
- __le64 tim_page_dir;
- __le32 number_of_qp;
- __le32 number_of_mrw;
- __le32 number_of_srq;
- __le32 number_of_cq;
- __le32 max_qp_per_vf;
- __le32 max_mrw_per_vf;
- __le32 max_srq_per_vf;
- __le32 max_cq_per_vf;
- __le32 max_gid_per_vf;
- __le32 stat_ctx_id;
-};
-
-/* De-initialize Firmware command (16 bytes) */
-struct cmdq_deinitialize_fw {
- u8 opcode;
- #define CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW 0x81UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
-};
-
-/* Stop function command (16 bytes) */
-struct cmdq_stop_func {
- u8 opcode;
- #define CMDQ_STOP_FUNC_OPCODE_STOP_FUNC 0x82UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
-};
-
-/* Query function command (16 bytes) */
-struct cmdq_query_func {
- u8 opcode;
- #define CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC 0x83UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
+/* creq_register_mr_resp (size:128b/16B) */
+struct creq_register_mr_resp {
+ u8 type;
+ #define CREQ_REGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_REGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_REGISTER_MR_RESP_TYPE_LAST CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_REGISTER_MR_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR 0xfUL
+ #define CREQ_REGISTER_MR_RESP_EVENT_LAST CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR
+ u8 reserved48[6];
};
-/* Set function resources command (16 bytes) */
-struct cmdq_set_func_resources {
- u8 opcode;
- #define CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES 0x84UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 number_of_qp;
- __le32 number_of_mrw;
- __le32 number_of_srq;
- __le32 number_of_cq;
- __le32 max_qp_per_vf;
- __le32 max_mrw_per_vf;
- __le32 max_srq_per_vf;
- __le32 max_cq_per_vf;
- __le32 max_gid_per_vf;
- __le32 stat_ctx_id;
-};
-
-/* Read hardware resource context command (24 bytes) */
-struct cmdq_read_context {
- u8 opcode;
- #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le32 type_xid;
- #define CMDQ_READ_CONTEXT_XID_MASK 0xffffffUL
- #define CMDQ_READ_CONTEXT_XID_SFT 0
- #define CMDQ_READ_CONTEXT_TYPE_MASK 0xff000000UL
- #define CMDQ_READ_CONTEXT_TYPE_SFT 24
- #define CMDQ_READ_CONTEXT_TYPE_QPC (0x0UL << 24)
- #define CMDQ_READ_CONTEXT_TYPE_CQ (0x1UL << 24)
- #define CMDQ_READ_CONTEXT_TYPE_MRW (0x2UL << 24)
- #define CMDQ_READ_CONTEXT_TYPE_SRQ (0x3UL << 24)
- __le32 unused_0;
-};
-
-/* Map TC to COS. Can only be issued from a PF (24 bytes) */
-struct cmdq_map_tc_to_cos {
- u8 opcode;
- #define CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS 0x8aUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
- __le16 cos0;
- #define CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE 0xffffUL
- __le16 cos1;
- #define CMDQ_MAP_TC_TO_COS_COS1_DISABLE 0x8000UL
- #define CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE 0xffffUL
- __le32 unused_0;
-};
-
-/* Query version command (16 bytes) */
-struct cmdq_query_version {
- u8 opcode;
- #define CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION 0x8bUL
- u8 cmd_size;
- __le16 flags;
- __le16 cookie;
- u8 resp_size;
- u8 reserved8;
- __le64 resp_addr;
-};
-
-/* Command-Response Event Queue (CREQ) Structures */
-/* Base CREQ Record (16 bytes) */
-struct creq_base {
- u8 type;
- #define CREQ_BASE_TYPE_MASK 0x3fUL
- #define CREQ_BASE_TYPE_SFT 0
- #define CREQ_BASE_TYPE_QP_EVENT 0x38UL
- #define CREQ_BASE_TYPE_FUNC_EVENT 0x3aUL
- #define CREQ_BASE_RESERVED2_MASK 0xc0UL
- #define CREQ_BASE_RESERVED2_SFT 6
- u8 reserved56[7];
- u8 v;
- #define CREQ_BASE_V 0x1UL
- #define CREQ_BASE_RESERVED7_MASK 0xfeUL
- #define CREQ_BASE_RESERVED7_SFT 1
- u8 event;
- __le16 reserved48[3];
-};
-
-/* RoCE Function Async Event Notification (16 bytes) */
-struct creq_func_event {
- u8 type;
- #define CREQ_FUNC_EVENT_TYPE_MASK 0x3fUL
- #define CREQ_FUNC_EVENT_TYPE_SFT 0
- #define CREQ_FUNC_EVENT_TYPE_FUNC_EVENT 0x3aUL
- #define CREQ_FUNC_EVENT_RESERVED2_MASK 0xc0UL
- #define CREQ_FUNC_EVENT_RESERVED2_SFT 6
- u8 reserved56[7];
- u8 v;
- #define CREQ_FUNC_EVENT_V 0x1UL
- #define CREQ_FUNC_EVENT_RESERVED7_MASK 0xfeUL
- #define CREQ_FUNC_EVENT_RESERVED7_SFT 1
- u8 event;
- #define CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR 0x1UL
- #define CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR 0x2UL
- #define CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR 0x3UL
- #define CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR 0x4UL
- #define CREQ_FUNC_EVENT_EVENT_CQ_ERROR 0x5UL
- #define CREQ_FUNC_EVENT_EVENT_TQM_ERROR 0x6UL
- #define CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR 0x7UL
- #define CREQ_FUNC_EVENT_EVENT_CFCS_ERROR 0x8UL
- #define CREQ_FUNC_EVENT_EVENT_CFCC_ERROR 0x9UL
- #define CREQ_FUNC_EVENT_EVENT_CFCM_ERROR 0xaUL
- #define CREQ_FUNC_EVENT_EVENT_TIM_ERROR 0xbUL
- #define CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST 0x80UL
- #define CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED 0x81UL
- __le16 reserved48[3];
-};
-
-/* RoCE Slowpath Command Completion (16 bytes) */
-struct creq_qp_event {
- u8 type;
- #define CREQ_QP_EVENT_TYPE_MASK 0x3fUL
- #define CREQ_QP_EVENT_TYPE_SFT 0
- #define CREQ_QP_EVENT_TYPE_QP_EVENT 0x38UL
- #define CREQ_QP_EVENT_RESERVED2_MASK 0xc0UL
- #define CREQ_QP_EVENT_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_QP_EVENT_V 0x1UL
- #define CREQ_QP_EVENT_RESERVED7_MASK 0xfeUL
- #define CREQ_QP_EVENT_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QP_EVENT_EVENT_CREATE_QP 0x1UL
- #define CREQ_QP_EVENT_EVENT_DESTROY_QP 0x2UL
- #define CREQ_QP_EVENT_EVENT_MODIFY_QP 0x3UL
- #define CREQ_QP_EVENT_EVENT_QUERY_QP 0x4UL
- #define CREQ_QP_EVENT_EVENT_CREATE_SRQ 0x5UL
- #define CREQ_QP_EVENT_EVENT_DESTROY_SRQ 0x6UL
- #define CREQ_QP_EVENT_EVENT_QUERY_SRQ 0x8UL
- #define CREQ_QP_EVENT_EVENT_CREATE_CQ 0x9UL
- #define CREQ_QP_EVENT_EVENT_DESTROY_CQ 0xaUL
- #define CREQ_QP_EVENT_EVENT_RESIZE_CQ 0xcUL
- #define CREQ_QP_EVENT_EVENT_ALLOCATE_MRW 0xdUL
- #define CREQ_QP_EVENT_EVENT_DEALLOCATE_KEY 0xeUL
- #define CREQ_QP_EVENT_EVENT_REGISTER_MR 0xfUL
- #define CREQ_QP_EVENT_EVENT_DEREGISTER_MR 0x10UL
- #define CREQ_QP_EVENT_EVENT_ADD_GID 0x11UL
- #define CREQ_QP_EVENT_EVENT_DELETE_GID 0x12UL
- #define CREQ_QP_EVENT_EVENT_MODIFY_GID 0x17UL
- #define CREQ_QP_EVENT_EVENT_QUERY_GID 0x18UL
- #define CREQ_QP_EVENT_EVENT_CREATE_QP1 0x13UL
- #define CREQ_QP_EVENT_EVENT_DESTROY_QP1 0x14UL
- #define CREQ_QP_EVENT_EVENT_CREATE_AH 0x15UL
- #define CREQ_QP_EVENT_EVENT_DESTROY_AH 0x16UL
- #define CREQ_QP_EVENT_EVENT_INITIALIZE_FW 0x80UL
- #define CREQ_QP_EVENT_EVENT_DEINITIALIZE_FW 0x81UL
- #define CREQ_QP_EVENT_EVENT_STOP_FUNC 0x82UL
- #define CREQ_QP_EVENT_EVENT_QUERY_FUNC 0x83UL
- #define CREQ_QP_EVENT_EVENT_SET_FUNC_RESOURCES 0x84UL
- #define CREQ_QP_EVENT_EVENT_MAP_TC_TO_COS 0x8aUL
- #define CREQ_QP_EVENT_EVENT_QUERY_VERSION 0x8bUL
- #define CREQ_QP_EVENT_EVENT_MODIFY_CC 0x8cUL
- #define CREQ_QP_EVENT_EVENT_QUERY_CC 0x8dUL
- #define CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
- __le16 reserved48[3];
-};
-
-/* Create QP command response (16 bytes) */
-struct creq_create_qp_resp {
- u8 type;
- #define CREQ_CREATE_QP_RESP_TYPE_MASK 0x3fUL
- #define CREQ_CREATE_QP_RESP_TYPE_SFT 0
- #define CREQ_CREATE_QP_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_CREATE_QP_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_CREATE_QP_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_CREATE_QP_RESP_V 0x1UL
- #define CREQ_CREATE_QP_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_CREATE_QP_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_CREATE_QP_RESP_EVENT_CREATE_QP 0x1UL
- __le16 reserved48[3];
-};
-
-/* Destroy QP command response (16 bytes) */
-struct creq_destroy_qp_resp {
- u8 type;
- #define CREQ_DESTROY_QP_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DESTROY_QP_RESP_TYPE_SFT 0
- #define CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DESTROY_QP_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DESTROY_QP_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DESTROY_QP_RESP_V 0x1UL
- #define CREQ_DESTROY_QP_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DESTROY_QP_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP 0x2UL
- __le16 reserved48[3];
-};
-
-/* Modify QP command response (16 bytes) */
-struct creq_modify_qp_resp {
- u8 type;
- #define CREQ_MODIFY_QP_RESP_TYPE_MASK 0x3fUL
- #define CREQ_MODIFY_QP_RESP_TYPE_SFT 0
- #define CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_MODIFY_QP_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_MODIFY_QP_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_MODIFY_QP_RESP_V 0x1UL
- #define CREQ_MODIFY_QP_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_MODIFY_QP_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP 0x3UL
- __le16 reserved48[3];
-};
-
-/* cmdq_query_roce_stats (size:128b/16B) */
-struct cmdq_query_roce_stats {
+/* cmdq_deregister_mr (size:192b/24B) */
+struct cmdq_deregister_mr {
u8 opcode;
- #define CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS 0x8eUL
- #define CMDQ_QUERY_ROCE_STATS_OPCODE_LAST \
- CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS
+ #define CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR 0x10UL
+ #define CMDQ_DEREGISTER_MR_OPCODE_LAST CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR
u8 cmd_size;
__le16 flags;
__le16 cookie;
u8 resp_size;
u8 reserved8;
__le64 resp_addr;
+ __le32 lkey;
+ __le32 unused_0;
};
-/* Query QP command response (16 bytes) */
-struct creq_query_qp_resp {
- u8 type;
- #define CREQ_QUERY_QP_RESP_TYPE_MASK 0x3fUL
- #define CREQ_QUERY_QP_RESP_TYPE_SFT 0
- #define CREQ_QUERY_QP_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_QP_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_QUERY_QP_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 size;
- u8 v;
- #define CREQ_QUERY_QP_RESP_V 0x1UL
- #define CREQ_QUERY_QP_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_QP_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QUERY_QP_RESP_EVENT_QUERY_QP 0x4UL
- __le16 reserved48[3];
-};
-
-/* Query QP command response side buffer structure (104 bytes) */
-struct creq_query_qp_resp_sb {
- u8 opcode;
- #define CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP 0x4UL
- u8 status;
- __le16 cookie;
- __le16 flags;
- u8 resp_size;
- u8 reserved8;
- __le32 xid;
- u8 en_sqd_async_notify_state;
- #define CREQ_QUERY_QP_RESP_SB_STATE_MASK 0xfUL
- #define CREQ_QUERY_QP_RESP_SB_STATE_SFT 0
- #define CREQ_QUERY_QP_RESP_SB_STATE_RESET 0x0UL
- #define CREQ_QUERY_QP_RESP_SB_STATE_INIT 0x1UL
- #define CREQ_QUERY_QP_RESP_SB_STATE_RTR 0x2UL
- #define CREQ_QUERY_QP_RESP_SB_STATE_RTS 0x3UL
- #define CREQ_QUERY_QP_RESP_SB_STATE_SQD 0x4UL
- #define CREQ_QUERY_QP_RESP_SB_STATE_SQE 0x5UL
- #define CREQ_QUERY_QP_RESP_SB_STATE_ERR 0x6UL
- #define CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY 0x10UL
- u8 access;
- #define CREQ_QUERY_QP_RESP_SB_ACCESS_LOCAL_WRITE 0x1UL
- #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_WRITE 0x2UL
- #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_READ 0x4UL
- #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL
- __le16 pkey;
- __le32 qkey;
- __le32 reserved32;
- __le32 dgid[4];
- __le32 flow_label;
- __le16 sgid_index;
- u8 hop_limit;
- u8 traffic_class;
- __le16 dest_mac[3];
- __le16 path_mtu_dest_vlan_id;
- #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_MASK 0xfffUL
- #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_SFT 0
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK 0xf000UL
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT 12
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_256 (0x0UL << 12)
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_512 (0x1UL << 12)
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_1024 (0x2UL << 12)
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_2048 (0x3UL << 12)
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_4096 (0x4UL << 12)
- #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192 (0x5UL << 12)
- u8 timeout;
- u8 retry_cnt;
- u8 rnr_retry;
- u8 min_rnr_timer;
- __le32 rq_psn;
- __le32 sq_psn;
- u8 max_rd_atomic;
- u8 max_dest_rd_atomic;
- u8 tos_dscp_tos_ecn;
- #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_MASK 0x3UL
- #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_SFT 0
- #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_MASK 0xfcUL
- #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_SFT 2
- u8 enable_cc;
- #define CREQ_QUERY_QP_RESP_SB_ENABLE_CC 0x1UL
- #define CREQ_QUERY_QP_RESP_SB_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_QP_RESP_SB_RESERVED7_SFT 1
- __le32 sq_size;
- __le32 rq_size;
- __le16 sq_sge;
- __le16 rq_sge;
- __le32 max_inline_data;
- __le32 dest_qp_id;
- __le32 unused_1;
- __le16 src_mac[3];
- __le16 vlan_pcp_vlan_dei_vlan_id;
- #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK 0xfffUL
- #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT 0
- #define CREQ_QUERY_QP_RESP_SB_VLAN_DEI 0x1000UL
- #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_MASK 0xe000UL
- #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_SFT 13
-};
-
-/* Create SRQ command response (16 bytes) */
-struct creq_create_srq_resp {
- u8 type;
- #define CREQ_CREATE_SRQ_RESP_TYPE_MASK 0x3fUL
- #define CREQ_CREATE_SRQ_RESP_TYPE_SFT 0
- #define CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_CREATE_SRQ_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_CREATE_SRQ_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_CREATE_SRQ_RESP_V 0x1UL
- #define CREQ_CREATE_SRQ_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_CREATE_SRQ_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ 0x5UL
- __le16 reserved48[3];
-};
-
-/* Destroy SRQ command response (16 bytes) */
-struct creq_destroy_srq_resp {
- u8 type;
- #define CREQ_DESTROY_SRQ_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DESTROY_SRQ_RESP_TYPE_SFT 0
- #define CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DESTROY_SRQ_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DESTROY_SRQ_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DESTROY_SRQ_RESP_V 0x1UL
- #define CREQ_DESTROY_SRQ_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DESTROY_SRQ_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ 0x6UL
- __le16 enable_for_arm[3];
- #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_MASK 0x30000UL
- #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_SFT 16
- #define CREQ_DESTROY_SRQ_RESP_RESERVED46_MASK 0xfffc0000UL
- #define CREQ_DESTROY_SRQ_RESP_RESERVED46_SFT 18
-};
-
-/* Query SRQ command response (16 bytes) */
-struct creq_query_srq_resp {
- u8 type;
- #define CREQ_QUERY_SRQ_RESP_TYPE_MASK 0x3fUL
- #define CREQ_QUERY_SRQ_RESP_TYPE_SFT 0
- #define CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_SRQ_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_QUERY_SRQ_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 size;
- u8 v;
- #define CREQ_QUERY_SRQ_RESP_V 0x1UL
- #define CREQ_QUERY_SRQ_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_SRQ_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ 0x8UL
- __le16 reserved48[3];
-};
-
-/* Query SRQ command response side buffer structure (24 bytes) */
-struct creq_query_srq_resp_sb {
- u8 opcode;
- #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ 0x8UL
- u8 status;
- __le16 cookie;
- __le16 flags;
- u8 resp_size;
- u8 reserved8;
- __le32 xid;
- __le16 srq_limit;
- __le16 reserved16;
- __le32 data[4];
-};
-
-/* Create CQ command Response (16 bytes) */
-struct creq_create_cq_resp {
- u8 type;
- #define CREQ_CREATE_CQ_RESP_TYPE_MASK 0x3fUL
- #define CREQ_CREATE_CQ_RESP_TYPE_SFT 0
- #define CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_CREATE_CQ_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_CREATE_CQ_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_CREATE_CQ_RESP_V 0x1UL
- #define CREQ_CREATE_CQ_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_CREATE_CQ_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ 0x9UL
- __le16 reserved48[3];
-};
-
-/* Destroy CQ command response (16 bytes) */
-struct creq_destroy_cq_resp {
- u8 type;
- #define CREQ_DESTROY_CQ_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DESTROY_CQ_RESP_TYPE_SFT 0
- #define CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DESTROY_CQ_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DESTROY_CQ_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DESTROY_CQ_RESP_V 0x1UL
- #define CREQ_DESTROY_CQ_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DESTROY_CQ_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ 0xaUL
- __le16 cq_arm_lvl;
- #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_MASK 0x3UL
- #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_SFT 0
- #define CREQ_DESTROY_CQ_RESP_RESERVED14_MASK 0xfffcUL
- #define CREQ_DESTROY_CQ_RESP_RESERVED14_SFT 2
- __le16 total_cnq_events;
- __le16 reserved16;
-};
-
-/* Resize CQ command response (16 bytes) */
-struct creq_resize_cq_resp {
- u8 type;
- #define CREQ_RESIZE_CQ_RESP_TYPE_MASK 0x3fUL
- #define CREQ_RESIZE_CQ_RESP_TYPE_SFT 0
- #define CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_RESIZE_CQ_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_RESIZE_CQ_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_RESIZE_CQ_RESP_V 0x1UL
- #define CREQ_RESIZE_CQ_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_RESIZE_CQ_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ 0xcUL
- __le16 reserved48[3];
-};
-
-/* Allocate MRW command response (16 bytes) */
-struct creq_allocate_mrw_resp {
- u8 type;
- #define CREQ_ALLOCATE_MRW_RESP_TYPE_MASK 0x3fUL
- #define CREQ_ALLOCATE_MRW_RESP_TYPE_SFT 0
- #define CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_ALLOCATE_MRW_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_ALLOCATE_MRW_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_ALLOCATE_MRW_RESP_V 0x1UL
- #define CREQ_ALLOCATE_MRW_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_ALLOCATE_MRW_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW 0xdUL
- __le16 reserved48[3];
-};
-
-/* De-allocate key command response (16 bytes) */
-struct creq_deallocate_key_resp {
- u8 type;
- #define CREQ_DEALLOCATE_KEY_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DEALLOCATE_KEY_RESP_TYPE_SFT 0
- #define CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DEALLOCATE_KEY_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DEALLOCATE_KEY_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DEALLOCATE_KEY_RESP_V 0x1UL
- #define CREQ_DEALLOCATE_KEY_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DEALLOCATE_KEY_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY 0xeUL
- __le16 reserved16;
- __le32 bound_window_info;
-};
-
-/* Register MR command response (16 bytes) */
-struct creq_register_mr_resp {
- u8 type;
- #define CREQ_REGISTER_MR_RESP_TYPE_MASK 0x3fUL
- #define CREQ_REGISTER_MR_RESP_TYPE_SFT 0
- #define CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_REGISTER_MR_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_REGISTER_MR_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_REGISTER_MR_RESP_V 0x1UL
- #define CREQ_REGISTER_MR_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_REGISTER_MR_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR 0xfUL
- __le16 reserved48[3];
-};
-
-/* Deregister MR command response (16 bytes) */
+/* creq_deregister_mr_resp (size:128b/16B) */
struct creq_deregister_mr_resp {
- u8 type;
- #define CREQ_DEREGISTER_MR_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DEREGISTER_MR_RESP_TYPE_SFT 0
- #define CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DEREGISTER_MR_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DEREGISTER_MR_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DEREGISTER_MR_RESP_V 0x1UL
- #define CREQ_DEREGISTER_MR_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DEREGISTER_MR_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR 0x10UL
- __le16 reserved16;
- __le32 bound_windows;
-};
-
-/* Add GID command response (16 bytes) */
+ u8 type;
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_LAST CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEREGISTER_MR_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR 0x10UL
+ #define CREQ_DEREGISTER_MR_RESP_EVENT_LAST CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR
+ __le16 reserved16;
+ __le32 bound_windows;
+};
+
+/* cmdq_add_gid (size:384b/48B) */
+struct cmdq_add_gid {
+ u8 opcode;
+ #define CMDQ_ADD_GID_OPCODE_ADD_GID 0x11UL
+ #define CMDQ_ADD_GID_OPCODE_LAST CMDQ_ADD_GID_OPCODE_ADD_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __be32 gid[4];
+ __be16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN_TPID_VLAN_ID_MASK 0xffffUL
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN_TPID_VLAN_ID_SFT 0
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_ADD_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_ADD_GID_VLAN_TPID_SFT 12
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_LAST CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 stats_ctx;
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID_STATS_CTX_ID_MASK 0xffffUL
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID_STATS_CTX_ID_SFT 0
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le32 unused_0;
+};
+
+/* creq_add_gid_resp (size:128b/16B) */
struct creq_add_gid_resp {
- u8 type;
- #define CREQ_ADD_GID_RESP_TYPE_MASK 0x3fUL
- #define CREQ_ADD_GID_RESP_TYPE_SFT 0
- #define CREQ_ADD_GID_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_ADD_GID_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_ADD_GID_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_ADD_GID_RESP_V 0x1UL
- #define CREQ_ADD_GID_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_ADD_GID_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_ADD_GID_RESP_EVENT_ADD_GID 0x11UL
- __le16 reserved48[3];
-};
-
-/* Delete GID command response (16 bytes) */
+ u8 type;
+ #define CREQ_ADD_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ADD_GID_RESP_TYPE_SFT 0
+ #define CREQ_ADD_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ADD_GID_RESP_TYPE_LAST CREQ_ADD_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ADD_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ADD_GID_RESP_EVENT_ADD_GID 0x11UL
+ #define CREQ_ADD_GID_RESP_EVENT_LAST CREQ_ADD_GID_RESP_EVENT_ADD_GID
+ u8 reserved48[6];
+};
+
+/* cmdq_delete_gid (size:192b/24B) */
+struct cmdq_delete_gid {
+ u8 opcode;
+ #define CMDQ_DELETE_GID_OPCODE_DELETE_GID 0x12UL
+ #define CMDQ_DELETE_GID_OPCODE_LAST CMDQ_DELETE_GID_OPCODE_DELETE_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ u8 unused_0[6];
+};
+
+/* creq_delete_gid_resp (size:128b/16B) */
struct creq_delete_gid_resp {
- u8 type;
- #define CREQ_DELETE_GID_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DELETE_GID_RESP_TYPE_SFT 0
- #define CREQ_DELETE_GID_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DELETE_GID_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DELETE_GID_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DELETE_GID_RESP_V 0x1UL
- #define CREQ_DELETE_GID_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DELETE_GID_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DELETE_GID_RESP_EVENT_DELETE_GID 0x12UL
- __le16 reserved48[3];
-};
-
-/* Modify GID command response (16 bytes) */
+ u8 type;
+ #define CREQ_DELETE_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DELETE_GID_RESP_TYPE_SFT 0
+ #define CREQ_DELETE_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DELETE_GID_RESP_TYPE_LAST CREQ_DELETE_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DELETE_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DELETE_GID_RESP_EVENT_DELETE_GID 0x12UL
+ #define CREQ_DELETE_GID_RESP_EVENT_LAST CREQ_DELETE_GID_RESP_EVENT_DELETE_GID
+ u8 reserved48[6];
+};
+
+/* cmdq_modify_gid (size:384b/48B) */
+struct cmdq_modify_gid {
+ u8 opcode;
+ #define CMDQ_MODIFY_GID_OPCODE_MODIFY_GID 0x17UL
+ #define CMDQ_MODIFY_GID_OPCODE_LAST CMDQ_MODIFY_GID_OPCODE_MODIFY_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __be32 gid[4];
+ __be16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_MODIFY_GID_VLAN_TPID_SFT 12
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_LAST CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le16 stats_ctx;
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le16 unused_0;
+};
+
+/* creq_modify_gid_resp (size:128b/16B) */
struct creq_modify_gid_resp {
- u8 type;
- #define CREQ_MODIFY_GID_RESP_TYPE_MASK 0x3fUL
- #define CREQ_MODIFY_GID_RESP_TYPE_SFT 0
- #define CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_MODIFY_GID_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_MODIFY_GID_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_MODIFY_GID_RESP_V 0x1UL
- #define CREQ_MODIFY_GID_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_MODIFY_GID_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_MODIFY_GID_RESP_EVENT_ADD_GID 0x11UL
- __le16 reserved48[3];
-};
-
-/* Query GID command response (16 bytes) */
+ u8 type;
+ #define CREQ_MODIFY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_GID_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_GID_RESP_TYPE_LAST CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MODIFY_GID_RESP_EVENT_ADD_GID 0x11UL
+ #define CREQ_MODIFY_GID_RESP_EVENT_LAST CREQ_MODIFY_GID_RESP_EVENT_ADD_GID
+ u8 reserved48[6];
+};
+
+/* cmdq_query_gid (size:192b/24B) */
+struct cmdq_query_gid {
+ u8 opcode;
+ #define CMDQ_QUERY_GID_OPCODE_QUERY_GID 0x18UL
+ #define CMDQ_QUERY_GID_OPCODE_LAST CMDQ_QUERY_GID_OPCODE_QUERY_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ u8 unused16[6];
+};
+
+/* creq_query_gid_resp (size:128b/16B) */
struct creq_query_gid_resp {
- u8 type;
- #define CREQ_QUERY_GID_RESP_TYPE_MASK 0x3fUL
- #define CREQ_QUERY_GID_RESP_TYPE_SFT 0
- #define CREQ_QUERY_GID_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_GID_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_QUERY_GID_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 size;
- u8 v;
- #define CREQ_QUERY_GID_RESP_V 0x1UL
- #define CREQ_QUERY_GID_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_GID_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QUERY_GID_RESP_EVENT_QUERY_GID 0x18UL
- __le16 reserved48[3];
-};
-
-/* Query GID command response side buffer structure (40 bytes) */
+ u8 type;
+ #define CREQ_QUERY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_GID_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_GID_RESP_TYPE_LAST CREQ_QUERY_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_GID_RESP_EVENT_QUERY_GID 0x18UL
+ #define CREQ_QUERY_GID_RESP_EVENT_LAST CREQ_QUERY_GID_RESP_EVENT_QUERY_GID
+ u8 reserved48[6];
+};
+
+/* creq_query_gid_resp_sb (size:320b/40B) */
struct creq_query_gid_resp_sb {
- u8 opcode;
- #define CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID 0x18UL
- u8 status;
- __le16 cookie;
- __le16 flags;
- u8 resp_size;
- u8 reserved8;
- __le32 gid[4];
- __le16 src_mac[3];
- __le16 vlan;
- #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_MASK 0xfffUL
- #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_SFT 0
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_MASK 0x7000UL
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_SFT 12
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_88A8 (0x0UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_8100 (0x1UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9100 (0x2UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9200 (0x3UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9300 (0x4UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
- #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_LAST \
- CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3
- #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN 0x8000UL
- __le16 ipid;
- __le16 gid_index;
- __le32 unused_0;
-};
-
-/* Create QP1 command response (16 bytes) */
+ u8 opcode;
+ #define CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID 0x18UL
+ #define CREQ_QUERY_GID_RESP_SB_OPCODE_LAST CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 gid[4];
+ __le16 src_mac[3];
+ __le16 vlan;
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN_TPID_VLAN_ID_MASK 0xffffUL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN_TPID_VLAN_ID_SFT 0
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_SFT 0
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_MASK 0x7000UL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_SFT 12
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_LAST CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le32 unused_0;
+};
+
+/* cmdq_create_qp1 (size:640b/80B) */
+struct cmdq_create_qp1 {
+ u8 opcode;
+ #define CMDQ_CREATE_QP1_OPCODE_CREATE_QP1 0x13UL
+ #define CMDQ_CREATE_QP1_OPCODE_LAST CMDQ_CREATE_QP1_OPCODE_CREATE_QP1
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP1_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_LAST CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE
+ u8 type;
+ #define CMDQ_CREATE_QP1_TYPE_GSI 0x1UL
+ #define CMDQ_CREATE_QP1_TYPE_LAST CMDQ_CREATE_QP1_TYPE_GSI
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP1_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LAST CMDQ_CREATE_QP1_SQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_LAST CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP1_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LAST CMDQ_CREATE_QP1_RQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_LAST CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP1_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP1_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+};
+
+/* creq_create_qp1_resp (size:128b/16B) */
struct creq_create_qp1_resp {
- u8 type;
- #define CREQ_CREATE_QP1_RESP_TYPE_MASK 0x3fUL
- #define CREQ_CREATE_QP1_RESP_TYPE_SFT 0
- #define CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_CREATE_QP1_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_CREATE_QP1_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_CREATE_QP1_RESP_V 0x1UL
- #define CREQ_CREATE_QP1_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_CREATE_QP1_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1 0x13UL
- __le16 reserved48[3];
-};
-
-/* Destroy QP1 command response (16 bytes) */
+ u8 type;
+ #define CREQ_CREATE_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP1_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP1_RESP_TYPE_LAST CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP1_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1 0x13UL
+ #define CREQ_CREATE_QP1_RESP_EVENT_LAST CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_qp1 (size:192b/24B) */
+struct cmdq_destroy_qp1 {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1 0x14UL
+ #define CMDQ_DESTROY_QP1_OPCODE_LAST CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp1_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_qp1_resp (size:128b/16B) */
struct creq_destroy_qp1_resp {
- u8 type;
- #define CREQ_DESTROY_QP1_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DESTROY_QP1_RESP_TYPE_SFT 0
- #define CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DESTROY_QP1_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DESTROY_QP1_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DESTROY_QP1_RESP_V 0x1UL
- #define CREQ_DESTROY_QP1_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DESTROY_QP1_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1 0x14UL
- __le16 reserved48[3];
-};
-
-/* Create AH command response (16 bytes) */
+ u8 type;
+ #define CREQ_DESTROY_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP1_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP1_RESP_TYPE_LAST CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP1_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1 0x14UL
+ #define CREQ_DESTROY_QP1_RESP_EVENT_LAST CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1
+ u8 reserved48[6];
+};
+
+/* cmdq_create_ah (size:512b/64B) */
+struct cmdq_create_ah {
+ u8 opcode;
+ #define CMDQ_CREATE_AH_OPCODE_CREATE_AH 0x15UL
+ #define CMDQ_CREATE_AH_OPCODE_LAST CMDQ_CREATE_AH_OPCODE_CREATE_AH
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 ah_handle;
+ __le32 dgid[4];
+ u8 type;
+ #define CMDQ_CREATE_AH_TYPE_V1 0x0UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV4 0x2UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV6 0x3UL
+ #define CMDQ_CREATE_AH_TYPE_LAST CMDQ_CREATE_AH_TYPE_V2IPV6
+ u8 hop_limit;
+ __le16 sgid_index;
+ __le32 dest_vlan_id_flow_label;
+ #define CMDQ_CREATE_AH_FLOW_LABEL_MASK 0xfffffUL
+ #define CMDQ_CREATE_AH_FLOW_LABEL_SFT 0
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_MASK 0xfff00000UL
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_SFT 20
+ __le32 pd_id;
+ __le32 unused_0;
+ __le16 dest_mac[3];
+ u8 traffic_class;
+ u8 enable_cc;
+ #define CMDQ_CREATE_AH_ENABLE_CC 0x1UL
+};
+
+/* creq_create_ah_resp (size:128b/16B) */
struct creq_create_ah_resp {
- u8 type;
- #define CREQ_CREATE_AH_RESP_TYPE_MASK 0x3fUL
- #define CREQ_CREATE_AH_RESP_TYPE_SFT 0
- #define CREQ_CREATE_AH_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_CREATE_AH_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_CREATE_AH_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_CREATE_AH_RESP_V 0x1UL
- #define CREQ_CREATE_AH_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_CREATE_AH_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_CREATE_AH_RESP_EVENT_CREATE_AH 0x15UL
- __le16 reserved48[3];
-};
-
-/* Destroy AH command response (16 bytes) */
+ u8 type;
+ #define CREQ_CREATE_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_AH_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_AH_RESP_TYPE_LAST CREQ_CREATE_AH_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_AH_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_AH_RESP_EVENT_CREATE_AH 0x15UL
+ #define CREQ_CREATE_AH_RESP_EVENT_LAST CREQ_CREATE_AH_RESP_EVENT_CREATE_AH
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_ah (size:192b/24B) */
+struct cmdq_destroy_ah {
+ u8 opcode;
+ #define CMDQ_DESTROY_AH_OPCODE_DESTROY_AH 0x16UL
+ #define CMDQ_DESTROY_AH_OPCODE_LAST CMDQ_DESTROY_AH_OPCODE_DESTROY_AH
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 ah_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_ah_resp (size:128b/16B) */
struct creq_destroy_ah_resp {
- u8 type;
- #define CREQ_DESTROY_AH_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DESTROY_AH_RESP_TYPE_SFT 0
- #define CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DESTROY_AH_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DESTROY_AH_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 xid;
- u8 v;
- #define CREQ_DESTROY_AH_RESP_V 0x1UL
- #define CREQ_DESTROY_AH_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DESTROY_AH_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH 0x16UL
- __le16 reserved48[3];
-};
-
-/* Initialize Firmware command response (16 bytes) */
-struct creq_initialize_fw_resp {
- u8 type;
- #define CREQ_INITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
- #define CREQ_INITIALIZE_FW_RESP_TYPE_SFT 0
- #define CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_INITIALIZE_FW_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_INITIALIZE_FW_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_INITIALIZE_FW_RESP_V 0x1UL
- #define CREQ_INITIALIZE_FW_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_INITIALIZE_FW_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW 0x80UL
- __le16 reserved48[3];
-};
-
-/* De-initialize Firmware command response (16 bytes) */
-struct creq_deinitialize_fw_resp {
- u8 type;
- #define CREQ_DEINITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
- #define CREQ_DEINITIALIZE_FW_RESP_TYPE_SFT 0
- #define CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_DEINITIALIZE_FW_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_DEINITIALIZE_FW_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_DEINITIALIZE_FW_RESP_V 0x1UL
- #define CREQ_DEINITIALIZE_FW_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_DEINITIALIZE_FW_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW 0x81UL
- __le16 reserved48[3];
-};
-
-/* Stop function command response (16 bytes) */
-struct creq_stop_func_resp {
- u8 type;
- #define CREQ_STOP_FUNC_RESP_TYPE_MASK 0x3fUL
- #define CREQ_STOP_FUNC_RESP_TYPE_SFT 0
- #define CREQ_STOP_FUNC_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_STOP_FUNC_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_STOP_FUNC_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_STOP_FUNC_RESP_V 0x1UL
- #define CREQ_STOP_FUNC_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_STOP_FUNC_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_STOP_FUNC_RESP_EVENT_STOP_FUNC 0x82UL
- __le16 reserved48[3];
-};
-
-/* Query function command response (16 bytes) */
-struct creq_query_func_resp {
- u8 type;
- #define CREQ_QUERY_FUNC_RESP_TYPE_MASK 0x3fUL
- #define CREQ_QUERY_FUNC_RESP_TYPE_SFT 0
- #define CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_FUNC_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_QUERY_FUNC_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 size;
- u8 v;
- #define CREQ_QUERY_FUNC_RESP_V 0x1UL
- #define CREQ_QUERY_FUNC_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_FUNC_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC 0x83UL
- __le16 reserved48[3];
-};
-
-/* Query function command response side buffer structure (88 bytes) */
-struct creq_query_func_resp_sb {
- u8 opcode;
- #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC 0x83UL
- u8 status;
- __le16 cookie;
- __le16 flags;
- u8 resp_size;
- u8 reserved8;
- __le64 max_mr_size;
- __le32 max_qp;
- __le16 max_qp_wr;
- __le16 dev_cap_flags;
- #define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL
- __le32 max_cq;
- __le32 max_cqe;
- __le32 max_pd;
- u8 max_sge;
- u8 max_srq_sge;
- u8 max_qp_rd_atom;
- u8 max_qp_init_rd_atom;
- __le32 max_mr;
- __le32 max_mw;
- __le32 max_raw_eth_qp;
- __le32 max_ah;
- __le32 max_fmr;
- __le32 max_srq_wr;
- __le32 max_pkeys;
- __le32 max_inline_data;
- u8 max_map_per_fmr;
- u8 l2_db_space_size;
- __le16 max_srq;
- __le32 max_gid;
- __le32 tqm_alloc_reqs[12];
- __le32 max_dpi;
- __le32 reserved_32;
-};
-
-/* Set resources command response (16 bytes) */
-struct creq_set_func_resources_resp {
- u8 type;
- #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_MASK 0x3fUL
- #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_SFT 0
- #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_SET_FUNC_RESOURCES_RESP_V 0x1UL
- #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES 0x84UL
- __le16 reserved48[3];
+ u8 type;
+ #define CREQ_DESTROY_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_AH_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_AH_RESP_TYPE_LAST CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_AH_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH 0x16UL
+ #define CREQ_DESTROY_AH_RESP_EVENT_LAST CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH
+ u8 reserved48[6];
};
-/* Map TC to COS response (16 bytes) */
-struct creq_map_tc_to_cos_resp {
- u8 type;
- #define CREQ_MAP_TC_TO_COS_RESP_TYPE_MASK 0x3fUL
- #define CREQ_MAP_TC_TO_COS_RESP_TYPE_SFT 0
- #define CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_MAP_TC_TO_COS_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_MAP_TC_TO_COS_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_MAP_TC_TO_COS_RESP_V 0x1UL
- #define CREQ_MAP_TC_TO_COS_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_MAP_TC_TO_COS_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS 0x8aUL
- __le16 reserved48[3];
-};
-
-/* Query version response (16 bytes) */
-struct creq_query_version_resp {
- u8 type;
- #define CREQ_QUERY_VERSION_RESP_TYPE_MASK 0x3fUL
- #define CREQ_QUERY_VERSION_RESP_TYPE_SFT 0
- #define CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_VERSION_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_QUERY_VERSION_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- u8 fw_maj;
- u8 fw_minor;
- u8 fw_bld;
- u8 fw_rsvd;
- u8 v;
- #define CREQ_QUERY_VERSION_RESP_V 0x1UL
- #define CREQ_QUERY_VERSION_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_VERSION_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION 0x8bUL
- __le16 reserved16;
- u8 intf_maj;
- u8 intf_minor;
- u8 intf_bld;
- u8 intf_rsvd;
-};
-
-/* Modify congestion control command response (16 bytes) */
-struct creq_modify_cc_resp {
- u8 type;
- #define CREQ_MODIFY_CC_RESP_TYPE_MASK 0x3fUL
- #define CREQ_MODIFY_CC_RESP_TYPE_SFT 0
- #define CREQ_MODIFY_CC_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_MODIFY_CC_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_MODIFY_CC_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 reserved32;
- u8 v;
- #define CREQ_MODIFY_CC_RESP_V 0x1UL
- #define CREQ_MODIFY_CC_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_MODIFY_CC_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_MODIFY_CC_RESP_EVENT_MODIFY_CC 0x8cUL
- __le16 reserved48[3];
-};
-
-/* Query congestion control command response (16 bytes) */
-struct creq_query_cc_resp {
- u8 type;
- #define CREQ_QUERY_CC_RESP_TYPE_MASK 0x3fUL
- #define CREQ_QUERY_CC_RESP_TYPE_SFT 0
- #define CREQ_QUERY_CC_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_CC_RESP_RESERVED2_MASK 0xc0UL
- #define CREQ_QUERY_CC_RESP_RESERVED2_SFT 6
- u8 status;
- __le16 cookie;
- __le32 size;
- u8 v;
- #define CREQ_QUERY_CC_RESP_V 0x1UL
- #define CREQ_QUERY_CC_RESP_RESERVED7_MASK 0xfeUL
- #define CREQ_QUERY_CC_RESP_RESERVED7_SFT 1
- u8 event;
- #define CREQ_QUERY_CC_RESP_EVENT_QUERY_CC 0x8dUL
- __le16 reserved48[3];
-};
-
-/* Query congestion control command response side buffer structure (32 bytes) */
-struct creq_query_cc_resp_sb {
- u8 opcode;
- #define CREQ_QUERY_CC_RESP_SB_OPCODE_QUERY_CC 0x8dUL
- u8 status;
- __le16 cookie;
- __le16 flags;
- u8 resp_size;
- u8 reserved8;
- u8 enable_cc;
- #define CREQ_QUERY_CC_RESP_SB_ENABLE_CC 0x1UL
- u8 g;
- #define CREQ_QUERY_CC_RESP_SB_G_MASK 0x7UL
- #define CREQ_QUERY_CC_RESP_SB_G_SFT 0
- u8 num_phases_per_state;
- __le16 init_cr;
- u8 unused_2;
- __le16 unused_3;
- u8 unused_4;
- __le16 init_tr;
- u8 tos_dscp_tos_ecn;
- #define CREQ_QUERY_CC_RESP_SB_TOS_ECN_MASK 0x3UL
- #define CREQ_QUERY_CC_RESP_SB_TOS_ECN_SFT 0
- #define CREQ_QUERY_CC_RESP_SB_TOS_DSCP_MASK 0xfcUL
- #define CREQ_QUERY_CC_RESP_SB_TOS_DSCP_SFT 2
- __le64 reserved64;
- __le64 reserved64_1;
+/* cmdq_query_roce_stats (size:192b/24B) */
+struct cmdq_query_roce_stats {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CMDQ_QUERY_ROCE_STATS_OPCODE_LAST CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_QUERY_ROCE_STATS_FLAGS_COLLECTION_ID 0x1UL
+ #define CMDQ_QUERY_ROCE_STATS_FLAGS_FUNCTION_ID 0x2UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 collection_id;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_ROCE_STATS_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_ROCE_STATS_PF_NUM_SFT 0
+ #define CMDQ_QUERY_ROCE_STATS_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_ROCE_STATS_VF_NUM_SFT 8
+ #define CMDQ_QUERY_ROCE_STATS_VF_VALID 0x1000000UL
+ __le32 reserved32;
};
/* creq_query_roce_stats_resp (size:128b/16B) */
@@ -2907,8 +1955,7 @@ struct creq_query_roce_stats_resp {
#define CREQ_QUERY_ROCE_STATS_RESP_TYPE_MASK 0x3fUL
#define CREQ_QUERY_ROCE_STATS_RESP_TYPE_SFT 0
#define CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT 0x38UL
- #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_LAST \
- CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_LAST CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT
u8 status;
__le16 cookie;
__le32 size;
@@ -2916,17 +1963,17 @@ struct creq_query_roce_stats_resp {
#define CREQ_QUERY_ROCE_STATS_RESP_V 0x1UL
u8 event;
#define CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS 0x8eUL
- #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_LAST \
- CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS
+ #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_LAST \
+ CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS
u8 reserved48[6];
};
-/* creq_query_roce_stats_resp_sb (size:2624b/328B) */
+/* creq_query_roce_stats_resp_sb (size:2944b/368B) */
struct creq_query_roce_stats_resp_sb {
u8 opcode;
#define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS 0x8eUL
#define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_LAST \
- CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS
+ CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS
u8 status;
__le16 cookie;
__le16 flags;
@@ -2973,46 +2020,2710 @@ struct creq_query_roce_stats_resp_sb {
__le64 res_srq_load_err;
__le64 res_tx_pci_err;
__le64 res_rx_pci_err;
- __le64 res_oos_drop_count;
- __le64 active_qp_count_p0;
- __le64 active_qp_count_p1;
- __le64 active_qp_count_p2;
- __le64 active_qp_count_p3;
+ __le64 res_oos_drop_count;
+ __le64 active_qp_count_p0;
+ __le64 active_qp_count_p1;
+ __le64 active_qp_count_p2;
+ __le64 active_qp_count_p3;
+};
+
+/* cmdq_query_roce_stats_ext (size:192b/24B) */
+struct cmdq_query_roce_stats_ext {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST \
+ CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID 0x1UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID 0x2UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 collection_id;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT 0
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT 8
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID 0x1000000UL
+ __le32 reserved32;
+};
+
+/* creq_query_roce_stats_ext_resp (size:128b/16B) */
+struct creq_query_roce_stats_ext_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_stats_ext_resp_sb (size:1856b/232B) */
+struct creq_query_roce_stats_ext_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 rsvd;
+ __le64 tx_atomic_req_pkts;
+ __le64 tx_read_req_pkts;
+ __le64 tx_read_res_pkts;
+ __le64 tx_write_req_pkts;
+ __le64 tx_send_req_pkts;
+ __le64 tx_roce_pkts;
+ __le64 tx_roce_bytes;
+ __le64 rx_atomic_req_pkts;
+ __le64 rx_read_req_pkts;
+ __le64 rx_read_res_pkts;
+ __le64 rx_write_req_pkts;
+ __le64 rx_send_req_pkts;
+ __le64 rx_roce_pkts;
+ __le64 rx_roce_bytes;
+ __le64 rx_roce_good_pkts;
+ __le64 rx_roce_good_bytes;
+ __le64 rx_out_of_buffer_pkts;
+ __le64 rx_out_of_sequence_pkts;
+ __le64 tx_cnp_pkts;
+ __le64 rx_cnp_pkts;
+ __le64 rx_ecn_marked_pkts;
+ __le64 tx_cnp_bytes;
+ __le64 rx_cnp_bytes;
+ __le64 seq_err_naks_rcvd;
+ __le64 rnr_naks_rcvd;
+ __le64 missing_resp;
+ __le64 to_retransmit;
+ __le64 dup_req;
+};
+
+/* cmdq_roce_mirror_cfg (size:192b/24B) */
+struct cmdq_roce_mirror_cfg {
+ u8 opcode;
+ #define CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG 0x99UL
+ #define CMDQ_ROCE_MIRROR_CFG_OPCODE_LAST \
+ CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mirror_flags;
+ #define CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE 0x1UL
+ u8 rsvd[7];
+};
+
+/* creq_roce_mirror_cfg_resp (size:128b/16B) */
+struct creq_roce_mirror_cfg_resp {
+ u8 type;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_SFT 0
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_LAST \
+ CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG 0x99UL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_LAST \
+ CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG
+ u8 reserved48[6];
+};
+
+/* cmdq_query_func (size:128b/16B) */
+struct cmdq_query_func {
+ u8 opcode;
+ #define CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC 0x83UL
+ #define CMDQ_QUERY_FUNC_OPCODE_LAST CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_query_func_resp (size:128b/16B) */
+struct creq_query_func_resp {
+ u8 type;
+ #define CREQ_QUERY_FUNC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_FUNC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_FUNC_RESP_TYPE_LAST CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_FUNC_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC 0x83UL
+ #define CREQ_QUERY_FUNC_RESP_EVENT_LAST CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC
+ u8 reserved48[6];
+};
+
+/* creq_query_func_resp_sb (size:1088b/136B) */
+struct creq_query_func_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC 0x83UL
+ #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_LAST CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 max_mr_size;
+ __le32 max_qp;
+ __le16 max_qp_wr;
+ __le16 dev_cap_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_RESIZE_QP 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_MASK 0xeUL
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_SFT 1
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN0 (0x0UL << 1)
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN1 (0x1UL << 1)
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN1_EXT (0x2UL << 1)
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_LAST \
+ CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN1_EXT
+ #define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS 0x10UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC 0x20UL
+ #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZED_TRANSMIT_ENABLED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CQE_V2 0x80UL
+ #define CREQ_QUERY_FUNC_RESP_SB_PINGPONG_PUSH_MODE 0x100UL
+ #define CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED 0x200UL
+ #define CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED 0x400UL
+ __le32 max_cq;
+ __le32 max_cqe;
+ __le32 max_pd;
+ u8 max_sge;
+ u8 max_srq_sge;
+ u8 max_qp_rd_atom;
+ u8 max_qp_init_rd_atom;
+ __le32 max_mr;
+ __le32 max_mw;
+ __le32 max_raw_eth_qp;
+ __le32 max_ah;
+ __le32 max_fmr;
+ __le32 max_srq_wr;
+ __le32 max_pkeys;
+ __le32 max_inline_data;
+ u8 max_map_per_fmr;
+ u8 l2_db_space_size;
+ __le16 max_srq;
+ __le32 max_gid;
+ __le32 tqm_alloc_reqs[12];
+ __le32 max_dpi;
+ u8 max_sge_var_wqe;
+ u8 dev_cap_ext_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_ATOMIC_OPS_NOT_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DRV_VERSION_RGTR_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_QP_BATCH_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DESTROY_QP_BATCH_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_ROCE_STATS_EXT_CTX_SUPPORTED 0x10UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_SRQ_SGE_SUPPORTED 0x20UL
+ #define CREQ_QUERY_FUNC_RESP_SB_FIXED_SIZE_WQE_DISABLED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DCN_SUPPORTED 0x80UL
+ __le16 max_inline_data_var_wqe;
+ __le32 start_qid;
+ u8 max_msn_table_size;
+ u8 reserved8_1;
+ __le16 dev_cap_ext_flags_2;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CHANGE_UDP_SRC_PORT_WQE_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK 0x30UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_SFT 4
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_PSN_TABLE (0x0UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE (0x1UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL
+ __le16 max_xp_qp_size;
+ __le16 create_qp_batch_size;
+ __le16 destroy_qp_batch_size;
+ __le16 max_srq_ext;
+ __le64 reserved64;
+};
+
+/* cmdq_set_func_resources (size:448b/56B) */
+struct cmdq_set_func_resources {
+ u8 opcode;
+ #define CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ #define CMDQ_SET_FUNC_RESOURCES_OPCODE_LAST\
+ CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_SET_FUNC_RESOURCES_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
};
-/* QP error notification event (16 bytes) */
+/* creq_set_func_resources_resp (size:128b/16B) */
+struct creq_set_func_resources_resp {
+ u8 type;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_SFT 0
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_LAST CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES 0x84UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_LAST \
+ CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES
+ u8 reserved48[6];
+};
+
+/* cmdq_read_context (size:192b/24B) */
+struct cmdq_read_context {
+ u8 opcode;
+ #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_READ_CONTEXT_OPCODE_LAST CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 xid;
+ u8 type;
+ #define CMDQ_READ_CONTEXT_TYPE_QPC 0x0UL
+ #define CMDQ_READ_CONTEXT_TYPE_CQ 0x1UL
+ #define CMDQ_READ_CONTEXT_TYPE_MRW 0x2UL
+ #define CMDQ_READ_CONTEXT_TYPE_SRQ 0x3UL
+ #define CMDQ_READ_CONTEXT_TYPE_LAST CMDQ_READ_CONTEXT_TYPE_SRQ
+ u8 unused_0[3];
+};
+
+/* creq_read_context (size:128b/16B) */
+struct creq_read_context {
+ u8 type;
+ #define CREQ_READ_CONTEXT_TYPE_MASK 0x3fUL
+ #define CREQ_READ_CONTEXT_TYPE_SFT 0
+ #define CREQ_READ_CONTEXT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_READ_CONTEXT_TYPE_LAST CREQ_READ_CONTEXT_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_READ_CONTEXT_V 0x1UL
+ u8 event;
+ #define CREQ_READ_CONTEXT_EVENT_READ_CONTEXT 0x85UL
+ #define CREQ_READ_CONTEXT_EVENT_LAST CREQ_READ_CONTEXT_EVENT_READ_CONTEXT
+ __le16 reserved16;
+ __le32 reserved_32;
+};
+
+/* cmdq_map_tc_to_cos (size:192b/24B) */
+struct cmdq_map_tc_to_cos {
+ u8 opcode;
+ #define CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS 0x8aUL
+ #define CMDQ_MAP_TC_TO_COS_OPCODE_LAST CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 cos0;
+ #define CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE 0xffffUL
+ #define CMDQ_MAP_TC_TO_COS_COS0_LAST CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE
+ __le16 cos1;
+ #define CMDQ_MAP_TC_TO_COS_COS1_DISABLE 0x8000UL
+ #define CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE 0xffffUL
+ #define CMDQ_MAP_TC_TO_COS_COS1_LAST CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE
+ __le32 unused_0;
+};
+
+/* creq_map_tc_to_cos_resp (size:128b/16B) */
+struct creq_map_tc_to_cos_resp {
+ u8 type;
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_SFT 0
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_LAST CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MAP_TC_TO_COS_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS 0x8aUL
+ #define CREQ_MAP_TC_TO_COS_RESP_EVENT_LAST CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS
+ u8 reserved48[6];
+};
+
+/* cmdq_query_roce_cc (size:128b/16B) */
+struct cmdq_query_roce_cc {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_CC_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CMDQ_QUERY_ROCE_CC_OPCODE_LAST CMDQ_QUERY_ROCE_CC_OPCODE_QUERY_ROCE_CC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_query_roce_cc_resp (size:128b/16B) */
+struct creq_query_roce_cc_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_LAST CREQ_QUERY_ROCE_CC_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_CC_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_CC_RESP_EVENT_QUERY_ROCE_CC 0x8dUL
+ #define CREQ_QUERY_ROCE_CC_RESP_EVENT_LAST CREQ_QUERY_ROCE_CC_RESP_EVENT_QUERY_ROCE_CC
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_cc_resp_sb (size:256b/32B) */
+struct creq_query_roce_cc_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_OPCODE_QUERY_ROCE_CC
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ u8 enable_cc;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_UNUSED7_MASK 0xfeUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_UNUSED7_SFT 1
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT 2
+ u8 g;
+ u8 num_phases_per_state;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 alt_vlan_pcp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_VLAN_PCP_MASK 0x7UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_VLAN_PCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD1_MASK 0xf8UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD1_SFT 3
+ u8 alt_tos_dscp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_TOS_DSCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD4_MASK 0xc0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD4_SFT 6
+ u8 cc_mode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_DCTCP 0x0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_PROBABILISTIC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_PROBABILISTIC
+ u8 tx_queue;
+ __le16 rtt;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RTT_MASK 0x3fffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RTT_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD5_MASK 0xc000UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TCP_CP_MASK 0x3ffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TCP_CP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD6_MASK 0xfc00UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD6_SFT 10
+ __le16 inactivity_th;
+ u8 pkts_per_phase;
+ u8 time_per_phase;
+ __le32 reserved32;
+};
+
+/* creq_query_roce_cc_resp_sb_tlv (size:384b/48B) */
+struct creq_query_roce_cc_resp_sb_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ u8 total_size;
+ u8 reserved56[7];
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_OPCODE_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_TLV_OPCODE_QUERY_ROCE_CC
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ u8 enable_cc;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ENABLE_CC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_UNUSED7_MASK 0xfeUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_UNUSED7_SFT 1
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_ECN_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_DSCP_SFT 2
+ u8 g;
+ u8 num_phases_per_state;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 alt_vlan_pcp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_VLAN_PCP_MASK 0x7UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_VLAN_PCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD1_MASK 0xf8UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD1_SFT 3
+ u8 alt_tos_dscp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_TOS_DSCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD4_MASK 0xc0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD4_SFT 6
+ u8 cc_mode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_DCTCP 0x0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_PROBABILISTIC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_LAST\
+ CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_PROBABILISTIC
+ u8 tx_queue;
+ __le16 rtt;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RTT_MASK 0x3fffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RTT_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD5_MASK 0xc000UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TCP_CP_MASK 0x3ffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TCP_CP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD6_MASK 0xfc00UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD6_SFT 10
+ __le16 inactivity_th;
+ u8 pkts_per_phase;
+ u8 time_per_phase;
+ __le32 reserved32;
+};
+
+/* creq_query_roce_cc_gen1_resp_sb_tlv (size:704b/88B) */
+struct creq_query_roce_cc_gen1_resp_sb_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ __le64 reserved64;
+ __le16 inactivity_th_hi;
+ __le16 min_time_between_cnps;
+ __le16 init_cp;
+ u8 tr_update_mode;
+ u8 tr_update_cycles;
+ u8 fr_num_rtts;
+ u8 ai_rate_increase;
+ __le16 reduction_relax_rtts_th;
+ __le16 additional_relax_cr_th;
+ __le16 cr_min_th;
+ u8 bw_avg_weight;
+ u8 actual_cr_factor;
+ __le16 max_cp_cr_th;
+ u8 cp_bias_en;
+ u8 cp_bias;
+ u8 cnp_ecn;
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_NOT_ECT 0x0UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_ECT_1 0x1UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_ECT_0 0x2UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_LAST \
+ CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_ECT_0
+ u8 rtt_jitter_en;
+ __le16 link_bytes_per_usec;
+ __le16 reset_cc_cr_th;
+ u8 cr_width;
+ u8 quota_period_min;
+ u8 quota_period_max;
+ u8 quota_period_abs_max;
+ __le16 tr_lower_bound;
+ u8 cr_prob_factor;
+ u8 tr_prob_factor;
+ __le16 fairness_cr_th;
+ u8 red_div;
+ u8 cnp_ratio_th;
+ __le16 exp_ai_rtts;
+ u8 exp_ai_cr_cp_ratio;
+ u8 use_rate_table;
+ __le16 cp_exp_update_th;
+ __le16 high_exp_ai_rtts_th1;
+ __le16 high_exp_ai_rtts_th2;
+ __le16 actual_cr_cong_free_rtts_th;
+ __le16 severe_cong_cr_th1;
+ __le16 severe_cong_cr_th2;
+ __le32 link64B_per_rtt;
+ u8 cc_ack_bytes;
+ u8 reduce_init_en;
+ __le16 reduce_init_cong_free_rtts_th;
+ u8 random_no_red_en;
+ u8 actual_cr_shift_correction_en;
+ u8 quota_period_adjust_en;
+ u8 reserved[5];
+};
+
+/* cmdq_modify_roce_cc (size:448b/56B) */
+struct cmdq_modify_roce_cc {
+ u8 opcode;
+ #define CMDQ_MODIFY_ROCE_CC_OPCODE_MODIFY_ROCE_CC 0x8cUL
+ #define CMDQ_MODIFY_ROCE_CC_OPCODE_LAST CMDQ_MODIFY_ROCE_CC_OPCODE_MODIFY_ROCE_CC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE 0x4UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR 0x8UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR 0x10UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN 0x20UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP 0x40UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP 0x80UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP 0x100UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT 0x200UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE 0x400UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP 0x800UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE 0x1000UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP 0x2000UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE 0x4000UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE 0x8000UL
+ u8 enable_cc;
+ #define CMDQ_MODIFY_ROCE_CC_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD1_MASK 0xfeUL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD1_SFT 1
+ u8 g;
+ u8 num_phases_per_state;
+ u8 pkts_per_phase;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_ROCE_CC_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_ROCE_CC_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_ROCE_CC_TOS_DSCP_SFT 2
+ u8 alt_vlan_pcp;
+ #define CMDQ_MODIFY_ROCE_CC_ALT_VLAN_PCP_MASK 0x7UL
+ #define CMDQ_MODIFY_ROCE_CC_ALT_VLAN_PCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD3_MASK 0xf8UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD3_SFT 3
+ __le16 alt_tos_dscp;
+ #define CMDQ_MODIFY_ROCE_CC_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CMDQ_MODIFY_ROCE_CC_ALT_TOS_DSCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD4_MASK 0xffc0UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD4_SFT 6
+ __le16 rtt;
+ #define CMDQ_MODIFY_ROCE_CC_RTT_MASK 0x3fffUL
+ #define CMDQ_MODIFY_ROCE_CC_RTT_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD5_MASK 0xc000UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CMDQ_MODIFY_ROCE_CC_TCP_CP_MASK 0x3ffUL
+ #define CMDQ_MODIFY_ROCE_CC_TCP_CP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD6_MASK 0xfc00UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD6_SFT 10
+ u8 cc_mode;
+ #define CMDQ_MODIFY_ROCE_CC_CC_MODE_DCTCP_CC_MODE 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_CC_MODE_LAST CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE
+ u8 tx_queue;
+ __le16 inactivity_th;
+ u8 time_per_phase;
+ u8 reserved8_1;
+ __le16 reserved16;
+ __le32 reserved32;
+ __le64 reserved64;
+};
+
+/* cmdq_modify_roce_cc_tlv (size:640b/80B) */
+struct cmdq_modify_roce_cc_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ u8 total_size;
+ u8 reserved56[7];
+ u8 opcode;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_OPCODE_MODIFY_ROCE_CC 0x8cUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_OPCODE_LAST CMDQ_MODIFY_ROCE_CC_TLV_OPCODE_MODIFY_ROCE_CC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_G 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_NUMPHASEPERSTATE 0x4UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_INIT_CR 0x8UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_INIT_TR 0x10UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TOS_ECN 0x20UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TOS_DSCP 0x40UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_ALT_VLAN_PCP 0x80UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_ALT_TOS_DSCP 0x100UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_RTT 0x200UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_CC_MODE 0x400UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TCP_CP 0x800UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TX_QUEUE 0x1000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_INACTIVITY_CP 0x2000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TIME_PER_PHASE 0x4000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_PKTS_PER_PHASE 0x8000UL
+ u8 enable_cc;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD1_MASK 0xfeUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD1_SFT 1
+ u8 g;
+ u8 num_phases_per_state;
+ u8 pkts_per_phase;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_DSCP_SFT 2
+ u8 alt_vlan_pcp;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_VLAN_PCP_MASK 0x7UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_VLAN_PCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD3_MASK 0xf8UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD3_SFT 3
+ __le16 alt_tos_dscp;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_TOS_DSCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD4_MASK 0xffc0UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD4_SFT 6
+ __le16 rtt;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RTT_MASK 0x3fffUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RTT_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD5_MASK 0xc000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TCP_CP_MASK 0x3ffUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TCP_CP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD6_MASK 0xfc00UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD6_SFT 10
+ u8 cc_mode;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_DCTCP_CC_MODE 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_PROBABILISTIC_CC_MODE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_LAST\
+ CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_PROBABILISTIC_CC_MODE
+ u8 tx_queue;
+ __le16 inactivity_th;
+ u8 time_per_phase;
+ u8 reserved8_1;
+ __le16 reserved16;
+ __le32 reserved32;
+ __le64 reserved64;
+ __le64 reservedtlvpad;
+};
+
+/* cmdq_modify_roce_cc_gen1_tlv (size:768b/96B) */
+struct cmdq_modify_roce_cc_gen1_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_LAST\
+ CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ __le64 reserved64;
+ __le64 modify_mask;
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_MIN_TIME_BETWEEN_CNPS 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_INIT_CP 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_UPDATE_MODE 0x4UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_UPDATE_CYCLES 0x8UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_FR_NUM_RTTS 0x10UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_AI_RATE_INCREASE 0x20UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_REDUCTION_RELAX_RTTS_TH 0x40UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ADDITIONAL_RELAX_CR_TH 0x80UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CR_MIN_TH 0x100UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_BW_AVG_WEIGHT 0x200UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ACTUAL_CR_FACTOR 0x400UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_MAX_CP_CR_TH 0x800UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CP_BIAS_EN 0x1000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CP_BIAS 0x2000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CNP_ECN 0x4000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RTT_JITTER_EN 0x8000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_LINK_BYTES_PER_USEC 0x10000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RESET_CC_CR_TH 0x20000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CR_WIDTH 0x40000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_MIN 0x80000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_MAX 0x100000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_ABS_MAX 0x200000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_LOWER_BOUND 0x400000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CR_PROB_FACTOR 0x800000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_PROB_FACTOR 0x1000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_FAIRNESS_CR_TH 0x2000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RED_DIV 0x4000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CNP_RATIO_TH 0x8000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_EXP_AI_RTTS 0x10000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_EXP_AI_CR_CP_RATIO 0x20000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CP_EXP_UPDATE_TH 0x40000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_HIGH_EXP_AI_RTTS_TH1 0x80000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_HIGH_EXP_AI_RTTS_TH2 0x100000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_USE_RATE_TABLE 0x200000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_LINK64B_PER_RTT 0x400000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ACTUAL_CR_CONG_FREE_RTTS_TH 0x800000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_SEVERE_CONG_CR_TH1 0x1000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_SEVERE_CONG_CR_TH2 0x2000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CC_ACK_BYTES 0x4000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_REDUCE_INIT_EN 0x8000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_REDUCE_INIT_CONG_FREE_RTTS_TH \
+ 0x10000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RANDOM_NO_RED_EN 0x20000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ACTUAL_CR_SHIFT_CORRECTION_EN \
+ 0x40000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_ADJUST_EN 0x80000000000ULL
+ __le16 inactivity_th_hi;
+ __le16 min_time_between_cnps;
+ __le16 init_cp;
+ u8 tr_update_mode;
+ u8 tr_update_cycles;
+ u8 fr_num_rtts;
+ u8 ai_rate_increase;
+ __le16 reduction_relax_rtts_th;
+ __le16 additional_relax_cr_th;
+ __le16 cr_min_th;
+ u8 bw_avg_weight;
+ u8 actual_cr_factor;
+ __le16 max_cp_cr_th;
+ u8 cp_bias_en;
+ u8 cp_bias;
+ u8 cnp_ecn;
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_NOT_ECT 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_ECT_1 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_ECT_0 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_LAST CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_ECT_0
+ u8 rtt_jitter_en;
+ __le16 link_bytes_per_usec;
+ __le16 reset_cc_cr_th;
+ u8 cr_width;
+ u8 quota_period_min;
+ u8 quota_period_max;
+ u8 quota_period_abs_max;
+ __le16 tr_lower_bound;
+ u8 cr_prob_factor;
+ u8 tr_prob_factor;
+ __le16 fairness_cr_th;
+ u8 red_div;
+ u8 cnp_ratio_th;
+ __le16 exp_ai_rtts;
+ u8 exp_ai_cr_cp_ratio;
+ u8 use_rate_table;
+ __le16 cp_exp_update_th;
+ __le16 high_exp_ai_rtts_th1;
+ __le16 high_exp_ai_rtts_th2;
+ __le16 actual_cr_cong_free_rtts_th;
+ __le16 severe_cong_cr_th1;
+ __le16 severe_cong_cr_th2;
+ __le32 link64B_per_rtt;
+ u8 cc_ack_bytes;
+ u8 reduce_init_en;
+ __le16 reduce_init_cong_free_rtts_th;
+ u8 random_no_red_en;
+ u8 actual_cr_shift_correction_en;
+ u8 quota_period_adjust_en;
+ u8 reserved[5];
+};
+
+/* creq_modify_roce_cc_resp (size:128b/16B) */
+struct creq_modify_roce_cc_resp {
+ u8 type;
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_LAST CREQ_MODIFY_ROCE_CC_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MODIFY_ROCE_CC_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MODIFY_ROCE_CC_RESP_EVENT_MODIFY_ROCE_CC 0x8cUL
+ #define CREQ_MODIFY_ROCE_CC_RESP_EVENT_LAST CREQ_MODIFY_ROCE_CC_RESP_EVENT_MODIFY_ROCE_CC
+ u8 reserved48[6];
+};
+
+/* cmdq_set_link_aggr_mode_cc (size:320b/40B) */
+struct cmdq_set_link_aggr_mode_cc {
+ u8 opcode;
+ #define CMDQ_SET_LINK_AGGR_MODE_OPCODE_SET_LINK_AGGR_MODE 0x8fUL
+ #define CMDQ_SET_LINK_AGGR_MODE_OPCODE_LAST \
+ CMDQ_SET_LINK_AGGR_MODE_OPCODE_SET_LINK_AGGR_MODE
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_AGGR_EN 0x1UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_ACTIVE_PORT_MAP 0x2UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_MEMBER_PORT_MAP 0x4UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_AGGR_MODE 0x8UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_STAT_CTX_ID 0x10UL
+ u8 aggr_enable;
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_ENABLE 0x1UL
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD1_MASK 0xfeUL
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD1_SFT 1
+ u8 active_port_map;
+ #define CMDQ_SET_LINK_AGGR_MODE_ACTIVE_PORT_MAP_MASK 0xfUL
+ #define CMDQ_SET_LINK_AGGR_MODE_ACTIVE_PORT_MAP_SFT 0
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD2_MASK 0xf0UL
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD2_SFT 4
+ u8 member_port_map;
+ u8 link_aggr_mode;
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_ACTIVE_ACTIVE 0x1UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_ACTIVE_BACKUP 0x2UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_BALANCE_XOR 0x3UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_802_3_AD 0x4UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_LAST CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_802_3_AD
+ __le16 stat_ctx_id[4];
+ __le64 rsvd1;
+};
+
+/* creq_set_link_aggr_mode_resources_resp (size:128b/16B) */
+struct creq_set_link_aggr_mode_resources_resp {
+ u8 type;
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_SFT 0
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_LAST CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_EVENT_SET_LINK_AGGR_MODE 0x8fUL
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_EVENT_LAST\
+ CREQ_SET_LINK_AGGR_MODE_RESP_EVENT_SET_LINK_AGGR_MODE
+ u8 reserved48[6];
+};
+
+/* creq_func_event (size:128b/16B) */
+struct creq_func_event {
+ u8 type;
+ #define CREQ_FUNC_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_FUNC_EVENT_TYPE_SFT 0
+ #define CREQ_FUNC_EVENT_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_FUNC_EVENT_TYPE_LAST CREQ_FUNC_EVENT_TYPE_FUNC_EVENT
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_FUNC_EVENT_V 0x1UL
+ u8 event;
+ #define CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR 0x1UL
+ #define CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR 0x2UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR 0x3UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR 0x4UL
+ #define CREQ_FUNC_EVENT_EVENT_CQ_ERROR 0x5UL
+ #define CREQ_FUNC_EVENT_EVENT_TQM_ERROR 0x6UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR 0x7UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCS_ERROR 0x8UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCC_ERROR 0x9UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCM_ERROR 0xaUL
+ #define CREQ_FUNC_EVENT_EVENT_TIM_ERROR 0xbUL
+ #define CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST 0x80UL
+ #define CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED 0x81UL
+ #define CREQ_FUNC_EVENT_EVENT_LAST CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED
+ u8 reserved48[6];
+};
+
+/* creq_qp_event (size:128b/16B) */
+struct creq_qp_event {
+ u8 type;
+ #define CREQ_QP_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_QP_EVENT_TYPE_SFT 0
+ #define CREQ_QP_EVENT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_EVENT_TYPE_LAST CREQ_QP_EVENT_TYPE_QP_EVENT
+ u8 status;
+ #define CREQ_QP_EVENT_STATUS_SUCCESS 0x0UL
+ #define CREQ_QP_EVENT_STATUS_FAIL 0x1UL
+ #define CREQ_QP_EVENT_STATUS_RESOURCES 0x2UL
+ #define CREQ_QP_EVENT_STATUS_INVALID_CMD 0x3UL
+ #define CREQ_QP_EVENT_STATUS_NOT_IMPLEMENTED 0x4UL
+ #define CREQ_QP_EVENT_STATUS_INVALID_PARAMETER 0x5UL
+ #define CREQ_QP_EVENT_STATUS_HARDWARE_ERROR 0x6UL
+ #define CREQ_QP_EVENT_STATUS_INTERNAL_ERROR 0x7UL
+ #define CREQ_QP_EVENT_STATUS_LAST CREQ_QP_EVENT_STATUS_INTERNAL_ERROR
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_QP_EVENT_V 0x1UL
+ u8 event;
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP 0x1UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP 0x2UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_QP 0x3UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_QP 0x4UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_SRQ 0x5UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_SRQ 0x6UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_SRQ 0x8UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_CQ 0x9UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_CQ 0xaUL
+ #define CREQ_QP_EVENT_EVENT_RESIZE_CQ 0xcUL
+ #define CREQ_QP_EVENT_EVENT_ALLOCATE_MRW 0xdUL
+ #define CREQ_QP_EVENT_EVENT_DEALLOCATE_KEY 0xeUL
+ #define CREQ_QP_EVENT_EVENT_REGISTER_MR 0xfUL
+ #define CREQ_QP_EVENT_EVENT_DEREGISTER_MR 0x10UL
+ #define CREQ_QP_EVENT_EVENT_ADD_GID 0x11UL
+ #define CREQ_QP_EVENT_EVENT_DELETE_GID 0x12UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_GID 0x17UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_GID 0x18UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP1 0x13UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP1 0x14UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_AH 0x15UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_AH 0x16UL
+ #define CREQ_QP_EVENT_EVENT_INITIALIZE_FW 0x80UL
+ #define CREQ_QP_EVENT_EVENT_DEINITIALIZE_FW 0x81UL
+ #define CREQ_QP_EVENT_EVENT_STOP_FUNC 0x82UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_FUNC 0x83UL
+ #define CREQ_QP_EVENT_EVENT_SET_FUNC_RESOURCES 0x84UL
+ #define CREQ_QP_EVENT_EVENT_READ_CONTEXT 0x85UL
+ #define CREQ_QP_EVENT_EVENT_MAP_TC_TO_COS 0x8aUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_VERSION 0x8bUL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_CC 0x8cUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_CC 0x8dUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_ROCE_STATS 0x8eUL
+ #define CREQ_QP_EVENT_EVENT_SET_LINK_AGGR_MODE 0x8fUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ #define CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION 0xc1UL
+ #define CREQ_QP_EVENT_EVENT_LAST CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION
+ u8 reserved48[6];
+};
+
+/* creq_qp_error_notification (size:128b/16B) */
struct creq_qp_error_notification {
- u8 type;
- #define CREQ_QP_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
- #define CREQ_QP_ERROR_NOTIFICATION_TYPE_SFT 0
- #define CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT 0x38UL
- #define CREQ_QP_ERROR_NOTIFICATION_RESERVED2_MASK 0xc0UL
- #define CREQ_QP_ERROR_NOTIFICATION_RESERVED2_SFT 6
- u8 status;
- u8 req_slow_path_state;
- u8 req_err_state_reason;
- __le32 xid;
- u8 v;
- #define CREQ_QP_ERROR_NOTIFICATION_V 0x1UL
- #define CREQ_QP_ERROR_NOTIFICATION_RESERVED7_MASK 0xfeUL
- #define CREQ_QP_ERROR_NOTIFICATION_RESERVED7_SFT 1
- u8 event;
+ u8 type;
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_SFT 0
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_LAST CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT
+ u8 status;
+ u8 req_slow_path_state;
+ u8 req_err_state_reason;
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_NO_ERROR 0X0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR 0X1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT 0X2UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT 0X3UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1 0X4UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2 0X5UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3 0X6UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4 0X7UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR 0X8UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR 0X9UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH 0XAUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP 0XBUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND 0XCUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG 0XDUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE 0XEUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR 0XFUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR 0X10UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR 0X11UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR 0X12UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR 0X13UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR 0X14UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR 0X15UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR 0X16UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR 0X17UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR 0X18UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR 0X19UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR 0X1AUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR 0X1BUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR 0X1CUL
+ __le32 xid;
+ u8 v;
+ #define CREQ_QP_ERROR_NOTIFICATION_V 0x1UL
+ u8 event;
#define CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
- u8 res_slow_path_state;
- u8 res_err_state_reason;
- __le16 sq_cons_idx;
- __le16 rq_cons_idx;
+ #define CREQ_QP_ERROR_NOTIFICATION_EVENT_LAST \
+ CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION
+ u8 res_slow_path_state;
+ u8 res_err_state_reason;
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_NO_ERROR 0x0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX 0x1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH 0x2UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE 0x3UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR 0x4UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT 0x5UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY 0x6UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR 0x7UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION 0x8UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR 0x9UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY 0xaUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR 0xbUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION 0xcUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR 0xdUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW 0xeUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE 0xfUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC 0x10UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE 0x11UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR 0x12UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR 0x13UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR 0x14UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY 0x15UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR 0x16UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR 0x17UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR 0x18UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR 0x19UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR 0x1bUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR 0x1cUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND 0x1dUL
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
};
-/* RoCE Slowpath HSI Specification 1.6.0 */
-#define ROCE_SP_HSI_VERSION_MAJOR 1
-#define ROCE_SP_HSI_VERSION_MINOR 6
-#define ROCE_SP_HSI_VERSION_UPDATE 0
+/* creq_cq_error_notification (size:128b/16B) */
+struct creq_cq_error_notification {
+ u8 type;
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_SFT 0
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_CQ_EVENT 0x38UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_LAST CREQ_CQ_ERROR_NOTIFICATION_TYPE_CQ_EVENT
+ u8 status;
+ u8 cq_err_reason;
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR 0x1UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR 0x2UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR 0x3UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR 0x4UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR 0x5UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR 0x6UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_LAST \
+ CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR
+ u8 reserved8;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CQ_ERROR_NOTIFICATION_V 0x1UL
+ u8 event;
+ #define CREQ_CQ_ERROR_NOTIFICATION_EVENT_CQ_ERROR_NOTIFICATION 0xc1UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_EVENT_LAST \
+ CREQ_CQ_ERROR_NOTIFICATION_EVENT_CQ_ERROR_NOTIFICATION
+ u8 reserved48[6];
+};
-#define ROCE_SP_HSI_VERSION_STR "1.6.0"
-/*
- * Following is the signature for ROCE_SP_HSI message field that indicates not
- * applicable (All F's). Need to cast it the size of the field if needed.
- */
-#define ROCE_SP_HSI_NA_SIGNATURE ((__le32)(-1))
-#endif /* __BNXT_RE_HSI_H__ */
+/* sq_base (size:64b/8B) */
+struct sq_base {
+ u8 wqe_type;
+ #define SQ_BASE_WQE_TYPE_SEND 0x0UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_BASE_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_BASE_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_BASE_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_BASE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_BASE_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_BASE_WQE_TYPE_BIND 0xeUL
+ #define SQ_BASE_WQE_TYPE_FR_PPMR 0xfUL
+ #define SQ_BASE_WQE_TYPE_LAST SQ_BASE_WQE_TYPE_FR_PPMR
+ u8 unused_0[7];
+};
+
+/* sq_sge (size:128b/16B) */
+struct sq_sge {
+ __le64 va_or_pa;
+ __le32 l_key;
+ __le32 size;
+};
+
+/* sq_psn_search (size:64b/8B) */
+struct sq_psn_search {
+ __le32 opcode_start_psn;
+ #define SQ_PSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_START_PSN_SFT 0
+ #define SQ_PSN_SEARCH_OPCODE_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_OPCODE_SFT 24
+ __le32 flags_next_psn;
+ #define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
+ #define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_FLAGS_SFT 24
+};
+
+/* sq_psn_search_ext (size:128b/16B) */
+struct sq_psn_search_ext {
+ __le32 opcode_start_psn;
+ #define SQ_PSN_SEARCH_EXT_START_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_EXT_START_PSN_SFT 0
+ #define SQ_PSN_SEARCH_EXT_OPCODE_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_EXT_OPCODE_SFT 24
+ __le32 flags_next_psn;
+ #define SQ_PSN_SEARCH_EXT_NEXT_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_EXT_NEXT_PSN_SFT 0
+ #define SQ_PSN_SEARCH_EXT_FLAGS_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_EXT_FLAGS_SFT 24
+ __le16 start_slot_idx;
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* sq_msn_search (size:64b/8B) */
+struct sq_msn_search {
+ __le64 start_idx_next_psn_start_psn;
+ #define SQ_MSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_MSN_SEARCH_START_PSN_SFT 0
+ #define SQ_MSN_SEARCH_NEXT_PSN_MASK 0xffffff000000ULL
+ #define SQ_MSN_SEARCH_NEXT_PSN_SFT 24
+ #define SQ_MSN_SEARCH_START_IDX_MASK 0xffff000000000000ULL
+ #define SQ_MSN_SEARCH_START_IDX_SFT 48
+};
+
+/* sq_send (size:1024b/128B) */
+struct sq_send {
+ u8 wqe_type;
+ #define SQ_SEND_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_SEND_WQE_TYPE_LAST SQ_SEND_WQE_TYPE_SEND_W_INVALID
+ u8 flags;
+ #define SQ_SEND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_SEND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_SEND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_FLAGS_SE 0x8UL
+ #define SQ_SEND_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ #define SQ_SEND_DST_QP_MASK 0xffffffUL
+ #define SQ_SEND_DST_QP_SFT 0
+ __le32 avid;
+ #define SQ_SEND_AVID_MASK 0xfffffUL
+ #define SQ_SEND_AVID_SFT 0
+ __le32 reserved32;
+ __le32 timestamp;
+ #define SQ_SEND_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_TIMESTAMP_SFT 0
+ __le32 data[24];
+};
+
+/* sq_send_hdr (size:256b/32B) */
+struct sq_send_hdr {
+ u8 wqe_type;
+ #define SQ_SEND_HDR_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_HDR_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_SEND_HDR_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_SEND_HDR_WQE_TYPE_LAST SQ_SEND_HDR_WQE_TYPE_SEND_W_INVALID
+ u8 flags;
+ #define SQ_SEND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_SEND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_SEND_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_HDR_FLAGS_SE 0x8UL
+ #define SQ_SEND_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ #define SQ_SEND_HDR_DST_QP_MASK 0xffffffUL
+ #define SQ_SEND_HDR_DST_QP_SFT 0
+ __le32 avid;
+ #define SQ_SEND_HDR_AVID_MASK 0xfffffUL
+ #define SQ_SEND_HDR_AVID_SFT 0
+ __le32 reserved32;
+ __le32 timestamp;
+ #define SQ_SEND_HDR_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_HDR_TIMESTAMP_SFT 0
+};
+
+/* sq_send_raweth_qp1 (size:1024b/128B) */
+struct sq_send_raweth_qp1 {
+ u8 wqe_type;
+ #define SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_RAWETH_QP1_WQE_TYPE_LAST SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND
+ u8 flags;
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK \
+ 0xffUL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT \
+ 0
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SE 0x8UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_TCP_UDP_CHKSUM 0x1UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM 0x2UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_NOCRC 0x4UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_STAMP 0x8UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_T_IP_CHKSUM 0x10UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC 0x100UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_FCOE_CRC 0x200UL
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK 0xfffUL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT 0
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_DE 0x1000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_MASK 0xe000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_SFT 13
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_MASK 0x70000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_SFT 16
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_LAST\
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_SFT 19
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_MASK 0xf0000000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_SFT 28
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_NONE (0x0UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_LAST SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG
+ __le32 reserved32_2;
+ __le32 reserved32_3;
+ __le32 timestamp;
+ #define SQ_SEND_RAWETH_QP1_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_RAWETH_QP1_TIMESTAMP_SFT 0
+ __le32 data[24];
+};
+
+/* sq_send_raweth_qp1_hdr (size:256b/32B) */
+struct sq_send_raweth_qp1_hdr {
+ u8 wqe_type;
+ #define SQ_SEND_RAWETH_QP1_HDR_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_RAWETH_QP1_HDR_WQE_TYPE_LAST SQ_SEND_RAWETH_QP1_HDR_WQE_TYPE_SEND
+ u8 flags;
+ #define \
+ SQ_SEND_RAWETH_QP1_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT\
+ 0
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_SE 0x8UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_TCP_UDP_CHKSUM 0x1UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_IP_CHKSUM 0x2UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_NOCRC 0x4UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_STAMP 0x8UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_T_IP_CHKSUM 0x10UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_ROCE_CRC 0x100UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_FCOE_CRC 0x200UL
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_VID_MASK 0xfffUL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_VID_SFT 0
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_DE 0x1000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_PRI_MASK 0xe000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_PRI_SFT 13
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_MASK 0x70000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_SFT 16
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_LAST\
+ SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPIDCFG
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_RESERVED_SFT 19
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_MASK 0xf0000000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_SFT 28
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_NONE (0x0UL << 28)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_LAST\
+ SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_VLAN_TAG
+ __le32 reserved32_2;
+ __le32 reserved32_3;
+ __le32 timestamp;
+ #define SQ_SEND_RAWETH_QP1_HDR_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_RAWETH_QP1_HDR_TIMESTAMP_SFT 0
+};
+
+/* sq_rdma (size:1024b/128B) */
+struct sq_rdma {
+ u8 wqe_type;
+ #define SQ_RDMA_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_RDMA_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_RDMA_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_RDMA_WQE_TYPE_LAST SQ_RDMA_WQE_TYPE_READ_WQE
+ u8 flags;
+ #define SQ_RDMA_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_RDMA_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_RDMA_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_RDMA_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_RDMA_FLAGS_UC_FENCE 0x4UL
+ #define SQ_RDMA_FLAGS_SE 0x8UL
+ #define SQ_RDMA_FLAGS_INLINE 0x10UL
+ #define SQ_RDMA_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_RDMA_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 timestamp;
+ #define SQ_RDMA_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_RDMA_TIMESTAMP_SFT 0
+ __le32 data[24];
+};
+
+/* sq_rdma_hdr (size:256b/32B) */
+struct sq_rdma_hdr {
+ u8 wqe_type;
+ #define SQ_RDMA_HDR_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_RDMA_HDR_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_RDMA_HDR_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_RDMA_HDR_WQE_TYPE_LAST SQ_RDMA_HDR_WQE_TYPE_READ_WQE
+ u8 flags;
+ #define SQ_RDMA_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_RDMA_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_RDMA_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_RDMA_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_RDMA_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_RDMA_HDR_FLAGS_SE 0x8UL
+ #define SQ_RDMA_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_RDMA_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_RDMA_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 timestamp;
+ #define SQ_RDMA_HDR_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_RDMA_HDR_TIMESTAMP_SFT 0
+};
+
+/* sq_atomic (size:1024b/128B) */
+struct sq_atomic {
+ u8 wqe_type;
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_ATOMIC_WQE_TYPE_LAST SQ_ATOMIC_WQE_TYPE_ATOMIC_FA
+ u8 flags;
+ #define SQ_ATOMIC_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_ATOMIC_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_ATOMIC_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_ATOMIC_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_ATOMIC_FLAGS_UC_FENCE 0x4UL
+ #define SQ_ATOMIC_FLAGS_SE 0x8UL
+ #define SQ_ATOMIC_FLAGS_INLINE 0x10UL
+ #define SQ_ATOMIC_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_ATOMIC_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+ __le32 data[24];
+};
+
+/* sq_atomic_hdr (size:256b/32B) */
+struct sq_atomic_hdr {
+ u8 wqe_type;
+ #define SQ_ATOMIC_HDR_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_ATOMIC_HDR_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_ATOMIC_HDR_WQE_TYPE_LAST SQ_ATOMIC_HDR_WQE_TYPE_ATOMIC_FA
+ u8 flags;
+ #define SQ_ATOMIC_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_ATOMIC_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_ATOMIC_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_ATOMIC_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_ATOMIC_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_ATOMIC_HDR_FLAGS_SE 0x8UL
+ #define SQ_ATOMIC_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_ATOMIC_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_ATOMIC_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
+/* sq_localinvalidate (size:1024b/128B) */
+struct sq_localinvalidate {
+ u8 wqe_type;
+ #define SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_LOCALINVALIDATE_WQE_TYPE_LAST SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID
+ u8 flags;
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK\
+ 0xffUL
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT\
+ 0
+ #define SQ_LOCALINVALIDATE_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_LOCALINVALIDATE_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_LOCALINVALIDATE_FLAGS_UC_FENCE 0x4UL
+ #define SQ_LOCALINVALIDATE_FLAGS_SE 0x8UL
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE 0x10UL
+ #define SQ_LOCALINVALIDATE_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_LOCALINVALIDATE_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ u8 reserved128[16];
+ __le32 data[24];
+};
+
+/* sq_localinvalidate_hdr (size:256b/32B) */
+struct sq_localinvalidate_hdr {
+ u8 wqe_type;
+ #define SQ_LOCALINVALIDATE_HDR_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_LOCALINVALIDATE_HDR_WQE_TYPE_LAST SQ_LOCALINVALIDATE_HDR_WQE_TYPE_LOCAL_INVALID
+ u8 flags;
+ #define \
+ SQ_LOCALINVALIDATE_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT\
+ 0
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_SE 0x8UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ u8 reserved128[16];
+};
+
+/* sq_fr_pmr (size:1024b/128B) */
+struct sq_fr_pmr {
+ u8 wqe_type;
+ #define SQ_FR_PMR_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_FR_PMR_WQE_TYPE_LAST SQ_FR_PMR_WQE_TYPE_FR_PMR
+ u8 flags;
+ #define SQ_FR_PMR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_FR_PMR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_FR_PMR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_FR_PMR_FLAGS_SE 0x8UL
+ #define SQ_FR_PMR_FLAGS_INLINE 0x10UL
+ #define SQ_FR_PMR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_FR_PMR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 zero_based_page_size_log;
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_LAST SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_ZERO_BASED 0x20UL
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_LAST SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_NUMLEVELS_MASK 0xc0UL
+ #define SQ_FR_PMR_NUMLEVELS_SFT 6
+ #define SQ_FR_PMR_NUMLEVELS_PHYSICAL (0x0UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER1 (0x1UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER2 (0x2UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAST SQ_FR_PMR_NUMLEVELS_LAYER2
+ __le64 pblptr;
+ __le64 va;
+ __le32 data[24];
+};
+
+/* sq_fr_pmr_hdr (size:256b/32B) */
+struct sq_fr_pmr_hdr {
+ u8 wqe_type;
+ #define SQ_FR_PMR_HDR_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_FR_PMR_HDR_WQE_TYPE_LAST SQ_FR_PMR_HDR_WQE_TYPE_FR_PMR
+ u8 flags;
+ #define SQ_FR_PMR_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_FR_PMR_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_FR_PMR_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_FR_PMR_HDR_FLAGS_SE 0x8UL
+ #define SQ_FR_PMR_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_FR_PMR_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_FR_PMR_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 zero_based_page_size_log;
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_LAST SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_HDR_ZERO_BASED 0x20UL
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_LAST SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_HDR_NUMLEVELS_MASK 0xc0UL
+ #define SQ_FR_PMR_HDR_NUMLEVELS_SFT 6
+ #define SQ_FR_PMR_HDR_NUMLEVELS_PHYSICAL (0x0UL << 6)
+ #define SQ_FR_PMR_HDR_NUMLEVELS_LAYER1 (0x1UL << 6)
+ #define SQ_FR_PMR_HDR_NUMLEVELS_LAYER2 (0x2UL << 6)
+ #define SQ_FR_PMR_HDR_NUMLEVELS_LAST SQ_FR_PMR_HDR_NUMLEVELS_LAYER2
+ __le64 pblptr;
+ __le64 va;
+};
+
+/* sq_bind (size:1024b/128B) */
+struct sq_bind {
+ u8 wqe_type;
+ #define SQ_BIND_WQE_TYPE_BIND 0xeUL
+ #define SQ_BIND_WQE_TYPE_LAST SQ_BIND_WQE_TYPE_BIND
+ u8 flags;
+ #define SQ_BIND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_BIND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_BIND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_BIND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_BIND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_BIND_FLAGS_SE 0x8UL
+ #define SQ_BIND_FLAGS_INLINE 0x10UL
+ #define SQ_BIND_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_BIND_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define \
+ SQ_BIND_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_MASK\
+ 0xffUL
+ #define \
+ SQ_BIND_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_SFT 0
+ #define SQ_BIND_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_BIND_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ #define SQ_BIND_ZERO_BASED 0x1UL
+ #define SQ_BIND_MW_TYPE 0x2UL
+ #define SQ_BIND_MW_TYPE_TYPE1 (0x0UL << 1)
+ #define SQ_BIND_MW_TYPE_TYPE2 (0x1UL << 1)
+ #define SQ_BIND_MW_TYPE_LAST SQ_BIND_MW_TYPE_TYPE2
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 reserved24[3];
+ __le32 data[24];
+};
+
+/* sq_bind_hdr (size:256b/32B) */
+struct sq_bind_hdr {
+ u8 wqe_type;
+ #define SQ_BIND_HDR_WQE_TYPE_BIND 0xeUL
+ #define SQ_BIND_HDR_WQE_TYPE_LAST SQ_BIND_HDR_WQE_TYPE_BIND
+ u8 flags;
+ #define SQ_BIND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_BIND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_BIND_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_BIND_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_BIND_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_BIND_HDR_FLAGS_SE 0x8UL
+ #define SQ_BIND_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_BIND_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_BIND_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define \
+ SQ_BIND_HDR_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_MASK\
+ 0xffUL
+ #define \
+ SQ_BIND_HDR_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_SFT \
+ 0
+ #define SQ_BIND_HDR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ #define SQ_BIND_HDR_ZERO_BASED 0x1UL
+ #define SQ_BIND_HDR_MW_TYPE 0x2UL
+ #define SQ_BIND_HDR_MW_TYPE_TYPE1 (0x0UL << 1)
+ #define SQ_BIND_HDR_MW_TYPE_TYPE2 (0x1UL << 1)
+ #define SQ_BIND_HDR_MW_TYPE_LAST SQ_BIND_HDR_MW_TYPE_TYPE2
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 reserved24[3];
+};
+
+/* rq_wqe (size:1024b/128B) */
+struct rq_wqe {
+ u8 wqe_type;
+ #define RQ_WQE_WQE_TYPE_RCV 0x80UL
+ #define RQ_WQE_WQE_TYPE_LAST RQ_WQE_WQE_TYPE_RCV
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ #define RQ_WQE_WR_ID_MASK 0xfffffUL
+ #define RQ_WQE_WR_ID_SFT 0
+ u8 reserved128[16];
+ __le32 data[24];
+};
+
+/* rq_wqe_hdr (size:256b/32B) */
+struct rq_wqe_hdr {
+ u8 wqe_type;
+ #define RQ_WQE_HDR_WQE_TYPE_RCV 0x80UL
+ #define RQ_WQE_HDR_WQE_TYPE_LAST RQ_WQE_HDR_WQE_TYPE_RCV
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ #define RQ_WQE_HDR_WR_ID_MASK 0xfffffUL
+ #define RQ_WQE_HDR_WR_ID_SFT 0
+ u8 reserved128[16];
+};
+
+/* cq_base (size:256b/32B) */
+struct cq_base {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_BASE_TOGGLE 0x1UL
+ #define CQ_BASE_CQE_TYPE_MASK 0x1eUL
+ #define CQ_BASE_CQE_TYPE_SFT 1
+ #define CQ_BASE_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_BASE_CQE_TYPE_REQ_V3 (0x8UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC_V3 (0x9UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_V3 (0xaUL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1_V3 (0xbUL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_CFA_V3 (0xcUL << 1)
+ #define CQ_BASE_CQE_TYPE_NO_OP (0xdUL << 1)
+ #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_BASE_CQE_TYPE_LAST CQ_BASE_CQE_TYPE_CUT_OFF
+ u8 status;
+ #define CQ_BASE_STATUS_OK 0x0UL
+ #define CQ_BASE_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_BASE_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_BASE_STATUS_HW_LOCAL_LENGTH_ERR 0x3UL
+ #define CQ_BASE_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_BASE_STATUS_LOCAL_PROTECTION_ERR 0x5UL
+ #define CQ_BASE_STATUS_LOCAL_ACCESS_ERROR 0x6UL
+ #define CQ_BASE_STATUS_MEMORY_MGT_OPERATION_ERR 0x7UL
+ #define CQ_BASE_STATUS_REMOTE_INVALID_REQUEST_ERR 0x8UL
+ #define CQ_BASE_STATUS_REMOTE_ACCESS_ERR 0x9UL
+ #define CQ_BASE_STATUS_REMOTE_OPERATION_ERR 0xaUL
+ #define CQ_BASE_STATUS_RNR_NAK_RETRY_CNT_ERR 0xbUL
+ #define CQ_BASE_STATUS_TRANSPORT_RETRY_CNT_ERR 0xcUL
+ #define CQ_BASE_STATUS_WORK_REQUEST_FLUSHED_ERR 0xdUL
+ #define CQ_BASE_STATUS_HW_FLUSH_ERR 0xeUL
+ #define CQ_BASE_STATUS_OVERFLOW_ERR 0xfUL
+ #define CQ_BASE_STATUS_LAST CQ_BASE_STATUS_OVERFLOW_ERR
+ __le16 reserved16;
+ __le32 opaque;
+};
+
+/* cq_req (size:256b/32B) */
+struct cq_req {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 reserved16_1;
+ __le32 reserved32_2;
+ __le64 reserved64;
+ u8 cqe_type_toggle;
+ #define CQ_REQ_TOGGLE 0x1UL
+ #define CQ_REQ_CQE_TYPE_MASK 0x1eUL
+ #define CQ_REQ_CQE_TYPE_SFT 1
+ #define CQ_REQ_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_REQ_CQE_TYPE_LAST CQ_REQ_CQE_TYPE_REQ
+ #define CQ_REQ_PUSH 0x20UL
+ u8 status;
+ #define CQ_REQ_STATUS_OK 0x0UL
+ #define CQ_REQ_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_REQ_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR 0x3UL
+ #define CQ_REQ_STATUS_LOCAL_PROTECTION_ERR 0x4UL
+ #define CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_REQ_STATUS_REMOTE_ACCESS_ERR 0x7UL
+ #define CQ_REQ_STATUS_REMOTE_OPERATION_ERR 0x8UL
+ #define CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR 0x9UL
+ #define CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR 0xaUL
+ #define CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR 0xbUL
+ #define CQ_REQ_STATUS_LAST CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR
+ __le16 reserved16_2;
+ __le32 reserved32_1;
+};
+
+/* cq_res_rc (size:256b/32B) */
+struct cq_res_rc {
+ __le32 length;
+ __le32 imm_data_or_inv_r_key;
+ __le64 qp_handle;
+ __le64 mr_handle;
+ u8 cqe_type_toggle;
+ #define CQ_RES_RC_TOGGLE 0x1UL
+ #define CQ_RES_RC_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RC_CQE_TYPE_SFT 1
+ #define CQ_RES_RC_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_RES_RC_CQE_TYPE_LAST CQ_RES_RC_CQE_TYPE_RES_RC
+ u8 status;
+ #define CQ_RES_RC_STATUS_OK 0x0UL
+ #define CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RC_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_RC_STATUS_LAST CQ_RES_RC_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_RC_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RC_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RC_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RC_FLAGS_SRQ_LAST CQ_RES_RC_FLAGS_SRQ_SRQ
+ #define CQ_RES_RC_FLAGS_IMM 0x2UL
+ #define CQ_RES_RC_FLAGS_INV 0x4UL
+ #define CQ_RES_RC_FLAGS_RDMA 0x8UL
+ #define CQ_RES_RC_FLAGS_RDMA_SEND (0x0UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE (0x1UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_LAST CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE
+ __le32 srq_or_rq_wr_id;
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_SFT 0
+};
+
+/* cq_res_ud (size:256b/32B) */
+struct cq_res_ud {
+ __le16 length;
+ #define CQ_RES_UD_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_LENGTH_SFT 0
+ __le16 cfa_metadata;
+ #define CQ_RES_UD_CFA_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_UD_CFA_METADATA_VID_SFT 0
+ #define CQ_RES_UD_CFA_METADATA_DE 0x1000UL
+ #define CQ_RES_UD_CFA_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_CFA_METADATA_PRI_SFT 13
+ __le32 imm_data;
+ __le64 qp_handle;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_TOGGLE 0x1UL
+ #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_RES_UD_CQE_TYPE_LAST CQ_RES_UD_CQE_TYPE_RES_UD
+ u8 status;
+ #define CQ_RES_UD_STATUS_OK 0x0UL
+ #define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_STATUS_LAST CQ_RES_UD_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_LAST CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
+ #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_SFT 10
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_ud_v2 (size:256b/32B) */
+struct cq_res_ud_v2 {
+ __le16 length;
+ #define CQ_RES_UD_V2_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_V2_LENGTH_SFT 0
+ __le16 cfa_metadata0;
+ #define CQ_RES_UD_V2_CFA_METADATA0_VID_MASK 0xfffUL
+ #define CQ_RES_UD_V2_CFA_METADATA0_VID_SFT 0
+ #define CQ_RES_UD_V2_CFA_METADATA0_DE 0x1000UL
+ #define CQ_RES_UD_V2_CFA_METADATA0_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_V2_CFA_METADATA0_PRI_SFT 13
+ __le32 imm_data;
+ __le64 qp_handle;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_V2_TOGGLE 0x1UL
+ #define CQ_RES_UD_V2_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_V2_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_V2_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_RES_UD_V2_CQE_TYPE_LAST CQ_RES_UD_V2_CQE_TYPE_RES_UD
+ u8 status;
+ #define CQ_RES_UD_V2_STATUS_OK 0x0UL
+ #define CQ_RES_UD_V2_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_V2_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_V2_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_V2_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_V2_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_V2_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_V2_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_V2_STATUS_LAST CQ_RES_UD_V2_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_V2_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_V2_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_V2_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_V2_FLAGS_SRQ_LAST CQ_RES_UD_V2_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_V2_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_V2_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_V2_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_ACT_REC_PTR (0x1UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_LAST CQ_RES_UD_V2_FLAGS_META_FORMAT_HDR_OFFSET
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_V2_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_V2_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_V2_CFA_METADATA1_MASK 0xf00000UL
+ #define CQ_RES_UD_V2_CFA_METADATA1_SFT 20
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_MASK 0x700000UL
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_SFT 20
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID88A8 (0x0UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID8100 (0x1UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID9100 (0x2UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID9200 (0x3UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID9300 (0x4UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPIDCFG (0x5UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_LAST CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPIDCFG
+ #define CQ_RES_UD_V2_CFA_METADATA1_VALID 0x800000UL
+ #define CQ_RES_UD_V2_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_V2_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_ud_cfa (size:256b/32B) */
+struct cq_res_ud_cfa {
+ __le16 length;
+ #define CQ_RES_UD_CFA_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_CFA_LENGTH_SFT 0
+ __le16 cfa_code;
+ __le32 imm_data;
+ __le32 qid;
+ #define CQ_RES_UD_CFA_QID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_QID_SFT 0
+ __le32 cfa_metadata;
+ #define CQ_RES_UD_CFA_CFA_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_UD_CFA_CFA_METADATA_VID_SFT 0
+ #define CQ_RES_UD_CFA_CFA_METADATA_DE 0x1000UL
+ #define CQ_RES_UD_CFA_CFA_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_CFA_CFA_METADATA_PRI_SFT 13
+ #define CQ_RES_UD_CFA_CFA_METADATA_TPID_MASK 0xffff0000UL
+ #define CQ_RES_UD_CFA_CFA_METADATA_TPID_SFT 16
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_CFA_TOGGLE 0x1UL
+ #define CQ_RES_UD_CFA_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CFA_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CFA_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_RES_UD_CFA_CQE_TYPE_LAST CQ_RES_UD_CFA_CQE_TYPE_RES_UD_CFA
+ u8 status;
+ #define CQ_RES_UD_CFA_STATUS_OK 0x0UL
+ #define CQ_RES_UD_CFA_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_CFA_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_CFA_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_CFA_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_CFA_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_CFA_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_CFA_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_CFA_STATUS_LAST CQ_RES_UD_CFA_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_CFA_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_CFA_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_FLAGS_SRQ_LAST CQ_RES_UD_CFA_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_CFA_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_CFA_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_CFA_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_LAST CQ_RES_UD_CFA_FLAGS_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_UD_CFA_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
+ #define CQ_RES_UD_CFA_FLAGS_EXT_META_FORMAT_SFT 10
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_CFA_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_CFA_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_CFA_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_ud_cfa_v2 (size:256b/32B) */
+struct cq_res_ud_cfa_v2 {
+ __le16 length;
+ #define CQ_RES_UD_CFA_V2_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_CFA_V2_LENGTH_SFT 0
+ __le16 cfa_metadata0;
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_VID_MASK 0xfffUL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_VID_SFT 0
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_DE 0x1000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_PRI_SFT 13
+ __le32 imm_data;
+ __le32 qid;
+ #define CQ_RES_UD_CFA_V2_QID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_V2_QID_SFT 0
+ __le32 cfa_metadata2;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_CFA_V2_TOGGLE 0x1UL
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_LAST CQ_RES_UD_CFA_V2_CQE_TYPE_RES_UD_CFA
+ u8 status;
+ #define CQ_RES_UD_CFA_V2_STATUS_OK 0x0UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_CFA_V2_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_CFA_V2_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_CFA_V2_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_CFA_V2_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LAST CQ_RES_UD_CFA_V2_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ_LAST CQ_RES_UD_CFA_V2_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_CFA_V2_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_CFA_V2_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_ACT_REC_PTR (0x1UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_LAST \
+ CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_HDR_OFFSET
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_CFA_V2_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_V2_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_MASK 0xf00000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_SFT 20
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_MASK 0x700000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_SFT 20
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID88A8 (0x0UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID8100 (0x1UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID9100 (0x2UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID9200 (0x3UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID9300 (0x4UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPIDCFG (0x5UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_LAST \
+ CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPIDCFG
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_VALID 0x800000UL
+ #define CQ_RES_UD_CFA_V2_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_CFA_V2_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_raweth_qp1 (size:256b/32B) */
+struct cq_res_raweth_qp1 {
+ __le16 length;
+ #define CQ_RES_RAWETH_QP1_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_RAWETH_QP1_LENGTH_SFT 0
+ __le16 raweth_qp1_flags;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_MASK 0x3ffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_SFT 6
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP (0x8UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP (0x9UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
+ __le16 raweth_qp1_errors;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR (0x0UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION (0x1UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN (0x2UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR (0x3UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR (0x4UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR (0x5UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL (0x6UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR (0x0UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION (0x1UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN (0x2UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL (0x3UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR (0x4UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR (0x5UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN (0x6UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL (0x7UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN (0x8UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
+ __le16 raweth_qp1_cfa_code;
+ __le64 qp_handle;
+ __le32 raweth_qp1_flags2;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC 0x2UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_IP_CS_CALC 0x4UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_L4_CS_CALC 0x8UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_NONE (0x0UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN (0x1UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_TUNNEL_ID (0x2UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_CHDR_DATA (0x3UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET (0x4UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_CALC 0x200UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_EXT_META_FORMAT_MASK 0xc00UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_EXT_META_FORMAT_SFT 10
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_SFT 16
+ __le32 raweth_qp1_metadata;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_DE_VID_MASK 0xffffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_DE_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_DE 0x1000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT 13
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT 16
+ u8 cqe_type_toggle;
+ #define CQ_RES_RAWETH_QP1_TOGGLE 0x1UL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_SFT 1
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_LAST CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1
+ u8 status;
+ #define CQ_RES_RAWETH_QP1_STATUS_OK 0x0UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LAST CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_LAST CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ
+ __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
+};
+
+/* cq_res_raweth_qp1_v2 (size:256b/32B) */
+struct cq_res_raweth_qp1_v2 {
+ __le16 length;
+ #define CQ_RES_RAWETH_QP1_V2_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_RAWETH_QP1_V2_LENGTH_SFT 0
+ __le16 raweth_qp1_flags;
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_MASK 0x3ffUL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_SFT 0
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_SFT 6
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP (0x8UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP (0x9UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
+ __le16 raweth_qp1_errors;
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR (0x0UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION (0x1UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN (0x2UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR (0x3UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR (0x4UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR (0x5UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL (0x6UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR (0x0UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION (0x1UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN (0x2UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL (0x3UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR (0x4UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR (0x5UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN (0x6UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL \
+ (0x7UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN \
+ (0x8UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
+ __le16 cfa_metadata0;
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_VID_MASK 0xfffUL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_DE 0x1000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_PRI_MASK 0xe000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_PRI_SFT 13
+ __le64 qp_handle;
+ __le32 raweth_qp1_flags2;
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_CS_ALL_OK_MODE 0x8UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_NONE (0x0UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_ACT_REC_PTR (0x1UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_TUNNEL_ID (0x2UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_CHDR_DATA (0x3UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET (0x4UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_CALC 0x200UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_CS_OK_MASK 0xfc00UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_CS_OK_SFT 10
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_SFT 16
+ __le32 cfa_metadata2;
+ u8 cqe_type_toggle;
+ #define CQ_RES_RAWETH_QP1_V2_TOGGLE 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_SFT 1
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_LAST CQ_RES_RAWETH_QP1_V2_CQE_TYPE_RES_RAWETH_QP1
+ u8 status;
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_OK 0x0UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LAST CQ_RES_RAWETH_QP1_V2_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_LAST CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_SRQ
+ __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
+ #define CQ_RES_RAWETH_QP1_V2_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RAWETH_QP1_V2_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_MASK 0xf00000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_SFT 20
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_MASK 0x700000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_SFT 20
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID88A8 (0x0UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID8100 (0x1UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID9100 (0x2UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID9200 (0x3UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID9300 (0x4UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPIDCFG (0x5UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_LAST \
+ CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPIDCFG
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_VALID 0x800000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
+};
+
+/* cq_terminal (size:256b/32B) */
+struct cq_terminal {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+ __le32 reserved32_1;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_TERMINAL_TOGGLE 0x1UL
+ #define CQ_TERMINAL_CQE_TYPE_MASK 0x1eUL
+ #define CQ_TERMINAL_CQE_TYPE_SFT 1
+ #define CQ_TERMINAL_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_TERMINAL_CQE_TYPE_LAST CQ_TERMINAL_CQE_TYPE_TERMINAL
+ u8 status;
+ #define CQ_TERMINAL_STATUS_OK 0x0UL
+ #define CQ_TERMINAL_STATUS_LAST CQ_TERMINAL_STATUS_OK
+ __le16 reserved16;
+ __le32 reserved32_2;
+};
+
+/* cq_cutoff (size:256b/32B) */
+struct cq_cutoff {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_CUTOFF_TOGGLE 0x1UL
+ #define CQ_CUTOFF_CQE_TYPE_MASK 0x1eUL
+ #define CQ_CUTOFF_CQE_TYPE_SFT 1
+ #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_CUTOFF_CQE_TYPE_LAST CQ_CUTOFF_CQE_TYPE_CUT_OFF
+ #define CQ_CUTOFF_RESIZE_TOGGLE_MASK 0x60UL
+ #define CQ_CUTOFF_RESIZE_TOGGLE_SFT 5
+ u8 status;
+ #define CQ_CUTOFF_STATUS_OK 0x0UL
+ #define CQ_CUTOFF_STATUS_LAST CQ_CUTOFF_STATUS_OK
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* nq_base (size:128b/16B) */
+struct nq_base {
+ __le16 info10_type;
+ #define NQ_BASE_TYPE_MASK 0x3fUL
+ #define NQ_BASE_TYPE_SFT 0
+ #define NQ_BASE_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_BASE_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_BASE_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define NQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define NQ_BASE_TYPE_LAST NQ_BASE_TYPE_FUNC_EVENT
+ #define NQ_BASE_INFO10_MASK 0xffc0UL
+ #define NQ_BASE_INFO10_SFT 6
+ __le16 info16;
+ __le32 info32;
+ __le32 info63_v[2];
+ #define NQ_BASE_V 0x1UL
+ #define NQ_BASE_INFO63_MASK 0xfffffffeUL
+ #define NQ_BASE_INFO63_SFT 1
+};
+
+/* nq_cn (size:128b/16B) */
+struct nq_cn {
+ __le16 type;
+ #define NQ_CN_TYPE_MASK 0x3fUL
+ #define NQ_CN_TYPE_SFT 0
+ #define NQ_CN_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_CN_TYPE_LAST NQ_CN_TYPE_CQ_NOTIFICATION
+ #define NQ_CN_TOGGLE_MASK 0xc0UL
+ #define NQ_CN_TOGGLE_SFT 6
+ __le16 reserved16;
+ __le32 cq_handle_low;
+ __le32 v;
+ #define NQ_CN_V 0x1UL
+ __le32 cq_handle_high;
+};
+
+/* nq_srq_event (size:128b/16B) */
+struct nq_srq_event {
+ u8 type;
+ #define NQ_SRQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_SRQ_EVENT_TYPE_SFT 0
+ #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_SRQ_EVENT_TYPE_LAST NQ_SRQ_EVENT_TYPE_SRQ_EVENT
+ #define NQ_SRQ_EVENT_TOGGLE_MASK 0xc0UL
+ #define NQ_SRQ_EVENT_TOGGLE_SFT 6
+ u8 event;
+ #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL
+ #define NQ_SRQ_EVENT_EVENT_LAST NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT
+ __le16 reserved16;
+ __le32 srq_handle_low;
+ __le32 v;
+ #define NQ_SRQ_EVENT_V 0x1UL
+ __le32 srq_handle_high;
+};
+
+/* nq_dbq_event (size:128b/16B) */
+struct nq_dbq_event {
+ u8 type;
+ #define NQ_DBQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_DBQ_EVENT_TYPE_SFT 0
+ #define NQ_DBQ_EVENT_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_DBQ_EVENT_TYPE_LAST NQ_DBQ_EVENT_TYPE_DBQ_EVENT
+ u8 event;
+ #define NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT 0x1UL
+ #define NQ_DBQ_EVENT_EVENT_LAST NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT
+ __le16 db_pfid;
+ #define NQ_DBQ_EVENT_DB_PFID_MASK 0xfUL
+ #define NQ_DBQ_EVENT_DB_PFID_SFT 0
+ __le32 db_dpi;
+ #define NQ_DBQ_EVENT_DB_DPI_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_DPI_SFT 0
+ __le32 v;
+ #define NQ_DBQ_EVENT_V 0x1UL
+ __le32 db_type_db_xid;
+ #define NQ_DBQ_EVENT_DB_XID_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_XID_SFT 0
+ #define NQ_DBQ_EVENT_DB_TYPE_MASK 0xf0000000UL
+ #define NQ_DBQ_EVENT_DB_TYPE_SFT 28
+};
+
+/* xrrq_irrq (size:256b/32B) */
+struct xrrq_irrq {
+ __le16 credits_type;
+ #define XRRQ_IRRQ_TYPE 0x1UL
+ #define XRRQ_IRRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_IRRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_IRRQ_TYPE_LAST XRRQ_IRRQ_TYPE_ATOMIC_REQ
+ #define XRRQ_IRRQ_CREDITS_MASK 0xf800UL
+ #define XRRQ_IRRQ_CREDITS_SFT 11
+ __le16 reserved16;
+ __le32 reserved32;
+ __le32 psn;
+ #define XRRQ_IRRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_PSN_SFT 0
+ __le32 msn;
+ #define XRRQ_IRRQ_MSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_MSN_SFT 0
+ __le64 va_or_atomic_result;
+ __le32 rdma_r_key;
+ __le32 length;
+};
+
+/* xrrq_orrq (size:256b/32B) */
+struct xrrq_orrq {
+ __le16 num_sges_type;
+ #define XRRQ_ORRQ_TYPE 0x1UL
+ #define XRRQ_ORRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_ORRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_ORRQ_TYPE_LAST XRRQ_ORRQ_TYPE_ATOMIC_REQ
+ #define XRRQ_ORRQ_NUM_SGES_MASK 0xf800UL
+ #define XRRQ_ORRQ_NUM_SGES_SFT 11
+ __le16 reserved16;
+ __le32 length;
+ __le32 psn;
+ #define XRRQ_ORRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_PSN_SFT 0
+ __le32 end_psn;
+ #define XRRQ_ORRQ_END_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_END_PSN_SFT 0
+ __le64 first_sge_phy_or_sing_sge_va;
+ __le32 single_sge_l_key;
+ __le32 single_sge_size;
+};
+
+/* ptu_pte (size:64b/8B) */
+struct ptu_pte {
+ __le32 page_next_to_last_last_valid[2];
+ #define PTU_PTE_VALID 0x1UL
+ #define PTU_PTE_LAST 0x2UL
+ #define PTU_PTE_NEXT_TO_LAST 0x4UL
+ #define PTU_PTE_UNUSED_MASK 0xff8UL
+ #define PTU_PTE_UNUSED_SFT 3
+ #define PTU_PTE_PAGE_MASK 0xfffff000UL
+ #define PTU_PTE_PAGE_SFT 12
+};
+
+/* ptu_pde (size:64b/8B) */
+struct ptu_pde {
+ __le32 page_valid[2];
+ #define PTU_PDE_VALID 0x1UL
+ #define PTU_PDE_UNUSED_MASK 0xffeUL
+ #define PTU_PDE_UNUSED_SFT 1
+ #define PTU_PDE_PAGE_MASK 0xfffff000UL
+ #define PTU_PDE_PAGE_SFT 12
+};
+
+#endif /* ___BNXT_RE_HSI_H__ */
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
deleted file mode 100644
index a7b77cb3d5d5..000000000000
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ /dev/null
@@ -1,18 +0,0 @@
-config INFINIBAND_CXGB3
- tristate "Chelsio RDMA Driver"
- depends on CHELSIO_T3
- select GENERIC_ALLOCATOR
- ---help---
- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
- 10GbE adapters.
-
- For general information about Chelsio and our products, visit
- our website at <http://www.chelsio.com>.
-
- For customer support, please visit our customer support page at
- <http://www.chelsio.com/support.html>.
-
- Please send feedback to <linux-bugs@chelsio.com>.
-
- To compile this driver as a module, choose M here: the module
- will be called iw_cxgb3.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
deleted file mode 100644
index 34bb86a6ae3a..000000000000
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb3
-
-obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
-
-iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
- iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
deleted file mode 100644
index 8ac72ac7cbac..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ /dev/null
@@ -1,1331 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <asm/delay.h>
-
-#include <linux/mutex.h>
-#include <linux/netdevice.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-#include "sge_defs.h"
-
-static LIST_HEAD(rdev_list);
-static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
-{
- struct cxio_rdev *rdev;
-
- list_for_each_entry(rdev, &rdev_list, entry)
- if (!strcmp(rdev->dev_name, dev_name))
- return rdev;
- return NULL;
-}
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
-{
- struct cxio_rdev *rdev;
-
- list_for_each_entry(rdev, &rdev_list, entry)
- if (rdev->t3cdev_p == tdev)
- return rdev;
- return NULL;
-}
-
-int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
- enum t3_cq_opcode op, u32 credit)
-{
- int ret;
- struct t3_cqe *cqe;
- u32 rptr;
-
- struct rdma_cq_op setup;
- setup.id = cq->cqid;
- setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
- setup.op = op;
- ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
-
- if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
- return ret;
-
- /*
- * If the rearm returned an index other than our current index,
- * then there might be CQE's in flight (being DMA'd). We must wait
- * here for them to complete or the consumer can miss a notification.
- */
- if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
- int i=0;
-
- rptr = cq->rptr;
-
- /*
- * Keep the generation correct by bumping rptr until it
- * matches the index returned by the rearm - 1.
- */
- while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
- rptr++;
-
- /*
- * Now rptr is the index for the (last) cqe that was
- * in-flight at the time the HW rearmed the CQ. We
- * spin until that CQE is valid.
- */
- cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
- while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
- udelay(1);
- if (i++ > 1000000) {
- pr_err("%s: stalled rnic\n", rdev_p->dev_name);
- return -EIO;
- }
- }
-
- return 1;
- }
-
- return 0;
-}
-
-static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
-{
- struct rdma_cq_setup setup;
- setup.id = cqid;
- setup.base_addr = 0; /* NULL address */
- setup.size = 0; /* disaable the CQ */
- setup.credits = 0;
- setup.credit_thres = 0;
- setup.ovfl_mode = 0;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
-{
- u64 sge_cmd;
- struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
- if (!skb) {
- pr_debug("%s alloc_skb failed\n", __func__);
- return -ENOMEM;
- }
- wqe = skb_put_zero(skb, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
- T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
- T3_SOPEOP);
- wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
- sge_cmd = qpid << 8 | 3;
- wqe->sge_cmd = cpu_to_be64(sge_cmd);
- skb->priority = CPL_PRIORITY_CONTROL;
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
-{
- struct rdma_cq_setup setup;
- int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
-
- size += 1; /* one extra page for storing cq-in-err state */
- cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
- if (!cq->cqid)
- return -ENOMEM;
- if (kernel) {
- cq->sw_queue = kzalloc(size, GFP_KERNEL);
- if (!cq->sw_queue)
- return -ENOMEM;
- }
- cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
- &(cq->dma_addr), GFP_KERNEL);
- if (!cq->queue) {
- kfree(cq->sw_queue);
- return -ENOMEM;
- }
- dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- memset(cq->queue, 0, size);
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = 65535;
- setup.credit_thres = 1;
- if (rdev_p->t3cdev_p->type != T3A)
- setup.ovfl_mode = 0;
- else
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-#ifdef notyet
-int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- struct rdma_cq_setup setup;
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = setup.size;
- setup.credit_thres = setup.size; /* TBD: overflow recovery */
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-#endif
-
-static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- struct cxio_qpid_list *entry;
- u32 qpid;
- int i;
-
- mutex_lock(&uctx->lock);
- if (!list_empty(&uctx->qpids)) {
- entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
- entry);
- list_del(&entry->entry);
- qpid = entry->qpid;
- kfree(entry);
- } else {
- qpid = cxio_hal_get_qpid(rdev_p->rscp);
- if (!qpid)
- goto out;
- for (i = qpid+1; i & rdev_p->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
- if (!entry)
- break;
- entry->qpid = i;
- list_add_tail(&entry->entry, &uctx->qpids);
- }
- }
-out:
- mutex_unlock(&uctx->lock);
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- return qpid;
-}
-
-static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
- struct cxio_ucontext *uctx)
-{
- struct cxio_qpid_list *entry;
-
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
- if (!entry)
- return;
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- entry->qpid = qpid;
- mutex_lock(&uctx->lock);
- list_add_tail(&entry->entry, &uctx->qpids);
- mutex_unlock(&uctx->lock);
-}
-
-void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- struct list_head *pos, *nxt;
- struct cxio_qpid_list *entry;
-
- mutex_lock(&uctx->lock);
- list_for_each_safe(pos, nxt, &uctx->qpids) {
- entry = list_entry(pos, struct cxio_qpid_list, entry);
- list_del_init(&entry->entry);
- if (!(entry->qpid & rdev_p->qpmask))
- cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
- kfree(entry);
- }
- mutex_unlock(&uctx->lock);
-}
-
-void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- INIT_LIST_HEAD(&uctx->qpids);
- mutex_init(&uctx->lock);
-}
-
-int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
- struct t3_wq *wq, struct cxio_ucontext *uctx)
-{
- int depth = 1UL << wq->size_log2;
- int rqsize = 1UL << wq->rq_size_log2;
-
- wq->qpid = get_qpid(rdev_p, uctx);
- if (!wq->qpid)
- return -ENOMEM;
-
- wq->rq = kcalloc(depth, sizeof(struct t3_swrq), GFP_KERNEL);
- if (!wq->rq)
- goto err1;
-
- wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
- if (!wq->rq_addr)
- goto err2;
-
- wq->sq = kcalloc(depth, sizeof(struct t3_swsq), GFP_KERNEL);
- if (!wq->sq)
- goto err3;
-
- wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
- depth * sizeof(union t3_wr),
- &(wq->dma_addr), GFP_KERNEL);
- if (!wq->queue)
- goto err4;
-
- dma_unmap_addr_set(wq, mapping, wq->dma_addr);
- wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
- if (!kernel_domain)
- wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
- (wq->qpid << rdev_p->qpshift);
- wq->rdev = rdev_p;
- pr_debug("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n",
- __func__, wq->qpid, wq->doorbell, (unsigned long long)wq->udb);
- return 0;
-err4:
- kfree(wq->sq);
-err3:
- cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
-err2:
- kfree(wq->rq);
-err1:
- put_qpid(rdev_p, wq->qpid, uctx);
- return -ENOMEM;
-}
-
-int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- int err;
- err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
- kfree(cq->sw_queue);
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (cq->size_log2))
- * sizeof(struct t3_cqe) + 1, cq->queue,
- dma_unmap_addr(cq, mapping));
- cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
- return err;
-}
-
-int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
- struct cxio_ucontext *uctx)
-{
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (wq->size_log2))
- * sizeof(union t3_wr), wq->queue,
- dma_unmap_addr(wq, mapping));
- kfree(wq->sq);
- cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
- kfree(wq->rq);
- put_qpid(rdev_p, wq->qpid, uctx);
- return 0;
-}
-
-static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
-{
- struct t3_cqe cqe;
-
- pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
- memset(&cqe, 0, sizeof(cqe));
- cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
- V_CQE_OPCODE(T3_SEND) |
- V_CQE_TYPE(0) |
- V_CQE_SWCQE(1) |
- V_CQE_QPID(wq->qpid) |
- V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
- cq->size_log2)));
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
- cq->sw_wptr++;
-}
-
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
- u32 ptr;
- int flushed = 0;
-
- pr_debug("%s wq %p cq %p\n", __func__, wq, cq);
-
- /* flush RQ */
- pr_debug("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
- wq->rq_rptr, wq->rq_wptr, count);
- ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr) {
- insert_recv_cqe(wq, cq);
- flushed++;
- }
- return flushed;
-}
-
-static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
- struct t3_swsq *sqp)
-{
- struct t3_cqe cqe;
-
- pr_debug("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
- memset(&cqe, 0, sizeof(cqe));
- cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
- V_CQE_OPCODE(sqp->opcode) |
- V_CQE_TYPE(1) |
- V_CQE_SWCQE(1) |
- V_CQE_QPID(wq->qpid) |
- V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
- cq->size_log2)));
- cqe.u.scqe.wrid_hi = sqp->sq_wptr;
-
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
- cq->sw_wptr++;
-}
-
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
- __u32 ptr = wq->sq_rptr + count;
- int flushed = 0;
- struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
-
- while (ptr != wq->sq_wptr) {
- sqp->signaled = 0;
- insert_sq_cqe(wq, cq, sqp);
- ptr++;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- flushed++;
- }
- return flushed;
-}
-
-/*
- * Move all CQEs from the HWCQ into the SWCQ.
- */
-void cxio_flush_hw_cq(struct t3_cq *cq)
-{
- struct t3_cqe *cqe, *swcqe;
-
- pr_debug("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
- cqe = cxio_next_hw_cqe(cq);
- while (cqe) {
- pr_debug("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
- __func__, cq->rptr, cq->sw_wptr);
- swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
- *swcqe = *cqe;
- swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
- cq->sw_wptr++;
- cq->rptr++;
- cqe = cxio_next_hw_cqe(cq);
- }
-}
-
-static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
-{
- if (CQE_OPCODE(*cqe) == T3_TERMINATE)
- return 0;
-
- if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
- return 0;
-
- if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
- return 0;
-
- if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
- Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
- return 0;
-
- return 1;
-}
-
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
- struct t3_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- ptr = cq->sw_rptr;
- while (!Q_EMPTY(ptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if ((SQ_TYPE(*cqe) ||
- ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
- (CQE_QPID(*cqe) == wq->qpid))
- (*count)++;
- ptr++;
- }
- pr_debug("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
- struct t3_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- pr_debug("%s count zero %d\n", __func__, *count);
- ptr = cq->sw_rptr;
- while (!Q_EMPTY(ptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
- (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
- (*count)++;
- ptr++;
- }
- pr_debug("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
-{
- struct rdma_cq_setup setup;
- setup.id = 0;
- setup.base_addr = 0; /* NULL address */
- setup.size = 1; /* enable the CQ */
- setup.credits = 0;
-
- /* force SGE to redirect to RspQ and interrupt */
- setup.credit_thres = 0;
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
-{
- int err;
- u64 sge_cmd, ctx0, ctx1;
- u64 base_addr;
- struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb;
-
- skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
- if (!skb) {
- pr_debug("%s alloc_skb failed\n", __func__);
- return -ENOMEM;
- }
- err = cxio_hal_init_ctrl_cq(rdev_p);
- if (err) {
- pr_debug("%s err %d initializing ctrl_cq\n", __func__, err);
- goto err;
- }
- rdev_p->ctrl_qp.workq = dma_alloc_coherent(
- &(rdev_p->rnic_info.pdev->dev),
- (1 << T3_CTRL_QP_SIZE_LOG2) *
- sizeof(union t3_wr),
- &(rdev_p->ctrl_qp.dma_addr),
- GFP_KERNEL);
- if (!rdev_p->ctrl_qp.workq) {
- pr_debug("%s dma_alloc_coherent failed\n", __func__);
- err = -ENOMEM;
- goto err;
- }
- dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
- rdev_p->ctrl_qp.dma_addr);
- rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
- memset(rdev_p->ctrl_qp.workq, 0,
- (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
-
- mutex_init(&rdev_p->ctrl_qp.lock);
- init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
-
- /* update HW Ctrl QP context */
- base_addr = rdev_p->ctrl_qp.dma_addr;
- base_addr >>= 12;
- ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
- V_EC_BASE_LO((u32) base_addr & 0xffff));
- ctx0 <<= 32;
- ctx0 |= V_EC_CREDITS(FW_WR_NUM);
- base_addr >>= 16;
- ctx1 = (u32) base_addr;
- base_addr >>= 32;
- ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
- V_EC_TYPE(0) | V_EC_GEN(1) |
- V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
- wqe = skb_put_zero(skb, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
- T3_CTL_QP_TID, 7, T3_SOPEOP);
- wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
- sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
- wqe->sge_cmd = cpu_to_be64(sge_cmd);
- wqe->ctx1 = cpu_to_be64(ctx1);
- wqe->ctx0 = cpu_to_be64(ctx0);
- pr_debug("CtrlQP dma_addr 0x%llx workq %p size %d\n",
- (unsigned long long)rdev_p->ctrl_qp.dma_addr,
- rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
- skb->priority = CPL_PRIORITY_CONTROL;
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-err:
- kfree_skb(skb);
- return err;
-}
-
-static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
-{
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << T3_CTRL_QP_SIZE_LOG2)
- * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
- dma_unmap_addr(&rdev_p->ctrl_qp, mapping));
- return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
-}
-
-/* write len bytes of data into addr (32B aligned address)
- * If data is NULL, clear len byte of memory to zero.
- * caller acquires the ctrl_qp lock before the call
- */
-static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
- u32 len, void *data)
-{
- u32 i, nr_wqe, copy_len;
- u8 *copy_data;
- u8 wr_len, utx_len; /* length in 8 byte flit */
- enum t3_wr_flags flag;
- __be64 *wqe;
- u64 utx_cmd;
- addr &= 0x7FFFFFF;
- nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
- pr_debug("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
- __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
- nr_wqe, data, addr);
- utx_len = 3; /* in 32B unit */
- for (i = 0; i < nr_wqe; i++) {
- if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2)) {
- pr_debug("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, wait for more space i %d\n",
- __func__,
- rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- !Q_FULL(rdev_p->ctrl_qp.rptr,
- rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2))) {
- pr_debug("%s ctrl_qp workq interrupted\n",
- __func__);
- return -ERESTARTSYS;
- }
- pr_debug("%s ctrl_qp wakeup, continue posting work request i %d\n",
- __func__, i);
- }
- wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
- (1 << T3_CTRL_QP_SIZE_LOG2)));
- flag = 0;
- if (i == (nr_wqe - 1)) {
- /* last WQE */
- flag = T3_COMPLETION_FLAG;
- if (len % 32)
- utx_len = len / 32 + 1;
- else
- utx_len = len / 32;
- }
-
- /*
- * Force a CQE to return the credit to the workq in case
- * we posted more than half the max QP size of WRs
- */
- if ((i != 0) &&
- (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
- flag = T3_COMPLETION_FLAG;
- pr_debug("%s force completion at i %d\n", __func__, i);
- }
-
- /* build the utx mem command */
- wqe += (sizeof(struct t3_bypass_wr) >> 3);
- utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
- utx_cmd <<= 32;
- utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
- *wqe = cpu_to_be64(utx_cmd);
- wqe++;
- copy_data = (u8 *) data + i * 96;
- copy_len = len > 96 ? 96 : len;
-
- /* clear memory content if data is NULL */
- if (data)
- memcpy(wqe, copy_data, copy_len);
- else
- memset(wqe, 0, copy_len);
- if (copy_len % 32)
- memset(((u8 *) wqe) + copy_len, 0,
- 32 - (copy_len % 32));
- wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
- (utx_len << 2);
- wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
- (1 << T3_CTRL_QP_SIZE_LOG2)));
-
- /* wptr in the WRID[31:0] */
- ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
-
- /*
- * This must be the last write with a memory barrier
- * for the genbit
- */
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
- Q_GENBIT(rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
- wr_len, T3_SOPEOP);
- if (flag == T3_COMPLETION_FLAG)
- ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
- len -= 96;
- rdev_p->ctrl_qp.wptr++;
- }
- return 0;
-}
-
-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
- * OUT: stag index
- * TBD: shared memory region support
- */
-static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
- u32 *stag, u8 stag_state, u32 pdid,
- enum tpt_mem_type type, enum tpt_mem_perm perm,
- u32 zbva, u64 to, u32 len, u8 page_size,
- u32 pbl_size, u32 pbl_addr)
-{
- int err;
- struct tpt_entry tpt;
- u32 stag_idx;
- u32 wptr;
-
- if (cxio_fatal_error(rdev_p))
- return -EIO;
-
- stag_state = stag_state > 0;
- stag_idx = (*stag) >> 8;
-
- if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
- stag_idx = cxio_hal_get_stag(rdev_p->rscp);
- if (!stag_idx)
- return -ENOMEM;
- *stag = (stag_idx << 8) | ((*stag) & 0xFF);
- }
- pr_debug("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
-
- mutex_lock(&rdev_p->ctrl_qp.lock);
-
- /* write TPT entry */
- if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
- else {
- tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
- V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
- V_TPT_STAG_STATE(stag_state) |
- V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
- BUG_ON(page_size >= 28);
- tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
- ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
- V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
- V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
- tpt.len = cpu_to_be32(len);
- tpt.va_hi = cpu_to_be32((u32) (to >> 32));
- tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
- tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
- }
- err = cxio_hal_ctrl_qp_write_mem(rdev_p,
- stag_idx +
- (rdev_p->rnic_info.tpt_base >> 5),
- sizeof(tpt), &tpt);
-
- /* release the stag index to free pool */
- if (reset_tpt_entry)
- cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-
- wptr = rdev_p->ctrl_qp.wptr;
- mutex_unlock(&rdev_p->ctrl_qp.lock);
- if (!err)
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- SEQ32_GE(rdev_p->ctrl_qp.rptr,
- wptr)))
- return -ERESTARTSYS;
- return err;
-}
-
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size)
-{
- u32 wptr;
- int err;
-
- pr_debug("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
- pbl_size);
-
- mutex_lock(&rdev_p->ctrl_qp.lock);
- err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
- pbl);
- wptr = rdev_p->ctrl_qp.wptr;
- mutex_unlock(&rdev_p->ctrl_qp.lock);
- if (err)
- return err;
-
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- SEQ32_GE(rdev_p->ctrl_qp.rptr,
- wptr)))
- return -ERESTARTSYS;
-
- return 0;
-}
-
-int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
- return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
- u32 pbl_addr)
-{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
- pbl_size, pbl_addr);
-}
-
-int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
- 0, 0);
-}
-
-int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
-{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
- 0, 0);
-}
-
-int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
- 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
-}
-
-int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
-{
- struct t3_rdma_init_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
- pr_debug("%s rdev_p %p\n", __func__, rdev_p);
- wqe = __skb_put(skb, sizeof(*wqe));
- wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
- wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
- V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
- wqe->wrid.id1 = 0;
- wqe->qpid = cpu_to_be32(attr->qpid);
- wqe->pdid = cpu_to_be32(attr->pdid);
- wqe->scqid = cpu_to_be32(attr->scqid);
- wqe->rcqid = cpu_to_be32(attr->rcqid);
- wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
- wqe->rq_size = cpu_to_be32(attr->rq_size);
- wqe->mpaattrs = attr->mpaattrs;
- wqe->qpcaps = attr->qpcaps;
- wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
- wqe->rqe_count = cpu_to_be16(attr->rqe_count);
- wqe->flags_rtr_type = cpu_to_be16(attr->flags |
- V_RTR_TYPE(attr->rtr_type) |
- V_CHAN(attr->chan));
- wqe->ord = cpu_to_be32(attr->ord);
- wqe->ird = cpu_to_be32(attr->ird);
- wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
- wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
- wqe->irs = cpu_to_be32(attr->irs);
- skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = ev_cb;
-}
-
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = NULL;
-}
-
-static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
-{
- static int cnt;
- struct cxio_rdev *rdev_p = NULL;
- struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- pr_debug("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x se %0x notify %0x cqbranch %0x creditth %0x\n",
- cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
- RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
- RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
- RSPQ_CREDIT_THRESH(rsp_msg));
- pr_debug("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
- rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
- if (!rdev_p) {
- pr_debug("%s called by t3cdev %p with null ulp\n", __func__,
- t3cdev_p);
- return 0;
- }
- if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
- rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
- wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
- dev_kfree_skb_irq(skb);
- } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
- dev_kfree_skb_irq(skb);
- else if (cxio_ev_cb)
- (*cxio_ev_cb) (rdev_p, skb);
- else
- dev_kfree_skb_irq(skb);
- cnt++;
- return 0;
-}
-
-/* Caller takes care of locking if needed */
-int cxio_rdev_open(struct cxio_rdev *rdev_p)
-{
- struct net_device *netdev_p = NULL;
- int err = 0;
- if (strlen(rdev_p->dev_name)) {
- if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
- return -EBUSY;
- }
- netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name);
- if (!netdev_p) {
- return -EINVAL;
- }
- dev_put(netdev_p);
- } else if (rdev_p->t3cdev_p) {
- if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
- return -EBUSY;
- }
- netdev_p = rdev_p->t3cdev_p->lldev;
- strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
- T3_MAX_DEV_NAME_LEN);
- } else {
- pr_debug("%s t3cdev_p or dev_name must be set\n", __func__);
- return -EINVAL;
- }
-
- list_add_tail(&rdev_p->entry, &rdev_list);
-
- pr_debug("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
- memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
- if (!rdev_p->t3cdev_p)
- rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
- rdev_p->t3cdev_p->ulp = (void *) rdev_p;
-
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
- &(rdev_p->fw_info));
- if (err) {
- pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
- pr_err("fatal firmware version mismatch: need version %u but adapter has version %u\n",
- CXIO_FW_MAJ,
- G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
- err = -EINVAL;
- goto err1;
- }
-
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
- &(rdev_p->rnic_info));
- if (err) {
- pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
- &(rdev_p->port_info));
- if (err) {
- pr_err("%s t3cdev_p(%p)->ctl returned error %d\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
-
- /*
- * qpshift is the number of bits to shift the qpid left in order
- * to get the correct address of the doorbell for that qp.
- */
- cxio_init_ucontext(rdev_p, &rdev_p->uctx);
- rdev_p->qpshift = PAGE_SHIFT -
- ilog2(65536 >>
- ilog2(rdev_p->rnic_info.udbell_len >>
- PAGE_SHIFT));
- rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
- rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- pr_debug("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
- __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
- rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
- rdev_p->rnic_info.pbl_base,
- rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
- rdev_p->rnic_info.rqt_top);
- pr_debug("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu qpnr %d qpmask 0x%x\n",
- rdev_p->rnic_info.udbell_len,
- rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
- rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
-
- err = cxio_hal_init_ctrl_qp(rdev_p);
- if (err) {
- pr_err("%s error %d initializing ctrl_qp\n", __func__, err);
- goto err1;
- }
- err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
- 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
- T3_MAX_NUM_PD);
- if (err) {
- pr_err("%s error %d initializing hal resources\n",
- __func__, err);
- goto err2;
- }
- err = cxio_hal_pblpool_create(rdev_p);
- if (err) {
- pr_err("%s error %d initializing pbl mem pool\n",
- __func__, err);
- goto err3;
- }
- err = cxio_hal_rqtpool_create(rdev_p);
- if (err) {
- pr_err("%s error %d initializing rqt mem pool\n",
- __func__, err);
- goto err4;
- }
- return 0;
-err4:
- cxio_hal_pblpool_destroy(rdev_p);
-err3:
- cxio_hal_destroy_resource(rdev_p->rscp);
-err2:
- cxio_hal_destroy_ctrl_qp(rdev_p);
-err1:
- rdev_p->t3cdev_p->ulp = NULL;
- list_del(&rdev_p->entry);
- return err;
-}
-
-void cxio_rdev_close(struct cxio_rdev *rdev_p)
-{
- if (rdev_p) {
- cxio_hal_pblpool_destroy(rdev_p);
- cxio_hal_rqtpool_destroy(rdev_p);
- list_del(&rdev_p->entry);
- cxio_hal_destroy_ctrl_qp(rdev_p);
- cxio_hal_destroy_resource(rdev_p->rscp);
- rdev_p->t3cdev_p->ulp = NULL;
- }
-}
-
-int __init cxio_hal_init(void)
-{
- if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
- return -ENOMEM;
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
- return 0;
-}
-
-void __exit cxio_hal_exit(void)
-{
- struct cxio_rdev *rdev, *tmp;
-
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
- list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
- cxio_rdev_close(rdev);
- cxio_hal_destroy_rhdl_resource();
-}
-
-static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
-{
- struct t3_swsq *sqp;
- __u32 ptr = wq->sq_rptr;
- int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
-
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- while (count--)
- if (!sqp->signaled) {
- ptr++;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- } else if (sqp->complete) {
-
- /*
- * Insert this completed cqe into the swcq.
- */
- pr_debug("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
- __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
- Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
- sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
- = sqp->cqe;
- cq->sw_wptr++;
- sqp->signaled = 0;
- break;
- } else
- break;
-}
-
-static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
- struct t3_cqe *read_cqe)
-{
- read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
- read_cqe->len = wq->oldest_read->read_len;
- read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
- V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
- V_CQE_OPCODE(T3_READ_REQ) |
- V_CQE_TYPE(1));
-}
-
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
-static void advance_oldest_read(struct t3_wq *wq)
-{
-
- u32 rptr = wq->oldest_read - wq->sq + 1;
- u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
-
- while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
- wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
-
- if (wq->oldest_read->opcode == T3_READ_REQ)
- return;
- rptr++;
- }
- wq->oldest_read = NULL;
-}
-
-/*
- * cxio_poll_cq
- *
- * Caller must:
- * check the validity of the first CQE,
- * supply the wq assicated with the qpid.
- *
- * credit: cq credit to return to sge.
- * cqe_flushed: 1 iff the CQE is flushed.
- * cqe: copy of the polled CQE.
- *
- * return value:
- * 0 CQE returned,
- * -1 CQE skipped, try again.
- */
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit)
-{
- int ret = 0;
- struct t3_cqe *hw_cqe, read_cqe;
-
- *cqe_flushed = 0;
- *credit = 0;
- hw_cqe = cxio_next_cqe(cq);
-
- pr_debug("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
- CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
- CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
- CQE_WRID_LOW(*hw_cqe));
-
- /*
- * skip cqe's not affiliated with a QP.
- */
- if (wq == NULL) {
- ret = -1;
- goto skip_cqe;
- }
-
- /*
- * Gotta tweak READ completions:
- * 1) the cqe doesn't contain the sq_wptr from the wr.
- * 2) opcode not reflected from the wr.
- * 3) read_len not reflected from the wr.
- * 4) cq_type is RQ_TYPE not SQ_TYPE.
- */
- if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
-
- /*
- * If this is an unsolicited read response, then the read
- * was generated by the kernel driver as part of peer-2-peer
- * connection setup. So ignore the completion.
- */
- if (!wq->oldest_read) {
- if (CQE_STATUS(*hw_cqe))
- wq->error = 1;
- ret = -1;
- goto skip_cqe;
- }
-
- /*
- * Don't write to the HWCQ, so create a new read req CQE
- * in local memory.
- */
- create_read_req_cqe(wq, hw_cqe, &read_cqe);
- hw_cqe = &read_cqe;
- advance_oldest_read(wq);
- }
-
- /*
- * T3A: Discard TERMINATE CQEs.
- */
- if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
- ret = -1;
- wq->error = 1;
- goto skip_cqe;
- }
-
- if (CQE_STATUS(*hw_cqe) || wq->error) {
- *cqe_flushed = wq->error;
- wq->error = 1;
-
- /*
- * T3A inserts errors into the CQE. We cannot return
- * these as work completions.
- */
- /* incoming write failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
- && RQ_TYPE(*hw_cqe)) {
- ret = -1;
- goto skip_cqe;
- }
- /* incoming read request failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
- ret = -1;
- goto skip_cqe;
- }
-
- /* incoming SEND with no receive posted failures */
- if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
- Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
- ret = -1;
- goto skip_cqe;
- }
- BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
- goto proc_cqe;
- }
-
- /*
- * RECV completion.
- */
- if (RQ_TYPE(*hw_cqe)) {
-
- /*
- * HW only validates 4 bits of MSN. So we must validate that
- * the MSN in the SEND is the next expected MSN. If its not,
- * then we complete this with TPT_ERR_MSN and mark the wq in
- * error.
- */
-
- if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
- wq->error = 1;
- ret = -1;
- goto skip_cqe;
- }
-
- if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
- wq->error = 1;
- hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
- goto proc_cqe;
- }
- goto proc_cqe;
- }
-
- /*
- * If we get here its a send completion.
- *
- * Handle out of order completion. These get stuffed
- * in the SW SQ. Then the SW SQ is walked to move any
- * now in-order completions into the SW CQ. This handles
- * 2 cases:
- * 1) reaping unsignaled WRs when the first subsequent
- * signaled WR is completed.
- * 2) out of order read completions.
- */
- if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
- struct t3_swsq *sqp;
-
- pr_debug("%s out of order completion going in swsq at idx %ld\n",
- __func__,
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe),
- wq->sq_size_log2));
- sqp = wq->sq +
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
- sqp->cqe = *hw_cqe;
- sqp->complete = 1;
- ret = -1;
- goto flush_wq;
- }
-
-proc_cqe:
- *cqe = *hw_cqe;
-
- /*
- * Reap the associated WR(s) that are freed up with this
- * completion.
- */
- if (SQ_TYPE(*hw_cqe)) {
- wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
- pr_debug("%s completing sq idx %ld\n", __func__,
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
- *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
- wq->sq_rptr++;
- } else {
- pr_debug("%s completing rq idx %ld\n", __func__,
- Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
- *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
- if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
- cxio_hal_pblpool_free(wq->rdev,
- wq->rq[Q_PTR2IDX(wq->rq_rptr,
- wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
- BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
- wq->rq_rptr++;
- }
-
-flush_wq:
- /*
- * Flush any completed cqes that are now in-order.
- */
- flush_completed_wrs(wq, cq);
-
-skip_cqe:
- if (SW_CQE(*hw_cqe)) {
- pr_debug("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->sw_rptr);
- ++cq->sw_rptr;
- } else {
- pr_debug("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->rptr);
- ++cq->rptr;
-
- /*
- * T3A: compute credits.
- */
- if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
- || ((cq->rptr - cq->wptr) >= 128)) {
- *credit = cq->rptr - cq->wptr;
- cq->wptr = cq->rptr;
- }
- }
- return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
deleted file mode 100644
index c64e50b5a548..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_HAL_H__
-#define __CXIO_HAL_H__
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/kfifo.h>
-
-#include "t3_cpl.h"
-#include "t3cdev.h"
-#include "cxgb3_ctl_defs.h"
-#include "cxio_wr.h"
-
-#define T3_CTRL_QP_ID FW_RI_SGEEC_START
-#define T3_CTL_QP_TID FW_RI_TID_START
-#define T3_CTRL_QP_SIZE_LOG2 8
-#define T3_CTRL_CQ_ID 0
-
-#define T3_MAX_NUM_RI (1<<15)
-#define T3_MAX_NUM_QP (1<<15)
-#define T3_MAX_NUM_CQ (1<<15)
-#define T3_MAX_NUM_PD (1<<15)
-#define T3_MAX_PBL_SIZE 256
-#define T3_MAX_RQ_SIZE 1024
-#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
-#define T3_MAX_CQ_DEPTH 65536
-#define T3_MAX_NUM_STAG (1<<15)
-#define T3_MAX_MR_SIZE 0x100000000ULL
-#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
-
-#define T3_STAG_UNSET 0xffffffff
-
-#define T3_MAX_DEV_NAME_LEN 32
-
-#define CXIO_FW_MAJ 7
-
-struct cxio_hal_ctrl_qp {
- u32 wptr;
- u32 rptr;
- struct mutex lock; /* for the wtpr, can sleep */
- wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
- union t3_wr *workq; /* the work request queue */
- dma_addr_t dma_addr; /* pci bus address of the workq */
- DEFINE_DMA_UNMAP_ADDR(mapping);
- void __iomem *doorbell;
-};
-
-struct cxio_hal_resource {
- struct kfifo tpt_fifo;
- spinlock_t tpt_fifo_lock;
- struct kfifo qpid_fifo;
- spinlock_t qpid_fifo_lock;
- struct kfifo cqid_fifo;
- spinlock_t cqid_fifo_lock;
- struct kfifo pdid_fifo;
- spinlock_t pdid_fifo_lock;
-};
-
-struct cxio_qpid_list {
- struct list_head entry;
- u32 qpid;
-};
-
-struct cxio_ucontext {
- struct list_head qpids;
- struct mutex lock;
-};
-
-struct cxio_rdev {
- char dev_name[T3_MAX_DEV_NAME_LEN];
- struct t3cdev *t3cdev_p;
- struct rdma_info rnic_info;
- struct adap_ports port_info;
- struct cxio_hal_resource *rscp;
- struct cxio_hal_ctrl_qp ctrl_qp;
- void *ulp;
- unsigned long qpshift;
- u32 qpnr;
- u32 qpmask;
- struct cxio_ucontext uctx;
- struct gen_pool *pbl_pool;
- struct gen_pool *rqt_pool;
- struct list_head entry;
- struct ch_embedded_info fw_info;
- u32 flags;
-#define CXIO_ERROR_FATAL 1
-};
-
-static inline int cxio_fatal_error(struct cxio_rdev *rdev_p)
-{
- return rdev_p->flags & CXIO_ERROR_FATAL;
-}
-
-static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
-{
- return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
-}
-
-typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
- struct sk_buff * skb);
-
-#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
-#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
-#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
-#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
-#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
-#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
-#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
-#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
-#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
-
-struct respQ_msg_t {
- __be32 flags; /* flit 0 */
- __be32 cq_ptrid;
- __be64 rsvd; /* flit 1 */
- struct t3_cqe cqe; /* flits 2-3 */
-};
-
-enum t3_cq_opcode {
- CQ_ARM_AN = 0x2,
- CQ_ARM_SE = 0x6,
- CQ_FORCE_AN = 0x3,
- CQ_CREDIT_UPDATE = 0x7
-};
-
-int cxio_rdev_open(struct cxio_rdev *rdev);
-void cxio_rdev_close(struct cxio_rdev *rdev);
-int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
- enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
-int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
- struct cxio_ucontext *uctx);
-int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
- struct cxio_ucontext *uctx);
-int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size);
-int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
- u32 pbl_addr);
-int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
-int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
-int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
-int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
-int __init cxio_hal_init(void);
-void __exit cxio_hal_exit(void);
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_flush_hw_cq(struct t3_cq *cq);
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit);
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-
-#ifdef pr_fmt
-#undef pr_fmt
-#endif
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
deleted file mode 100644
index c6e7bc4420b6..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/* Crude resource management */
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-
-static struct kfifo rhdl_fifo;
-static spinlock_t rhdl_fifo_lock;
-
-#define RANDOM_SIZE 16
-
-static int __cxio_init_resource_fifo(struct kfifo *fifo,
- spinlock_t *fifo_lock,
- u32 nr, u32 skip_low,
- u32 skip_high,
- int random)
-{
- u32 i, j, entry = 0, idx;
- u32 random_bytes;
- u32 rarray[16];
- spin_lock_init(fifo_lock);
-
- if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
- return -ENOMEM;
-
- for (i = 0; i < skip_low + skip_high; i++)
- kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
- if (random) {
- j = 0;
- random_bytes = prandom_u32();
- for (i = 0; i < RANDOM_SIZE; i++)
- rarray[i] = i + skip_low;
- for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
- if (j >= RANDOM_SIZE) {
- j = 0;
- random_bytes = prandom_u32();
- }
- idx = (random_bytes >> (j * 2)) & 0xF;
- kfifo_in(fifo,
- (unsigned char *) &rarray[idx],
- sizeof(u32));
- rarray[idx] = i;
- j++;
- }
- for (i = 0; i < RANDOM_SIZE; i++)
- kfifo_in(fifo,
- (unsigned char *) &rarray[i],
- sizeof(u32));
- } else
- for (i = skip_low; i < nr - skip_high; i++)
- kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
-
- for (i = 0; i < skip_low + skip_high; i++)
- if (kfifo_out_locked(fifo, (unsigned char *) &entry,
- sizeof(u32), fifo_lock) != sizeof(u32))
- break;
- return 0;
-}
-
-static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
- u32 nr, u32 skip_low, u32 skip_high)
-{
- return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
- skip_high, 0));
-}
-
-static int cxio_init_resource_fifo_random(struct kfifo *fifo,
- spinlock_t * fifo_lock,
- u32 nr, u32 skip_low, u32 skip_high)
-{
-
- return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
- skip_high, 1));
-}
-
-static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
-{
- u32 i;
-
- spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
-
- if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
- GFP_KERNEL))
- return -ENOMEM;
-
- for (i = 16; i < T3_MAX_NUM_QP; i++)
- if (!(i & rdev_p->qpmask))
- kfifo_in(&rdev_p->rscp->qpid_fifo,
- (unsigned char *) &i, sizeof(u32));
- return 0;
-}
-
-int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
-{
- return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
- 0);
-}
-
-void cxio_hal_destroy_rhdl_resource(void)
-{
- kfifo_free(&rhdl_fifo);
-}
-
-/* nr_* must be power of 2 */
-int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
- u32 nr_tpt, u32 nr_pbl,
- u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
-{
- int err = 0;
- struct cxio_hal_resource *rscp;
-
- rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
- if (!rscp)
- return -ENOMEM;
- rdev_p->rscp = rscp;
- err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
- &rscp->tpt_fifo_lock,
- nr_tpt, 1, 0);
- if (err)
- goto tpt_err;
- err = cxio_init_qpid_fifo(rdev_p);
- if (err)
- goto qpid_err;
- err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
- nr_cqid, 1, 0);
- if (err)
- goto cqid_err;
- err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
- nr_pdid, 1, 0);
- if (err)
- goto pdid_err;
- return 0;
-pdid_err:
- kfifo_free(&rscp->cqid_fifo);
-cqid_err:
- kfifo_free(&rscp->qpid_fifo);
-qpid_err:
- kfifo_free(&rscp->tpt_fifo);
-tpt_err:
- return -ENOMEM;
-}
-
-/*
- * returns 0 if no resource available
- */
-static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
-{
- u32 entry;
- if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
- return entry;
- else
- return 0; /* fifo emptry */
-}
-
-static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
- u32 entry)
-{
- BUG_ON(
- kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
- == 0);
-}
-
-u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
-}
-
-void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
-{
- cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
-}
-
-u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
-{
- u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
- &rscp->qpid_fifo_lock);
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- return qpid;
-}
-
-void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
-{
- pr_debug("%s qpid 0x%x\n", __func__, qpid);
- cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
-}
-
-u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
-}
-
-void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
-{
- cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
-}
-
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
-}
-
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
-{
- cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
-}
-
-void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
-{
- kfifo_free(&rscp->tpt_fifo);
- kfifo_free(&rscp->cqid_fifo);
- kfifo_free(&rscp->qpid_fifo);
- kfifo_free(&rscp->pdid_fifo);
- kfree(rscp);
-}
-
-/*
- * PBL Memory Manager. Uses Linux generic allocator.
- */
-
-#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
-
-u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
- unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
- pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
- return (u32)addr;
-}
-
-void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
- pr_debug("%s addr 0x%x size %d\n", __func__, addr, size);
- gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
-}
-
-int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
-{
- unsigned pbl_start, pbl_chunk;
-
- rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
- if (!rdev_p->pbl_pool)
- return -ENOMEM;
-
- pbl_start = rdev_p->rnic_info.pbl_base;
- pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
-
- while (pbl_start < rdev_p->rnic_info.pbl_top) {
- pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
- pbl_chunk);
- if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
- pr_debug("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
- if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- pr_warn("%s: Failed to add all PBL chunks (%x/%x)\n",
- __func__, pbl_start,
- rdev_p->rnic_info.pbl_top - pbl_start);
- return 0;
- }
- pbl_chunk >>= 1;
- } else {
- pr_debug("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
- pbl_start += pbl_chunk;
- }
- }
-
- return 0;
-}
-
-void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
-{
- gen_pool_destroy(rdev_p->pbl_pool);
-}
-
-/*
- * RQT Memory Manager. Uses Linux generic allocator.
- */
-
-#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
-#define RQT_CHUNK 2*1024*1024
-
-u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
- unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
- pr_debug("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
- return (u32)addr;
-}
-
-void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
- pr_debug("%s addr 0x%x size %d\n", __func__, addr, size << 6);
- gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
-}
-
-int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
-{
- unsigned long i;
- rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
- if (rdev_p->rqt_pool)
- for (i = rdev_p->rnic_info.rqt_base;
- i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
- i += RQT_CHUNK)
- gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
- return rdev_p->rqt_pool ? 0 : -ENOMEM;
-}
-
-void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
-{
- gen_pool_destroy(rdev_p->rqt_pool);
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
deleted file mode 100644
index a2703a3d882d..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_RESOURCE_H__
-#define __CXIO_RESOURCE_H__
-
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/genalloc.h>
-#include "cxio_hal.h"
-
-extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
-extern void cxio_hal_destroy_rhdl_resource(void);
-extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
- u32 nr_tpt, u32 nr_pbl,
- u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
- u32 nr_pdid);
-extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
-extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
-extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
-extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
-
-#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
-extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-
-#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
-extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
deleted file mode 100644
index 53aa5c36247a..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_WR_H__
-#define __CXIO_WR_H__
-
-#include <asm/io.h>
-#include <linux/pci.h>
-#include <linux/timer.h>
-#include "firmware_exports.h"
-
-#define T3_MAX_SGE 4
-#define T3_MAX_INLINE 64
-#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
-#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
-#define T3_STAG0_PAGE_SHIFT 15
-
-#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
-#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
- ((rptr)!=(wptr)) )
-#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
-#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
-#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
-#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
-
-static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
-{
- writel(((1<<31) | qpid), doorbell);
-}
-
-#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
-
-enum t3_wr_flags {
- T3_COMPLETION_FLAG = 0x01,
- T3_NOTIFY_FLAG = 0x02,
- T3_SOLICITED_EVENT_FLAG = 0x04,
- T3_READ_FENCE_FLAG = 0x08,
- T3_LOCAL_FENCE_FLAG = 0x10
-} __packed;
-
-enum t3_wr_opcode {
- T3_WR_BP = FW_WROPCODE_RI_BYPASS,
- T3_WR_SEND = FW_WROPCODE_RI_SEND,
- T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
- T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
- T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
- T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
- T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
- T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
- T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
- T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
-} __packed;
-
-enum t3_rdma_opcode {
- T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
- T3_READ_REQ,
- T3_READ_RESP,
- T3_SEND,
- T3_SEND_WITH_INV,
- T3_SEND_WITH_SE,
- T3_SEND_WITH_SE_INV,
- T3_TERMINATE,
- T3_RDMA_INIT, /* CHELSIO RI specific ... */
- T3_BIND_MW,
- T3_FAST_REGISTER,
- T3_LOCAL_INV,
- T3_QP_MOD,
- T3_BYPASS,
- T3_RDMA_READ_REQ_WITH_INV,
-} __packed;
-
-static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
-{
- switch (wrop) {
- case T3_WR_BP: return T3_BYPASS;
- case T3_WR_SEND: return T3_SEND;
- case T3_WR_WRITE: return T3_RDMA_WRITE;
- case T3_WR_READ: return T3_READ_REQ;
- case T3_WR_INV_STAG: return T3_LOCAL_INV;
- case T3_WR_BIND: return T3_BIND_MW;
- case T3_WR_INIT: return T3_RDMA_INIT;
- case T3_WR_QP_MOD: return T3_QP_MOD;
- case T3_WR_FASTREG: return T3_FAST_REGISTER;
- default: break;
- }
- return -1;
-}
-
-
-/* Work request id */
-union t3_wrid {
- struct {
- u32 hi;
- u32 low;
- } id0;
- u64 id1;
-};
-
-#define WRID(wrid) (wrid.id1)
-#define WRID_GEN(wrid) (wrid.id0.wr_gen)
-#define WRID_IDX(wrid) (wrid.id0.wr_idx)
-#define WRID_LO(wrid) (wrid.id0.wr_lo)
-
-struct fw_riwrh {
- __be32 op_seop_flags;
- __be32 gen_tid_len;
-};
-
-#define S_FW_RIWR_OP 24
-#define M_FW_RIWR_OP 0xff
-#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
-#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
-
-#define S_FW_RIWR_SOPEOP 22
-#define M_FW_RIWR_SOPEOP 0x3
-#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
-
-#define S_FW_RIWR_FLAGS 8
-#define M_FW_RIWR_FLAGS 0x3fffff
-#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
-#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
-
-#define S_FW_RIWR_TID 8
-#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
-
-#define S_FW_RIWR_LEN 0
-#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
-
-#define S_FW_RIWR_GEN 31
-#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
-
-struct t3_sge {
- __be32 stag;
- __be32 len;
- __be64 to;
-};
-
-/* If num_sgle is zero, flit 5+ contains immediate data.*/
-struct t3_send_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
-
- u8 rdmaop; /* 2 */
- u8 reserved[3];
- __be32 rem_stag;
- __be32 plen; /* 3 */
- __be32 num_sgle;
- struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
-};
-
-#define T3_MAX_FASTREG_DEPTH 10
-#define T3_MAX_FASTREG_FRAG 10
-
-struct t3_fastreg_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 stag; /* 2 */
- __be32 len;
- __be32 va_base_hi; /* 3 */
- __be32 va_base_lo_fbo;
- __be32 page_type_perms; /* 4 */
- __be32 reserved1;
- __be64 pbl_addrs[0]; /* 5+ */
-};
-
-/*
- * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
- */
-struct t3_pbl_frag {
- struct fw_riwrh wrh; /* 0 */
- __be64 pbl_addrs[14]; /* 1..14 */
-};
-
-#define S_FR_PAGE_COUNT 24
-#define M_FR_PAGE_COUNT 0xff
-#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT)
-#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
-
-#define S_FR_PAGE_SIZE 16
-#define M_FR_PAGE_SIZE 0x1f
-#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE)
-#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
-
-#define S_FR_TYPE 8
-#define M_FR_TYPE 0x1
-#define V_FR_TYPE(x) ((x) << S_FR_TYPE)
-#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
-
-#define S_FR_PERMS 0
-#define M_FR_PERMS 0xff
-#define V_FR_PERMS(x) ((x) << S_FR_PERMS)
-#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
-
-struct t3_local_inv_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 stag; /* 2 */
- __be32 reserved;
-};
-
-struct t3_rdma_write_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 rdmaop; /* 2 */
- u8 reserved[3];
- __be32 stag_sink;
- __be64 to_sink; /* 3 */
- __be32 plen; /* 4 */
- __be32 num_sgle;
- struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
-};
-
-struct t3_rdma_read_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 rdmaop; /* 2 */
- u8 local_inv;
- u8 reserved[2];
- __be32 rem_stag;
- __be64 rem_to; /* 3 */
- __be32 local_stag; /* 4 */
- __be32 local_len;
- __be64 local_to; /* 5 */
-};
-
-struct t3_bind_mw_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u16 reserved; /* 2 */
- u8 type;
- u8 perms;
- __be32 mr_stag;
- __be32 mw_stag; /* 3 */
- __be32 mw_len;
- __be64 mw_va; /* 4 */
- __be32 mr_pbl_addr; /* 5 */
- u8 reserved2[3];
- u8 mr_pagesz;
-};
-
-struct t3_receive_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 pagesz[T3_MAX_SGE];
- __be32 num_sgle; /* 2 */
- struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
- __be32 pbl_addr[T3_MAX_SGE];
-};
-
-struct t3_bypass_wr {
- struct fw_riwrh wrh;
- union t3_wrid wrid; /* 1 */
-};
-
-struct t3_modify_qp_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 flags; /* 2 */
- __be32 quiesce; /* 2 */
- __be32 max_ird; /* 3 */
- __be32 max_ord; /* 3 */
- __be64 sge_cmd; /* 4 */
- __be64 ctx1; /* 5 */
- __be64 ctx0; /* 6 */
-};
-
-enum t3_modify_qp_flags {
- MODQP_QUIESCE = 0x01,
- MODQP_MAX_IRD = 0x02,
- MODQP_MAX_ORD = 0x04,
- MODQP_WRITE_EC = 0x08,
- MODQP_READ_EC = 0x10,
-};
-
-
-enum t3_mpa_attrs {
- uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
- uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
- uP_RI_MPA_CRC_ENABLE = 0x4,
- uP_RI_MPA_IETF_ENABLE = 0x8
-} __packed;
-
-enum t3_qp_caps {
- uP_RI_QP_RDMA_READ_ENABLE = 0x01,
- uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
- uP_RI_QP_BIND_ENABLE = 0x04,
- uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
- uP_RI_QP_STAG0_ENABLE = 0x10
-} __packed;
-
-enum rdma_init_rtr_types {
- RTR_READ = 1,
- RTR_WRITE = 2,
- RTR_SEND = 3,
-};
-
-#define S_RTR_TYPE 2
-#define M_RTR_TYPE 0x3
-#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
-#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
-
-#define S_CHAN 4
-#define M_CHAN 0x3
-#define V_CHAN(x) ((x) << S_CHAN)
-#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
-
-struct t3_rdma_init_attr {
- u32 tid;
- u32 qpid;
- u32 pdid;
- u32 scqid;
- u32 rcqid;
- u32 rq_addr;
- u32 rq_size;
- enum t3_mpa_attrs mpaattrs;
- enum t3_qp_caps qpcaps;
- u16 tcp_emss;
- u32 ord;
- u32 ird;
- u64 qp_dma_addr;
- u32 qp_dma_size;
- enum rdma_init_rtr_types rtr_type;
- u16 flags;
- u16 rqe_count;
- u32 irs;
- u32 chan;
-};
-
-struct t3_rdma_init_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 qpid; /* 2 */
- __be32 pdid;
- __be32 scqid; /* 3 */
- __be32 rcqid;
- __be32 rq_addr; /* 4 */
- __be32 rq_size;
- u8 mpaattrs; /* 5 */
- u8 qpcaps;
- __be16 ulpdu_size;
- __be16 flags_rtr_type;
- __be16 rqe_count;
- __be32 ord; /* 6 */
- __be32 ird;
- __be64 qp_dma_addr; /* 7 */
- __be32 qp_dma_size; /* 8 */
- __be32 irs;
-};
-
-struct t3_genbit {
- u64 flit[15];
- __be64 genbit;
-};
-
-struct t3_wq_in_err {
- u64 flit[13];
- u64 err;
-};
-
-enum rdma_init_wr_flags {
- MPA_INITIATOR = (1<<0),
- PRIV_QP = (1<<1),
-};
-
-union t3_wr {
- struct t3_send_wr send;
- struct t3_rdma_write_wr write;
- struct t3_rdma_read_wr read;
- struct t3_receive_wr recv;
- struct t3_fastreg_wr fastreg;
- struct t3_pbl_frag pbl_frag;
- struct t3_local_inv_wr local_inv;
- struct t3_bind_mw_wr bind;
- struct t3_bypass_wr bypass;
- struct t3_rdma_init_wr init;
- struct t3_modify_qp_wr qp_mod;
- struct t3_genbit genbit;
- struct t3_wq_in_err wq_in_err;
- __be64 flit[16];
-};
-
-#define T3_SQ_CQE_FLIT 13
-#define T3_SQ_COOKIE_FLIT 14
-
-#define T3_RQ_COOKIE_FLIT 13
-#define T3_RQ_CQE_FLIT 14
-
-static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
-{
- return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
-}
-
-enum t3_wr_hdr_bits {
- T3_EOP = 1,
- T3_SOP = 2,
- T3_SOPEOP = T3_EOP|T3_SOP,
-};
-
-static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
- enum t3_wr_flags flags, u8 genbit, u32 tid,
- u8 len, u8 sopeop)
-{
- wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
- V_FW_RIWR_SOPEOP(sopeop) |
- V_FW_RIWR_FLAGS(flags));
- wmb();
- wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
- V_FW_RIWR_TID(tid) |
- V_FW_RIWR_LEN(len));
- /* 2nd gen bit... */
- ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
-}
-
-/*
- * T3 ULP2_TX commands
- */
-enum t3_utx_mem_op {
- T3_UTX_MEM_READ = 2,
- T3_UTX_MEM_WRITE = 3
-};
-
-/* T3 MC7 RDMA TPT entry format */
-
-enum tpt_mem_type {
- TPT_NON_SHARED_MR = 0x0,
- TPT_SHARED_MR = 0x1,
- TPT_MW = 0x2,
- TPT_MW_RELAXED_PROTECTION = 0x3
-};
-
-enum tpt_addr_type {
- TPT_ZBTO = 0,
- TPT_VATO = 1
-};
-
-enum tpt_mem_perm {
- TPT_MW_BIND = 0x10,
- TPT_LOCAL_READ = 0x8,
- TPT_LOCAL_WRITE = 0x4,
- TPT_REMOTE_READ = 0x2,
- TPT_REMOTE_WRITE = 0x1
-};
-
-struct tpt_entry {
- __be32 valid_stag_pdid;
- __be32 flags_pagesize_qpid;
-
- __be32 rsvd_pbl_addr;
- __be32 len;
- __be32 va_hi;
- __be32 va_low_or_fbo;
-
- __be32 rsvd_bind_cnt_or_pstag;
- __be32 rsvd_pbl_size;
-};
-
-#define S_TPT_VALID 31
-#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
-#define F_TPT_VALID V_TPT_VALID(1U)
-
-#define S_TPT_STAG_KEY 23
-#define M_TPT_STAG_KEY 0xFF
-#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
-#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
-
-#define S_TPT_STAG_STATE 22
-#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
-#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
-
-#define S_TPT_STAG_TYPE 20
-#define M_TPT_STAG_TYPE 0x3
-#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
-#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
-
-#define S_TPT_PDID 0
-#define M_TPT_PDID 0xFFFFF
-#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
-#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
-
-#define S_TPT_PERM 28
-#define M_TPT_PERM 0xF
-#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
-#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
-
-#define S_TPT_REM_INV_DIS 27
-#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
-#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
-
-#define S_TPT_ADDR_TYPE 26
-#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
-#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
-
-#define S_TPT_MW_BIND_ENABLE 25
-#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
-#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
-
-#define S_TPT_PAGE_SIZE 20
-#define M_TPT_PAGE_SIZE 0x1F
-#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
-#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
-
-#define S_TPT_PBL_ADDR 0
-#define M_TPT_PBL_ADDR 0x1FFFFFFF
-#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
-#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
-
-#define S_TPT_QPID 0
-#define M_TPT_QPID 0xFFFFF
-#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
-#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
-
-#define S_TPT_PSTAG 0
-#define M_TPT_PSTAG 0xFFFFFF
-#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
-#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
-
-#define S_TPT_PBL_SIZE 0
-#define M_TPT_PBL_SIZE 0xFFFFF
-#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
-#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
-
-/*
- * CQE defs
- */
-struct t3_cqe {
- __be32 header;
- __be32 len;
- union {
- struct {
- __be32 stag;
- __be32 msn;
- } rcqe;
- struct {
- u32 wrid_hi;
- u32 wrid_low;
- } scqe;
- } u;
-};
-
-#define S_CQE_OOO 31
-#define M_CQE_OOO 0x1
-#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
-#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
-
-#define S_CQE_QPID 12
-#define M_CQE_QPID 0x7FFFF
-#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
-#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
-
-#define S_CQE_SWCQE 11
-#define M_CQE_SWCQE 0x1
-#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
-#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
-
-#define S_CQE_GENBIT 10
-#define M_CQE_GENBIT 0x1
-#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
-#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
-
-#define S_CQE_STATUS 5
-#define M_CQE_STATUS 0x1F
-#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
-#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
-
-#define S_CQE_TYPE 4
-#define M_CQE_TYPE 0x1
-#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
-#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
-
-#define S_CQE_OPCODE 0
-#define M_CQE_OPCODE 0xF
-#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
-#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
-
-#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header)))
-#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header)))
-#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header)))
-#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header)))
-#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header)))
-#define SQ_TYPE(x) (CQE_TYPE((x)))
-#define RQ_TYPE(x) (!CQE_TYPE((x)))
-#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
-#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
-
-#define CQE_SEND_OPCODE(x)( \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
-
-#define CQE_LEN(x) (be32_to_cpu((x).len))
-
-/* used for RQ completion processing */
-#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag))
-#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn))
-
-/* used for SQ completion processing */
-#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
-#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
-
-/* generic accessor macros */
-#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
-#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
-
-#define TPT_ERR_SUCCESS 0x0
-#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
- /* STAG is offlimt, being 0, */
- /* or STAG_key mismatch */
-#define TPT_ERR_PDID 0x2 /* PDID mismatch */
-#define TPT_ERR_QPID 0x3 /* QPID mismatch */
-#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
-#define TPT_ERR_WRAP 0x5 /* Wrap error */
-#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
-#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
- /* shared memory region */
-#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
- /* shared memory region */
-#define TPT_ERR_ECC 0x9 /* ECC error detected */
-#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
- /* reading PSTAG for a MW */
- /* Invalidate */
-#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
- /* software error */
-#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
-#define TPT_ERR_CRC 0x10 /* CRC error */
-#define TPT_ERR_MARKER 0x11 /* Marker error */
-#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
-#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
-#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
-#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
-#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
-#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
-#define TPT_ERR_MSN 0x18 /* MSN error */
-#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
-#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
- /* or READ_REQ */
-#define TPT_ERR_MSN_GAP 0x1B
-#define TPT_ERR_MSN_RANGE 0x1C
-#define TPT_ERR_IRD_OVERFLOW 0x1D
-#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
- /* software error */
-#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
- /* mismatch) */
-
-struct t3_swsq {
- __u64 wr_id;
- struct t3_cqe cqe;
- __u32 sq_wptr;
- __be32 read_len;
- int opcode;
- int complete;
- int signaled;
-};
-
-struct t3_swrq {
- __u64 wr_id;
- __u32 pbl_addr;
-};
-
-/*
- * A T3 WQ implements both the SQ and RQ.
- */
-struct t3_wq {
- union t3_wr *queue; /* DMA accessible memory */
- dma_addr_t dma_addr; /* DMA address for HW */
- DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */
- u32 error; /* 1 once we go to ERROR */
- u32 qpid;
- u32 wptr; /* idx to next available WR slot */
- u32 size_log2; /* total wq size */
- struct t3_swsq *sq; /* SW SQ */
- struct t3_swsq *oldest_read; /* tracks oldest pending read */
- u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
- u32 sq_rptr; /* pending wrs */
- u32 sq_size_log2; /* sq size */
- struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
- u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
- u32 rq_rptr; /* pending wrs */
- struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
- u32 rq_size_log2; /* rq size */
- u32 rq_addr; /* rq adapter address */
- void __iomem *doorbell; /* kernel db */
- u64 udb; /* user db if any */
- struct cxio_rdev *rdev;
-};
-
-struct t3_cq {
- u32 cqid;
- u32 rptr;
- u32 wptr;
- u32 size_log2;
- dma_addr_t dma_addr;
- DEFINE_DMA_UNMAP_ADDR(mapping);
- struct t3_cqe *queue;
- struct t3_cqe *sw_queue;
- u32 sw_rptr;
- u32 sw_wptr;
-};
-
-#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
- CQE_GENBIT(*cqe))
-
-struct t3_cq_status_page {
- u32 cq_err;
-};
-
-static inline int cxio_cq_in_error(struct t3_cq *cq)
-{
- return ((struct t3_cq_status_page *)
- &cq->queue[1 << cq->size_log2])->cq_err;
-}
-
-static inline void cxio_set_cq_in_error(struct t3_cq *cq)
-{
- ((struct t3_cq_status_page *)
- &cq->queue[1 << cq->size_log2])->cq_err = 1;
-}
-
-static inline void cxio_set_wq_in_error(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err |= 1;
-}
-
-static inline void cxio_disable_wq_db(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err |= 2;
-}
-
-static inline void cxio_enable_wq_db(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err &= ~2;
-}
-
-static inline int cxio_wq_db_enabled(struct t3_wq *wq)
-{
- return !(wq->queue->wq_in_err.err & 2);
-}
-
-static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
- if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
- return cqe;
- return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
- return cqe;
- }
- return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
- return cqe;
- }
- cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
- if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
- return cqe;
- return NULL;
-}
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
deleted file mode 100644
index 56a8ab6210cf..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-#include <rdma/cxgb3-abi.h>
-#include "iwch.h"
-#include "iwch_cm.h"
-
-#define DRV_VERSION "1.1"
-
-MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
-MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-
-static void open_rnic_dev(struct t3cdev *);
-static void close_rnic_dev(struct t3cdev *);
-static void iwch_event_handler(struct t3cdev *, u32, u32);
-
-struct cxgb3_client t3c_client = {
- .name = "iw_cxgb3",
- .add = open_rnic_dev,
- .remove = close_rnic_dev,
- .handlers = t3c_handlers,
- .redirect = iwch_ep_redirect,
- .event_handler = iwch_event_handler
-};
-
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(dev_mutex);
-
-static void disable_dbs(struct iwch_dev *rnicp)
-{
- unsigned long index;
- struct iwch_qp *qhp;
-
- xa_lock_irq(&rnicp->qps);
- xa_for_each(&rnicp->qps, index, qhp)
- cxio_disable_wq_db(&qhp->wq);
- xa_unlock_irq(&rnicp->qps);
-}
-
-static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
-{
- unsigned long index;
- struct iwch_qp *qhp;
-
- xa_lock_irq(&rnicp->qps);
- xa_for_each(&rnicp->qps, index, qhp) {
- if (ring_db)
- ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell,
- qhp->wq.qpid);
- cxio_enable_wq_db(&qhp->wq);
- }
- xa_unlock_irq(&rnicp->qps);
-}
-
-static void iwch_db_drop_task(struct work_struct *work)
-{
- struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
- db_drop_task.work);
- enable_dbs(rnicp, 1);
-}
-
-static void rnic_init(struct iwch_dev *rnicp)
-{
- pr_debug("%s iwch_dev %p\n", __func__, rnicp);
- xa_init_flags(&rnicp->cqs, XA_FLAGS_LOCK_IRQ);
- xa_init_flags(&rnicp->qps, XA_FLAGS_LOCK_IRQ);
- xa_init_flags(&rnicp->mrs, XA_FLAGS_LOCK_IRQ);
- INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
-
- rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
- rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
- rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
- rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
- rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
- rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
- rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
- rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
- rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
- rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
- rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
- rnicp->attr.can_resize_wq = 0;
- rnicp->attr.max_rdma_reads_per_qp = 8;
- rnicp->attr.max_rdma_read_resources =
- rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
- rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
- rnicp->attr.max_rdma_read_depth =
- rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
- rnicp->attr.rq_overflow_handled = 0;
- rnicp->attr.can_modify_ird = 0;
- rnicp->attr.can_modify_ord = 0;
- rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
- rnicp->attr.stag0_value = 1;
- rnicp->attr.zbva_support = 1;
- rnicp->attr.local_invalidate_fence = 1;
- rnicp->attr.cq_overflow_detection = 1;
- return;
-}
-
-static void open_rnic_dev(struct t3cdev *tdev)
-{
- struct iwch_dev *rnicp;
-
- pr_debug("%s t3cdev %p\n", __func__, tdev);
- pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
- rnicp = ib_alloc_device(iwch_dev, ibdev);
- if (!rnicp) {
- pr_err("Cannot allocate ib device\n");
- return;
- }
- rnicp->rdev.ulp = rnicp;
- rnicp->rdev.t3cdev_p = tdev;
-
- mutex_lock(&dev_mutex);
-
- if (cxio_rdev_open(&rnicp->rdev)) {
- mutex_unlock(&dev_mutex);
- pr_err("Unable to open CXIO rdev\n");
- ib_dealloc_device(&rnicp->ibdev);
- return;
- }
-
- rnic_init(rnicp);
-
- list_add_tail(&rnicp->entry, &dev_list);
- mutex_unlock(&dev_mutex);
-
- if (iwch_register_device(rnicp)) {
- pr_err("Unable to register device\n");
- close_rnic_dev(tdev);
- }
- pr_info("Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
- return;
-}
-
-static void close_rnic_dev(struct t3cdev *tdev)
-{
- struct iwch_dev *dev, *tmp;
- pr_debug("%s t3cdev %p\n", __func__, tdev);
- mutex_lock(&dev_mutex);
- list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
- if (dev->rdev.t3cdev_p == tdev) {
- dev->rdev.flags = CXIO_ERROR_FATAL;
- synchronize_net();
- cancel_delayed_work_sync(&dev->db_drop_task);
- list_del(&dev->entry);
- iwch_unregister_device(dev);
- cxio_rdev_close(&dev->rdev);
- WARN_ON(!xa_empty(&dev->cqs));
- WARN_ON(!xa_empty(&dev->qps));
- WARN_ON(!xa_empty(&dev->mrs));
- ib_dealloc_device(&dev->ibdev);
- break;
- }
- }
- mutex_unlock(&dev_mutex);
-}
-
-static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
-{
- struct cxio_rdev *rdev = tdev->ulp;
- struct iwch_dev *rnicp;
- struct ib_event event;
- u32 portnum = port_id + 1;
- int dispatch = 0;
-
- if (!rdev)
- return;
- rnicp = rdev_to_iwch_dev(rdev);
- switch (evt) {
- case OFFLOAD_STATUS_DOWN: {
- rdev->flags = CXIO_ERROR_FATAL;
- synchronize_net();
- event.event = IB_EVENT_DEVICE_FATAL;
- dispatch = 1;
- break;
- }
- case OFFLOAD_PORT_DOWN: {
- event.event = IB_EVENT_PORT_ERR;
- dispatch = 1;
- break;
- }
- case OFFLOAD_PORT_UP: {
- event.event = IB_EVENT_PORT_ACTIVE;
- dispatch = 1;
- break;
- }
- case OFFLOAD_DB_FULL: {
- disable_dbs(rnicp);
- break;
- }
- case OFFLOAD_DB_EMPTY: {
- enable_dbs(rnicp, 1);
- break;
- }
- case OFFLOAD_DB_DROP: {
- unsigned long delay = 1000;
- unsigned short r;
-
- disable_dbs(rnicp);
- get_random_bytes(&r, 2);
- delay += r & 1023;
-
- /*
- * delay is between 1000-2023 usecs.
- */
- schedule_delayed_work(&rnicp->db_drop_task,
- usecs_to_jiffies(delay));
- break;
- }
- }
-
- if (dispatch) {
- event.device = &rnicp->ibdev;
- event.element.port_num = portnum;
- ib_dispatch_event(&event);
- }
-
- return;
-}
-
-static int __init iwch_init_module(void)
-{
- int err;
-
- err = cxio_hal_init();
- if (err)
- return err;
- err = iwch_cm_init();
- if (err)
- return err;
- cxio_register_ev_cb(iwch_ev_dispatch);
- cxgb3_register_client(&t3c_client);
- return 0;
-}
-
-static void __exit iwch_exit_module(void)
-{
- cxgb3_unregister_client(&t3c_client);
- cxio_unregister_ev_cb(iwch_ev_dispatch);
- iwch_cm_term();
- cxio_hal_exit();
-}
-
-module_init(iwch_init_module);
-module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
deleted file mode 100644
index 310a937bffcf..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_H__
-#define __IWCH_H__
-
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/xarray.h>
-#include <linux/workqueue.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-
-struct iwch_pd;
-struct iwch_cq;
-struct iwch_qp;
-struct iwch_mr;
-
-struct iwch_rnic_attributes {
- u32 max_qps;
- u32 max_wrs; /* Max for any SQ/RQ */
- u32 max_sge_per_wr;
- u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
- u32 max_cqs;
- u32 max_cqes_per_cq;
- u32 max_mem_regs;
- u32 max_phys_buf_entries; /* for phys buf list */
- u32 max_pds;
-
- /*
- * The memory page sizes supported by this RNIC.
- * Bit position i in bitmap indicates page of
- * size (4k)^i. Phys block list mode unsupported.
- */
- u32 mem_pgsizes_bitmask;
- u64 max_mr_size;
- u8 can_resize_wq;
-
- /*
- * The maximum number of RDMA Reads that can be outstanding
- * per QP with this RNIC as the target.
- */
- u32 max_rdma_reads_per_qp;
-
- /*
- * The maximum number of resources used for RDMA Reads
- * by this RNIC with this RNIC as the target.
- */
- u32 max_rdma_read_resources;
-
- /*
- * The max depth per QP for initiation of RDMA Read
- * by this RNIC.
- */
- u32 max_rdma_read_qp_depth;
-
- /*
- * The maximum depth for initiation of RDMA Read
- * operations by this RNIC on all QPs
- */
- u32 max_rdma_read_depth;
- u8 rq_overflow_handled;
- u32 can_modify_ird;
- u32 can_modify_ord;
- u32 max_mem_windows;
- u32 stag0_value;
- u8 zbva_support;
- u8 local_invalidate_fence;
- u32 cq_overflow_detection;
-};
-
-struct iwch_dev {
- struct ib_device ibdev;
- struct cxio_rdev rdev;
- u32 device_cap_flags;
- struct iwch_rnic_attributes attr;
- struct xarray cqs;
- struct xarray qps;
- struct xarray mrs;
- struct list_head entry;
- struct delayed_work db_drop_task;
-};
-
-static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct iwch_dev, ibdev);
-}
-
-static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev)
-{
- return container_of(rdev, struct iwch_dev, rdev);
-}
-
-static inline int t3b_device(const struct iwch_dev *rhp)
-{
- return rhp->rdev.t3cdev_p->type == T3B;
-}
-
-static inline int t3a_device(const struct iwch_dev *rhp)
-{
- return rhp->rdev.t3cdev_p->type == T3A;
-}
-
-static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
-{
- return xa_load(&rhp->cqs, cqid);
-}
-
-static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
-{
- return xa_load(&rhp->qps, qpid);
-}
-
-static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
-{
- return xa_load(&rhp->mrs, mmid);
-}
-
-extern struct cxgb3_client t3c_client;
-extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
deleted file mode 100644
index 1c90c86fc8b8..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ /dev/null
@@ -1,2258 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-#include <linux/inetdevice.h>
-
-#include <net/neighbour.h>
-#include <net/netevent.h>
-#include <net/route.h>
-
-#include "tcb.h"
-#include "cxgb3_offload.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-
-static char *states[] = {
- "idle",
- "listen",
- "connecting",
- "mpa_wait_req",
- "mpa_req_sent",
- "mpa_req_rcvd",
- "mpa_rep_sent",
- "fpdu_mode",
- "aborting",
- "closing",
- "moribund",
- "dead",
- NULL,
-};
-
-int peer2peer = 0;
-module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
-
-static int ep_timeout_secs = 60;
-module_param(ep_timeout_secs, int, 0644);
-MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
- "in seconds (default=60)");
-
-static int mpa_rev = 1;
-module_param(mpa_rev, int, 0644);
-MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
- "1 is spec compliant. (default=1)");
-
-static int markers_enabled = 0;
-module_param(markers_enabled, int, 0644);
-MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
-
-static int crc_enabled = 1;
-module_param(crc_enabled, int, 0644);
-MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
-
-static int rcv_win = 256 * 1024;
-module_param(rcv_win, int, 0644);
-MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
-
-static int snd_win = 32 * 1024;
-module_param(snd_win, int, 0644);
-MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
-
-static unsigned int nocong = 0;
-module_param(nocong, uint, 0644);
-MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
-
-static unsigned int cong_flavor = 1;
-module_param(cong_flavor, uint, 0644);
-MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
-
-static struct workqueue_struct *workq;
-
-static struct sk_buff_head rxq;
-
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
-static void ep_timeout(struct timer_list *t);
-static void connect_reply_upcall(struct iwch_ep *ep, int status);
-
-static void start_ep_timer(struct iwch_ep *ep)
-{
- pr_debug("%s ep %p\n", __func__, ep);
- if (timer_pending(&ep->timer)) {
- pr_debug("%s stopped / restarted timer ep %p\n", __func__, ep);
- del_timer_sync(&ep->timer);
- } else
- get_ep(&ep->com);
- ep->timer.expires = jiffies + ep_timeout_secs * HZ;
- add_timer(&ep->timer);
-}
-
-static void stop_ep_timer(struct iwch_ep *ep)
-{
- pr_debug("%s ep %p\n", __func__, ep);
- if (!timer_pending(&ep->timer)) {
- WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
- __func__, ep, ep->com.state);
- return;
- }
- del_timer_sync(&ep->timer);
- put_ep(&ep->com);
-}
-
-static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
-{
- int error = 0;
- struct cxio_rdev *rdev;
-
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- kfree_skb(skb);
- return -EIO;
- }
- error = l2t_send(tdev, skb, l2e);
- if (error < 0)
- kfree_skb(skb);
- return error < 0 ? error : 0;
-}
-
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
-{
- int error = 0;
- struct cxio_rdev *rdev;
-
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- kfree_skb(skb);
- return -EIO;
- }
- error = cxgb3_ofld_send(tdev, skb);
- if (error < 0)
- kfree_skb(skb);
- return error < 0 ? error : 0;
-}
-
-static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
-{
- struct cpl_tid_release *req;
-
- skb = get_skb(skb, sizeof *req, GFP_KERNEL);
- if (!skb)
- return;
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- skb->priority = CPL_PRIORITY_SETUP;
- iwch_cxgb3_ofld_send(tdev, skb);
- return;
-}
-
-int iwch_quiesce_tid(struct iwch_ep *ep)
-{
- struct cpl_set_tcb_field *req;
- struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
- if (!skb)
- return -ENOMEM;
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
-
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-int iwch_resume_tid(struct iwch_ep *ep)
-{
- struct cpl_set_tcb_field *req;
- struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
- if (!skb)
- return -ENOMEM;
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = 0;
-
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static void set_emss(struct iwch_ep *ep, u16 opt)
-{
- pr_debug("%s ep %p opt %u\n", __func__, ep, opt);
- ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
- if (G_TCPOPT_TSTAMP(opt))
- ep->emss -= 12;
- if (ep->emss < 128)
- ep->emss = 128;
- pr_debug("emss=%d\n", ep->emss);
-}
-
-static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
-{
- unsigned long flags;
- enum iwch_ep_state state;
-
- spin_lock_irqsave(&epc->lock, flags);
- state = epc->state;
- spin_unlock_irqrestore(&epc->lock, flags);
- return state;
-}
-
-static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
- epc->state = new;
-}
-
-static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&epc->lock, flags);
- pr_debug("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
- __state_set(epc, new);
- spin_unlock_irqrestore(&epc->lock, flags);
- return;
-}
-
-static void *alloc_ep(int size, gfp_t gfp)
-{
- struct iwch_ep_common *epc;
-
- epc = kzalloc(size, gfp);
- if (epc) {
- kref_init(&epc->kref);
- spin_lock_init(&epc->lock);
- init_waitqueue_head(&epc->waitq);
- }
- pr_debug("%s alloc ep %p\n", __func__, epc);
- return epc;
-}
-
-void __free_ep(struct kref *kref)
-{
- struct iwch_ep *ep;
- ep = container_of(container_of(kref, struct iwch_ep_common, kref),
- struct iwch_ep, com);
- pr_debug("%s ep %p state %s\n",
- __func__, ep, states[state_read(&ep->com)]);
- if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
- cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
- dst_release(ep->dst);
- l2t_release(ep->com.tdev, ep->l2t);
- }
- kfree(ep);
-}
-
-static void release_ep_resources(struct iwch_ep *ep)
-{
- pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- set_bit(RELEASE_RESOURCES, &ep->com.flags);
- put_ep(&ep->com);
-}
-
-static int status2errno(int status)
-{
- switch (status) {
- case CPL_ERR_NONE:
- return 0;
- case CPL_ERR_CONN_RESET:
- return -ECONNRESET;
- case CPL_ERR_ARP_MISS:
- return -EHOSTUNREACH;
- case CPL_ERR_CONN_TIMEDOUT:
- return -ETIMEDOUT;
- case CPL_ERR_TCAM_FULL:
- return -ENOMEM;
- case CPL_ERR_CONN_EXIST:
- return -EADDRINUSE;
- default:
- return -EIO;
- }
-}
-
-/*
- * Try and reuse skbs already allocated...
- */
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
-{
- if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
- skb_trim(skb, 0);
- skb_get(skb);
- } else {
- skb = alloc_skb(len, gfp);
- }
- return skb;
-}
-
-static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
- __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos)
-{
- struct rtable *rt;
- struct flowi4 fl4;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- peer_port, local_port, IPPROTO_TCP,
- tos, 0);
- if (IS_ERR(rt))
- return NULL;
- return rt;
-}
-
-static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
-{
- int i = 0;
-
- while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
- ++i;
- return i;
-}
-
-static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
-{
- pr_debug("%s t3cdev %p\n", __func__, dev);
- kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for an active open.
- */
-static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
- pr_err("ARP failure during connect\n");
- kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
- * and send it along.
- */
-static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
- struct cpl_abort_req *req = cplhdr(skb);
-
- pr_debug("%s t3cdev %p\n", __func__, dev);
- req->cmd = CPL_ABORT_NO_RST;
- iwch_cxgb3_ofld_send(dev, skb);
-}
-
-static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
-{
- struct cpl_close_con_req *req;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), gfp);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, arp_failure_discard);
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- struct cpl_abort_req *req;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(skb, sizeof(*req), gfp);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, abort_arp_failure);
- req = skb_put_zero(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- req->cmd = CPL_ABORT_SEND_RST;
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_connect(struct iwch_ep *ep)
-{
- struct cpl_act_open_req *req;
- struct sk_buff *skb;
- u32 opt0h, opt0l, opt2;
- unsigned int mtu_idx;
- int wscale;
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
- wscale = compute_wscale(rcv_win);
- opt0h = V_NAGLE(0) |
- V_NO_CONG(nocong) |
- V_KEEP_ALIVE(1) |
- F_TCAM_BYPASS |
- V_WND_SCALE(wscale) |
- V_MSS_IDX(mtu_idx) |
- V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
- opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
- V_CONG_CONTROL_FLAVOR(cong_flavor);
- skb->priority = CPL_PRIORITY_SETUP;
- set_arp_failure_handler(skb, act_open_req_arp_failure);
-
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
- req->local_port = ep->com.local_addr.sin_port;
- req->peer_port = ep->com.remote_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
- req->opt0h = htonl(opt0h);
- req->opt0l = htonl(opt0l);
- req->params = 0;
- req->opt2 = htonl(opt2);
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- int len;
-
- pr_debug("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
-
- BUG_ON(skb_cloned(skb));
-
- mpalen = sizeof(*mpa) + ep->plen;
- if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
- kfree_skb(skb);
- skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- connect_reply_upcall(ep, -ENOMEM);
- return;
- }
- }
- skb_trim(skb, 0);
- skb_reserve(skb, sizeof(*req));
- skb_put(skb, mpalen);
- skb->priority = CPL_PRIORITY_DATA;
- mpa = (struct mpa_message *) skb->data;
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
- mpa->flags = (crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
- mpa->private_data_size = htons(ep->plen);
- mpa->revision = mpa_rev;
-
- if (ep->plen)
- memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
-
- /*
- * Reference the mpa skb. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- len = skb->len;
- req = skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- BUG_ON(ep->mpa_skb);
- ep->mpa_skb = skb;
- iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
- start_ep_timer(ep);
- state_set(&ep->com, MPA_REQ_SENT);
- return;
-}
-
-static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
-
- mpalen = sizeof(*mpa) + plen;
-
- skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - cannot alloc skb!\n", __func__);
- return -ENOMEM;
- }
- skb_reserve(skb, sizeof(*req));
- mpa = skb_put(skb, mpalen);
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = MPA_REJECT;
- mpa->revision = mpa_rev;
- mpa->private_data_size = htons(plen);
- if (plen)
- memcpy(mpa->private_data, pdata, plen);
-
- /*
- * Reference the mpa skb again. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- req = skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(mpalen);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- BUG_ON(ep->mpa_skb);
- ep->mpa_skb = skb;
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- int len;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p plen %d\n", __func__, ep, plen);
-
- mpalen = sizeof(*mpa) + plen;
-
- skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - cannot alloc skb!\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- skb_reserve(skb, sizeof(*req));
- mpa = skb_put(skb, mpalen);
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
- mpa->revision = mpa_rev;
- mpa->private_data_size = htons(plen);
- if (plen)
- memcpy(mpa->private_data, pdata, plen);
-
- /*
- * Reference the mpa skb. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- len = skb->len;
- req = skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- ep->mpa_skb = skb;
- state_set(&ep->com, MPA_REP_SENT);
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_act_establish *req = cplhdr(skb);
- unsigned int tid = GET_TID(req);
-
- pr_debug("%s ep %p tid %d\n", __func__, ep, tid);
-
- dst_confirm(ep->dst);
-
- /* setup the hwtid for this connection */
- ep->hwtid = tid;
- cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
-
- ep->snd_seq = ntohl(req->snd_isn);
- ep->rcv_seq = ntohl(req->rcv_isn);
-
- set_emss(ep, ntohs(req->tcp_opt));
-
- /* dealloc the atid */
- cxgb3_free_atid(ep->com.tdev, ep->atid);
-
- /* start MPA negotiation */
- send_mpa_req(ep, skb);
-
- return 0;
-}
-
-static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- pr_debug("%s ep %p\n", __FILE__, ep);
- state_set(&ep->com, ABORTING);
- send_abort(ep, skb, gfp);
-}
-
-static void close_complete_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CLOSE;
- if (ep->com.cm_id) {
- pr_debug("close complete delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void peer_close_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_DISCONNECT;
- if (ep->com.cm_id) {
- pr_debug("peer close delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
-}
-
-static void peer_abort_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CLOSE;
- event.status = -ECONNRESET;
- if (ep->com.cm_id) {
- pr_debug("abort delivered ep %p cm_id %p tid %d\n", ep,
- ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void connect_reply_upcall(struct iwch_ep *ep, int status)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p status %d\n", __func__, ep, status);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CONNECT_REPLY;
- event.status = status;
- memcpy(&event.local_addr, &ep->com.local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&event.remote_addr, &ep->com.remote_addr,
- sizeof(ep->com.remote_addr));
-
- if ((status == 0) || (status == -ECONNREFUSED)) {
- event.private_data_len = ep->plen;
- event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
- }
- if (ep->com.cm_id) {
- pr_debug("%s ep %p tid %d status %d\n", __func__, ep,
- ep->hwtid, status);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
- if (status < 0) {
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void connect_request_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CONNECT_REQUEST;
- memcpy(&event.local_addr, &ep->com.local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&event.remote_addr, &ep->com.remote_addr,
- sizeof(ep->com.local_addr));
- event.private_data_len = ep->plen;
- event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
- event.provider_data = ep;
- /*
- * Until ird/ord negotiation via MPAv2 support is added, send max
- * supported values
- */
- event.ird = event.ord = 8;
- if (state_read(&ep->parent_ep->com) != DEAD) {
- get_ep(&ep->com);
- ep->parent_ep->com.cm_id->event_handler(
- ep->parent_ep->com.cm_id,
- &event);
- }
- put_ep(&ep->parent_ep->com);
- ep->parent_ep = NULL;
-}
-
-static void established_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- pr_debug("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_ESTABLISHED;
- /*
- * Until ird/ord negotiation via MPAv2 support is added, send max
- * supported values
- */
- event.ird = event.ord = 8;
- if (ep->com.cm_id) {
- pr_debug("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
-}
-
-static int update_rx_credits(struct iwch_ep *ep, u32 credits)
-{
- struct cpl_rx_data_ack *req;
- struct sk_buff *skb;
-
- pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("update_rx_credits - cannot alloc skb!\n");
- return 0;
- }
-
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
- req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
- skb->priority = CPL_PRIORITY_ACK;
- iwch_cxgb3_ofld_send(ep->com.tdev, skb);
- return credits;
-}
-
-static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
-{
- struct mpa_message *mpa;
- u16 plen;
- struct iwch_qp_attributes attrs;
- enum iwch_qp_attr_mask mask;
- int err;
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- /*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
- */
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_SENT)
- return;
-
- /*
- * If we get more than the supported amount of private data
- * then we must fail this connection.
- */
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- err = -EINVAL;
- goto err;
- }
-
- /*
- * copy the new data into our accumulation buffer.
- */
- skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
- skb->len);
- ep->mpa_pkt_len += skb->len;
-
- /*
- * if we don't even have the mpa message, then bail.
- */
- if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
- mpa = (struct mpa_message *) ep->mpa_pkt;
-
- /* Validate MPA header. */
- if (mpa->revision != mpa_rev) {
- err = -EPROTO;
- goto err;
- }
- if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
- err = -EPROTO;
- goto err;
- }
-
- plen = ntohs(mpa->private_data_size);
-
- /*
- * Fail if there's too much private data.
- */
- if (plen > MPA_MAX_PRIVATE_DATA) {
- err = -EPROTO;
- goto err;
- }
-
- /*
- * If plen does not account for pkt size
- */
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- err = -EPROTO;
- goto err;
- }
-
- ep->plen = (u8) plen;
-
- /*
- * If we don't have all the pdata yet, then bail.
- * We'll continue process when more data arrives.
- */
- if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
-
- if (mpa->flags & MPA_REJECT) {
- err = -ECONNREFUSED;
- goto err;
- }
-
- /*
- * If we get here we have accumulated the entire mpa
- * start reply message including private data. And
- * the MPA header is valid.
- */
- state_set(&ep->com, FPDU_MODE);
- ep->mpa_attr.initiator = 1;
- ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
- ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
- ep->mpa_attr.version = mpa_rev;
- pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
- __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ird;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
-
- /* bind QP and TID with INIT_WR */
- err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
- if (err)
- goto err;
-
- if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
- iwch_post_zb_read(ep);
- }
-
- goto out;
-err:
- abort_connection(ep, skb, GFP_KERNEL);
-out:
- connect_reply_upcall(ep, err);
- return;
-}
-
-static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
-{
- struct mpa_message *mpa;
- u16 plen;
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- /*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
- */
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_WAIT)
- return;
-
- /*
- * If we get more than the supported amount of private data
- * then we must fail this connection.
- */
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-
- /*
- * Copy the new data into our accumulation buffer.
- */
- skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
- skb->len);
- ep->mpa_pkt_len += skb->len;
-
- /*
- * If we don't even have the mpa message, then bail.
- * We'll continue process when more data arrives.
- */
- if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
- pr_debug("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
- mpa = (struct mpa_message *) ep->mpa_pkt;
-
- /*
- * Validate MPA Header.
- */
- if (mpa->revision != mpa_rev) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- plen = ntohs(mpa->private_data_size);
-
- /*
- * Fail if there's too much private data.
- */
- if (plen > MPA_MAX_PRIVATE_DATA) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- /*
- * If plen does not account for pkt size
- */
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
- ep->plen = (u8) plen;
-
- /*
- * If we don't have all the pdata yet, then bail.
- */
- if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
-
- /*
- * If we get here we have accumulated the entire mpa
- * start reply message including private data.
- */
- ep->mpa_attr.initiator = 0;
- ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
- ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
- ep->mpa_attr.version = mpa_rev;
- pr_debug("%s - crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d\n",
- __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
- state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- connect_request_upcall(ep);
- return;
-}
-
-static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_rx_data *hdr = cplhdr(skb);
- unsigned int dlen = ntohs(hdr->len);
-
- pr_debug("%s ep %p dlen %u\n", __func__, ep, dlen);
-
- skb_pull(skb, sizeof(*hdr));
- skb_trim(skb, dlen);
-
- ep->rcv_seq += dlen;
- BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
-
- switch (state_read(&ep->com)) {
- case MPA_REQ_SENT:
- process_mpa_reply(ep, skb);
- break;
- case MPA_REQ_WAIT:
- process_mpa_request(ep, skb);
- break;
- case MPA_REP_SENT:
- break;
- default:
- pr_err("%s Unexpected streaming data. ep %p state %d tid %d\n",
- __func__, ep, state_read(&ep->com), ep->hwtid);
-
- /*
- * The ep will timeout and inform the ULP of the failure.
- * See ep_timeout().
- */
- break;
- }
-
- /* update RX credits */
- update_rx_credits(ep, dlen);
-
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Upcall from the adapter indicating data has been transmitted.
- * For us its just the single MPA request or reply. We can now free
- * the skb holding the mpa message.
- */
-static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_wr_ack *hdr = cplhdr(skb);
- unsigned int credits = ntohs(hdr->credits);
- unsigned long flags;
- int post_zb = 0;
-
- pr_debug("%s ep %p credits %u\n", __func__, ep, credits);
-
- if (credits == 0) {
- pr_debug("%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- BUG_ON(credits != 1);
- dst_confirm(ep->dst);
- if (!ep->mpa_skb) {
- pr_debug("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (ep->mpa_attr.initiator) {
- pr_debug("%s initiator ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (peer2peer && ep->com.state == FPDU_MODE)
- post_zb = 1;
- } else {
- pr_debug("%s responder ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (ep->com.state == MPA_REQ_RCVD) {
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
- }
- }
- } else {
- pr_debug("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, ep->com.state);
- kfree_skb(ep->mpa_skb);
- ep->mpa_skb = NULL;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (post_zb)
- iwch_post_zb_read(ep);
- return CPL_RET_BUF_DONE;
-}
-
-static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- unsigned long flags;
- int release = 0;
-
- pr_debug("%s ep %p\n", __func__, ep);
- BUG_ON(!ep);
-
- /*
- * We get 2 abort replies from the HW. The first one must
- * be ignored except for scribbling that we need one more.
- */
- if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case ABORTING:
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- break;
- default:
- pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
- break;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
-
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Return whether a failed active open has allocated a TID
- */
-static inline int act_open_has_tid(int status)
-{
- return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
- status != CPL_ERR_ARP_MISS;
-}
-
-static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_act_open_rpl *rpl = cplhdr(skb);
-
- pr_debug("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
- status2errno(rpl->status));
- connect_reply_upcall(ep, status2errno(rpl->status));
- state_set(&ep->com, DEAD);
- if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
- release_tid(ep->com.tdev, GET_TID(rpl), NULL);
- cxgb3_free_atid(ep->com.tdev, ep->atid);
- dst_release(ep->dst);
- l2t_release(ep->com.tdev, ep->l2t);
- put_ep(&ep->com);
- return CPL_RET_BUF_DONE;
-}
-
-static int listen_start(struct iwch_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_pass_open_req *req;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("t3c_listen_start failed to alloc skb!\n");
- return -ENOMEM;
- }
-
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
- req->local_port = ep->com.local_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_port = 0;
- req->peer_ip = 0;
- req->peer_netmask = 0;
- req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
- req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
- req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
-
- skb->priority = 1;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_listen_ep *ep = ctx;
- struct cpl_pass_open_rpl *rpl = cplhdr(skb);
-
- pr_debug("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
- ep->com.rpl_err = status2errno(rpl->status);
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
-
- return CPL_RET_BUF_DONE;
-}
-
-static int listen_stop(struct iwch_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_close_listserv_req *req;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- pr_err("%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- req = skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->cpu_idx = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
- skb->priority = 1;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
- void *ctx)
-{
- struct iwch_listen_ep *ep = ctx;
- struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
-
- pr_debug("%s ep %p\n", __func__, ep);
- ep->com.rpl_err = status2errno(rpl->status);
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
- return CPL_RET_BUF_DONE;
-}
-
-static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
-{
- struct cpl_pass_accept_rpl *rpl;
- unsigned int mtu_idx;
- u32 opt0h, opt0l, opt2;
- int wscale;
-
- pr_debug("%s ep %p\n", __func__, ep);
- BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(*rpl));
- skb_get(skb);
- mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
- wscale = compute_wscale(rcv_win);
- opt0h = V_NAGLE(0) |
- V_NO_CONG(nocong) |
- V_KEEP_ALIVE(1) |
- F_TCAM_BYPASS |
- V_WND_SCALE(wscale) |
- V_MSS_IDX(mtu_idx) |
- V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
- opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
- V_CONG_CONTROL_FLAVOR(cong_flavor);
-
- rpl = cplhdr(skb);
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
- rpl->peer_ip = peer_ip;
- rpl->opt0h = htonl(opt0h);
- rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
- rpl->opt2 = htonl(opt2);
- rpl->rsvd = rpl->opt2; /* workaround for HW bug */
- skb->priority = CPL_PRIORITY_SETUP;
- iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-
- return;
-}
-
-static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
- struct sk_buff *skb)
-{
- pr_debug("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
- peer_ip);
- BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(struct cpl_tid_release));
- skb_get(skb);
-
- if (tdev->type != T3A)
- release_tid(tdev, hwtid, skb);
- else {
- struct cpl_pass_accept_rpl *rpl;
-
- rpl = cplhdr(skb);
- skb->priority = CPL_PRIORITY_SETUP;
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- hwtid));
- rpl->peer_ip = peer_ip;
- rpl->opt0h = htonl(F_TCAM_BYPASS);
- rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
- rpl->opt2 = 0;
- rpl->rsvd = rpl->opt2;
- iwch_cxgb3_ofld_send(tdev, skb);
- }
-}
-
-static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *child_ep, *parent_ep = ctx;
- struct cpl_pass_accept_req *req = cplhdr(skb);
- unsigned int hwtid = GET_TID(req);
- struct dst_entry *dst;
- struct l2t_entry *l2t;
- struct rtable *rt;
- struct iff_mac tim;
-
- pr_debug("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
-
- if (state_read(&parent_ep->com) != LISTEN) {
- pr_err("%s - listening ep not in LISTEN\n", __func__);
- goto reject;
- }
-
- /*
- * Find the netdev for this connection request.
- */
- tim.mac_addr = req->dst_mac;
- tim.vlan_tag = ntohs(req->vlan_tag);
- if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- pr_err("%s bad dst mac %pM\n", __func__, req->dst_mac);
- goto reject;
- }
-
- /* Find output route */
- rt = find_route(tdev,
- req->local_ip,
- req->peer_ip,
- req->local_port,
- req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
- if (!rt) {
- pr_err("%s - failed to find dst entry!\n", __func__);
- goto reject;
- }
- dst = &rt->dst;
- l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
- if (!l2t) {
- pr_err("%s - failed to allocate l2t entry!\n", __func__);
- dst_release(dst);
- goto reject;
- }
- child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
- if (!child_ep) {
- pr_err("%s - failed to allocate ep entry!\n", __func__);
- l2t_release(tdev, l2t);
- dst_release(dst);
- goto reject;
- }
- state_set(&child_ep->com, CONNECTING);
- child_ep->com.tdev = tdev;
- child_ep->com.cm_id = NULL;
- child_ep->com.local_addr.sin_family = AF_INET;
- child_ep->com.local_addr.sin_port = req->local_port;
- child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
- child_ep->com.remote_addr.sin_family = AF_INET;
- child_ep->com.remote_addr.sin_port = req->peer_port;
- child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
- get_ep(&parent_ep->com);
- child_ep->parent_ep = parent_ep;
- child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
- child_ep->l2t = l2t;
- child_ep->dst = dst;
- child_ep->hwtid = hwtid;
- timer_setup(&child_ep->timer, ep_timeout, 0);
- cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
- accept_cr(child_ep, req->peer_ip, skb);
- goto out;
-reject:
- reject_cr(tdev, hwtid, req->peer_ip, skb);
-out:
- return CPL_RET_BUF_DONE;
-}
-
-static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_pass_establish *req = cplhdr(skb);
-
- pr_debug("%s ep %p\n", __func__, ep);
- ep->snd_seq = ntohl(req->snd_isn);
- ep->rcv_seq = ntohl(req->rcv_isn);
-
- set_emss(ep, ntohs(req->tcp_opt));
-
- dst_confirm(ep->dst);
- state_set(&ep->com, MPA_REQ_WAIT);
- start_ep_timer(ep);
-
- return CPL_RET_BUF_DONE;
-}
-
-static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int disconnect = 1;
- int release = 0;
-
- pr_debug("%s ep %p\n", __func__, ep);
- dst_confirm(ep->dst);
-
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case MPA_REQ_WAIT:
- __state_set(&ep->com, CLOSING);
- break;
- case MPA_REQ_SENT:
- __state_set(&ep->com, CLOSING);
- connect_reply_upcall(ep, -ECONNRESET);
- break;
- case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR. Also wake up anyone waiting
- * in rdma connection migration (see iwch_accept_cr()).
- */
- __state_set(&ep->com, CLOSING);
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MPA_REP_SENT:
- __state_set(&ep->com, CLOSING);
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case FPDU_MODE:
- start_ep_timer(ep);
- __state_set(&ep->com, CLOSING);
- attrs.next_state = IWCH_QP_STATE_CLOSING;
- iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
- peer_close_upcall(ep);
- break;
- case ABORTING:
- disconnect = 0;
- break;
- case CLOSING:
- __state_set(&ep->com, MORIBUND);
- disconnect = 0;
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
- }
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- disconnect = 0;
- break;
- case DEAD:
- disconnect = 0;
- break;
- default:
- BUG_ON(1);
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (disconnect)
- iwch_ep_disconnect(ep, 0, GFP_KERNEL);
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Returns whether an ABORT_REQ_RSS message is a negative advice.
- */
-static int is_neg_adv_abort(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE;
-}
-
-static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_abort_req_rss *req = cplhdr(skb);
- struct iwch_ep *ep = ctx;
- struct cpl_abort_rpl *rpl;
- struct sk_buff *rpl_skb;
- struct iwch_qp_attributes attrs;
- int ret;
- int release = 0;
- unsigned long flags;
-
- if (is_neg_adv_abort(req->status)) {
- pr_debug("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
- ep->hwtid);
- t3_l2t_send_event(ep->com.tdev, ep->l2t);
- return CPL_RET_BUF_DONE;
- }
-
- /*
- * We get 2 peer aborts from the HW. The first one must
- * be ignored except for scribbling that we need one more.
- */
- if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- pr_debug("%s ep %p state %u\n", __func__, ep, ep->com.state);
- switch (ep->com.state) {
- case CONNECTING:
- break;
- case MPA_REQ_WAIT:
- stop_ep_timer(ep);
- break;
- case MPA_REQ_SENT:
- stop_ep_timer(ep);
- connect_reply_upcall(ep, -ECONNRESET);
- break;
- case MPA_REP_SENT:
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR. Also wake up anyone waiting
- * in rdma connection migration (see iwch_accept_cr()).
- */
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- pr_debug("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MORIBUND:
- case CLOSING:
- stop_ep_timer(ep);
- /*FALLTHROUGH*/
- case FPDU_MODE:
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- ret = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (ret)
- pr_err("%s - qp <- error failed!\n", __func__);
- }
- peer_abort_upcall(ep);
- break;
- case ABORTING:
- break;
- case DEAD:
- pr_debug("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
- spin_unlock_irqrestore(&ep->com.lock, flags);
- return CPL_RET_BUF_DONE;
- default:
- BUG_ON(1);
- break;
- }
- dst_confirm(ep->dst);
- if (ep->com.state != ABORTING) {
- __state_set(&ep->com, DEAD);
- release = 1;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
-
- rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
- if (!rpl_skb) {
- pr_err("%s - cannot allocate skb!\n", __func__);
- release = 1;
- goto out;
- }
- rpl_skb->priority = CPL_PRIORITY_DATA;
- rpl = skb_put(rpl_skb, sizeof(*rpl));
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
- rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- rpl->cmd = CPL_ABORT_NO_RST;
- iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
-out:
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int release = 0;
-
- pr_debug("%s ep %p\n", __func__, ep);
- BUG_ON(!ep);
-
- /* The cm_id may be null if we failed to connect */
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case CLOSING:
- __state_set(&ep->com, MORIBUND);
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if ((ep->com.cm_id) && (ep->com.qp)) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- break;
- case ABORTING:
- case DEAD:
- break;
- default:
- BUG_ON(1);
- break;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * T3A does 3 things when a TERM is received:
- * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
- * 2) generate an async event on the QP with the TERMINATE opcode
- * 3) post a TERMINATE opcode cqe into the associated CQ.
- *
- * For (1), we save the message in the qp for later consumer consumption.
- * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
- * For (3), we toss the CQE in cxio_poll_cq().
- *
- * terminate() handles case (1)...
- */
-static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
-
- if (state_read(&ep->com) != FPDU_MODE)
- return CPL_RET_BUF_DONE;
-
- pr_debug("%s ep %p\n", __func__, ep);
- skb_pull(skb, sizeof(struct cpl_rdma_terminate));
- pr_debug("%s saving %d bytes of term msg\n", __func__, skb->len);
- skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
- skb->len);
- ep->com.qp->attr.terminate_msg_len = skb->len;
- ep->com.qp->attr.is_terminate_local = 0;
- return CPL_RET_BUF_DONE;
-}
-
-static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_rdma_ec_status *rep = cplhdr(skb);
- struct iwch_ep *ep = ctx;
-
- pr_debug("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
- rep->status);
- if (rep->status) {
- struct iwch_qp_attributes attrs;
-
- pr_err("%s BAD CLOSE - Aborting tid %u\n",
- __func__, ep->hwtid);
- stop_ep_timer(ep);
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- abort_connection(ep, NULL, GFP_KERNEL);
- }
- return CPL_RET_BUF_DONE;
-}
-
-static void ep_timeout(struct timer_list *t)
-{
- struct iwch_ep *ep = from_timer(ep, t, timer);
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int abort = 1;
-
- spin_lock_irqsave(&ep->com.lock, flags);
- pr_debug("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
- switch (ep->com.state) {
- case MPA_REQ_SENT:
- __state_set(&ep->com, ABORTING);
- connect_reply_upcall(ep, -ETIMEDOUT);
- break;
- case MPA_REQ_WAIT:
- __state_set(&ep->com, ABORTING);
- break;
- case CLOSING:
- case MORIBUND:
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- __state_set(&ep->com, ABORTING);
- break;
- default:
- WARN(1, "%s unexpected state ep %p state %u\n",
- __func__, ep, ep->com.state);
- abort = 0;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (abort)
- abort_connection(ep, NULL, GFP_ATOMIC);
- put_ep(&ep->com);
-}
-
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- struct iwch_ep *ep = to_ep(cm_id);
-
- pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-
- if (state_read(&ep->com) == DEAD) {
- put_ep(&ep->com);
- return -ECONNRESET;
- }
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- if (mpa_rev == 0)
- abort_connection(ep, NULL, GFP_KERNEL);
- else {
- send_mpa_reject(ep, pdata, pdata_len);
- iwch_ep_disconnect(ep, 0, GFP_KERNEL);
- }
- put_ep(&ep->com);
- return 0;
-}
-
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- int err;
- struct iwch_qp_attributes attrs;
- enum iwch_qp_attr_mask mask;
- struct iwch_ep *ep = to_ep(cm_id);
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
-
- pr_debug("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
- err = -ECONNRESET;
- goto err;
- }
-
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- BUG_ON(!qp);
-
- if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
- (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
- abort_connection(ep, NULL, GFP_KERNEL);
- err = -EINVAL;
- goto err;
- }
-
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->com.qp = qp;
-
- ep->ird = conn_param->ird;
- ep->ord = conn_param->ord;
-
- if (peer2peer && ep->ird == 0)
- ep->ird = 1;
-
- pr_debug("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
-
- /* bind QP to EP and move to RTS */
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ird;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- /* bind QP and TID with INIT_WR */
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_MAX_ORD;
-
- err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
- if (err)
- goto err1;
-
- /* if needed, wait for wr_ack */
- if (iwch_rqes_posted(qp)) {
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (err)
- goto err1;
- }
-
- err = send_mpa_reply(ep, conn_param->private_data,
- conn_param->private_data_len);
- if (err)
- goto err1;
-
-
- state_set(&ep->com, FPDU_MODE);
- established_upcall(ep);
- put_ep(&ep->com);
- return 0;
-err1:
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- cm_id->rem_ref(cm_id);
-err:
- put_ep(&ep->com);
- return err;
-}
-
-static int is_loopback_dst(struct iw_cm_id *cm_id)
-{
- struct net_device *dev;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
- if (!dev)
- return 0;
- dev_put(dev);
- return 1;
-}
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_ep *ep;
- struct rtable *rt;
- int err = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- if (cm_id->m_remote_addr.ss_family != PF_INET) {
- err = -ENOSYS;
- goto out;
- }
-
- if (is_loopback_dst(cm_id)) {
- err = -ENOSYS;
- goto out;
- }
-
- ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- pr_err("%s - cannot alloc ep\n", __func__);
- err = -ENOMEM;
- goto out;
- }
- timer_setup(&ep->timer, ep_timeout, 0);
- ep->plen = conn_param->private_data_len;
- if (ep->plen)
- memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
- conn_param->private_data, ep->plen);
- ep->ird = conn_param->ird;
- ep->ord = conn_param->ord;
-
- if (peer2peer && ep->ord == 0)
- ep->ord = 1;
-
- ep->com.tdev = h->rdev.t3cdev_p;
-
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->com.qp = get_qhp(h, conn_param->qpn);
- BUG_ON(!ep->com.qp);
- pr_debug("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
-
- /*
- * Allocate an active TID to initiate a TCP connection.
- */
- ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
- if (ep->atid == -1) {
- pr_err("%s - cannot alloc atid\n", __func__);
- err = -ENOMEM;
- goto fail2;
- }
-
- /* find a route */
- rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, IPTOS_LOWDELAY);
- if (!rt) {
- pr_err("%s - cannot find route\n", __func__);
- err = -EHOSTUNREACH;
- goto fail3;
- }
- ep->dst = &rt->dst;
- ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
- &raddr->sin_addr.s_addr);
- if (!ep->l2t) {
- pr_err("%s - cannot alloc l2e\n", __func__);
- err = -ENOMEM;
- goto fail4;
- }
-
- state_set(&ep->com, CONNECTING);
- ep->tos = IPTOS_LOWDELAY;
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
- sizeof(ep->com.remote_addr));
-
- /* send connect request to rnic */
- err = send_connect(ep);
- if (!err)
- goto out;
-
- l2t_release(h->rdev.t3cdev_p, ep->l2t);
-fail4:
- dst_release(ep->dst);
-fail3:
- cxgb3_free_atid(ep->com.tdev, ep->atid);
-fail2:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
-out:
- return err;
-}
-
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int err = 0;
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_listen_ep *ep;
-
-
- might_sleep();
-
- if (cm_id->m_local_addr.ss_family != PF_INET) {
- err = -ENOSYS;
- goto fail1;
- }
-
- ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- pr_err("%s - cannot alloc ep\n", __func__);
- err = -ENOMEM;
- goto fail1;
- }
- pr_debug("%s ep %p\n", __func__, ep);
- ep->com.tdev = h->rdev.t3cdev_p;
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->backlog = backlog;
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
-
- /*
- * Allocate a server TID.
- */
- ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
- if (ep->stid == -1) {
- pr_err("%s - cannot alloc atid\n", __func__);
- err = -ENOMEM;
- goto fail2;
- }
-
- state_set(&ep->com, LISTEN);
- err = listen_start(ep);
- if (err)
- goto fail3;
-
- /* wait for pass_open_rpl */
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (!err) {
- cm_id->provider_data = ep;
- goto out;
- }
-fail3:
- cxgb3_free_stid(ep->com.tdev, ep->stid);
-fail2:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
-fail1:
-out:
- return err;
-}
-
-int iwch_destroy_listen(struct iw_cm_id *cm_id)
-{
- int err;
- struct iwch_listen_ep *ep = to_listen_ep(cm_id);
-
- pr_debug("%s ep %p\n", __func__, ep);
-
- might_sleep();
- state_set(&ep->com, DEAD);
- ep->com.rpl_done = 0;
- ep->com.rpl_err = 0;
- err = listen_stop(ep);
- if (err)
- goto done;
- wait_event(ep->com.waitq, ep->com.rpl_done);
- cxgb3_free_stid(ep->com.tdev, ep->stid);
-done:
- err = ep->com.rpl_err;
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
- return err;
-}
-
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
-{
- int ret=0;
- unsigned long flags;
- int close = 0;
- int fatal = 0;
- struct t3cdev *tdev;
- struct cxio_rdev *rdev;
-
- spin_lock_irqsave(&ep->com.lock, flags);
-
- pr_debug("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
-
- tdev = (struct t3cdev *)ep->com.tdev;
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- fatal = 1;
- close_complete_upcall(ep);
- ep->com.state = DEAD;
- }
- switch (ep->com.state) {
- case MPA_REQ_WAIT:
- case MPA_REQ_SENT:
- case MPA_REQ_RCVD:
- case MPA_REP_SENT:
- case FPDU_MODE:
- close = 1;
- if (abrupt)
- ep->com.state = ABORTING;
- else {
- ep->com.state = CLOSING;
- start_ep_timer(ep);
- }
- set_bit(CLOSE_SENT, &ep->com.flags);
- break;
- case CLOSING:
- if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
- close = 1;
- if (abrupt) {
- stop_ep_timer(ep);
- ep->com.state = ABORTING;
- } else
- ep->com.state = MORIBUND;
- }
- break;
- case MORIBUND:
- case ABORTING:
- case DEAD:
- pr_debug("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
- break;
- default:
- BUG();
- break;
- }
-
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (close) {
- if (abrupt)
- ret = send_abort(ep, NULL, gfp);
- else
- ret = send_halfclose(ep, gfp);
- if (ret)
- fatal = 1;
- }
- if (fatal)
- release_ep_resources(ep);
- return ret;
-}
-
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t)
-{
- struct iwch_ep *ep = ctx;
-
- if (ep->dst != old)
- return 0;
-
- pr_debug("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
- l2t);
- dst_hold(new);
- l2t_release(ep->com.tdev, ep->l2t);
- ep->l2t = l2t;
- dst_release(old);
- ep->dst = new;
- return 1;
-}
-
-/*
- * All the CM events are handled on a work queue to have a safe context.
- * These are the real handlers that are called from the work queue.
- */
-static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
- [CPL_ACT_ESTABLISH] = act_establish,
- [CPL_ACT_OPEN_RPL] = act_open_rpl,
- [CPL_RX_DATA] = rx_data,
- [CPL_TX_DMA_ACK] = tx_ack,
- [CPL_ABORT_RPL_RSS] = abort_rpl,
- [CPL_ABORT_RPL] = abort_rpl,
- [CPL_PASS_OPEN_RPL] = pass_open_rpl,
- [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
- [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
- [CPL_PASS_ESTABLISH] = pass_establish,
- [CPL_PEER_CLOSE] = peer_close,
- [CPL_ABORT_REQ_RSS] = peer_abort,
- [CPL_CLOSE_CON_RPL] = close_con_rpl,
- [CPL_RDMA_TERMINATE] = terminate,
- [CPL_RDMA_EC_STATUS] = ec_status,
-};
-
-static void process_work(struct work_struct *work)
-{
- struct sk_buff *skb = NULL;
- void *ep;
- struct t3cdev *tdev;
- int ret;
-
- while ((skb = skb_dequeue(&rxq))) {
- ep = *((void **) (skb->cb));
- tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
- ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
- if (ret & CPL_RET_BUF_DONE)
- kfree_skb(skb);
-
- /*
- * ep was referenced in sched(), and is freed here.
- */
- put_ep((struct iwch_ep_common *)ep);
- }
-}
-
-static DECLARE_WORK(skb_work, process_work);
-
-static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep_common *epc = ctx;
-
- get_ep(epc);
-
- /*
- * Save ctx and tdev in the skb->cb area.
- */
- *((void **) skb->cb) = ctx;
- *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
-
- /*
- * Queue the skb and schedule the worker thread.
- */
- skb_queue_tail(&rxq, skb);
- queue_work(workq, &skb_work);
- return 0;
-}
-
-static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
-
- if (rpl->status != CPL_ERR_NONE) {
- pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
- rpl->status, GET_TID(rpl));
- }
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * All upcalls from the T3 Core go to sched() to schedule the
- * processing on a work queue.
- */
-cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
- [CPL_ACT_ESTABLISH] = sched,
- [CPL_ACT_OPEN_RPL] = sched,
- [CPL_RX_DATA] = sched,
- [CPL_TX_DMA_ACK] = sched,
- [CPL_ABORT_RPL_RSS] = sched,
- [CPL_ABORT_RPL] = sched,
- [CPL_PASS_OPEN_RPL] = sched,
- [CPL_CLOSE_LISTSRV_RPL] = sched,
- [CPL_PASS_ACCEPT_REQ] = sched,
- [CPL_PASS_ESTABLISH] = sched,
- [CPL_PEER_CLOSE] = sched,
- [CPL_CLOSE_CON_RPL] = sched,
- [CPL_ABORT_REQ_RSS] = sched,
- [CPL_RDMA_TERMINATE] = sched,
- [CPL_RDMA_EC_STATUS] = sched,
- [CPL_SET_TCB_RPL] = set_tcb_rpl,
-};
-
-int __init iwch_cm_init(void)
-{
- skb_queue_head_init(&rxq);
-
- workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM);
- if (!workq)
- return -ENOMEM;
-
- return 0;
-}
-
-void __exit iwch_cm_term(void)
-{
- flush_workqueue(workq);
- destroy_workqueue(workq);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
deleted file mode 100644
index cc7fe644d260..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _IWCH_CM_H_
-#define _IWCH_CM_H_
-
-#include <linux/inet.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/kref.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-
-#define MPA_KEY_REQ "MPA ID Req Frame"
-#define MPA_KEY_REP "MPA ID Rep Frame"
-
-#define MPA_MAX_PRIVATE_DATA 256
-#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
-#define MPA_REJECT 0x20
-#define MPA_CRC 0x40
-#define MPA_MARKERS 0x80
-#define MPA_FLAGS_MASK 0xE0
-
-#define put_ep(ep) { \
- pr_debug("put_ep (via %s:%u) ep %p refcnt %d\n", \
- __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
- WARN_ON(kref_read(&((ep)->kref)) < 1); \
- kref_put(&((ep)->kref), __free_ep); \
-}
-
-#define get_ep(ep) { \
- pr_debug("get_ep (via %s:%u) ep %p, refcnt %d\n", \
- __func__, __LINE__, ep, kref_read(&((ep)->kref))); \
- kref_get(&((ep)->kref)); \
-}
-
-struct mpa_message {
- u8 key[16];
- u8 flags;
- u8 revision;
- __be16 private_data_size;
- u8 private_data[0];
-};
-
-struct terminate_message {
- u8 layer_etype;
- u8 ecode;
- __be16 hdrct_rsvd;
- u8 len_hdrs[0];
-};
-
-#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
-
-enum iwch_layers_types {
- LAYER_RDMAP = 0x00,
- LAYER_DDP = 0x10,
- LAYER_MPA = 0x20,
- RDMAP_LOCAL_CATA = 0x00,
- RDMAP_REMOTE_PROT = 0x01,
- RDMAP_REMOTE_OP = 0x02,
- DDP_LOCAL_CATA = 0x00,
- DDP_TAGGED_ERR = 0x01,
- DDP_UNTAGGED_ERR = 0x02,
- DDP_LLP = 0x03
-};
-
-enum iwch_rdma_ecodes {
- RDMAP_INV_STAG = 0x00,
- RDMAP_BASE_BOUNDS = 0x01,
- RDMAP_ACC_VIOL = 0x02,
- RDMAP_STAG_NOT_ASSOC = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_VERS = 0x05,
- RDMAP_INV_OPCODE = 0x06,
- RDMAP_STREAM_CATA = 0x07,
- RDMAP_GLOBAL_CATA = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum iwch_ddp_ecodes {
- DDPT_INV_STAG = 0x00,
- DDPT_BASE_BOUNDS = 0x01,
- DDPT_STAG_NOT_ASSOC = 0x02,
- DDPT_TO_WRAP = 0x03,
- DDPT_INV_VERS = 0x04,
- DDPU_INV_QN = 0x01,
- DDPU_INV_MSN_NOBUF = 0x02,
- DDPU_INV_MSN_RANGE = 0x03,
- DDPU_INV_MO = 0x04,
- DDPU_MSG_TOOBIG = 0x05,
- DDPU_INV_VERS = 0x06
-};
-
-enum iwch_mpa_ecodes {
- MPA_CRC_ERR = 0x02,
- MPA_MARKER_ERR = 0x03
-};
-
-enum iwch_ep_state {
- IDLE = 0,
- LISTEN,
- CONNECTING,
- MPA_REQ_WAIT,
- MPA_REQ_SENT,
- MPA_REQ_RCVD,
- MPA_REP_SENT,
- FPDU_MODE,
- ABORTING,
- CLOSING,
- MORIBUND,
- DEAD,
-};
-
-enum iwch_ep_flags {
- PEER_ABORT_IN_PROGRESS = 0,
- ABORT_REQ_IN_PROGRESS = 1,
- RELEASE_RESOURCES = 2,
- CLOSE_SENT = 3,
-};
-
-struct iwch_ep_common {
- struct iw_cm_id *cm_id;
- struct iwch_qp *qp;
- struct t3cdev *tdev;
- enum iwch_ep_state state;
- struct kref kref;
- spinlock_t lock;
- struct sockaddr_in local_addr;
- struct sockaddr_in remote_addr;
- wait_queue_head_t waitq;
- int rpl_done;
- int rpl_err;
- unsigned long flags;
-};
-
-struct iwch_listen_ep {
- struct iwch_ep_common com;
- unsigned int stid;
- int backlog;
-};
-
-struct iwch_ep {
- struct iwch_ep_common com;
- struct iwch_ep *parent_ep;
- struct timer_list timer;
- unsigned int atid;
- u32 hwtid;
- u32 snd_seq;
- u32 rcv_seq;
- struct l2t_entry *l2t;
- struct dst_entry *dst;
- struct sk_buff *mpa_skb;
- struct iwch_mpa_attributes mpa_attr;
- unsigned int mpa_pkt_len;
- u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
- u8 tos;
- u16 emss;
- u16 plen;
- u32 ird;
- u32 ord;
-};
-
-static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
-{
- return cm_id->provider_data;
-}
-
-static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
-{
- return cm_id->provider_data;
-}
-
-static inline int compute_wscale(int win)
-{
- int wscale = 0;
-
- while (wscale < 14 && (65535<<wscale) < win)
- wscale++;
- return wscale;
-}
-
-/* CM prototypes */
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
-int iwch_destroy_listen(struct iw_cm_id *cm_id);
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
-int iwch_quiesce_tid(struct iwch_ep *ep);
-int iwch_resume_tid(struct iwch_ep *ep);
-void __free_ep(struct kref *kref);
-void iwch_rearp(struct iwch_ep *ep);
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
-
-int __init iwch_cm_init(void);
-void __exit iwch_cm_term(void);
-extern int peer2peer;
-
-#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
deleted file mode 100644
index a098c0140580..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "iwch_provider.h"
-#include "iwch.h"
-
-static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
- struct iwch_qp *qhp, struct ib_wc *wc)
-{
- struct t3_wq *wq = qhp ? &qhp->wq : NULL;
- struct t3_cqe cqe;
- u32 credit = 0;
- u8 cqe_flushed;
- u64 cookie;
- int ret = 1;
-
- ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
- &credit);
- if (t3a_device(chp->rhp) && credit) {
- pr_debug("%s updating %d cq credits on id %d\n", __func__,
- credit, chp->cq.cqid);
- cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
- }
-
- if (ret) {
- ret = -EAGAIN;
- goto out;
- }
- ret = 1;
-
- wc->wr_id = cookie;
- wc->qp = qhp ? &qhp->ibqp : NULL;
- wc->vendor_err = CQE_STATUS(cqe);
- wc->wc_flags = 0;
-
- pr_debug("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
- __func__,
- CQE_QPID(cqe), CQE_TYPE(cqe),
- CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
- CQE_WRID_LOW(cqe), (unsigned long long)cookie);
-
- if (CQE_TYPE(cqe) == 0) {
- if (!CQE_STATUS(cqe))
- wc->byte_len = CQE_LEN(cqe);
- else
- wc->byte_len = 0;
- wc->opcode = IB_WC_RECV;
- if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
- CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
- wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
- wc->wc_flags |= IB_WC_WITH_INVALIDATE;
- }
- } else {
- switch (CQE_OPCODE(cqe)) {
- case T3_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case T3_READ_REQ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = CQE_LEN(cqe);
- break;
- case T3_SEND:
- case T3_SEND_WITH_SE:
- case T3_SEND_WITH_INV:
- case T3_SEND_WITH_SE_INV:
- wc->opcode = IB_WC_SEND;
- break;
- case T3_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- break;
- case T3_FAST_REGISTER:
- wc->opcode = IB_WC_REG_MR;
- break;
- default:
- pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
- CQE_OPCODE(cqe), CQE_QPID(cqe));
- ret = -EINVAL;
- goto out;
- }
- }
-
- if (cqe_flushed)
- wc->status = IB_WC_WR_FLUSH_ERR;
- else {
-
- switch (CQE_STATUS(cqe)) {
- case TPT_ERR_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case TPT_ERR_STAG:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case TPT_ERR_PDID:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case TPT_ERR_QPID:
- case TPT_ERR_ACCESS:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case TPT_ERR_WRAP:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- case TPT_ERR_BOUND:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case TPT_ERR_CRC:
- case TPT_ERR_MARKER:
- case TPT_ERR_PDU_LEN_ERR:
- case TPT_ERR_OUT_OF_RQE:
- case TPT_ERR_DDP_VERSION:
- case TPT_ERR_RDMA_VERSION:
- case TPT_ERR_DDP_QUEUE_NUM:
- case TPT_ERR_MSN:
- case TPT_ERR_TBIT:
- case TPT_ERR_MO:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_IRD_OVERFLOW:
- case TPT_ERR_OPCODE:
- wc->status = IB_WC_FATAL_ERR;
- break;
- case TPT_ERR_SWFLUSH:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- default:
- pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
- CQE_STATUS(cqe), CQE_QPID(cqe));
- ret = -EINVAL;
- }
- }
-out:
- return ret;
-}
-
-/*
- * Get one cq entry from cxio and map it to openib.
- *
- * Returns:
- * 0 EMPTY;
- * 1 cqe returned
- * -EAGAIN caller must try again
- * any other -errno fatal error
- */
-static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
- struct ib_wc *wc)
-{
- struct iwch_qp *qhp;
- struct t3_cqe *rd_cqe;
- int ret;
-
- rd_cqe = cxio_next_cqe(&chp->cq);
-
- if (!rd_cqe)
- return 0;
-
- qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
- if (qhp) {
- spin_lock(&qhp->lock);
- ret = __iwch_poll_cq_one(rhp, chp, qhp, wc);
- spin_unlock(&qhp->lock);
- } else {
- ret = __iwch_poll_cq_one(rhp, chp, NULL, wc);
- }
- return ret;
-}
-
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- unsigned long flags;
- int npolled;
- int err = 0;
-
- chp = to_iwch_cq(ibcq);
- rhp = chp->rhp;
-
- spin_lock_irqsave(&chp->lock, flags);
- for (npolled = 0; npolled < num_entries; ++npolled) {
-
- /*
- * Because T3 can post CQEs that are _not_ associated
- * with a WR, we might have to poll again after removing
- * one of these.
- */
- do {
- err = iwch_poll_cq_one(rhp, chp, wc + npolled);
- } while (err == -EAGAIN);
- if (err <= 0)
- break;
- }
- spin_unlock_irqrestore(&chp->lock, flags);
-
- if (err < 0)
- return err;
- else {
- return npolled;
- }
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
deleted file mode 100644
index 9d356c1301c7..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/gfp.h>
-#include <linux/mman.h>
-#include <net/sock.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
- struct respQ_msg_t *rsp_msg,
- enum ib_event_type ib_event,
- int send_term)
-{
- struct ib_event event;
- struct iwch_qp_attributes attrs;
- struct iwch_qp *qhp;
- unsigned long flag;
-
- xa_lock(&rnicp->qps);
- qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe));
-
- if (!qhp) {
- pr_err("%s unaffiliated error 0x%x qpid 0x%x\n",
- __func__, CQE_STATUS(rsp_msg->cqe),
- CQE_QPID(rsp_msg->cqe));
- xa_unlock(&rnicp->qps);
- return;
- }
-
- if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
- (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
- pr_debug("%s AE received after RTS - qp state %d qpid 0x%x status 0x%x\n",
- __func__,
- qhp->attr.state, qhp->wq.qpid,
- CQE_STATUS(rsp_msg->cqe));
- xa_unlock(&rnicp->qps);
- return;
- }
-
- pr_err("%s - AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
- __func__,
- CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
- CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-
- atomic_inc(&qhp->refcnt);
- xa_unlock(&rnicp->qps);
-
- if (qhp->attr.state == IWCH_QP_STATE_RTS) {
- attrs.next_state = IWCH_QP_STATE_TERMINATE;
- iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (send_term)
- iwch_post_terminate(qhp, rsp_msg);
- }
-
- event.event = ib_event;
- event.device = chp->ibcq.device;
- if (ib_event == IB_EVENT_CQ_ERR)
- event.element.cq = &chp->ibcq;
- else
- event.element.qp = &qhp->ibqp;
-
- if (qhp->ibqp.event_handler)
- (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
-
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
-
- if (atomic_dec_and_test(&qhp->refcnt))
- wake_up(&qhp->wait);
-}
-
-void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
-{
- struct iwch_dev *rnicp;
- struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- struct iwch_cq *chp;
- struct iwch_qp *qhp;
- u32 cqid = RSPQ_CQID(rsp_msg);
- unsigned long flag;
-
- rnicp = (struct iwch_dev *) rdev_p->ulp;
- xa_lock(&rnicp->qps);
- chp = get_chp(rnicp, cqid);
- qhp = xa_load(&rnicp->qps, CQE_QPID(rsp_msg->cqe));
- if (!chp || !qhp) {
- pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
- cqid, CQE_QPID(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
- CQE_WRID_LOW(rsp_msg->cqe));
- xa_unlock(&rnicp->qps);
- goto out;
- }
- iwch_qp_add_ref(&qhp->ibqp);
- atomic_inc(&chp->refcnt);
- xa_unlock(&rnicp->qps);
-
- /*
- * 1) completion of our sending a TERMINATE.
- * 2) incoming TERMINATE message.
- */
- if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
- (CQE_STATUS(rsp_msg->cqe) == 0)) {
- if (SQ_TYPE(rsp_msg->cqe)) {
- pr_debug("%s QPID 0x%x ep %p disconnecting\n",
- __func__, qhp->wq.qpid, qhp->ep);
- iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
- } else {
- pr_debug("%s post REQ_ERR AE QPID 0x%x\n", __func__,
- qhp->wq.qpid);
- post_qp_event(rnicp, chp, rsp_msg,
- IB_EVENT_QP_REQ_ERR, 0);
- iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
- }
- goto done;
- }
-
- /* Bad incoming Read request */
- if (SQ_TYPE(rsp_msg->cqe) &&
- (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
- goto done;
- }
-
- /* Bad incoming write */
- if (RQ_TYPE(rsp_msg->cqe) &&
- (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
- goto done;
- }
-
- switch (CQE_STATUS(rsp_msg->cqe)) {
-
- /* Completion Events */
- case TPT_ERR_SUCCESS:
-
- /*
- * Confirm the destination entry if this is a RECV completion.
- */
- if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
- dst_confirm(qhp->ep->dst);
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
- break;
-
- case TPT_ERR_STAG:
- case TPT_ERR_PDID:
- case TPT_ERR_QPID:
- case TPT_ERR_ACCESS:
- case TPT_ERR_WRAP:
- case TPT_ERR_BOUND:
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
- break;
-
- /* Device Fatal Errors */
- case TPT_ERR_ECC:
- case TPT_ERR_ECC_PSTAG:
- case TPT_ERR_INTERNAL_ERR:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
- break;
-
- /* QP Fatal Errors */
- case TPT_ERR_OUT_OF_RQE:
- case TPT_ERR_PBL_ADDR_BOUND:
- case TPT_ERR_CRC:
- case TPT_ERR_MARKER:
- case TPT_ERR_PDU_LEN_ERR:
- case TPT_ERR_DDP_VERSION:
- case TPT_ERR_RDMA_VERSION:
- case TPT_ERR_OPCODE:
- case TPT_ERR_DDP_QUEUE_NUM:
- case TPT_ERR_MSN:
- case TPT_ERR_TBIT:
- case TPT_ERR_MO:
- case TPT_ERR_MSN_GAP:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_RQE_ADDR_BOUND:
- case TPT_ERR_IRD_OVERFLOW:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
- break;
-
- default:
- pr_err("Unknown T3 status 0x%x QPID 0x%x\n",
- CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
- break;
- }
-done:
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
- iwch_qp_rem_ref(&qhp->ibqp);
-out:
- dev_kfree_skb_irq(skb);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
deleted file mode 100644
index ce0f2741821d..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-
-static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
-{
- u32 mmid;
-
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- pr_debug("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
- return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
-}
-
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp, int shift)
-{
- u32 stag;
- int ret;
-
- if (cxio_register_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
- return ret;
-}
-
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
-{
- mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
- npages << 3);
-
- if (!mhp->attr.pbl_addr)
- return -ENOMEM;
-
- mhp->attr.pbl_size = npages;
-
- return 0;
-}
-
-void iwch_free_pbl(struct iwch_mr *mhp)
-{
- cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-}
-
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
-{
- return cxio_write_pbl(&mhp->rhp->rdev, pages,
- mhp->attr.pbl_addr + (offset << 3), npages);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
deleted file mode 100644
index 3a481dfb1607..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ /dev/null
@@ -1,1402 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/sched/mm.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-#include <rdma/uverbs_ioctl.h>
-
-#include "cxio_hal.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-#include <rdma/cxgb3-abi.h>
-#include "common.h"
-
-static void iwch_dealloc_ucontext(struct ib_ucontext *context)
-{
- struct iwch_dev *rhp = to_iwch_dev(context->device);
- struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
- struct iwch_mm_entry *mm, *tmp;
-
- pr_debug("%s context %p\n", __func__, context);
- list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
- kfree(mm);
- cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
-}
-
-static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
- struct ib_udata *udata)
-{
- struct ib_device *ibdev = ucontext->device;
- struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
- struct iwch_dev *rhp = to_iwch_dev(ibdev);
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- cxio_init_ucontext(&rhp->rdev, &context->uctx);
- INIT_LIST_HEAD(&context->mmaps);
- spin_lock_init(&context->mmap_lock);
- return 0;
-}
-
-static int iwch_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
-{
- struct iwch_cq *chp;
-
- pr_debug("%s ib_cq %p\n", __func__, ib_cq);
- chp = to_iwch_cq(ib_cq);
-
- xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
- atomic_dec(&chp->refcnt);
- wait_event(chp->wait, !atomic_read(&chp->refcnt));
-
- cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- kfree(chp);
- return 0;
-}
-
-static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
-{
- int entries = attr->cqe;
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- struct iwch_create_cq_resp uresp;
- struct iwch_create_cq_req ureq;
- static int warned;
- size_t resplen;
-
- pr_debug("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- rhp = to_iwch_dev(ibdev);
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
-
- if (udata) {
- if (!t3a_device(rhp)) {
- if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
- kfree(chp);
- return ERR_PTR(-EFAULT);
- }
- chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
- }
- }
-
- if (t3a_device(rhp)) {
-
- /*
- * T3A: Add some fluff to handle extra CQEs inserted
- * for various errors.
- * Additional CQE possibilities:
- * TERMINATE,
- * incoming RDMA WRITE Failures
- * incoming RDMA READ REQUEST FAILUREs
- * NOTE: We cannot ensure the CQ won't overflow.
- */
- entries += 16;
- }
- entries = roundup_pow_of_two(entries);
- chp->cq.size_log2 = ilog2(entries);
-
- if (cxio_create_cq(&rhp->rdev, &chp->cq, !udata)) {
- kfree(chp);
- return ERR_PTR(-ENOMEM);
- }
- chp->rhp = rhp;
- chp->ibcq.cqe = 1 << chp->cq.size_log2;
- spin_lock_init(&chp->lock);
- spin_lock_init(&chp->comp_handler_lock);
- atomic_set(&chp->refcnt, 1);
- init_waitqueue_head(&chp->wait);
- if (xa_store_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL)) {
- cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- kfree(chp);
- return ERR_PTR(-ENOMEM);
- }
-
- if (udata) {
- struct iwch_mm_entry *mm;
- struct iwch_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct iwch_ucontext, ibucontext);
-
- mm = kmalloc(sizeof *mm, GFP_KERNEL);
- if (!mm) {
- iwch_destroy_cq(&chp->ibcq, udata);
- return ERR_PTR(-ENOMEM);
- }
- uresp.cqid = chp->cq.cqid;
- uresp.size_log2 = chp->cq.size_log2;
- spin_lock(&ucontext->mmap_lock);
- uresp.key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- spin_unlock(&ucontext->mmap_lock);
- mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
- if (udata->outlen < sizeof uresp) {
- if (!warned++)
- pr_warn("Warning - downlevel libcxgb3 (non-fatal)\n");
- mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
- sizeof(struct t3_cqe));
- resplen = sizeof(struct iwch_create_cq_resp_v0);
- } else {
- mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
- sizeof(struct t3_cqe));
- uresp.memsize = mm->len;
- uresp.reserved = 0;
- resplen = sizeof uresp;
- }
- if (ib_copy_to_udata(udata, &uresp, resplen)) {
- kfree(mm);
- iwch_destroy_cq(&chp->ibcq, udata);
- return ERR_PTR(-EFAULT);
- }
- insert_mmap(ucontext, mm);
- }
- pr_debug("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- (unsigned long long)chp->cq.dma_addr);
- return &chp->ibcq;
-}
-
-static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-#ifdef notyet
- struct iwch_cq *chp = to_iwch_cq(cq);
- struct t3_cq oldcq, newcq;
- int ret;
-
- pr_debug("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
-
- /* We don't downsize... */
- if (cqe <= cq->cqe)
- return 0;
-
- /* create new t3_cq with new size */
- cqe = roundup_pow_of_two(cqe+1);
- newcq.size_log2 = ilog2(cqe);
-
- /* Dont allow resize to less than the current wce count */
- if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
- return -ENOMEM;
- }
-
- /* Quiesce all QPs using this CQ */
- ret = iwch_quiesce_qps(chp);
- if (ret) {
- return ret;
- }
-
- ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
- if (ret) {
- return ret;
- }
-
- /* copy CQEs */
- memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
- sizeof(struct t3_cqe));
-
- /* old iwch_qp gets new t3_cq but keeps old cqid */
- oldcq = chp->cq;
- chp->cq = newcq;
- chp->cq.cqid = oldcq.cqid;
-
- /* resize new t3_cq to update the HW context */
- ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
- if (ret) {
- chp->cq = oldcq;
- return ret;
- }
- chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
-
- /* destroy old t3_cq */
- oldcq.cqid = newcq.cqid;
- ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
- if (ret) {
- pr_err("%s - cxio_destroy_cq failed %d\n", __func__, ret);
- }
-
- /* add user hooks here */
-
- /* resume qps */
- ret = iwch_resume_qps(chp);
- return ret;
-#else
- return -ENOSYS;
-#endif
-}
-
-static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
-{
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- enum t3_cq_opcode cq_op;
- int err;
- unsigned long flag;
- u32 rptr;
-
- chp = to_iwch_cq(ibcq);
- rhp = chp->rhp;
- if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
- cq_op = CQ_ARM_SE;
- else
- cq_op = CQ_ARM_AN;
- if (chp->user_rptr_addr) {
- if (get_user(rptr, chp->user_rptr_addr))
- return -EFAULT;
- spin_lock_irqsave(&chp->lock, flag);
- chp->cq.rptr = rptr;
- } else
- spin_lock_irqsave(&chp->lock, flag);
- pr_debug("%s rptr 0x%x\n", __func__, chp->cq.rptr);
- err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
- spin_unlock_irqrestore(&chp->lock, flag);
- if (err < 0)
- pr_err("Error %d rearming CQID 0x%x\n", err, chp->cq.cqid);
- if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
- err = 0;
- return err;
-}
-
-static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- int len = vma->vm_end - vma->vm_start;
- u32 key = vma->vm_pgoff << PAGE_SHIFT;
- struct cxio_rdev *rdev_p;
- int ret = 0;
- struct iwch_mm_entry *mm;
- struct iwch_ucontext *ucontext;
- u64 addr;
-
- pr_debug("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
-
- if (vma->vm_start & (PAGE_SIZE-1)) {
- return -EINVAL;
- }
-
- rdev_p = &(to_iwch_dev(context->device)->rdev);
- ucontext = to_iwch_ucontext(context);
-
- mm = remove_mmap(ucontext, key, len);
- if (!mm)
- return -EINVAL;
- addr = mm->addr;
- kfree(mm);
-
- if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
- (addr < (rdev_p->rnic_info.udbell_physbase +
- rdev_p->rnic_info.udbell_len))) {
-
- /*
- * Map T3 DB register.
- */
- if (vma->vm_flags & VM_READ) {
- return -EPERM;
- }
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
- vma->vm_flags &= ~VM_MAYREAD;
- ret = io_remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- }
-
- return ret;
-}
-
-static void iwch_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
- cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
-}
-
-static int iwch_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
- struct iwch_pd *php = to_iwch_pd(pd);
- struct ib_device *ibdev = pd->device;
- u32 pdid;
- struct iwch_dev *rhp;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- rhp = (struct iwch_dev *) ibdev;
- pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
- if (!pdid)
- return -EINVAL;
-
- php->pdid = pdid;
- php->rhp = rhp;
- if (udata) {
- struct iwch_alloc_pd_resp resp = {.pdid = php->pdid};
-
- if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- iwch_deallocate_pd(&php->ibpd, udata);
- return -EFAULT;
- }
- }
- pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
- return 0;
-}
-
-static int iwch_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_mr *mhp;
- u32 mmid;
-
- pr_debug("%s ib_mr %p\n", __func__, ib_mr);
-
- mhp = to_iwch_mr(ib_mr);
- kfree(mhp->pages);
- rhp = mhp->rhp;
- mmid = mhp->attr.stag >> 8;
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
- iwch_free_pbl(mhp);
- xa_erase_irq(&rhp->mrs, mmid);
- if (mhp->kva)
- kfree((void *) (unsigned long) mhp->kva);
- if (mhp->umem)
- ib_umem_release(mhp->umem);
- pr_debug("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- const u64 total_size = 0xffffffff;
- const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
- struct iwch_pd *php = to_iwch_pd(pd);
- struct iwch_dev *rhp = php->rhp;
- struct iwch_mr *mhp;
- __be64 *page_list;
- int shift = 26, npages, ret, i;
-
- pr_debug("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once("Cannot support dma_mrs on this platform\n");
- return ERR_PTR(-ENOTSUPP);
- }
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- npages = (total_size + (1ULL << shift) - 1) >> shift;
- if (!npages) {
- ret = -EINVAL;
- goto err;
- }
-
- page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
- if (!page_list) {
- ret = -ENOMEM;
- goto err;
- }
-
- for (i = 0; i < npages; i++)
- page_list[i] = cpu_to_be64((u64)i << shift);
-
- pr_debug("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, mask, shift, total_size, npages);
-
- ret = iwch_alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err_pbl;
- }
-
- ret = iwch_write_pbl(mhp, page_list, npages, 0);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = 0;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = iwch_register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- iwch_free_pbl(mhp);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-}
-
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
-{
- __be64 *pages;
- int shift, n, i;
- int err = 0;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mr *mhp;
- struct iwch_reg_user_mr_resp uresp;
- struct sg_dma_page_iter sg_iter;
- pr_debug("%s ib_pd %p\n", __func__, pd);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- mhp->umem = ib_umem_get(udata, start, length, acc, 0);
- if (IS_ERR(mhp->umem)) {
- err = PTR_ERR(mhp->umem);
- kfree(mhp);
- return ERR_PTR(err);
- }
-
- shift = PAGE_SHIFT;
-
- n = ib_umem_num_pages(mhp->umem);
-
- err = iwch_alloc_pbl(mhp, n);
- if (err)
- goto err;
-
- pages = (__be64 *) __get_free_page(GFP_KERNEL);
- if (!pages) {
- err = -ENOMEM;
- goto err_pbl;
- }
-
- i = n = 0;
-
- for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
- pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
- if (i == PAGE_SIZE / sizeof *pages) {
- err = iwch_write_pbl(mhp, pages, i, n);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
- }
-
- if (i)
- err = iwch_write_pbl(mhp, pages, i, n);
-
-pbl_done:
- free_page((unsigned long) pages);
- if (err)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = virt;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) length;
-
- err = iwch_register_mem(rhp, php, mhp, shift);
- if (err)
- goto err_pbl;
-
- if (udata && !t3a_device(rhp)) {
- uresp.pbl_addr = (mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3;
- pr_debug("%s user resp pbl_addr 0x%x\n", __func__,
- uresp.pbl_addr);
-
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
- iwch_dereg_mr(&mhp->ibmr, udata);
- err = -EFAULT;
- goto err;
- }
- }
-
- return &mhp->ibmr;
-
-err_pbl:
- iwch_free_pbl(mhp);
-
-err:
- ib_umem_release(mhp->umem);
- kfree(mhp);
- return ERR_PTR(err);
-}
-
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mw *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
- ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
- }
- mhp->rhp = rhp;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = TPT_MW;
- mhp->attr.stag = stag;
- mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
- if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
- cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
- kfree(mhp);
- return ERR_PTR(-ENOMEM);
- }
- pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmw);
-}
-
-static int iwch_dealloc_mw(struct ib_mw *mw)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- u32 mmid;
-
- mhp = to_iwch_mw(mw);
- rhp = mhp->rhp;
- mmid = (mw->rkey) >> 8;
- cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
- xa_erase_irq(&rhp->mrs, mmid);
- pr_debug("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
-static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mr *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret = -ENOMEM;
-
- if (mr_type != IB_MR_TYPE_MEM_REG ||
- max_num_sg > T3_MAX_FASTREG_DEPTH)
- return ERR_PTR(-EINVAL);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- goto err;
-
- mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
- if (!mhp->pages)
- goto pl_err;
-
- mhp->rhp = rhp;
- ret = iwch_alloc_pbl(mhp, max_num_sg);
- if (ret)
- goto err1;
- mhp->attr.pbl_size = max_num_sg;
- ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- goto err2;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = TPT_NON_SHARED_MR;
- mhp->attr.stag = stag;
- mhp->attr.state = 1;
- mmid = (stag) >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- ret = xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL);
- if (ret)
- goto err3;
-
- pr_debug("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmr);
-err3:
- cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-err2:
- iwch_free_pbl(mhp);
-err1:
- kfree(mhp->pages);
-pl_err:
- kfree(mhp);
-err:
- return ERR_PTR(ret);
-}
-
-static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
- if (unlikely(mhp->npages == mhp->attr.pbl_size))
- return -ENOMEM;
-
- mhp->pages[mhp->npages++] = addr;
-
- return 0;
-}
-
-static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
- mhp->npages = 0;
-
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
-}
-
-static int iwch_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- struct iwch_qp_attributes attrs;
- struct iwch_ucontext *ucontext;
-
- qhp = to_iwch_qp(ib_qp);
- rhp = qhp->rhp;
-
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
- wait_event(qhp->wait, !qhp->ep);
-
- xa_erase_irq(&rhp->qps, qhp->wq.qpid);
-
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
-
- ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
- ibucontext);
- cxio_destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-
- pr_debug("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
- ib_qp, qhp->wq.qpid, qhp);
- kfree(qhp);
- return 0;
-}
-
-static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- struct iwch_pd *php;
- struct iwch_cq *schp;
- struct iwch_cq *rchp;
- struct iwch_create_qp_resp uresp;
- int wqsize, sqsize, rqsize;
- struct iwch_ucontext *ucontext;
-
- pr_debug("%s ib_pd %p\n", __func__, pd);
- if (attrs->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
- rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
- if (!schp || !rchp)
- return ERR_PTR(-EINVAL);
-
- /* The RQT size must be # of entries + 1 rounded up to a power of two */
- rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
- if (rqsize == attrs->cap.max_recv_wr)
- rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
-
- /* T3 doesn't support RQT depth < 16 */
- if (rqsize < 16)
- rqsize = 16;
-
- if (rqsize > T3_MAX_RQ_SIZE)
- return ERR_PTR(-EINVAL);
-
- if (attrs->cap.max_inline_data > T3_MAX_INLINE)
- return ERR_PTR(-EINVAL);
-
- /*
- * NOTE: The SQ and total WQ sizes don't need to be
- * a power of two. However, all the code assumes
- * they are. EG: Q_FREECNT() and friends.
- */
- sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
- wqsize = roundup_pow_of_two(rqsize + sqsize);
-
- /*
- * Kernel users need more wq space for fastreg WRs which can take
- * 2 WR fragments.
- */
- ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
- ibucontext);
- if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
- wqsize = roundup_pow_of_two(rqsize +
- roundup_pow_of_two(attrs->cap.max_send_wr * 2));
- pr_debug("%s wqsize %d sqsize %d rqsize %d\n", __func__,
- wqsize, sqsize, rqsize);
- qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
- if (!qhp)
- return ERR_PTR(-ENOMEM);
- qhp->wq.size_log2 = ilog2(wqsize);
- qhp->wq.rq_size_log2 = ilog2(rqsize);
- qhp->wq.sq_size_log2 = ilog2(sqsize);
- if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
- kfree(qhp);
- return ERR_PTR(-ENOMEM);
- }
-
- attrs->cap.max_recv_wr = rqsize - 1;
- attrs->cap.max_send_wr = sqsize;
- attrs->cap.max_inline_data = T3_MAX_INLINE;
-
- qhp->rhp = rhp;
- qhp->attr.pd = php->pdid;
- qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
- qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
- qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
- qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
- qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
- qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
- qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- qhp->attr.next_state = IWCH_QP_STATE_IDLE;
-
- /*
- * XXX - These don't get passed in from the openib user
- * at create time. The CM sets them via a QP modify.
- * Need to fix... I think the CM should
- */
- qhp->attr.enable_rdma_read = 1;
- qhp->attr.enable_rdma_write = 1;
- qhp->attr.enable_bind = 1;
- qhp->attr.max_ord = 1;
- qhp->attr.max_ird = 1;
-
- spin_lock_init(&qhp->lock);
- init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
-
- if (xa_store_irq(&rhp->qps, qhp->wq.qpid, qhp, GFP_KERNEL)) {
- cxio_destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- kfree(qhp);
- return ERR_PTR(-ENOMEM);
- }
-
- if (udata) {
-
- struct iwch_mm_entry *mm1, *mm2;
-
- mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
- if (!mm1) {
- iwch_destroy_qp(&qhp->ibqp, udata);
- return ERR_PTR(-ENOMEM);
- }
-
- mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
- if (!mm2) {
- kfree(mm1);
- iwch_destroy_qp(&qhp->ibqp, udata);
- return ERR_PTR(-ENOMEM);
- }
-
- uresp.qpid = qhp->wq.qpid;
- uresp.size_log2 = qhp->wq.size_log2;
- uresp.sq_size_log2 = qhp->wq.sq_size_log2;
- uresp.rq_size_log2 = qhp->wq.rq_size_log2;
- spin_lock(&ucontext->mmap_lock);
- uresp.key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- uresp.db_key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- spin_unlock(&ucontext->mmap_lock);
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
- kfree(mm1);
- kfree(mm2);
- iwch_destroy_qp(&qhp->ibqp, udata);
- return ERR_PTR(-EFAULT);
- }
- mm1->key = uresp.key;
- mm1->addr = virt_to_phys(qhp->wq.queue);
- mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
- insert_mmap(ucontext, mm1);
- mm2->key = uresp.db_key;
- mm2->addr = qhp->wq.udb & PAGE_MASK;
- mm2->len = PAGE_SIZE;
- insert_mmap(ucontext, mm2);
- }
- qhp->ibqp.qp_num = qhp->wq.qpid;
- pr_debug("%s sq_num_entries %d, rq_num_entries %d qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, (unsigned long long)qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
- return &qhp->ibqp;
-}
-
-static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- enum iwch_qp_attr_mask mask = 0;
- struct iwch_qp_attributes attrs;
-
- pr_debug("%s ib_qp %p\n", __func__, ibqp);
-
- /* iwarp does not support the RTR state */
- if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
- attr_mask &= ~IB_QP_STATE;
-
- /* Make sure we still have something left to do */
- if (!attr_mask)
- return 0;
-
- memset(&attrs, 0, sizeof attrs);
- qhp = to_iwch_qp(ibqp);
- rhp = qhp->rhp;
-
- attrs.next_state = iwch_convert_state(attr->qp_state);
- attrs.enable_rdma_read = (attr->qp_access_flags &
- IB_ACCESS_REMOTE_READ) ? 1 : 0;
- attrs.enable_rdma_write = (attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-
- mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
- mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
- (IWCH_QP_ATTR_ENABLE_RDMA_READ |
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
- IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
-
- return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp)
-{
- pr_debug("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_iwch_qp(qp)->refcnt));
-}
-
-void iwch_qp_rem_ref(struct ib_qp *qp)
-{
- pr_debug("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
- wake_up(&(to_iwch_qp(qp)->wait));
-}
-
-static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
-{
- pr_debug("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
- return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
-}
-
-
-static int iwch_query_pkey(struct ib_device *ibdev,
- u8 port, u16 index, u16 * pkey)
-{
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- *pkey = 0;
- return 0;
-}
-
-static int iwch_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
-{
- struct iwch_dev *dev;
-
- pr_debug("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
- dev = to_iwch_dev(ibdev);
- BUG_ON(port == 0 || port > 2);
- memset(&(gid->raw[0]), 0, sizeof(gid->raw));
- memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
- return 0;
-}
-
-static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
-{
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- char *cp, *next;
- unsigned fw_maj, fw_min, fw_mic;
-
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
-
- next = info.fw_version + 1;
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_maj);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_min);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_mic);
-
- return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
- (fw_mic & 0xffff);
-}
-
-static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
- struct ib_udata *uhw)
-{
-
- struct iwch_dev *dev;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
-
- dev = to_iwch_dev(ibdev);
- memset(props, 0, sizeof *props);
- memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- props->hw_ver = dev->rdev.t3cdev_p->type;
- props->fw_ver = fw_vers_string_to_u64(dev);
- props->device_cap_flags = dev->device_cap_flags;
- props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
- props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
- props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
- props->max_mr_size = dev->attr.max_mr_size;
- props->max_qp = dev->attr.max_qps;
- props->max_qp_wr = dev->attr.max_wrs;
- props->max_send_sge = dev->attr.max_sge_per_wr;
- props->max_recv_sge = dev->attr.max_sge_per_wr;
- props->max_sge_rd = 1;
- props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
- props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
- props->max_cq = dev->attr.max_cqs;
- props->max_cqe = dev->attr.max_cqes_per_cq;
- props->max_mr = dev->attr.max_mem_regs;
- props->max_pd = dev->attr.max_pds;
- props->local_ca_ack_delay = 0;
- props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
-
- return 0;
-}
-
-static int iwch_query_port(struct ib_device *ibdev,
- u8 port, struct ib_port_attr *props)
-{
- struct iwch_dev *dev;
- struct net_device *netdev;
- struct in_device *inetdev;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
-
- dev = to_iwch_dev(ibdev);
- netdev = dev->rdev.port_info.lldevs[port-1];
-
- /* props being zeroed by the caller, avoid zeroing it here */
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
- if (!netif_carrier_ok(netdev))
- props->state = IB_PORT_DOWN;
- else {
- inetdev = in_dev_get(netdev);
- if (inetdev) {
- if (inetdev->ifa_list)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_INIT;
- in_dev_put(inetdev);
- } else
- props->state = IB_PORT_INIT;
- }
-
- props->port_cap_flags =
- IB_PORT_CM_SUP |
- IB_PORT_SNMP_TUNNEL_SUP |
- IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = 2;
- props->active_speed = IB_SPEED_DDR;
- props->max_msg_sz = -1;
-
- return 0;
-}
-
-static ssize_t hw_rev_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev =
- rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-
- pr_debug("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
-}
-static DEVICE_ATTR_RO(hw_rev);
-
-static ssize_t hca_type_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev =
- rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- pr_debug("%s dev 0x%p\n", __func__, dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
-}
-static DEVICE_ATTR_RO(hca_type);
-
-static ssize_t board_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct iwch_dev *iwch_dev =
- rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
-
- pr_debug("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
- iwch_dev->rdev.rnic_info.pdev->device);
-}
-static DEVICE_ATTR_RO(board_id);
-
-enum counters {
- IPINRECEIVES,
- IPINHDRERRORS,
- IPINADDRERRORS,
- IPINUNKNOWNPROTOS,
- IPINDISCARDS,
- IPINDELIVERS,
- IPOUTREQUESTS,
- IPOUTDISCARDS,
- IPOUTNOROUTES,
- IPREASMTIMEOUT,
- IPREASMREQDS,
- IPREASMOKS,
- IPREASMFAILS,
- TCPACTIVEOPENS,
- TCPPASSIVEOPENS,
- TCPATTEMPTFAILS,
- TCPESTABRESETS,
- TCPCURRESTAB,
- TCPINSEGS,
- TCPOUTSEGS,
- TCPRETRANSSEGS,
- TCPINERRS,
- TCPOUTRSTS,
- TCPRTOMIN,
- TCPRTOMAX,
- NR_COUNTERS
-};
-
-static const char * const names[] = {
- [IPINRECEIVES] = "ipInReceives",
- [IPINHDRERRORS] = "ipInHdrErrors",
- [IPINADDRERRORS] = "ipInAddrErrors",
- [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
- [IPINDISCARDS] = "ipInDiscards",
- [IPINDELIVERS] = "ipInDelivers",
- [IPOUTREQUESTS] = "ipOutRequests",
- [IPOUTDISCARDS] = "ipOutDiscards",
- [IPOUTNOROUTES] = "ipOutNoRoutes",
- [IPREASMTIMEOUT] = "ipReasmTimeout",
- [IPREASMREQDS] = "ipReasmReqds",
- [IPREASMOKS] = "ipReasmOKs",
- [IPREASMFAILS] = "ipReasmFails",
- [TCPACTIVEOPENS] = "tcpActiveOpens",
- [TCPPASSIVEOPENS] = "tcpPassiveOpens",
- [TCPATTEMPTFAILS] = "tcpAttemptFails",
- [TCPESTABRESETS] = "tcpEstabResets",
- [TCPCURRESTAB] = "tcpCurrEstab",
- [TCPINSEGS] = "tcpInSegs",
- [TCPOUTSEGS] = "tcpOutSegs",
- [TCPRETRANSSEGS] = "tcpRetransSegs",
- [TCPINERRS] = "tcpInErrs",
- [TCPOUTRSTS] = "tcpOutRsts",
- [TCPRTOMIN] = "tcpRtoMin",
- [TCPRTOMAX] = "tcpRtoMax",
-};
-
-static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
-
- /* Our driver only supports device level stats */
- if (port_num != 0)
- return NULL;
-
- return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
- u8 port, int index)
-{
- struct iwch_dev *dev;
- struct tp_mib_stats m;
- int ret;
-
- if (port != 0 || !stats)
- return -ENOSYS;
-
- pr_debug("%s ibdev %p\n", __func__, ibdev);
- dev = to_iwch_dev(ibdev);
- ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
- if (ret)
- return -ENOSYS;
-
- stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo;
- stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
- stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
- stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
- stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
- stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
- stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
- stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
- stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
- stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout;
- stats->value[IPREASMREQDS] = m.ipReasmReqds;
- stats->value[IPREASMOKS] = m.ipReasmOKs;
- stats->value[IPREASMFAILS] = m.ipReasmFails;
- stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens;
- stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
- stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
- stats->value[TCPESTABRESETS] = m.tcpEstabResets;
- stats->value[TCPCURRESTAB] = m.tcpOutRsts;
- stats->value[TCPINSEGS] = m.tcpCurrEstab;
- stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
- stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
- stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
- stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
- stats->value[TCPRTOMIN] = m.tcpRtoMin;
- stats->value[TCPRTOMAX] = m.tcpRtoMax;
-
- return stats->num_counters;
-}
-
-static struct attribute *iwch_class_attributes[] = {
- &dev_attr_hw_rev.attr,
- &dev_attr_hca_type.attr,
- &dev_attr_board_id.attr,
- NULL
-};
-
-static const struct attribute_group iwch_attr_group = {
- .attrs = iwch_class_attributes,
-};
-
-static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- err = ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
- return 0;
-}
-
-static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
-{
- struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- pr_debug("%s dev 0x%p\n", __func__, iwch_dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
-}
-
-static const struct ib_device_ops iwch_dev_ops = {
- .alloc_hw_stats = iwch_alloc_stats,
- .alloc_mr = iwch_alloc_mr,
- .alloc_mw = iwch_alloc_mw,
- .alloc_pd = iwch_allocate_pd,
- .alloc_ucontext = iwch_alloc_ucontext,
- .create_cq = iwch_create_cq,
- .create_qp = iwch_create_qp,
- .dealloc_mw = iwch_dealloc_mw,
- .dealloc_pd = iwch_deallocate_pd,
- .dealloc_ucontext = iwch_dealloc_ucontext,
- .dereg_mr = iwch_dereg_mr,
- .destroy_cq = iwch_destroy_cq,
- .destroy_qp = iwch_destroy_qp,
- .get_dev_fw_str = get_dev_fw_ver_str,
- .get_dma_mr = iwch_get_dma_mr,
- .get_hw_stats = iwch_get_mib,
- .get_port_immutable = iwch_port_immutable,
- .iw_accept = iwch_accept_cr,
- .iw_add_ref = iwch_qp_add_ref,
- .iw_connect = iwch_connect,
- .iw_create_listen = iwch_create_listen,
- .iw_destroy_listen = iwch_destroy_listen,
- .iw_get_qp = iwch_get_qp,
- .iw_reject = iwch_reject_cr,
- .iw_rem_ref = iwch_qp_rem_ref,
- .map_mr_sg = iwch_map_mr_sg,
- .mmap = iwch_mmap,
- .modify_qp = iwch_ib_modify_qp,
- .poll_cq = iwch_poll_cq,
- .post_recv = iwch_post_receive,
- .post_send = iwch_post_send,
- .query_device = iwch_query_device,
- .query_gid = iwch_query_gid,
- .query_pkey = iwch_query_pkey,
- .query_port = iwch_query_port,
- .reg_user_mr = iwch_reg_user_mr,
- .req_notify_cq = iwch_arm_cq,
- .resize_cq = iwch_resize_cq,
- INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
- INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
-};
-
-int iwch_register_device(struct iwch_dev *dev)
-{
- pr_debug("%s iwch_dev %p\n", __func__, dev);
- memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
- memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- dev->ibdev.owner = THIS_MODULE;
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- /* cxgb3 supports STag 0. */
- dev->ibdev.local_dma_lkey = 0;
-
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV);
- dev->ibdev.node_type = RDMA_NODE_RNIC;
- BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
- memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
- dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
- dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
- dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
-
- memcpy(dev->ibdev.iw_ifname, dev->rdev.t3cdev_p->lldev->name,
- sizeof(dev->ibdev.iw_ifname));
-
- dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
- rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
- ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
- return ib_register_device(&dev->ibdev, "cxgb3_%d");
-}
-
-void iwch_unregister_device(struct iwch_dev *dev)
-{
- pr_debug("%s iwch_dev %p\n", __func__, dev);
- ib_unregister_device(&dev->ibdev);
- return;
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
deleted file mode 100644
index 8adbe9658935..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_PROVIDER_H__
-#define __IWCH_PROVIDER_H__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <rdma/ib_verbs.h>
-#include <asm/types.h>
-#include "t3cdev.h"
-#include "iwch.h"
-#include "cxio_wr.h"
-#include "cxio_hal.h"
-
-struct iwch_pd {
- struct ib_pd ibpd;
- u32 pdid;
- struct iwch_dev *rhp;
-};
-
-static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct iwch_pd, ibpd);
-}
-
-struct tpt_attributes {
- u32 stag;
- u32 state:1;
- u32 type:2;
- u32 rsvd:1;
- enum tpt_mem_perm perms;
- u32 remote_invaliate_disable:1;
- u32 zbva:1;
- u32 mw_bind_enable:1;
- u32 page_size:5;
-
- u32 pdid;
- u32 qpid;
- u32 pbl_addr;
- u32 len;
- u64 va_fbo;
- u32 pbl_size;
-};
-
-struct iwch_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct iwch_dev *rhp;
- u64 kva;
- struct tpt_attributes attr;
- u64 *pages;
- u32 npages;
-};
-
-typedef struct iwch_mw iwch_mw_handle;
-
-static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct iwch_mr, ibmr);
-}
-
-struct iwch_mw {
- struct ib_mw ibmw;
- struct iwch_dev *rhp;
- u64 kva;
- struct tpt_attributes attr;
-};
-
-static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct iwch_mw, ibmw);
-}
-
-struct iwch_cq {
- struct ib_cq ibcq;
- struct iwch_dev *rhp;
- struct t3_cq cq;
- spinlock_t lock;
- spinlock_t comp_handler_lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
- u32 __user *user_rptr_addr;
-};
-
-static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct iwch_cq, ibcq);
-}
-
-enum IWCH_QP_FLAGS {
- QP_QUIESCED = 0x01
-};
-
-struct iwch_mpa_attributes {
- u8 initiator;
- u8 recv_marker_enabled;
- u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
- u8 crc_enabled;
- u8 version; /* 0 or 1 */
-};
-
-struct iwch_qp_attributes {
- u32 scq;
- u32 rcq;
- u32 sq_num_entries;
- u32 rq_num_entries;
- u32 sq_max_sges;
- u32 sq_max_sges_rdma_write;
- u32 rq_max_sges;
- u32 state;
- u8 enable_rdma_read;
- u8 enable_rdma_write; /* enable inbound Read Resp. */
- u8 enable_bind;
- u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
- /*
- * Next QP state. If specify the current state, only the
- * QP attributes will be modified.
- */
- u32 max_ord;
- u32 max_ird;
- u32 pd; /* IN */
- u32 next_state;
- char terminate_buffer[52];
- u32 terminate_msg_len;
- u8 is_terminate_local;
- struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
- struct iwch_ep *llp_stream_handle;
- char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
- u32 stream_msg_buf_len; /* Only on Idle -> RTS */
-};
-
-struct iwch_qp {
- struct ib_qp ibqp;
- struct iwch_dev *rhp;
- struct iwch_ep *ep;
- struct iwch_qp_attributes attr;
- struct t3_wq wq;
- spinlock_t lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
- enum IWCH_QP_FLAGS flags;
-};
-
-static inline int qp_quiesced(struct iwch_qp *qhp)
-{
- return qhp->flags & QP_QUIESCED;
-}
-
-static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct iwch_qp, ibqp);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp);
-void iwch_qp_rem_ref(struct ib_qp *qp);
-
-struct iwch_ucontext {
- struct ib_ucontext ibucontext;
- struct cxio_ucontext uctx;
- u32 key;
- spinlock_t mmap_lock;
- struct list_head mmaps;
-};
-
-static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
-{
- return container_of(c, struct iwch_ucontext, ibucontext);
-}
-
-struct iwch_mm_entry {
- struct list_head entry;
- u64 addr;
- u32 key;
- unsigned len;
-};
-
-static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
- u32 key, unsigned len)
-{
- struct list_head *pos, *nxt;
- struct iwch_mm_entry *mm;
-
- spin_lock(&ucontext->mmap_lock);
- list_for_each_safe(pos, nxt, &ucontext->mmaps) {
-
- mm = list_entry(pos, struct iwch_mm_entry, entry);
- if (mm->key == key && mm->len == len) {
- list_del_init(&mm->entry);
- spin_unlock(&ucontext->mmap_lock);
- pr_debug("%s key 0x%x addr 0x%llx len %d\n",
- __func__, key,
- (unsigned long long)mm->addr, mm->len);
- return mm;
- }
- }
- spin_unlock(&ucontext->mmap_lock);
- return NULL;
-}
-
-static inline void insert_mmap(struct iwch_ucontext *ucontext,
- struct iwch_mm_entry *mm)
-{
- spin_lock(&ucontext->mmap_lock);
- pr_debug("%s key 0x%x addr 0x%llx len %d\n",
- __func__, mm->key, (unsigned long long)mm->addr, mm->len);
- list_add_tail(&mm->entry, &ucontext->mmaps);
- spin_unlock(&ucontext->mmap_lock);
-}
-
-enum iwch_qp_attr_mask {
- IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
- IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
- IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
- IWCH_QP_ATTR_MAX_ORD = 1 << 11,
- IWCH_QP_ATTR_MAX_IRD = 1 << 12,
- IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
- IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
- IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
- IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
- IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
- IWCH_QP_ATTR_MAX_ORD |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_STREAM_MSG_BUFFER |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
-};
-
-int iwch_modify_qp(struct iwch_dev *rhp,
- struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs,
- int internal);
-
-enum iwch_qp_state {
- IWCH_QP_STATE_IDLE,
- IWCH_QP_STATE_RTS,
- IWCH_QP_STATE_ERROR,
- IWCH_QP_STATE_TERMINATE,
- IWCH_QP_STATE_CLOSING,
- IWCH_QP_STATE_TOT
-};
-
-static inline int iwch_convert_state(enum ib_qp_state ib_state)
-{
- switch (ib_state) {
- case IB_QPS_RESET:
- case IB_QPS_INIT:
- return IWCH_QP_STATE_IDLE;
- case IB_QPS_RTS:
- return IWCH_QP_STATE_RTS;
- case IB_QPS_SQD:
- return IWCH_QP_STATE_CLOSING;
- case IB_QPS_SQE:
- return IWCH_QP_STATE_TERMINATE;
- case IB_QPS_ERR:
- return IWCH_QP_STATE_ERROR;
- default:
- return -1;
- }
-}
-
-static inline u32 iwch_ib_to_tpt_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
- (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) |
- TPT_LOCAL_READ;
-}
-
-static inline u32 iwch_ib_to_tpt_bind_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0);
-}
-
-enum iwch_mmid_state {
- IWCH_STAG_STATE_VALID,
- IWCH_STAG_STATE_INVALID
-};
-
-enum iwch_qp_query_flags {
- IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
- IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
- IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
-
- /*
- * Quiesce QP context; Consumer
- * will NOT replay outstanding WR
- */
- IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
- IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
- IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
-};
-
-u16 iwch_rqes_posted(struct iwch_qp *qhp);
-int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr);
-int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr);
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
-int iwch_post_zb_read(struct iwch_ep *ep);
-int iwch_register_device(struct iwch_dev *dev);
-void iwch_unregister_device(struct iwch_dev *dev);
-void stop_read_rep_timer(struct iwch_qp *qhp);
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp, int shift);
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
-void iwch_free_pbl(struct iwch_mr *mhp);
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-
-#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
deleted file mode 100644
index c649faad63f9..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ /dev/null
@@ -1,1082 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-
-#define NO_SUPPORT -1
-
-static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- int i;
- u32 plen;
-
- switch (wr->opcode) {
- case IB_WR_SEND:
- if (wr->send_flags & IB_SEND_SOLICITED)
- wqe->send.rdmaop = T3_SEND_WITH_SE;
- else
- wqe->send.rdmaop = T3_SEND;
- wqe->send.rem_stag = 0;
- break;
- case IB_WR_SEND_WITH_INV:
- if (wr->send_flags & IB_SEND_SOLICITED)
- wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
- else
- wqe->send.rdmaop = T3_SEND_WITH_INV;
- wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
- break;
- default:
- return -EINVAL;
- }
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
- wqe->send.reserved[0] = 0;
- wqe->send.reserved[1] = 0;
- wqe->send.reserved[2] = 0;
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen)
- return -EMSGSIZE;
-
- plen += wr->sg_list[i].length;
- wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
- wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
- wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 4 + ((wr->num_sge) << 1);
- wqe->send.plen = cpu_to_be32(plen);
- return 0;
-}
-
-static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- int i;
- u32 plen;
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
- wqe->write.rdmaop = T3_RDMA_WRITE;
- wqe->write.reserved[0] = 0;
- wqe->write.reserved[1] = 0;
- wqe->write.reserved[2] = 0;
- wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
- wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
-
- if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
- plen = 4;
- wqe->write.sgl[0].stag = wr->ex.imm_data;
- wqe->write.sgl[0].len = cpu_to_be32(0);
- wqe->write.num_sgle = cpu_to_be32(0);
- *flit_cnt = 6;
- } else {
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen) {
- return -EMSGSIZE;
- }
- plen += wr->sg_list[i].length;
- wqe->write.sgl[i].stag =
- cpu_to_be32(wr->sg_list[i].lkey);
- wqe->write.sgl[i].len =
- cpu_to_be32(wr->sg_list[i].length);
- wqe->write.sgl[i].to =
- cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 5 + ((wr->num_sge) << 1);
- }
- wqe->write.plen = cpu_to_be32(plen);
- return 0;
-}
-
-static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- if (wr->num_sge > 1)
- return -EINVAL;
- wqe->read.rdmaop = T3_READ_REQ;
- if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
- wqe->read.local_inv = 1;
- else
- wqe->read.local_inv = 0;
- wqe->read.reserved[0] = 0;
- wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
- wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
- wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
- wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
- wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
- *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
- return 0;
-}
-
-static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr,
- u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
-{
- struct iwch_mr *mhp = to_iwch_mr(wr->mr);
- int i;
- __be64 *p;
-
- if (mhp->npages > T3_MAX_FASTREG_DEPTH)
- return -EINVAL;
- *wr_cnt = 1;
- wqe->fastreg.stag = cpu_to_be32(wr->key);
- wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
- wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
- wqe->fastreg.va_base_lo_fbo =
- cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
- wqe->fastreg.page_type_perms = cpu_to_be32(
- V_FR_PAGE_COUNT(mhp->npages) |
- V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
- V_FR_TYPE(TPT_VATO) |
- V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
- p = &wqe->fastreg.pbl_addrs[0];
- for (i = 0; i < mhp->npages; i++, p++) {
-
- /* If we need a 2nd WR, then set it up */
- if (i == T3_MAX_FASTREG_FRAG) {
- *wr_cnt = 2;
- wqe = (union t3_wr *)(wq->queue +
- Q_PTR2IDX((wq->wptr+1), wq->size_log2));
- build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
- Q_GENBIT(wq->wptr + 1, wq->size_log2),
- 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
- T3_EOP);
-
- p = &wqe->pbl_frag.pbl_addrs[0];
- }
- *p = cpu_to_be64((u64)mhp->pages[i]);
- }
- *flit_cnt = 5 + mhp->npages;
- if (*flit_cnt > 15)
- *flit_cnt = 15;
- return 0;
-}
-
-static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
- wqe->local_inv.reserved = 0;
- *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
- return 0;
-}
-
-static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
- u32 num_sgle, u32 * pbl_addr, u8 * page_size)
-{
- int i;
- struct iwch_mr *mhp;
- u64 offset;
- for (i = 0; i < num_sgle; i++) {
-
- mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
- if (!mhp) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
- if (!mhp->attr.state) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
- if (mhp->attr.zbva) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
-
- if (sg_list[i].addr < mhp->attr.va_fbo) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (sg_list[i].addr + ((u64) sg_list[i].length) <
- sg_list[i].addr) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (sg_list[i].addr + ((u64) sg_list[i].length) >
- mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
- pr_debug("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- offset = sg_list[i].addr - mhp->attr.va_fbo;
- offset += mhp->attr.va_fbo &
- ((1UL << (12 + mhp->attr.page_size)) - 1);
- pbl_addr[i] = ((mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3) +
- (offset >> (12 + mhp->attr.page_size));
- page_size[i] = mhp->attr.page_size;
- }
- return 0;
-}
-
-static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
- const struct ib_recv_wr *wr)
-{
- int i, err = 0;
- u32 pbl_addr[T3_MAX_SGE];
- u8 page_size[T3_MAX_SGE];
-
- err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
- page_size);
- if (err)
- return err;
- wqe->recv.pagesz[0] = page_size[0];
- wqe->recv.pagesz[1] = page_size[1];
- wqe->recv.pagesz[2] = page_size[2];
- wqe->recv.pagesz[3] = page_size[3];
- wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
- for (i = 0; i < wr->num_sge; i++) {
- wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
- wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
-
- /* to in the WQE == the offset into the page */
- wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) &
- ((1UL << (12 + page_size[i])) - 1));
-
- /* pbl_addr is the adapters address in the PBL */
- wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
- }
- for (; i < T3_MAX_SGE; i++) {
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = 0;
- wqe->recv.sgl[i].to = 0;
- wqe->recv.pbl_addr[i] = 0;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].pbl_addr = 0;
- return 0;
-}
-
-static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
- const struct ib_recv_wr *wr)
-{
- int i;
- u32 pbl_addr;
- u32 pbl_offset;
-
-
- /*
- * The T3 HW requires the PBL in the HW recv descriptor to reference
- * a PBL entry. So we allocate the max needed PBL memory here and pass
- * it to the uP in the recv WR. The uP will build the PBL and setup
- * the HW recv descriptor.
- */
- pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
- if (!pbl_addr)
- return -ENOMEM;
-
- /*
- * Compute the 8B aligned offset.
- */
- pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
-
- wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
-
- for (i = 0; i < wr->num_sge; i++) {
-
- /*
- * Use a 128MB page size. This and an imposed 128MB
- * sge length limit allows us to require only a 2-entry HW
- * PBL for each SGE. This restriction is acceptable since
- * since it is not possible to allocate 128MB of contiguous
- * DMA coherent memory!
- */
- if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
- return -EINVAL;
- wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
-
- /*
- * T3 restricts a recv to all zero-stag or all non-zero-stag.
- */
- if (wr->sg_list[i].lkey != 0)
- return -EINVAL;
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
- wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
- pbl_offset += 2;
- }
- for (; i < T3_MAX_SGE; i++) {
- wqe->recv.pagesz[i] = 0;
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = 0;
- wqe->recv.sgl[i].to = 0;
- wqe->recv.pbl_addr[i] = 0;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
- return 0;
-}
-
-int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr)
-{
- int err = 0;
- u8 uninitialized_var(t3_wr_flit_cnt);
- enum t3_wr_opcode t3_wr_opcode = 0;
- enum t3_wr_flags t3_wr_flags;
- struct iwch_qp *qhp;
- u32 idx;
- union t3_wr *wqe;
- u32 num_wrs;
- unsigned long flag;
- struct t3_swsq *sqp;
- int wr_cnt = 1;
-
- qhp = to_iwch_qp(ibqp);
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -EINVAL;
- goto out;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -ENOMEM;
- goto out;
- }
- while (wr) {
- if (num_wrs == 0) {
- err = -ENOMEM;
- break;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
- t3_wr_flags = 0;
- if (wr->send_flags & IB_SEND_SOLICITED)
- t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
- if (wr->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags |= T3_COMPLETION_FLAG;
- sqp = qhp->wq.sq +
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- switch (wr->opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_READ_FENCE_FLAG;
- t3_wr_opcode = T3_WR_SEND;
- err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
- break;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- t3_wr_opcode = T3_WR_WRITE;
- err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
- break;
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_READ_WITH_INV:
- t3_wr_opcode = T3_WR_READ;
- t3_wr_flags = 0; /* T3 reads are always signaled */
- err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
- if (err)
- break;
- sqp->read_len = wqe->read.local_len;
- if (!qhp->wq.oldest_read)
- qhp->wq.oldest_read = sqp;
- break;
- case IB_WR_REG_MR:
- t3_wr_opcode = T3_WR_FASTREG;
- err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
- &wr_cnt, &qhp->wq);
- break;
- case IB_WR_LOCAL_INV:
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
- t3_wr_opcode = T3_WR_INV_STAG;
- err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
- break;
- default:
- pr_debug("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
- err = -EINVAL;
- }
- if (err)
- break;
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp->wr_id = wr->wr_id;
- sqp->opcode = wr2opcode(t3_wr_opcode);
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
-
- build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, t3_wr_flit_cnt,
- (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
- pr_debug("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
- __func__, (unsigned long long)wr->wr_id, idx,
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
- sqp->opcode);
- wr = wr->next;
- num_wrs--;
- qhp->wq.wptr += wr_cnt;
- ++(qhp->wq.sq_wptr);
- }
- spin_unlock_irqrestore(&qhp->lock, flag);
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
- if (err)
- *bad_wr = wr;
- return err;
-}
-
-int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- int err = 0;
- struct iwch_qp *qhp;
- u32 idx;
- union t3_wr *wqe;
- u32 num_wrs;
- unsigned long flag;
-
- qhp = to_iwch_qp(ibqp);
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -EINVAL;
- goto out;
- }
- num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2) - 1;
- if (!wr) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -ENOMEM;
- goto out;
- }
- while (wr) {
- if (wr->num_sge > T3_MAX_SGE) {
- err = -EINVAL;
- break;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
- if (num_wrs)
- if (wr->sg_list[0].lkey)
- err = build_rdma_recv(qhp, wqe, wr);
- else
- err = build_zero_stag_recv(qhp, wqe, wr);
- else
- err = -ENOMEM;
-
- if (err)
- break;
-
- build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
- pr_debug("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x wqe %p\n",
- __func__, (unsigned long long)wr->wr_id,
- idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
- ++(qhp->wq.rq_wptr);
- ++(qhp->wq.wptr);
- wr = wr->next;
- num_wrs--;
- }
- spin_unlock_irqrestore(&qhp->lock, flag);
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
- if (err)
- *bad_wr = wr;
- return err;
-}
-
-static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
- u8 *layer_type, u8 *ecode)
-{
- int status = TPT_ERR_INTERNAL_ERR;
- int tagged = 0;
- int opcode = -1;
- int rqtype = 0;
- int send_inv = 0;
-
- if (rsp_msg) {
- status = CQE_STATUS(rsp_msg->cqe);
- opcode = CQE_OPCODE(rsp_msg->cqe);
- rqtype = RQ_TYPE(rsp_msg->cqe);
- send_inv = (opcode == T3_SEND_WITH_INV) ||
- (opcode == T3_SEND_WITH_SE_INV);
- tagged = (opcode == T3_RDMA_WRITE) ||
- (rqtype && (opcode == T3_READ_RESP));
- }
-
- switch (status) {
- case TPT_ERR_STAG:
- if (send_inv) {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_CANT_INV_STAG;
- } else {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_INV_STAG;
- }
- break;
- case TPT_ERR_PDID:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- if ((opcode == T3_SEND_WITH_INV) ||
- (opcode == T3_SEND_WITH_SE_INV))
- *ecode = RDMAP_CANT_INV_STAG;
- else
- *ecode = RDMAP_STAG_NOT_ASSOC;
- break;
- case TPT_ERR_QPID:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_STAG_NOT_ASSOC;
- break;
- case TPT_ERR_ACCESS:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_ACC_VIOL;
- break;
- case TPT_ERR_WRAP:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_TO_WRAP;
- break;
- case TPT_ERR_BOUND:
- if (tagged) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_BASE_BOUNDS;
- } else {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_BASE_BOUNDS;
- }
- break;
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_CANT_INV_STAG;
- break;
- case TPT_ERR_ECC:
- case TPT_ERR_ECC_PSTAG:
- case TPT_ERR_INTERNAL_ERR:
- *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
- *ecode = 0;
- break;
- case TPT_ERR_OUT_OF_RQE:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MSN_NOBUF;
- break;
- case TPT_ERR_PBL_ADDR_BOUND:
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_BASE_BOUNDS;
- break;
- case TPT_ERR_CRC:
- *layer_type = LAYER_MPA|DDP_LLP;
- *ecode = MPA_CRC_ERR;
- break;
- case TPT_ERR_MARKER:
- *layer_type = LAYER_MPA|DDP_LLP;
- *ecode = MPA_MARKER_ERR;
- break;
- case TPT_ERR_PDU_LEN_ERR:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_MSG_TOOBIG;
- break;
- case TPT_ERR_DDP_VERSION:
- if (tagged) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_INV_VERS;
- } else {
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_VERS;
- }
- break;
- case TPT_ERR_RDMA_VERSION:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_INV_VERS;
- break;
- case TPT_ERR_OPCODE:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_INV_OPCODE;
- break;
- case TPT_ERR_DDP_QUEUE_NUM:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_QN;
- break;
- case TPT_ERR_MSN:
- case TPT_ERR_MSN_GAP:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_IRD_OVERFLOW:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MSN_RANGE;
- break;
- case TPT_ERR_TBIT:
- *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
- *ecode = 0;
- break;
- case TPT_ERR_MO:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MO;
- break;
- default:
- *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
- *ecode = 0;
- break;
- }
-}
-
-int iwch_post_zb_read(struct iwch_ep *ep)
-{
- union t3_wr *wqe;
- struct sk_buff *skb;
- u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
-
- pr_debug("%s enter\n", __func__);
- skb = alloc_skb(40, GFP_KERNEL);
- if (!skb) {
- pr_err("%s cannot send zb_read!!\n", __func__);
- return -ENOMEM;
- }
- wqe = skb_put_zero(skb, sizeof(struct t3_rdma_read_wr));
- wqe->read.rdmaop = T3_READ_REQ;
- wqe->read.reserved[0] = 0;
- wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(1);
- wqe->read.rem_to = cpu_to_be64(1);
- wqe->read.local_stag = cpu_to_be32(1);
- wqe->read.local_len = cpu_to_be32(0);
- wqe->read.local_to = cpu_to_be64(1);
- wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)|
- V_FW_RIWR_LEN(flit_cnt));
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * This posts a TERMINATE with layer=RDMA, type=catastrophic.
- */
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
-{
- union t3_wr *wqe;
- struct terminate_message *term;
- struct sk_buff *skb;
-
- pr_debug("%s %d\n", __func__, __LINE__);
- skb = alloc_skb(40, GFP_ATOMIC);
- if (!skb) {
- pr_err("%s cannot send TERMINATE!\n", __func__);
- return -ENOMEM;
- }
- wqe = skb_put_zero(skb, 40);
- wqe->send.rdmaop = T3_TERMINATE;
-
- /* immediate data length */
- wqe->send.plen = htonl(4);
-
- /* immediate data starts here. */
- term = (struct terminate_message *)wqe->send.sgl;
- build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
- wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
- V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * Assumes qhp lock is held.
- */
-static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
- struct iwch_cq *schp)
- __releases(&qhp->lock)
- __acquires(&qhp->lock)
-{
- int count;
- int flushed;
-
- lockdep_assert_held(&qhp->lock);
-
- pr_debug("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
- /* take a ref on the qhp since we must release the lock */
- atomic_inc(&qhp->refcnt);
- spin_unlock(&qhp->lock);
-
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock(&rchp->lock);
- spin_lock(&qhp->lock);
- cxio_flush_hw_cq(&rchp->cq);
- cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock(&rchp->lock);
- if (flushed) {
- spin_lock(&rchp->comp_handler_lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock(&rchp->comp_handler_lock);
- }
-
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock(&schp->lock);
- spin_lock(&qhp->lock);
- cxio_flush_hw_cq(&schp->cq);
- cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock(&schp->lock);
- if (flushed) {
- spin_lock(&schp->comp_handler_lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
- spin_unlock(&schp->comp_handler_lock);
- }
-
- /* deref */
- if (atomic_dec_and_test(&qhp->refcnt))
- wake_up(&qhp->wait);
-
- spin_lock(&qhp->lock);
-}
-
-static void flush_qp(struct iwch_qp *qhp)
-{
- struct iwch_cq *rchp, *schp;
-
- rchp = get_chp(qhp->rhp, qhp->attr.rcq);
- schp = get_chp(qhp->rhp, qhp->attr.scq);
-
- if (qhp->ibqp.uobject) {
- cxio_set_wq_in_error(&qhp->wq);
- cxio_set_cq_in_error(&rchp->cq);
- spin_lock(&rchp->comp_handler_lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock(&rchp->comp_handler_lock);
- if (schp != rchp) {
- cxio_set_cq_in_error(&schp->cq);
- spin_lock(&schp->comp_handler_lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq,
- schp->ibcq.cq_context);
- spin_unlock(&schp->comp_handler_lock);
- }
- return;
- }
- __flush_qp(qhp, rchp, schp);
-}
-
-
-/*
- * Return count of RECV WRs posted
- */
-u16 iwch_rqes_posted(struct iwch_qp *qhp)
-{
- union t3_wr *wqe = qhp->wq.queue;
- u16 count = 0;
-
- while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
- count++;
- wqe++;
- }
- pr_debug("%s qhp %p count %u\n", __func__, qhp, count);
- return count;
-}
-
-static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs)
-{
- struct t3_rdma_init_attr init_attr;
- int ret;
-
- init_attr.tid = qhp->ep->hwtid;
- init_attr.qpid = qhp->wq.qpid;
- init_attr.pdid = qhp->attr.pd;
- init_attr.scqid = qhp->attr.scq;
- init_attr.rcqid = qhp->attr.rcq;
- init_attr.rq_addr = qhp->wq.rq_addr;
- init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
- init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
- qhp->attr.mpa_attr.recv_marker_enabled |
- (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
- (qhp->attr.mpa_attr.crc_enabled << 2);
-
- init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
- uP_RI_QP_RDMA_WRITE_ENABLE |
- uP_RI_QP_BIND_ENABLE;
- if (!qhp->ibqp.uobject)
- init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE |
- uP_RI_QP_FAST_REGISTER_ENABLE;
-
- init_attr.tcp_emss = qhp->ep->emss;
- init_attr.ord = qhp->attr.max_ord;
- init_attr.ird = qhp->attr.max_ird;
- init_attr.qp_dma_addr = qhp->wq.dma_addr;
- init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
- init_attr.rqe_count = iwch_rqes_posted(qhp);
- init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
- init_attr.chan = qhp->ep->l2t->smt_idx;
- if (peer2peer) {
- init_attr.rtr_type = RTR_READ;
- if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
- init_attr.ord = 1;
- if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
- init_attr.ird = 1;
- } else
- init_attr.rtr_type = 0;
- init_attr.irs = qhp->ep->rcv_seq;
- pr_debug("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d flags 0x%x qpcaps 0x%x\n",
- __func__,
- init_attr.rq_addr, init_attr.rq_size,
- init_attr.flags, init_attr.qpcaps);
- ret = cxio_rdma_init(&rhp->rdev, &init_attr);
- pr_debug("%s ret %d\n", __func__, ret);
- return ret;
-}
-
-int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs,
- int internal)
-{
- int ret = 0;
- struct iwch_qp_attributes newattr = qhp->attr;
- unsigned long flag;
- int disconnect = 0;
- int terminate = 0;
- int abort = 0;
- int free = 0;
- struct iwch_ep *ep = NULL;
-
- pr_debug("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
- (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
-
- spin_lock_irqsave(&qhp->lock, flag);
-
- /* Process attr changes if in IDLE */
- if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
- if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
- ret = -EIO;
- goto out;
- }
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
- newattr.enable_rdma_read = attrs->enable_rdma_read;
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
- newattr.enable_rdma_write = attrs->enable_rdma_write;
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
- newattr.enable_bind = attrs->enable_bind;
- if (mask & IWCH_QP_ATTR_MAX_ORD) {
- if (attrs->max_ord >
- rhp->attr.max_rdma_read_qp_depth) {
- ret = -EINVAL;
- goto out;
- }
- newattr.max_ord = attrs->max_ord;
- }
- if (mask & IWCH_QP_ATTR_MAX_IRD) {
- if (attrs->max_ird >
- rhp->attr.max_rdma_reads_per_qp) {
- ret = -EINVAL;
- goto out;
- }
- newattr.max_ird = attrs->max_ird;
- }
- qhp->attr = newattr;
- }
-
- if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
- goto out;
- if (qhp->attr.state == attrs->next_state)
- goto out;
-
- switch (qhp->attr.state) {
- case IWCH_QP_STATE_IDLE:
- switch (attrs->next_state) {
- case IWCH_QP_STATE_RTS:
- if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
- ret = -EINVAL;
- goto out;
- }
- if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
- ret = -EINVAL;
- goto out;
- }
- qhp->attr.mpa_attr = attrs->mpa_attr;
- qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
- qhp->ep = qhp->attr.llp_stream_handle;
- qhp->attr.state = IWCH_QP_STATE_RTS;
-
- /*
- * Ref the endpoint here and deref when we
- * disassociate the endpoint from the QP. This
- * happens in CLOSING->IDLE transition or *->ERROR
- * transition.
- */
- get_ep(&qhp->ep->com);
- spin_unlock_irqrestore(&qhp->lock, flag);
- ret = rdma_init(rhp, qhp, mask, attrs);
- spin_lock_irqsave(&qhp->lock, flag);
- if (ret)
- goto err;
- break;
- case IWCH_QP_STATE_ERROR:
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- flush_qp(qhp);
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
- break;
- case IWCH_QP_STATE_RTS:
- switch (attrs->next_state) {
- case IWCH_QP_STATE_CLOSING:
- BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
- qhp->attr.state = IWCH_QP_STATE_CLOSING;
- if (!internal) {
- abort=0;
- disconnect = 1;
- ep = qhp->ep;
- get_ep(&ep->com);
- }
- break;
- case IWCH_QP_STATE_TERMINATE:
- qhp->attr.state = IWCH_QP_STATE_TERMINATE;
- if (qhp->ibqp.uobject)
- cxio_set_wq_in_error(&qhp->wq);
- if (!internal)
- terminate = 1;
- break;
- case IWCH_QP_STATE_ERROR:
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- if (!internal) {
- abort=1;
- disconnect = 1;
- ep = qhp->ep;
- get_ep(&ep->com);
- }
- goto err;
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
- break;
- case IWCH_QP_STATE_CLOSING:
- if (!internal) {
- ret = -EINVAL;
- goto out;
- }
- switch (attrs->next_state) {
- case IWCH_QP_STATE_IDLE:
- flush_qp(qhp);
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- qhp->attr.llp_stream_handle = NULL;
- put_ep(&qhp->ep->com);
- qhp->ep = NULL;
- wake_up(&qhp->wait);
- break;
- case IWCH_QP_STATE_ERROR:
- goto err;
- default:
- ret = -EINVAL;
- goto err;
- }
- break;
- case IWCH_QP_STATE_ERROR:
- if (attrs->next_state != IWCH_QP_STATE_IDLE) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
- !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
- ret = -EINVAL;
- goto out;
- }
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- break;
- case IWCH_QP_STATE_TERMINATE:
- if (!internal) {
- ret = -EINVAL;
- goto out;
- }
- goto err;
- break;
- default:
- pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
- ret = -EINVAL;
- goto err;
- break;
- }
- goto out;
-err:
- pr_debug("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.qpid);
-
- /* disassociate the LLP connection */
- qhp->attr.llp_stream_handle = NULL;
- ep = qhp->ep;
- qhp->ep = NULL;
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- free=1;
- wake_up(&qhp->wait);
- BUG_ON(!ep);
- flush_qp(qhp);
-out:
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (terminate)
- iwch_post_terminate(qhp, NULL);
-
- /*
- * If disconnect is 1, then we need to initiate a disconnect
- * on the EP. This can be a normal close (RTS->CLOSING) or
- * an abnormal close (RTS/CLOSING->ERROR).
- */
- if (disconnect) {
- iwch_ep_disconnect(ep, abort, GFP_KERNEL);
- put_ep(&ep->com);
- }
-
- /*
- * If free is 1, then we've disassociated the EP from the QP
- * and we need to dereference the EP.
- */
- if (free)
- put_ep(&ep->com);
-
- pr_debug("%s exit state %d\n", __func__, qhp->attr.state);
- return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
deleted file mode 100644
index c702dc199e18..000000000000
--- a/drivers/infiniband/hw/cxgb3/tcb.h
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _TCB_DEFS_H
-#define _TCB_DEFS_H
-
-#define W_TCB_T_STATE 0
-#define S_TCB_T_STATE 0
-#define M_TCB_T_STATE 0xfULL
-#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
-
-#define W_TCB_TIMER 0
-#define S_TCB_TIMER 4
-#define M_TCB_TIMER 0x1ULL
-#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
-
-#define W_TCB_DACK_TIMER 0
-#define S_TCB_DACK_TIMER 5
-#define M_TCB_DACK_TIMER 0x1ULL
-#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
-
-#define W_TCB_DEL_FLAG 0
-#define S_TCB_DEL_FLAG 6
-#define M_TCB_DEL_FLAG 0x1ULL
-#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
-
-#define W_TCB_L2T_IX 0
-#define S_TCB_L2T_IX 7
-#define M_TCB_L2T_IX 0x7ffULL
-#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
-
-#define W_TCB_SMAC_SEL 0
-#define S_TCB_SMAC_SEL 18
-#define M_TCB_SMAC_SEL 0x3ULL
-#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
-
-#define W_TCB_TOS 0
-#define S_TCB_TOS 20
-#define M_TCB_TOS 0x3fULL
-#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
-
-#define W_TCB_MAX_RT 0
-#define S_TCB_MAX_RT 26
-#define M_TCB_MAX_RT 0xfULL
-#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
-
-#define W_TCB_T_RXTSHIFT 0
-#define S_TCB_T_RXTSHIFT 30
-#define M_TCB_T_RXTSHIFT 0xfULL
-#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
-
-#define W_TCB_T_DUPACKS 1
-#define S_TCB_T_DUPACKS 2
-#define M_TCB_T_DUPACKS 0xfULL
-#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
-
-#define W_TCB_T_MAXSEG 1
-#define S_TCB_T_MAXSEG 6
-#define M_TCB_T_MAXSEG 0xfULL
-#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
-
-#define W_TCB_T_FLAGS1 1
-#define S_TCB_T_FLAGS1 10
-#define M_TCB_T_FLAGS1 0xffffffffULL
-#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
-
-#define W_TCB_T_MIGRATION 1
-#define S_TCB_T_MIGRATION 20
-#define M_TCB_T_MIGRATION 0x1ULL
-#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
-
-#define W_TCB_T_FLAGS2 2
-#define S_TCB_T_FLAGS2 10
-#define M_TCB_T_FLAGS2 0x7fULL
-#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
-
-#define W_TCB_SND_SCALE 2
-#define S_TCB_SND_SCALE 17
-#define M_TCB_SND_SCALE 0xfULL
-#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
-
-#define W_TCB_RCV_SCALE 2
-#define S_TCB_RCV_SCALE 21
-#define M_TCB_RCV_SCALE 0xfULL
-#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
-
-#define W_TCB_SND_UNA_RAW 2
-#define S_TCB_SND_UNA_RAW 25
-#define M_TCB_SND_UNA_RAW 0x7ffffffULL
-#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
-
-#define W_TCB_SND_NXT_RAW 3
-#define S_TCB_SND_NXT_RAW 20
-#define M_TCB_SND_NXT_RAW 0x7ffffffULL
-#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
-
-#define W_TCB_RCV_NXT 4
-#define S_TCB_RCV_NXT 15
-#define M_TCB_RCV_NXT 0xffffffffULL
-#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
-
-#define W_TCB_RCV_ADV 5
-#define S_TCB_RCV_ADV 15
-#define M_TCB_RCV_ADV 0xffffULL
-#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
-
-#define W_TCB_SND_MAX_RAW 5
-#define S_TCB_SND_MAX_RAW 31
-#define M_TCB_SND_MAX_RAW 0x7ffffffULL
-#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
-
-#define W_TCB_SND_CWND 6
-#define S_TCB_SND_CWND 26
-#define M_TCB_SND_CWND 0x7ffffffULL
-#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
-
-#define W_TCB_SND_SSTHRESH 7
-#define S_TCB_SND_SSTHRESH 21
-#define M_TCB_SND_SSTHRESH 0x7ffffffULL
-#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
-
-#define W_TCB_T_RTT_TS_RECENT_AGE 8
-#define S_TCB_T_RTT_TS_RECENT_AGE 16
-#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL
-#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
-
-#define W_TCB_T_RTSEQ_RECENT 9
-#define S_TCB_T_RTSEQ_RECENT 16
-#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL
-#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
-
-#define W_TCB_T_SRTT 10
-#define S_TCB_T_SRTT 16
-#define M_TCB_T_SRTT 0xffffULL
-#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
-
-#define W_TCB_T_RTTVAR 11
-#define S_TCB_T_RTTVAR 0
-#define M_TCB_T_RTTVAR 0xffffULL
-#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
-
-#define W_TCB_TS_LAST_ACK_SENT_RAW 11
-#define S_TCB_TS_LAST_ACK_SENT_RAW 16
-#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL
-#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
-
-#define W_TCB_DIP 12
-#define S_TCB_DIP 11
-#define M_TCB_DIP 0xffffffffULL
-#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
-
-#define W_TCB_SIP 13
-#define S_TCB_SIP 11
-#define M_TCB_SIP 0xffffffffULL
-#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
-
-#define W_TCB_DP 14
-#define S_TCB_DP 11
-#define M_TCB_DP 0xffffULL
-#define V_TCB_DP(x) ((x) << S_TCB_DP)
-
-#define W_TCB_SP 14
-#define S_TCB_SP 27
-#define M_TCB_SP 0xffffULL
-#define V_TCB_SP(x) ((x) << S_TCB_SP)
-
-#define W_TCB_TIMESTAMP 15
-#define S_TCB_TIMESTAMP 11
-#define M_TCB_TIMESTAMP 0xffffffffULL
-#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
-
-#define W_TCB_TIMESTAMP_OFFSET 16
-#define S_TCB_TIMESTAMP_OFFSET 11
-#define M_TCB_TIMESTAMP_OFFSET 0xfULL
-#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
-
-#define W_TCB_TX_MAX 16
-#define S_TCB_TX_MAX 15
-#define M_TCB_TX_MAX 0xffffffffULL
-#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
-
-#define W_TCB_TX_HDR_PTR_RAW 17
-#define S_TCB_TX_HDR_PTR_RAW 15
-#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL
-#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
-
-#define W_TCB_TX_LAST_PTR_RAW 18
-#define S_TCB_TX_LAST_PTR_RAW 0
-#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL
-#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
-
-#define W_TCB_TX_COMPACT 18
-#define S_TCB_TX_COMPACT 17
-#define M_TCB_TX_COMPACT 0x1ULL
-#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
-
-#define W_TCB_RX_COMPACT 18
-#define S_TCB_RX_COMPACT 18
-#define M_TCB_RX_COMPACT 0x1ULL
-#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
-
-#define W_TCB_RCV_WND 18
-#define S_TCB_RCV_WND 19
-#define M_TCB_RCV_WND 0x7ffffffULL
-#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
-
-#define W_TCB_RX_HDR_OFFSET 19
-#define S_TCB_RX_HDR_OFFSET 14
-#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL
-#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
-
-#define W_TCB_RX_FRAG0_START_IDX_RAW 20
-#define S_TCB_RX_FRAG0_START_IDX_RAW 9
-#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
-
-#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21
-#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4
-#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL
-#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
-
-#define W_TCB_RX_FRAG0_LEN 21
-#define S_TCB_RX_FRAG0_LEN 31
-#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL
-#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
-
-#define W_TCB_RX_FRAG1_LEN 22
-#define S_TCB_RX_FRAG1_LEN 26
-#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL
-#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
-
-#define W_TCB_NEWRENO_RECOVER 23
-#define S_TCB_NEWRENO_RECOVER 21
-#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL
-#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
-
-#define W_TCB_PDU_HAVE_LEN 24
-#define S_TCB_PDU_HAVE_LEN 16
-#define M_TCB_PDU_HAVE_LEN 0x1ULL
-#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
-
-#define W_TCB_PDU_LEN 24
-#define S_TCB_PDU_LEN 17
-#define M_TCB_PDU_LEN 0xffffULL
-#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
-
-#define W_TCB_RX_QUIESCE 25
-#define S_TCB_RX_QUIESCE 1
-#define M_TCB_RX_QUIESCE 0x1ULL
-#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
-
-#define W_TCB_RX_PTR_RAW 25
-#define S_TCB_RX_PTR_RAW 2
-#define M_TCB_RX_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
-
-#define W_TCB_CPU_NO 25
-#define S_TCB_CPU_NO 19
-#define M_TCB_CPU_NO 0x7fULL
-#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
-
-#define W_TCB_ULP_TYPE 25
-#define S_TCB_ULP_TYPE 26
-#define M_TCB_ULP_TYPE 0xfULL
-#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
-
-#define W_TCB_RX_FRAG1_PTR_RAW 25
-#define S_TCB_RX_FRAG1_PTR_RAW 30
-#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26
-#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15
-#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
-
-#define W_TCB_RX_FRAG2_PTR_RAW 27
-#define S_TCB_RX_FRAG2_PTR_RAW 10
-#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_LEN_RAW 27
-#define S_TCB_RX_FRAG2_LEN_RAW 27
-#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_PTR_RAW 28
-#define S_TCB_RX_FRAG3_PTR_RAW 22
-#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
-
-#define W_TCB_RX_FRAG3_LEN_RAW 29
-#define S_TCB_RX_FRAG3_LEN_RAW 7
-#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30
-#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2
-#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
-
-#define W_TCB_PDU_HDR_LEN 30
-#define S_TCB_PDU_HDR_LEN 29
-#define M_TCB_PDU_HDR_LEN 0xffULL
-#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
-
-#define W_TCB_SLUSH1 31
-#define S_TCB_SLUSH1 5
-#define M_TCB_SLUSH1 0x7ffffULL
-#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
-
-#define W_TCB_ULP_RAW 31
-#define S_TCB_ULP_RAW 24
-#define M_TCB_ULP_RAW 0xffULL
-#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
-
-#define W_TCB_DDP_RDMAP_VERSION 25
-#define S_TCB_DDP_RDMAP_VERSION 30
-#define M_TCB_DDP_RDMAP_VERSION 0x1ULL
-#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
-
-#define W_TCB_MARKER_ENABLE_RX 25
-#define S_TCB_MARKER_ENABLE_RX 31
-#define M_TCB_MARKER_ENABLE_RX 0x1ULL
-#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
-
-#define W_TCB_MARKER_ENABLE_TX 26
-#define S_TCB_MARKER_ENABLE_TX 0
-#define M_TCB_MARKER_ENABLE_TX 0x1ULL
-#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
-
-#define W_TCB_CRC_ENABLE 26
-#define S_TCB_CRC_ENABLE 1
-#define M_TCB_CRC_ENABLE 0x1ULL
-#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
-
-#define W_TCB_IRS_ULP 26
-#define S_TCB_IRS_ULP 2
-#define M_TCB_IRS_ULP 0x1ffULL
-#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
-
-#define W_TCB_ISS_ULP 26
-#define S_TCB_ISS_ULP 11
-#define M_TCB_ISS_ULP 0x1ffULL
-#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
-
-#define W_TCB_TX_PDU_LEN 26
-#define S_TCB_TX_PDU_LEN 20
-#define M_TCB_TX_PDU_LEN 0x3fffULL
-#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
-
-#define W_TCB_TX_PDU_OUT 27
-#define S_TCB_TX_PDU_OUT 2
-#define M_TCB_TX_PDU_OUT 0x1ULL
-#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
-
-#define W_TCB_CQ_IDX_SQ 27
-#define S_TCB_CQ_IDX_SQ 3
-#define M_TCB_CQ_IDX_SQ 0xffffULL
-#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
-
-#define W_TCB_CQ_IDX_RQ 27
-#define S_TCB_CQ_IDX_RQ 19
-#define M_TCB_CQ_IDX_RQ 0xffffULL
-#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
-
-#define W_TCB_QP_ID 28
-#define S_TCB_QP_ID 3
-#define M_TCB_QP_ID 0xffffULL
-#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
-
-#define W_TCB_PD_ID 28
-#define S_TCB_PD_ID 19
-#define M_TCB_PD_ID 0xffffULL
-#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
-
-#define W_TCB_STAG 29
-#define S_TCB_STAG 3
-#define M_TCB_STAG 0xffffffffULL
-#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
-
-#define W_TCB_RQ_START 30
-#define S_TCB_RQ_START 3
-#define M_TCB_RQ_START 0x3ffffffULL
-#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
-
-#define W_TCB_RQ_MSN 30
-#define S_TCB_RQ_MSN 29
-#define M_TCB_RQ_MSN 0x3ffULL
-#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
-
-#define W_TCB_RQ_MAX_OFFSET 31
-#define S_TCB_RQ_MAX_OFFSET 7
-#define M_TCB_RQ_MAX_OFFSET 0xfULL
-#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
-
-#define W_TCB_RQ_WRITE_PTR 31
-#define S_TCB_RQ_WRITE_PTR 11
-#define M_TCB_RQ_WRITE_PTR 0x3ffULL
-#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
-
-#define W_TCB_INB_WRITE_PERM 31
-#define S_TCB_INB_WRITE_PERM 21
-#define M_TCB_INB_WRITE_PERM 0x1ULL
-#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
-
-#define W_TCB_INB_READ_PERM 31
-#define S_TCB_INB_READ_PERM 22
-#define M_TCB_INB_READ_PERM 0x1ULL
-#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
-
-#define W_TCB_ORD_L_BIT_VLD 31
-#define S_TCB_ORD_L_BIT_VLD 23
-#define M_TCB_ORD_L_BIT_VLD 0x1ULL
-#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
-
-#define W_TCB_RDMAP_OPCODE 31
-#define S_TCB_RDMAP_OPCODE 24
-#define M_TCB_RDMAP_OPCODE 0xfULL
-#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
-
-#define W_TCB_TX_FLUSH 31
-#define S_TCB_TX_FLUSH 28
-#define M_TCB_TX_FLUSH 0x1ULL
-#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
-
-#define W_TCB_TX_OOS_RXMT 31
-#define S_TCB_TX_OOS_RXMT 29
-#define M_TCB_TX_OOS_RXMT 0x1ULL
-#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
-
-#define W_TCB_TX_OOS_TXMT 31
-#define S_TCB_TX_OOS_TXMT 30
-#define M_TCB_TX_OOS_TXMT 0x1ULL
-#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
-
-#define W_TCB_SLUSH_AUX2 31
-#define S_TCB_SLUSH_AUX2 31
-#define M_TCB_SLUSH_AUX2 0x1ULL
-#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
-
-#define W_TCB_RX_FRAG1_PTR_RAW2 25
-#define S_TCB_RX_FRAG1_PTR_RAW2 30
-#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
-
-#define W_TCB_RX_DDP_FLAGS 26
-#define S_TCB_RX_DDP_FLAGS 15
-#define M_TCB_RX_DDP_FLAGS 0x3ffULL
-#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
-
-#define W_TCB_SLUSH_AUX3 26
-#define S_TCB_SLUSH_AUX3 31
-#define M_TCB_SLUSH_AUX3 0x1ffULL
-#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
-
-#define W_TCB_RX_DDP_BUF0_OFFSET 27
-#define S_TCB_RX_DDP_BUF0_OFFSET 8
-#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
-
-#define W_TCB_RX_DDP_BUF0_LEN 27
-#define S_TCB_RX_DDP_BUF0_LEN 30
-#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
-
-#define W_TCB_RX_DDP_BUF1_OFFSET 28
-#define S_TCB_RX_DDP_BUF1_OFFSET 20
-#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
-
-#define W_TCB_RX_DDP_BUF1_LEN 29
-#define S_TCB_RX_DDP_BUF1_LEN 10
-#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
-
-#define W_TCB_RX_DDP_BUF0_TAG 30
-#define S_TCB_RX_DDP_BUF0_TAG 0
-#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL
-#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
-
-#define W_TCB_RX_DDP_BUF1_TAG 31
-#define S_TCB_RX_DDP_BUF1_TAG 0
-#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL
-#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
-
-#define S_TF_DACK 10
-#define V_TF_DACK(x) ((x) << S_TF_DACK)
-
-#define S_TF_NAGLE 11
-#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
-
-#define S_TF_RECV_SCALE 12
-#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
-
-#define S_TF_RECV_TSTMP 13
-#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
-
-#define S_TF_RECV_SACK 14
-#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
-
-#define S_TF_TURBO 15
-#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
-
-#define S_TF_KEEPALIVE 16
-#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
-
-#define S_TF_TCAM_BYPASS 17
-#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
-
-#define S_TF_CORE_FIN 18
-#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
-
-#define S_TF_CORE_MORE 19
-#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
-
-#define S_TF_MIGRATING 20
-#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
-
-#define S_TF_ACTIVE_OPEN 21
-#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
-
-#define S_TF_ASK_MODE 22
-#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
-
-#define S_TF_NON_OFFLOAD 23
-#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
-
-#define S_TF_MOD_SCHD 24
-#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
-
-#define S_TF_MOD_SCHD_REASON0 25
-#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
-
-#define S_TF_MOD_SCHD_REASON1 26
-#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
-
-#define S_TF_MOD_SCHD_RX 27
-#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
-
-#define S_TF_CORE_PUSH 28
-#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
-
-#define S_TF_RCV_COALESCE_ENABLE 29
-#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
-
-#define S_TF_RCV_COALESCE_PUSH 30
-#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
-
-#define S_TF_RCV_COALESCE_LAST_PSH 31
-#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
-
-#define S_TF_RCV_COALESCE_HEARTBEAT 32
-#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
-
-#define S_TF_HALF_CLOSE 33
-#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
-
-#define S_TF_DACK_MSS 34
-#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
-
-#define S_TF_CCTRL_SEL0 35
-#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
-
-#define S_TF_CCTRL_SEL1 36
-#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
-
-#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37
-#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
-
-#define S_TF_TX_PACE_AUTO 38
-#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
-
-#define S_TF_PEER_FIN_HELD 39
-#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
-
-#define S_TF_CORE_URG 40
-#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
-
-#define S_TF_RDMA_ERROR 41
-#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
-
-#define S_TF_SSWS_DISABLED 42
-#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
-
-#define S_TF_DUPACK_COUNT_ODD 43
-#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
-
-#define S_TF_TX_CHANNEL 44
-#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
-
-#define S_TF_RX_CHANNEL 45
-#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
-
-#define S_TF_TX_PACE_FIXED 46
-#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
-
-#define S_TF_RDMA_FLM_ERROR 47
-#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
-
-#define S_TF_RX_FLOW_CONTROL_DISABLE 48
-#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
-
-#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index e0522a5d5a06..9e2b2c348afd 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_CXGB4
tristate "Chelsio T4/T5 RDMA Driver"
depends on CHELSIO_T4 && INET
depends on INFINIBAND_ADDR_TRANS
select CHELSIO_LIB
select GENERIC_ALLOCATOR
- ---help---
+ help
This is an iWARP/RDMA driver for the Chelsio T4 and T5
1GbE, 10GbE adapters and T5 40GbE adapter.
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index 31a87d90a40b..291d259d2319 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0f3b1193d5f8..b3b45c49077d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -77,9 +77,9 @@ static int enable_ecn;
module_param(enable_ecn, int, 0644);
MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
-static int dack_mode = 1;
+static int dack_mode;
module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
uint c4iw_max_read_depth = 32;
module_param(c4iw_max_read_depth, int, 0644);
@@ -145,7 +145,7 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status);
static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
static LIST_HEAD(timeout_list);
-static spinlock_t timeout_lock;
+static DEFINE_SPINLOCK(timeout_lock);
static void deref_cm_id(struct c4iw_ep_common *epc)
{
@@ -191,7 +191,7 @@ static void start_ep_timer(struct c4iw_ep *ep)
static int stop_ep_timer(struct c4iw_ep *ep)
{
pr_debug("ep %p stopping\n", ep);
- del_timer_sync(&ep->timer);
+ timer_delete_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
return 0;
@@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
c4iw_put_ep(&ep->parent_ep->com);
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -736,7 +734,7 @@ static int send_connect(struct c4iw_ep *ep)
&ep->com.remote_addr;
int ret;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- u32 isn = (prandom_u32() & ~7UL) - 1;
+ u32 isn = (get_random_u32() & ~7UL) - 1;
struct net_device *netdev;
u64 params;
@@ -953,7 +951,7 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
mpalen = sizeof(*mpa) + ep->plen;
if (mpa_rev_to_use == 2)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(skb, wrlen, GFP_KERNEL);
if (!skb) {
connect_reply_upcall(ep, -ENOMEM);
@@ -997,8 +995,9 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
}
if (mpa_rev_to_use == 2) {
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
pr_debug("initiator ird %u ord %u\n", ep->ird,
ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
@@ -1057,7 +1056,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
@@ -1088,8 +1087,9 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons(((u16)ep->ird) |
(peer2peer ? MPA_V2_PEER2PEER_MODEL :
0));
@@ -1136,7 +1136,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
@@ -1171,8 +1171,9 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
if (peer2peer && (ep->mpa_attr.p2p_type !=
@@ -1221,6 +1222,8 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
int ret;
ep = lookup_atid(t, atid);
+ if (!ep)
+ return -EINVAL;
pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
@@ -1964,6 +1967,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
int win;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
req = __skb_put_zero(skb, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
@@ -2080,7 +2086,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
err = -ENOMEM;
if (n->dev->flags & IFF_LOOPBACK) {
if (iptype == 4)
- pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
+ pdev = __ip_dev_find(&init_net, *(__be32 *)peer_ip, false);
else if (IS_ENABLED(CONFIG_IPV6))
for_each_netdev(&init_net, pdev) {
if (ipv6_chk_addr(&init_net,
@@ -2095,12 +2101,12 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
err = -ENODEV;
goto out;
}
+ if (is_vlan_dev(pdev))
+ pdev = vlan_dev_real_dev(pdev);
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, rt_tos2priority(tos));
- if (!ep->l2t) {
- dev_put(pdev);
+ if (!ep->l2t)
goto out;
- }
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
@@ -2113,7 +2119,6 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
cxgb4_port_idx(pdev) * step];
set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
- dev_put(pdev);
} else {
pdev = get_real_dev(n->dev);
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
@@ -2275,6 +2280,9 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
int ret = 0;
ep = lookup_atid(t, atid);
+ if (!ep)
+ return -EINVAL;
+
la = (struct sockaddr_in *)&ep->com.local_addr;
ra = (struct sockaddr_in *)&ep->com.remote_addr;
la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
@@ -2421,20 +2429,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
-
- skb_get(skb);
- rpl = cplhdr(skb);
- if (!is_t4(adapter_type)) {
- skb_trim(skb, roundup(sizeof(*rpl5), 16));
- rpl5 = (void *)rpl;
- INIT_TP_WR(rpl5, ep->hwtid);
- } else {
- skb_trim(skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- }
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
-
cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
enable_tcp_timestamps && req->tcpopt.tstamp,
(ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2480,17 +2474,25 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
- if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
- u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ if (!is_t4(adapter_type)) {
+ u32 isn = (get_random_u32() & ~7UL) - 1;
+
+ skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL);
+ rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16));
+ rpl = (void *)rpl5;
+ INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid);
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
opt2 |= T5_ISS_F;
- rpl5 = (void *)rpl;
- memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
if (peer2peer)
isn += 4;
rpl5->iss = cpu_to_be32(isn);
pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
+ } else {
+ skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+ rpl = __skb_put_zero(skb, sizeof(*rpl));
+ INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid);
}
rpl->opt0 = cpu_to_be64(opt0);
@@ -2681,6 +2683,9 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
u16 tcp_opt = ntohs(req->tcp_opt);
ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -2884,14 +2889,13 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
case MORIBUND:
case CLOSING:
stop_ep_timer(ep);
- /*FALLTHROUGH*/
+ fallthrough;
case FPDU_MODE:
if (ep->com.qp && ep->com.qp->srq) {
srqidx = ABORT_RSS_SRQIDX_G(
be32_to_cpu(req->srqidx_status));
if (srqidx) {
- complete_cached_srq_buffers(ep,
- req->srqidx_status);
+ complete_cached_srq_buffers(ep, srqidx);
} else {
/* Hold ep ref until finish_peer_abort() */
c4iw_get_ep(&ep->com);
@@ -3035,6 +3039,10 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
+ /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
+ * when entering the TERM state the RNIC MUST initiate a CLOSE.
+ */
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
@@ -3230,17 +3238,22 @@ static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
int found = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ const struct in_ifaddr *ifa;
ind = in_dev_get(dev->rdev.lldi.ports[0]);
if (!ind)
return -EADDRNOTAVAIL;
- for_primary_ifa(ind) {
+ rcu_read_lock();
+ in_dev_for_each_ifa_rcu(ifa, ind) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
laddr->sin_addr.s_addr = ifa->ifa_address;
raddr->sin_addr.s_addr = ifa->ifa_address;
found = 1;
break;
}
- endfor_ifa(ind);
+ rcu_read_unlock();
+
in_dev_put(ind);
return found ? 0 : -EADDRNOTAVAIL;
}
@@ -3273,7 +3286,7 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
{
- struct in6_addr uninitialized_var(addr);
+ struct in6_addr addr;
struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
@@ -3373,7 +3386,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail3;
}
/* find a route */
@@ -3395,7 +3408,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail3;
}
/* find a route */
@@ -3601,13 +3614,14 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
ep->com.local_addr.ss_family == AF_INET) {
err = cxgb4_remove_server_filter(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ ep->com.dev->rdev.lldi.rxq_ids[0], false);
} else {
struct sockaddr_in6 *sin6;
c4iw_init_wr_wait(ep->com.wr_waitp);
err = cxgb4_remove_server(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ ep->com.dev->rdev.lldi.rxq_ids[0],
+ ep->com.local_addr.ss_family == AF_INET6);
if (err)
goto done;
err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
@@ -3750,7 +3764,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
send_fw_act_open_req(ep, atid);
return;
}
- /* fall through */
+ fallthrough;
case FW_EADDRINUSE:
set_bit(ACT_RETRY_INUSE, &ep->com.history);
if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
@@ -3868,8 +3882,8 @@ static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
- ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_W,
- TCB_RQ_START_S);
+ ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
+ TCB_RQ_START_S);
cleanup:
pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
@@ -4140,6 +4154,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
if (neigh->dev->flags & IFF_LOOPBACK) {
pdev = ip_dev_find(&init_net, iph->daddr);
+ if (!pdev) {
+ pr_err("%s - failed to find device!\n", __func__);
+ goto free_dst;
+ }
e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
pdev, 0);
pi = (struct port_info *)netdev_priv(pdev);
@@ -4309,7 +4327,7 @@ static DECLARE_WORK(skb_work, process_work);
static void ep_timeout(struct timer_list *t)
{
- struct c4iw_ep *ep = from_timer(ep, t, timer);
+ struct c4iw_ep *ep = timer_container_of(ep, t, timer);
int kickit = 0;
spin_lock(&timeout_lock);
@@ -4442,7 +4460,6 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
int __init c4iw_cm_init(void)
{
- spin_lock_init(&timeout_lock);
skb_queue_head_init(&rxq);
workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
@@ -4455,6 +4472,5 @@ int __init c4iw_cm_init(void)
void c4iw_cm_term(void)
{
WARN_ON(!list_empty(&timeout_list));
- flush_workqueue(workq);
destroy_workqueue(workq);
}
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 52ce586621c6..14ced7b667fa 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -34,16 +34,15 @@
#include "iw_cxgb4.h"
-static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
- struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
- struct c4iw_wr_wait *wr_waitp)
+static void destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
- int ret;
- wr_len = sizeof *res_wr + sizeof *res;
+ wr_len = sizeof(*res_wr) + sizeof(*res);
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
res_wr = __skb_put_zero(skb, wr_len);
@@ -59,14 +58,13 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
res->u.cq.iqid = cpu_to_be32(cq->cqid);
c4iw_init_wr_wait(wr_waitp);
- ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+ c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
kfree(cq->sw_queue);
dma_free_coherent(&(rdev->lldi.pdev->dev),
cq->memsize, cq->queue,
dma_unmap_addr(cq, mapping));
c4iw_put_cqid(rdev, cq->cqid, uctx);
- return ret;
}
static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
@@ -104,7 +102,6 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
goto err3;
}
dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- memset(cq->queue, 0, cq->memsize);
if (user && ucontext->is_32b_cqe) {
cq->qp_errp = &((struct t4_status_page *)
@@ -117,7 +114,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
}
/* build fw_ri_res_wr */
- wr_len = sizeof *res_wr + sizeof *res;
+ wr_len = sizeof(*res_wr) + sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb) {
@@ -757,7 +754,7 @@ skip_cqe:
static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
struct ib_wc *wc, struct c4iw_srq *srq)
{
- struct t4_cqe uninitialized_var(cqe);
+ struct t4_cqe cqe;
struct t4_wq *wq = qhp ? &qhp->wq : NULL;
u32 credit = 0;
u8 cqe_flushed;
@@ -770,7 +767,7 @@ static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
goto out;
wc->wr_id = cookie;
- wc->qp = qhp ? &qhp->ibqp : NULL;
+ wc->qp = &qhp->ibqp;
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
@@ -970,6 +967,12 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return !err || err == -ENODATA ? npolled : err;
}
+void c4iw_cq_rem_ref(struct c4iw_cq *chp)
+{
+ if (refcount_dec_and_test(&chp->refcnt))
+ complete(&chp->cq_rel_comp);
+}
+
int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct c4iw_cq *chp;
@@ -979,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
chp = to_c4iw_cq(ib_cq);
xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
- atomic_dec(&chp->refcnt);
- wait_event(chp->wait, !atomic_read(&chp->refcnt));
+ c4iw_cq_rem_ref(chp);
+ wait_for_completion(&chp->cq_rel_comp);
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
@@ -988,18 +991,18 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
chp->destroy_skb, chp->wr_waitp);
c4iw_put_wr_wait(chp->wr_waitp);
- kfree(chp);
return 0;
}
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
- struct c4iw_dev *rhp;
- struct c4iw_cq *chp;
+ struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device);
+ struct c4iw_cq *chp = to_c4iw_cq(ibcq);
struct c4iw_create_cq ucmd;
struct c4iw_create_cq_resp uresp;
int ret, wr_len;
@@ -1010,22 +1013,19 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
pr_debug("ib_dev %p entries %d\n", ibdev, entries);
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
- rhp = to_c4iw_dev(ibdev);
+ if (entries < 1 || entries > ibdev->attrs.max_cqe)
+ return -EINVAL;
if (vector >= rhp->rdev.lldi.nciq)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (udata) {
if (udata->inlen < sizeof(ucmd))
ucontext->is_32b_cqe = 1;
}
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
-
chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
if (!chp->wr_waitp) {
ret = -ENOMEM;
@@ -1087,18 +1087,18 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
chp->ibcq.cqe = entries - 2;
spin_lock_init(&chp->lock);
spin_lock_init(&chp->comp_handler_lock);
- atomic_set(&chp->refcnt, 1);
- init_waitqueue_head(&chp->wait);
+ refcount_set(&chp->refcnt, 1);
+ init_completion(&chp->cq_rel_comp);
ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL);
if (ret)
goto err_destroy_cq;
if (ucontext) {
ret = -ENOMEM;
- mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm)
goto err_remove_handle;
- mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
if (!mm2)
goto err_free_mm;
@@ -1126,19 +1126,26 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
goto err_free_mm2;
mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
+ mm->addr = 0;
+ mm->vaddr = chp->cq.queue;
+ mm->dma_addr = chp->cq.dma_addr;
mm->len = chp->cq.memsize;
+ insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
insert_mmap(ucontext, mm);
mm2->key = uresp.gts_key;
mm2->addr = chp->cq.bar2_pa;
mm2->len = PAGE_SIZE;
+ mm2->vaddr = NULL;
+ mm2->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, mm2, mm2->addr);
insert_mmap(ucontext, mm2);
}
- pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, chp->cq.size,
- chp->cq.memsize, (unsigned long long)chp->cq.dma_addr);
- return &chp->ibcq;
+
+ pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
+ chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
+ &chp->cq.dma_addr);
+ return 0;
err_free_mm2:
kfree(mm2);
err_free_mm:
@@ -1154,8 +1161,7 @@ err_free_skb:
err_free_wr_wait:
c4iw_put_wr_wait(chp->wr_waitp);
err_free_chp:
- kfree(chp);
- return ERR_PTR(ret);
+ return ret;
}
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4c0d925c5ff5..d892f55febe2 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
}
}
-static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+ struct c4iw_debugfs_data *qpd)
{
int space;
int cc;
+ if (id != qp->wq.sq.qid)
+ return 0;
space = qpd->bufsize - qpd->pos - 1;
if (space == 0)
@@ -327,7 +330,7 @@ static int qp_open(struct inode *inode, struct file *file)
unsigned long index;
int count = 1;
- qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
+ qpd = kmalloc(sizeof(*qpd), GFP_KERNEL);
if (!qpd)
return -ENOMEM;
@@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
xa_lock_irq(&qpd->devp->qps);
xa_for_each(&qpd->devp->qps, index, qp)
- dump_qp(qp, qpd);
+ dump_qp(index, qp, qpd);
xa_unlock_irq(&qpd->devp->qps);
qpd->buf[qpd->pos++] = 0;
@@ -421,7 +424,7 @@ static int stag_open(struct inode *inode, struct file *file)
int ret = 0;
int count = 1;
- stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
+ stagd = kmalloc(sizeof(*stagd), GFP_KERNEL);
if (!stagd) {
ret = -ENOMEM;
goto out;
@@ -902,8 +905,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
return 0;
err_free_status_page_and_wr_log:
- if (c4iw_wr_log && rdev->wr_log)
- kfree(rdev->wr_log);
+ kfree(rdev->wr_log);
free_page((unsigned long)rdev->status_page);
destroy_ocqp_pool:
c4iw_ocqp_pool_destroy(rdev);
@@ -950,6 +952,7 @@ void c4iw_dealloc(struct uld_ctx *ctx)
static void c4iw_remove(struct uld_ctx *ctx)
{
pr_debug("c4iw_dev %p\n", ctx->dev);
+ debugfs_remove_recursive(ctx->dev->debugfs_root);
c4iw_unregister_device(ctx->dev);
c4iw_dealloc(ctx);
}
@@ -1075,7 +1078,7 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
DRV_VERSION);
- ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
ctx = ERR_PTR(-ENOMEM);
goto out;
@@ -1110,8 +1113,10 @@ static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
* The math here assumes sizeof cpl_pass_accept_req >= sizeof
* cpl_rx_pkt.
*/
- skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
- sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
+ skb = alloc_skb(size_add(gl->tot_len,
+ sizeof(struct cpl_pass_accept_req) +
+ sizeof(struct rss_header)) - pktshift,
+ GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -1223,9 +1228,8 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
if (!ctx->dev) {
ctx->dev = c4iw_alloc(&ctx->lldi);
if (IS_ERR(ctx->dev)) {
- pr_err("%s: initialization failed: %ld\n",
- pci_name(ctx->lldi.pdev),
- PTR_ERR(ctx->dev));
+ pr_err("%s: initialization failed: %pe\n",
+ pci_name(ctx->lldi.pdev), ctx->dev);
ctx->dev = NULL;
break;
}
@@ -1243,10 +1247,9 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
case CXGB4_STATE_START_RECOVERY:
pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
if (ctx->dev) {
- struct ib_event event;
+ struct ib_event event = {};
ctx->dev->rdev.flags |= T4_FATAL_ERROR;
- memset(&event, 0, sizeof event);
event.event = IB_EVENT_DEVICE_FATAL;
event.device = &ctx->dev->ibdev;
ib_dispatch_event(&event);
@@ -1559,7 +1562,6 @@ static void __exit c4iw_exit_module(void)
kfree(ctx);
}
mutex_unlock(&dev_mutex);
- flush_workqueue(reg_workq);
destroy_workqueue(reg_workq);
cxgb4_unregister_uld(CXGB4_ULD_RDMA);
c4iw_cm_term();
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 4cd877bd2f56..34211a533d5c 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -151,7 +151,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
}
c4iw_qp_add_ref(&qhp->ibqp);
- atomic_inc(&chp->refcnt);
+ refcount_inc(&chp->refcnt);
xa_unlock_irq(&dev->qps);
/* Bad incoming write */
@@ -213,8 +213,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
break;
}
done:
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
+ c4iw_cq_rem_ref(chp);
c4iw_qp_rem_ref(&qhp->ibqp);
out:
return;
@@ -228,14 +227,13 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
xa_lock_irqsave(&dev->cqs, flag);
chp = xa_load(&dev->cqs, qid);
if (chp) {
- atomic_inc(&chp->refcnt);
+ refcount_inc(&chp->refcnt);
xa_unlock_irqrestore(&dev->cqs, flag);
t4_clear_cq_armed(&chp->cq);
spin_lock_irqsave(&chp->comp_handler_lock, flag);
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
+ c4iw_cq_rem_ref(chp);
} else {
pr_debug("unknown cqid 0x%x\n", qid);
xa_unlock_irqrestore(&dev->cqs, flag);
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
index 724d23297b35..e2188b335e76 100644
--- a/drivers/infiniband/hw/cxgb4/id_table.c
+++ b/drivers/infiniband/hw/cxgb4/id_table.c
@@ -54,12 +54,12 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
if (obj < alloc->max) {
if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last += prandom_u32() % RANDOM_SKIP;
+ alloc->last += get_random_u32_below(RANDOM_SKIP);
else
alloc->last = obj + 1;
if (alloc->last >= alloc->max)
alloc->last = 0;
- set_bit(obj, alloc->table);
+ __set_bit(obj, alloc->table);
obj += alloc->start;
} else
obj = -1;
@@ -75,37 +75,32 @@ void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
obj -= alloc->start;
spin_lock_irqsave(&alloc->lock, flags);
- clear_bit(obj, alloc->table);
+ __clear_bit(obj, alloc->table);
spin_unlock_irqrestore(&alloc->lock, flags);
}
int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
u32 reserved, u32 flags)
{
- int i;
-
alloc->start = start;
alloc->flags = flags;
if (flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last = prandom_u32() % RANDOM_SKIP;
+ alloc->last = get_random_u32_below(RANDOM_SKIP);
else
alloc->last = 0;
- alloc->max = num;
+ alloc->max = num;
spin_lock_init(&alloc->lock);
- alloc->table = kmalloc_array(BITS_TO_LONGS(num), sizeof(long),
- GFP_KERNEL);
+ alloc->table = bitmap_zalloc(num, GFP_KERNEL);
if (!alloc->table)
return -ENOMEM;
- bitmap_zero(alloc->table, num);
if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
- for (i = 0; i < reserved; ++i)
- set_bit(i, alloc->table);
+ bitmap_set(alloc->table, 0, reserved);
return 0;
}
void c4iw_id_table_free(struct c4iw_id_table *alloc)
{
- kfree(alloc->table);
+ bitmap_free(alloc->table);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 916ef982172e..e17c1252536b 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -314,7 +314,6 @@ enum db_state {
struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
- u32 device_cap_flags;
struct xarray cqs;
struct xarray qps;
struct xarray mrs;
@@ -341,11 +340,6 @@ static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
return container_of(ibdev, struct c4iw_dev, ibdev);
}
-static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev)
-{
- return container_of(rdev, struct c4iw_dev, rdev);
-}
-
static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid)
{
return xa_load(&rhp->cqs, cqid);
@@ -432,8 +426,8 @@ struct c4iw_cq {
struct t4_cq cq;
spinlock_t lock;
spinlock_t comp_handler_lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
+ refcount_t refcnt;
+ struct completion cq_rel_comp;
struct c4iw_wr_wait *wr_waitp;
};
@@ -490,13 +484,13 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- struct kref kref;
wait_queue_head_t wait;
int sq_sig_all;
struct c4iw_srq *srq;
- struct work_struct free_work;
struct c4iw_ucontext *ucontext;
struct c4iw_wr_wait *wr_waitp;
+ struct completion qp_rel_comp;
+ refcount_t qp_refcnt;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -538,11 +532,21 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
return container_of(c, struct c4iw_ucontext, ibucontext);
}
+enum {
+ CXGB4_MMAP_BAR,
+ CXGB4_MMAP_BAR_WC,
+ CXGB4_MMAP_CONTIG,
+ CXGB4_MMAP_NON_CONTIG,
+};
+
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
u32 key;
+ void *vaddr;
+ dma_addr_t dma_addr;
unsigned len;
+ u8 mmap_flag;
};
static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
@@ -567,6 +571,32 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
return NULL;
}
+static inline void insert_flag_to_mmap(struct c4iw_rdev *rdev,
+ struct c4iw_mm_entry *mm, u64 addr)
+{
+ if (addr >= pci_resource_start(rdev->lldi.pdev, 0) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
+ pci_resource_len(rdev->lldi.pdev, 0))))
+ mm->mmap_flag = CXGB4_MMAP_BAR;
+ else if (addr >= pci_resource_start(rdev->lldi.pdev, 2) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
+ pci_resource_len(rdev->lldi.pdev, 2)))) {
+ if (addr >= rdev->oc_mw_pa) {
+ mm->mmap_flag = CXGB4_MMAP_BAR_WC;
+ } else {
+ if (is_t4(rdev->lldi.adapter_type))
+ mm->mmap_flag = CXGB4_MMAP_BAR;
+ else
+ mm->mmap_flag = CXGB4_MMAP_BAR_WC;
+ }
+ } else {
+ if (addr)
+ mm->mmap_flag = CXGB4_MMAP_CONTIG;
+ else
+ mm->mmap_flag = CXGB4_MMAP_NON_CONTIG;
+ }
+}
+
static inline void insert_mmap(struct c4iw_ucontext *ucontext,
struct c4iw_mm_entry *mm)
{
@@ -659,12 +689,6 @@ static inline u32 c4iw_ib_to_tpt_access(int a)
FW_RI_MEM_ACCESS_LOCAL_READ;
}
-static inline u32 c4iw_ib_to_tpt_bind_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0);
-}
-
enum c4iw_mmid_state {
C4IW_STAG_STATE_VALID,
C4IW_STAG_STATE_INVALID
@@ -707,7 +731,7 @@ struct mpa_message {
u8 flags;
u8 revision;
__be16 private_data_size;
- u8 private_data[0];
+ u8 private_data[];
};
struct mpa_v2_conn_params {
@@ -719,7 +743,7 @@ struct terminate_message {
u8 layer_etype;
u8 ecode;
__be16 hdrct_rsvd;
- u8 len_hdrs[0];
+ u8 len_hdrs[];
};
#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
@@ -942,15 +966,12 @@ void c4iw_id_table_free(struct c4iw_id_table *alloc);
typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
-int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
struct c4iw_dev_ucontext *uctx);
u32 c4iw_get_resource(struct c4iw_id_table *id_table);
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
u32 nr_pdid, u32 nr_srqt);
-int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
int c4iw_pblpool_create(struct c4iw_rdev *rdev);
int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev);
@@ -958,7 +979,6 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev);
void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev);
void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev);
void c4iw_destroy_resource(struct c4iw_resource *rscp);
-int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
void c4iw_register_device(struct work_struct *work);
void c4iw_unregister_device(struct c4iw_dev *dev);
int __init c4iw_cm_init(void);
@@ -980,33 +1000,30 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
-int c4iw_dealloc_mw(struct ib_mw *mw);
void c4iw_dealloc(struct uld_ctx *ctx);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+void c4iw_cq_rem_ref(struct c4iw_cq *chp);
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask srq_attr_mask,
struct ib_udata *udata);
-void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
struct ib_udata *udata);
int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
-struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata);
+int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata);
int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1024,8 +1041,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
int c4iw_flush_sq(struct c4iw_qp *qhp);
int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
-u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
-int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
struct c4iw_dev_ucontext *uctx);
@@ -1054,8 +1069,9 @@ int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
-typedef int c4iw_restrack_func(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
-extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX];
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr);
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq);
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp);
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id);
#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 811c0c8c5b16..adeed7447e7b 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -130,8 +130,9 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE :
len;
- wr_len = roundup(sizeof *req + sizeof *sc +
- roundup(copy_len, T4_ULPTX_MIN_IO), 16);
+ wr_len = roundup(sizeof(*req) + sizeof(*sc) +
+ roundup(copy_len, T4_ULPTX_MIN_IO),
+ 16);
if (!skb) {
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
@@ -274,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
{
int err;
- struct fw_ri_tpte tpt;
+ struct fw_ri_tpte *tpt;
u32 stag_idx;
static atomic_t key;
if (c4iw_fatal_error(rdev))
return -EIO;
+ tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+ if (!tpt)
+ return -ENOMEM;
+
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -290,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.fail++;
mutex_unlock(&rdev->stats.lock);
+ kfree(tpt);
return -ENOMEM;
}
mutex_lock(&rdev->stats.lock);
@@ -304,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
/* write TPT entry */
if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
+ memset(tpt, 0, sizeof(*tpt));
else {
- tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+ tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
FW_RI_TPTE_STAGSTATE_V(stag_state) |
FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
- tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+ tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
FW_RI_VA_BASED_TO))|
FW_RI_TPTE_PS_V(page_size));
- tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
- tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
- tpt.va_hi = cpu_to_be32((u32)(to >> 32));
- tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
- tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
- tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt->len_hi = cpu_to_be32((u32)(len >> 32));
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt, skb, wr_waitp);
+ sizeof(*tpt), tpt, skb, wr_waitp);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -333,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
rdev->stats.stag.cur -= 32;
mutex_unlock(&rdev->stats.lock);
}
+ kfree(tpt);
return err;
}
@@ -341,7 +348,7 @@ static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
{
int err;
- pr_debug("*pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ pr_debug("*pbl_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
pbl_addr, rdev->lldi.vr->pbl.start,
pbl_size);
@@ -358,22 +365,6 @@ static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
pbl_size, pbl_addr, skb, wr_waitp);
}
-static int allocate_window(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
- struct c4iw_wr_wait *wr_waitp)
-{
- *stag = T4_STAG_UNSET;
- return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
- 0UL, 0, 0, 0, 0, NULL, wr_waitp);
-}
-
-static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
- struct sk_buff *skb,
- struct c4iw_wr_wait *wr_waitp)
-{
- return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
- 0, skb, wr_waitp);
-}
-
static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
u32 pbl_size, u32 pbl_addr,
struct c4iw_wr_wait *wr_waitp)
@@ -392,7 +383,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
mhp->ibmr.length = mhp->attr.len;
- mhp->ibmr.iova = mhp->attr.va_fbo;
mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
@@ -499,18 +489,22 @@ err_free_mhp:
}
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
__be64 *pages;
int shift, n, i;
int err = -ENOMEM;
- struct sg_dma_page_iter sg_iter;
+ struct ib_block_iter biter;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
pr_debug("ib_pd %p\n", pd);
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (length == ~0ULL)
return ERR_PTR(-EINVAL);
@@ -536,13 +530,13 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp->rhp = rhp;
- mhp->umem = ib_umem_get(udata, start, length, acc, 0);
+ mhp->umem = ib_umem_get(pd->device, start, length, acc);
if (IS_ERR(mhp->umem))
goto err_free_skb;
shift = PAGE_SHIFT;
- n = ib_umem_num_pages(mhp->umem);
+ n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
err = alloc_pbl(mhp, n);
if (err)
goto err_umem_release;
@@ -555,8 +549,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
i = n = 0;
- for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) {
- pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter));
+ rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) {
+ pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter));
if (i == PAGE_SIZE / sizeof(*pages)) {
err = write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (n << 3), i,
@@ -605,86 +599,8 @@ err_free_mhp:
return ERR_PTR(err);
}
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- struct c4iw_mw *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- php = to_c4iw_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
- if (!mhp->wr_waitp) {
- ret = -ENOMEM;
- goto free_mhp;
- }
-
- mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
- if (!mhp->dereg_skb) {
- ret = -ENOMEM;
- goto free_wr_wait;
- }
-
- ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp);
- if (ret)
- goto free_skb;
- mhp->rhp = rhp;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = FW_RI_STAG_MW;
- mhp->attr.stag = stag;
- mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
- if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
- ret = -ENOMEM;
- goto dealloc_win;
- }
- pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag);
- return &(mhp->ibmw);
-
-dealloc_win:
- deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
- mhp->wr_waitp);
-free_skb:
- kfree_skb(mhp->dereg_skb);
-free_wr_wait:
- c4iw_put_wr_wait(mhp->wr_waitp);
-free_mhp:
- kfree(mhp);
- return ERR_PTR(ret);
-}
-
-int c4iw_dealloc_mw(struct ib_mw *mw)
-{
- struct c4iw_dev *rhp;
- struct c4iw_mw *mhp;
- u32 mmid;
-
- mhp = to_c4iw_mw(mw);
- rhp = mhp->rhp;
- mmid = (mw->rkey) >> 8;
- xa_erase_irq(&rhp->mrs, mmid);
- deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb,
- mhp->wr_waitp);
- kfree_skb(mhp->dereg_skb);
- c4iw_put_wr_wait(mhp->wr_waitp);
- pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -807,8 +723,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
mhp->attr.pbl_size << 3);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
- if (mhp->umem)
- ib_umem_release(mhp->umem);
+ ib_umem_release(mhp->umem);
pr_debug("mmid 0x%x ptr %p\n", mmid, mhp);
c4iw_put_wr_wait(mhp->wr_waitp);
kfree(mhp);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 74b795642fca..e059f92d90fd 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -41,6 +41,7 @@
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include <linux/inetdevice.h>
+#include <net/addrconf.h>
#include <linux/io.h>
#include <asm/irq.h>
@@ -112,6 +113,9 @@ static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
mm->key = uresp.status_page_key;
mm->addr = virt_to_phys(rhp->rdev.status_page);
mm->len = PAGE_SIZE;
+ mm->vaddr = NULL;
+ mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
insert_mmap(context, mm);
}
return 0;
@@ -130,6 +134,11 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_mm_entry *mm;
struct c4iw_ucontext *ucontext;
u64 addr;
+ u8 mmap_flag;
+ size_t size;
+ void *vaddr;
+ unsigned long vm_pgoff;
+ dma_addr_t dma_addr;
pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
key, len);
@@ -144,53 +153,44 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (!mm)
return -EINVAL;
addr = mm->addr;
+ vaddr = mm->vaddr;
+ dma_addr = mm->dma_addr;
+ size = mm->len;
+ mmap_flag = mm->mmap_flag;
kfree(mm);
- if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
- (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
- pci_resource_len(rdev->lldi.pdev, 0)))) {
-
- /*
- * MA_SYNC register...
- */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ switch (mmap_flag) {
+ case CXGB4_MMAP_BAR:
+ ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
+ len,
+ pgprot_noncached(vma->vm_page_prot));
+ break;
+ case CXGB4_MMAP_BAR_WC:
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
- (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
- pci_resource_len(rdev->lldi.pdev, 2)))) {
-
- /*
- * Map user DB or OCQP memory...
- */
- if (addr >= rdev->oc_mw_pa)
- vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
- else {
- if (!is_t4(rdev->lldi.adapter_type))
- vma->vm_page_prot =
- t4_pgprot_wc(vma->vm_page_prot);
- else
- vma->vm_page_prot =
- pgprot_noncached(vma->vm_page_prot);
- }
+ len, t4_pgprot_wc(vma->vm_page_prot));
+ break;
+ case CXGB4_MMAP_CONTIG:
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
+ break;
+ case CXGB4_MMAP_NON_CONTIG:
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+ vaddr, dma_addr, size);
+ vma->vm_pgoff = vm_pgoff;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
}
return ret;
}
-static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
@@ -202,6 +202,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
+ return 0;
}
static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
@@ -236,20 +237,12 @@ static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
return 0;
}
-static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- pr_debug("ibdev %p\n", ibdev);
- *pkey = 0;
- return 0;
-}
-
-static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int c4iw_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct c4iw_dev *dev;
- pr_debug("ibdev %p, port %d, index %d, gid %p\n",
+ pr_debug("ibdev %p, port %u, index %d, gid %p\n",
ibdev, port, index, gid);
if (!port)
return -EINVAL;
@@ -271,11 +264,14 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
return -EINVAL;
dev = to_c4iw_dev(ibdev);
- memset(props, 0, sizeof *props);
- memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
+ addrconf_addr_eui48((u8 *)&props->sys_image_guid,
+ dev->rdev.lldi.ports[0]->dev_addr);
props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
props->fw_ver = dev->rdev.lldi.fw_vers;
- props->device_cap_flags = dev->device_cap_flags;
+ props->device_cap_flags = IB_DEVICE_MEM_WINDOW;
+ props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ if (fastreg_support)
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->page_size_cap = T4_PAGESIZE_MASK;
props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
@@ -303,34 +299,13 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
return 0;
}
-static int c4iw_query_port(struct ib_device *ibdev, u8 port,
+static int c4iw_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
- struct c4iw_dev *dev;
- struct net_device *netdev;
- struct in_device *inetdev;
-
+ int ret = 0;
pr_debug("ibdev %p\n", ibdev);
-
- dev = to_c4iw_dev(ibdev);
- netdev = dev->rdev.lldi.ports[port-1];
- /* props being zeroed by the caller, avoid zeroing it here */
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
- if (!netif_carrier_ok(netdev))
- props->state = IB_PORT_DOWN;
- else {
- inetdev = in_dev_get(netdev);
- if (inetdev) {
- if (inetdev->ifa_list)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_INIT;
- in_dev_put(inetdev);
- } else
- props->state = IB_PORT_INIT;
- }
+ ret = ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width);
props->port_cap_flags =
IB_PORT_CM_SUP |
@@ -339,12 +314,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = 2;
- props->active_speed = IB_SPEED_DDR;
props->max_msg_sz = -1;
- return 0;
+ return ret;
}
static ssize_t hw_rev_show(struct device *dev,
@@ -354,8 +326,9 @@ static ssize_t hw_rev_show(struct device *dev,
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
- return sprintf(buf, "%d\n",
- CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
+ return sysfs_emit(
+ buf, "%d\n",
+ CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
static DEVICE_ATTR_RO(hw_rev);
@@ -369,7 +342,7 @@ static ssize_t hca_type_show(struct device *dev,
pr_debug("dev 0x%p\n", dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
+ return sysfs_emit(buf, "%s\n", info.driver);
}
static DEVICE_ATTR_RO(hca_type);
@@ -380,8 +353,8 @@ static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
pr_debug("dev 0x%p\n", dev);
- return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
- c4iw_dev->rdev.lldi.pdev->device);
+ return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
+ c4iw_dev->rdev.lldi.pdev->device);
}
static DEVICE_ATTR_RO(board_id);
@@ -397,32 +370,29 @@ enum counters {
NR_COUNTERS
};
-static const char * const names[] = {
- [IP4INSEGS] = "ip4InSegs",
- [IP4OUTSEGS] = "ip4OutSegs",
- [IP4RETRANSSEGS] = "ip4RetransSegs",
- [IP4OUTRSTS] = "ip4OutRsts",
- [IP6INSEGS] = "ip6InSegs",
- [IP6OUTSEGS] = "ip6OutSegs",
- [IP6RETRANSSEGS] = "ip6RetransSegs",
- [IP6OUTRSTS] = "ip6OutRsts"
+static const struct rdma_stat_desc cxgb4_descs[] = {
+ [IP4INSEGS].name = "ip4InSegs",
+ [IP4OUTSEGS].name = "ip4OutSegs",
+ [IP4RETRANSSEGS].name = "ip4RetransSegs",
+ [IP4OUTRSTS].name = "ip4OutRsts",
+ [IP6INSEGS].name = "ip6InSegs",
+ [IP6OUTSEGS].name = "ip6OutSegs",
+ [IP6RETRANSSEGS].name = "ip6RetransSegs",
+ [IP6OUTRSTS].name = "ip6OutRsts"
};
-static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
-
- if (port_num != 0)
- return NULL;
+ BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS);
- return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+ /* FIXME: these look like port stats */
+ return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int c4iw_get_mib(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
struct tp_tcp_stats v4, v6;
struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev);
@@ -451,7 +421,7 @@ static const struct attribute_group c4iw_attr_group = {
.attrs = c4iw_class_attributes,
};
-static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int c4iw_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -463,7 +433,6 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
@@ -482,30 +451,29 @@ static void get_dev_fw_str(struct ib_device *dev, char *str)
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
-static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
-{
- return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) &&
- c4iw_restrack_funcs[res->type]) ?
- c4iw_restrack_funcs[res->type](msg, res) : 0;
-}
-
static const struct ib_device_ops c4iw_dev_ops = {
- .alloc_hw_stats = c4iw_alloc_stats,
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_CXGB4,
+ .uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION,
+
+ .alloc_hw_device_stats = c4iw_alloc_device_stats,
.alloc_mr = c4iw_alloc_mr,
- .alloc_mw = c4iw_alloc_mw,
.alloc_pd = c4iw_allocate_pd,
.alloc_ucontext = c4iw_alloc_ucontext,
.create_cq = c4iw_create_cq,
.create_qp = c4iw_create_qp,
.create_srq = c4iw_create_srq,
- .dealloc_mw = c4iw_dealloc_mw,
.dealloc_pd = c4iw_deallocate_pd,
.dealloc_ucontext = c4iw_dealloc_ucontext,
.dereg_mr = c4iw_dereg_mr,
.destroy_cq = c4iw_destroy_cq,
.destroy_qp = c4iw_destroy_qp,
.destroy_srq = c4iw_destroy_srq,
- .fill_res_entry = fill_res_entry,
+ .device_group = &c4iw_attr_group,
+ .fill_res_cq_entry = c4iw_fill_res_cq_entry,
+ .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry,
+ .fill_res_mr_entry = c4iw_fill_res_mr_entry,
+ .fill_res_qp_entry = c4iw_fill_res_qp_entry,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = c4iw_get_dma_mr,
.get_hw_stats = c4iw_get_mib,
@@ -528,12 +496,15 @@ static const struct ib_device_ops c4iw_dev_ops = {
.post_srq_recv = c4iw_post_srq_recv,
.query_device = c4iw_query_device,
.query_gid = c4iw_query_gid,
- .query_pkey = c4iw_query_pkey,
.query_port = c4iw_query_port,
.query_qp = c4iw_ib_query_qp,
.reg_user_mr = c4iw_reg_user_mr,
.req_notify_cq = c4iw_arm_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw),
INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, c4iw_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
};
@@ -559,53 +530,26 @@ void c4iw_register_device(struct work_struct *work)
struct c4iw_dev *dev = ctx->dev;
pr_debug("c4iw_dev %p\n", dev);
- memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
- memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
- dev->ibdev.owner = THIS_MODULE;
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
- if (fastreg_support)
- dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
+ dev->rdev.lldi.ports[0]->dev_addr);
dev->ibdev.local_dma_lkey = 0;
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
dev->ibdev.node_type = RDMA_NODE_RNIC;
BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
- dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iw_ifname));
- rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
- dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
ret = set_netdevs(&dev->ibdev, &dev->rdev);
if (ret)
goto err_dealloc_ctx;
- ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
+ dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX);
+ ret = ib_register_device(&dev->ibdev, "cxgb4_%d",
+ &dev->rdev.lldi.pdev->dev);
if (ret)
goto err_dealloc_ctx;
return;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index e92b9544357a..955f061a55e9 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -274,7 +274,6 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
(unsigned long long)virt_to_phys(wq->sq.queue),
wq->rq.queue,
(unsigned long long)virt_to_phys(wq->rq.queue));
- memset(wq->rq.queue, 0, wq->rq.memsize);
dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
}
@@ -296,6 +295,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
+ ret = -EINVAL;
goto free_dma;
}
@@ -303,7 +303,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->rq.msn = 1;
/* build fw_ri_res_wr */
- wr_len = sizeof *res_wr + 2 * sizeof *res;
+ wr_len = sizeof(*res_wr) + 2 * sizeof(*res);
if (need_rq)
wr_len += sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL);
@@ -439,7 +439,7 @@ static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
rem -= len;
}
}
- len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
+ len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp));
if (len)
memset(dstp, 0, len);
immdp->op = FW_RI_DATA_IMMD;
@@ -528,7 +528,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
T4_MAX_SEND_INLINE, &plen);
if (ret)
return ret;
- size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) +
plen;
} else {
ret = build_isgl((__be64 *)sq->queue,
@@ -537,7 +537,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
- size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
} else {
@@ -545,7 +545,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
wqe->send.u.immd_src[0].r1 = 0;
wqe->send.u.immd_src[0].r2 = 0;
wqe->send.u.immd_src[0].immdlen = 0;
- size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
@@ -579,7 +579,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
T4_MAX_WRITE_INLINE, &plen);
if (ret)
return ret;
- size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) +
plen;
} else {
ret = build_isgl((__be64 *)sq->queue,
@@ -588,7 +588,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
- size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
} else {
@@ -596,7 +596,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
wqe->write.u.immd_src[0].r1 = 0;
wqe->write.u.immd_src[0].r2 = 0;
wqe->write.u.immd_src[0].immdlen = 0;
- size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
@@ -683,7 +683,7 @@ static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
}
wqe->read.r2 = 0;
wqe->read.r5 = 0;
- *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
+ *len16 = DIV_ROUND_UP(sizeof(wqe->read), 16);
return 0;
}
@@ -766,8 +766,8 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
if (ret)
return ret;
- *len16 = DIV_ROUND_UP(sizeof wqe->recv +
- wr->num_sge * sizeof(struct fw_ri_sge), 16);
+ *len16 = DIV_ROUND_UP(
+ sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16);
return 0;
}
@@ -886,47 +886,21 @@ static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
{
wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
wqe->inv.r2 = 0;
- *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
+ *len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16);
return 0;
}
-static void free_qp_work(struct work_struct *work)
-{
- struct c4iw_ucontext *ucontext;
- struct c4iw_qp *qhp;
- struct c4iw_dev *rhp;
-
- qhp = container_of(work, struct c4iw_qp, free_work);
- ucontext = qhp->ucontext;
- rhp = qhp->rhp;
-
- pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
- destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
-
- c4iw_put_wr_wait(qhp->wr_waitp);
- kfree(qhp);
-}
-
-static void queue_qp_free(struct kref *kref)
-{
- struct c4iw_qp *qhp;
-
- qhp = container_of(kref, struct c4iw_qp, kref);
- pr_debug("qhp %p\n", qhp);
- queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
-}
-
void c4iw_qp_add_ref(struct ib_qp *qp)
{
pr_debug("ib_qp %p\n", qp);
- kref_get(&to_c4iw_qp(qp)->kref);
+ refcount_inc(&to_c4iw_qp(qp)->qp_refcnt);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
pr_debug("ib_qp %p\n", qp);
- kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
+ if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt))
+ complete(&to_c4iw_qp(qp)->qp_rel_comp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1192,7 +1166,7 @@ int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
}
fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
- /*FALLTHROUGH*/
+ fallthrough;
case IB_WR_RDMA_WRITE:
fw_opcode = FW_RI_RDMA_WRITE_WR;
swsqe->opcode = FW_RI_RDMA_WRITE;
@@ -1606,7 +1580,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
- wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
+ wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term));
term = (struct terminate_message *)wqe->u.terminate.termmsg;
if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
term->layer_etype = qhp->attr.layer_etype;
@@ -1625,6 +1599,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
int count;
int rq_flushed = 0, sq_flushed;
unsigned long flag;
+ struct ib_event ev;
pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
@@ -1633,6 +1608,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
if (schp != rchp)
spin_lock(&schp->lock);
spin_lock(&qhp->lock);
+ if (qhp->srq && qhp->attr.state == C4IW_QP_STATE_ERROR &&
+ qhp->ibqp.event_handler) {
+ ev.device = qhp->ibqp.device;
+ ev.element.qp = &qhp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qhp->ibqp.event_handler(&ev, qhp->ibqp.qp_context);
+ }
if (qhp->wq.flushed) {
spin_unlock(&qhp->lock);
@@ -1751,16 +1733,15 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
pr_debug("p2p_type = %d\n", p2p_type);
- memset(&init->u, 0, sizeof init->u);
+ memset(&init->u, 0, sizeof(init->u));
switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
init->u.write.stag_sink = cpu_to_be32(1);
init->u.write.to_sink = cpu_to_be64(1);
init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
- init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
- sizeof(struct fw_ri_immd),
- 16);
+ init->u.write.len16 = DIV_ROUND_UP(
+ sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16);
break;
case FW_RI_INIT_P2PTYPE_READ_REQ:
init->u.write.opcode = FW_RI_RDMA_READ_WR;
@@ -1768,7 +1749,7 @@ static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
init->u.read.to_src_lo = cpu_to_be32(1);
init->u.read.stag_sink = cpu_to_be32(1);
init->u.read.to_sink_lo = cpu_to_be32(1);
- init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
+ init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16);
break;
}
}
@@ -1782,7 +1763,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
- skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
+ skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
ret = -ENOMEM;
goto out;
@@ -1976,10 +1957,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
ep = qhp->ep;
- c4iw_get_ep(&ep->com);
- disconnect = 1;
if (!internal) {
+ c4iw_get_ep(&ep->com);
terminate = 1;
+ disconnect = 1;
} else {
terminate = qhp->attr.send_term;
ret = rdma_fini(rhp, qhp, ep);
@@ -1991,7 +1972,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
t4_set_wq_in_error(&qhp->wq, 0);
set_state(qhp, C4IW_QP_STATE_ERROR);
if (!internal) {
- abort = 1;
disconnect = 1;
ep = qhp->ep;
c4iw_get_ep(&qhp->ep->com);
@@ -2099,10 +2079,12 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
+ struct c4iw_ucontext *ucontext;
struct c4iw_qp_attributes attrs;
qhp = to_c4iw_qp(ib_qp);
rhp = qhp->rhp;
+ ucontext = qhp->ucontext;
attrs.next_state = C4IW_QP_STATE_ERROR;
if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
@@ -2120,15 +2102,24 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
c4iw_qp_rem_ref(ib_qp);
+ wait_for_completion(&qhp->qp_rel_comp);
+
pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
+ pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
+
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
+
+ c4iw_put_wr_wait(qhp->wr_waitp);
return 0;
}
-struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
+int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
+ struct ib_pd *pd = qp->pd;
struct c4iw_dev *rhp;
- struct c4iw_qp *qhp;
+ struct c4iw_qp *qhp = to_c4iw_qp(qp);
struct c4iw_pd *php;
struct c4iw_cq *schp;
struct c4iw_cq *rchp;
@@ -2140,44 +2131,36 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
- pr_debug("ib_pd %p\n", pd);
-
- if (attrs->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
+ if (attrs->qp_type != IB_QPT_RC || attrs->create_flags)
+ return -EOPNOTSUPP;
php = to_c4iw_pd(pd);
rhp = php->rhp;
schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
if (!schp || !rchp)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (!attrs->srq) {
if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
rqsize = attrs->cap.max_recv_wr + 1;
if (rqsize < 8)
rqsize = 8;
}
if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
sqsize = attrs->cap.max_send_wr + 1;
if (sqsize < 8)
sqsize = 8;
- qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
- if (!qhp)
- return ERR_PTR(-ENOMEM);
-
qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
- if (!qhp->wr_waitp) {
- ret = -ENOMEM;
- goto err_free_qhp;
- }
+ if (!qhp->wr_waitp)
+ return -ENOMEM;
qhp->wq.sq.size = sqsize;
qhp->wq.sq.memsize =
@@ -2230,8 +2213,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
spin_lock_init(&qhp->lock);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- kref_init(&qhp->kref);
- INIT_WORK(&qhp->free_work, free_qp_work);
+ init_completion(&qhp->qp_rel_comp);
+ refcount_set(&qhp->qp_refcnt, 1);
ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL);
if (ret)
@@ -2302,28 +2285,43 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
ucontext->key += PAGE_SIZE;
}
spin_unlock(&ucontext->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (ret)
goto err_free_ma_sync_key;
sq_key_mm->key = uresp.sq_key;
- sq_key_mm->addr = qhp->wq.sq.phys_addr;
+ sq_key_mm->addr = 0;
+ sq_key_mm->vaddr = qhp->wq.sq.queue;
+ sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
+ insert_flag_to_mmap(&rhp->rdev, sq_key_mm, sq_key_mm->addr);
insert_mmap(ucontext, sq_key_mm);
if (!attrs->srq) {
rq_key_mm->key = uresp.rq_key;
- rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+ rq_key_mm->addr = 0;
+ rq_key_mm->vaddr = qhp->wq.rq.queue;
+ rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+ insert_flag_to_mmap(&rhp->rdev, rq_key_mm,
+ rq_key_mm->addr);
insert_mmap(ucontext, rq_key_mm);
}
sq_db_key_mm->key = uresp.sq_db_gts_key;
sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
+ sq_db_key_mm->vaddr = NULL;
+ sq_db_key_mm->dma_addr = 0;
sq_db_key_mm->len = PAGE_SIZE;
+ insert_flag_to_mmap(&rhp->rdev, sq_db_key_mm,
+ sq_db_key_mm->addr);
insert_mmap(ucontext, sq_db_key_mm);
if (!attrs->srq) {
rq_db_key_mm->key = uresp.rq_db_gts_key;
rq_db_key_mm->addr =
(u64)(unsigned long)qhp->wq.rq.bar2_pa;
rq_db_key_mm->len = PAGE_SIZE;
+ rq_db_key_mm->vaddr = NULL;
+ rq_db_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, rq_db_key_mm,
+ rq_db_key_mm->addr);
insert_mmap(ucontext, rq_db_key_mm);
}
if (ma_sync_key_mm) {
@@ -2332,6 +2330,10 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
PCIE_MA_SYNC_A) & PAGE_MASK;
ma_sync_key_mm->len = PAGE_SIZE;
+ ma_sync_key_mm->vaddr = NULL;
+ ma_sync_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, ma_sync_key_mm,
+ ma_sync_key_mm->addr);
insert_mmap(ucontext, ma_sync_key_mm);
}
@@ -2355,7 +2357,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
- return &qhp->ibqp;
+ return 0;
err_free_ma_sync_key:
kfree(ma_sync_key_mm);
err_free_rq_db_key:
@@ -2375,9 +2377,7 @@ err_destroy_qp:
ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
err_free_wr_wait:
c4iw_put_wr_wait(qhp->wr_waitp);
-err_free_qhp:
- kfree(qhp);
- return ERR_PTR(ret);
+ return ret;
}
int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -2386,10 +2386,13 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
enum c4iw_qp_attr_mask mask = 0;
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {};
pr_debug("ib_qp %p\n", ibqp);
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
attr_mask &= ~IB_QP_STATE;
@@ -2398,7 +2401,6 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!attr_mask)
return 0;
- memset(&attrs, 0, sizeof attrs);
qhp = to_c4iw_qp(ibqp);
rhp = qhp->rhp;
@@ -2482,15 +2484,16 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
- memset(attr, 0, sizeof *attr);
- memset(init_attr, 0, sizeof *init_attr);
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
attr->qp_state = to_ib_qp_state(qhp->attr.state);
+ attr->cur_qp_state = to_ib_qp_state(qhp->attr.state);
init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
- init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
+ init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges;
init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
- init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
+ init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
return 0;
}
@@ -2697,6 +2700,9 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
int ret;
int wr_len;
+ if (attrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
pr_debug("%s ib_pd %p\n", __func__, pd);
php = to_c4iw_pd(pd);
@@ -2754,15 +2760,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
srq->flags = T4_SRQ_LIMIT_SUPPORT;
- ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
- if (ret)
- goto err_free_queue;
-
if (udata) {
srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
if (!srq_key_mm) {
ret = -ENOMEM;
- goto err_remove_handle;
+ goto err_free_queue;
}
srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
if (!srq_db_key_mm) {
@@ -2786,12 +2788,19 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (ret)
goto err_free_srq_db_key_mm;
srq_key_mm->key = uresp.srq_key;
- srq_key_mm->addr = virt_to_phys(srq->wq.queue);
+ srq_key_mm->addr = 0;
srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+ srq_key_mm->vaddr = srq->wq.queue;
+ srq_key_mm->dma_addr = srq->wq.dma_addr;
+ insert_flag_to_mmap(&rhp->rdev, srq_key_mm, srq_key_mm->addr);
insert_mmap(ucontext, srq_key_mm);
srq_db_key_mm->key = uresp.srq_db_gts_key;
srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
srq_db_key_mm->len = PAGE_SIZE;
+ srq_db_key_mm->vaddr = NULL;
+ srq_db_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, srq_db_key_mm,
+ srq_db_key_mm->addr);
insert_mmap(ucontext, srq_db_key_mm);
}
@@ -2806,8 +2815,6 @@ err_free_srq_db_key_mm:
kfree(srq_db_key_mm);
err_free_srq_key_mm:
kfree(srq_key_mm);
-err_remove_handle:
- xa_erase_irq(&rhp->qps, srq->wq.qid);
err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
@@ -2820,7 +2827,7 @@ err_free_wr_wait:
return ret;
}
-void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_srq *srq;
@@ -2830,12 +2837,11 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rhp = srq->rhp;
pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
- xa_erase_irq(&rhp->qps, srq->wq.qid);
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
c4iw_free_srq_idx(&rhp->rdev, srq->idx);
c4iw_put_wr_wait(srq->wr_waitp);
+ return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 57ed26b3cc21..e800e8e8bed5 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -126,7 +126,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -137,13 +137,13 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
* now put the same ids on the qp list since they all
* map to the same db/gts page.
*/
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = qid;
list_add_tail(&entry->entry, &uctx->qpids);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -165,7 +165,7 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
{
struct c4iw_qid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
pr_debug("qid 0x%x\n", qid);
@@ -200,7 +200,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -211,13 +211,13 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
* now put the same ids on the cq list since they all
* map to the same db/gts page.
*/
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = qid;
list_add_tail(&entry->entry, &uctx->cqids);
- for (i = qid; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ for (i = qid + 1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -239,7 +239,7 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
{
struct c4iw_qid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
pr_debug("qid 0x%x\n", qid);
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
index f82d46ed969d..fd22c85d35f4 100644
--- a/drivers/infiniband/hw/cxgb4/restrack.c
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -134,10 +134,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_qp_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
{
- struct ib_qp *ibqp = container_of(res, struct ib_qp, res);
struct t4_swsqe *fsp = NULL, *lsp = NULL;
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
u16 first_sq_idx = 0, last_sq_idx = 0;
@@ -195,10 +193,9 @@ union union_ep {
struct c4iw_ep ep;
};
-static int fill_res_ep_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
+ struct rdma_cm_id *cm_id)
{
- struct rdma_cm_id *cm_id = rdma_res_to_id(res);
struct nlattr *table_attr;
struct c4iw_ep_common *epcp;
struct c4iw_listen_ep *listen_ep = NULL;
@@ -212,7 +209,7 @@ static int fill_res_ep_entry(struct sk_buff *msg,
epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data;
if (!epcp)
return 0;
- uep = kcalloc(1, sizeof(*uep), GFP_KERNEL);
+ uep = kzalloc(sizeof(*uep), GFP_KERNEL);
if (!uep)
return 0;
@@ -241,7 +238,7 @@ static int fill_res_ep_entry(struct sk_buff *msg,
if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history))
goto err_cancel_table;
- if (epcp->state == LISTEN) {
+ if (listen_ep) {
if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid))
goto err_cancel_table;
if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog))
@@ -372,10 +369,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_cq_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq)
{
- struct ib_cq *ibcq = container_of(res, struct ib_cq, res);
struct c4iw_cq *chp = to_c4iw_cq(ibcq);
struct nlattr *table_attr;
struct t4_cqe hwcqes[2];
@@ -433,10 +428,8 @@ err:
return -EMSGSIZE;
}
-static int fill_res_mr_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
{
- struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
struct c4iw_dev *dev = mhp->rhp;
u32 stag = mhp->attr.stag;
@@ -492,10 +485,3 @@ err_cancel_table:
err:
return -EMSGSIZE;
}
-
-c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = {
- [RDMA_RESTRACK_QP] = fill_res_qp_entry,
- [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry,
- [RDMA_RESTRACK_CQ] = fill_res_cq_entry,
- [RDMA_RESTRACK_MR] = fill_res_mr_entry,
-};
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index b170817b2741..c3b0e2896475 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -487,11 +487,6 @@ static inline int t4_rq_empty(struct t4_wq *wq)
return wq->rq.in_use == 0;
}
-static inline int t4_rq_full(struct t4_wq *wq)
-{
- return wq->rq.in_use == (wq->rq.size - 1);
-}
-
static inline u32 t4_rq_avail(struct t4_wq *wq)
{
return wq->rq.size - 1 - wq->rq.in_use;
@@ -534,11 +529,6 @@ static inline int t4_sq_empty(struct t4_wq *wq)
return wq->sq.in_use == 0;
}
-static inline int t4_sq_full(struct t4_wq *wq)
-{
- return wq->sq.in_use == (wq->sq.size - 1);
-}
-
static inline u32 t4_sq_avail(struct t4_wq *wq)
{
return wq->sq.size - 1 - wq->sq.in_use;
@@ -679,11 +669,6 @@ static inline void t4_enable_wq_db(struct t4_wq *wq)
wq->rq.queue[wq->rq.size].status.db_off = 0;
}
-static inline int t4_wq_db_enabled(struct t4_wq *wq)
-{
- return !wq->rq.queue[wq->rq.size].status.db_off;
-}
-
enum t4_cq_flags {
CQ_ARMED = 1,
};
@@ -817,19 +802,6 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
return ret;
}
-static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
-{
- if (cq->sw_in_use == cq->size) {
- pr_warn("%s cxgb4 sw cq overflow cqid %u\n",
- __func__, cq->cqid);
- cq->error = 1;
- return NULL;
- }
- if (cq->sw_in_use)
- return &cq->sw_queue[cq->sw_cidx];
- return NULL;
-}
-
static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
{
int ret = 0;
@@ -843,11 +815,6 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
return ret;
}
-static inline int t4_cq_in_error(struct t4_cq *cq)
-{
- return *cq->qp_errp;
-}
-
static inline void t4_set_cq_in_error(struct t4_cq *cq)
{
*cq->qp_errp = 1;
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index cbdb300a4794..1f79537fc8d1 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -122,9 +122,7 @@ struct fw_ri_dsgl {
__be16 nsge;
__be32 len0;
__be64 addr0;
-#ifndef C99_NOT_SUPPORTED
- struct fw_ri_dsge_pair sge[0];
-#endif
+ struct fw_ri_dsge_pair sge[];
};
struct fw_ri_sge {
@@ -138,9 +136,7 @@ struct fw_ri_isgl {
__u8 r1;
__be16 nsge;
__be32 r2;
-#ifndef C99_NOT_SUPPORTED
- struct fw_ri_sge sge[0];
-#endif
+ struct fw_ri_sge sge[];
};
struct fw_ri_immd {
@@ -148,9 +144,7 @@ struct fw_ri_immd {
__u8 r1;
__be16 r2;
__be32 immdlen;
-#ifndef C99_NOT_SUPPORTED
- __u8 data[0];
-#endif
+ __u8 data[];
};
struct fw_ri_tpte {
@@ -320,9 +314,7 @@ struct fw_ri_res_wr {
__be32 op_nres;
__be32 len16_pkd;
__u64 cookie;
-#ifndef C99_NOT_SUPPORTED
- struct fw_ri_res res[0];
-#endif
+ struct fw_ri_res res[];
};
#define FW_RI_RES_WR_NRES_S 0
@@ -562,12 +554,10 @@ struct fw_ri_rdma_write_wr {
__be32 plen;
__be32 stag_sink;
__be64 to_sink;
-#ifndef C99_NOT_SUPPORTED
union {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
struct fw_ri_send_wr {
@@ -581,12 +571,10 @@ struct fw_ri_send_wr {
__be32 plen;
__be32 r3;
__be64 r4;
-#ifndef C99_NOT_SUPPORTED
union {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
#define FW_RI_SEND_WR_SENDOP_S 0
@@ -618,12 +606,10 @@ struct fw_ri_rdma_write_cmpl_wr {
struct fw_ri_isgl isgl_src;
} u_cmpl;
__be64 r3;
-#ifndef C99_NOT_SUPPORTED
union fw_ri_write {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
struct fw_ri_rdma_read_wr {
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 9e3cc3239c13..96f9c3bc98b2 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,16 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
#define _EFA_H_
#include <linux/bitops.h>
-#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
-#include <linux/sched.h>
#include <rdma/efa-abi.h>
#include <rdma/ib_verbs.h>
@@ -22,31 +20,27 @@
#define EFA_IRQNAME_SIZE 40
-/* 1 for AENQ + ADMIN */
-#define EFA_NUM_MSIX_VEC 1
#define EFA_MGMNT_MSIX_VEC_IDX 0
+#define EFA_COMP_EQS_VEC_BASE 1
struct efa_irq {
irq_handler_t handler;
void *data;
- int cpu;
+ u32 irqn;
u32 vector;
cpumask_t affinity_hint_mask;
char name[EFA_IRQNAME_SIZE];
};
-struct efa_sw_stats {
+/* Don't use anything other than atomic64 */
+struct efa_stats {
atomic64_t alloc_pd_err;
atomic64_t create_qp_err;
atomic64_t create_cq_err;
atomic64_t reg_mr_err;
atomic64_t alloc_ucontext_err;
atomic64_t create_ah_err;
-};
-
-/* Don't use anything other than atomic64 */
-struct efa_stats {
- struct efa_sw_stats sw_stats;
+ atomic64_t mmap_err;
atomic64_t keep_alive_rcvd;
};
@@ -62,19 +56,23 @@ struct efa_dev {
u64 mem_bar_len;
u64 db_bar_addr;
u64 db_bar_len;
- u8 addr[EFA_GID_SIZE];
- u32 mtu;
- int admin_msix_vector_idx;
+ u32 num_irq_vectors;
+ u32 admin_msix_vector_idx;
struct efa_irq admin_irq;
struct efa_stats stats;
+
+ /* Array of completion EQs */
+ struct efa_eq *eqs;
+ u32 neqs;
+
+ /* Only stores CQs with interrupts enabled */
+ struct xarray cqs_xa;
};
struct efa_ucontext {
struct ib_ucontext ibucontext;
- struct xarray mmap_xa;
- u32 mmap_xa_page;
u16 uarn;
};
@@ -83,9 +81,19 @@ struct efa_pd {
u16 pdn;
};
+struct efa_mr_interconnect_info {
+ u16 recv_ic_id;
+ u16 rdma_read_ic_id;
+ u16 rdma_recv_ic_id;
+ u8 recv_ic_id_valid : 1;
+ u8 rdma_read_ic_id_valid : 1;
+ u8 rdma_recv_ic_id_valid : 1;
+};
+
struct efa_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
+ struct efa_mr_interconnect_info ic_info;
};
struct efa_cq {
@@ -93,8 +101,13 @@ struct efa_cq {
struct efa_ucontext *ucontext;
dma_addr_t dma_addr;
void *cpu_addr;
+ struct rdma_user_mmap_entry *mmap_entry;
+ struct rdma_user_mmap_entry *db_mmap_entry;
size_t size;
u16 cq_idx;
+ /* NULL when no interrupts requested */
+ struct efa_eq *eq;
+ struct ib_umem *umem;
};
struct efa_qp {
@@ -103,6 +116,13 @@ struct efa_qp {
void *rq_cpu_addr;
size_t rq_size;
enum ib_qp_state state;
+
+ /* Used for saving mmap_xa entries */
+ struct rdma_user_mmap_entry *sq_db_mmap_entry;
+ struct rdma_user_mmap_entry *llq_desc_mmap_entry;
+ struct rdma_user_mmap_entry *rq_db_mmap_entry;
+ struct rdma_user_mmap_entry *rq_mmap_entry;
+
u32 qp_handle;
u32 max_send_wr;
u32 max_recv_wr;
@@ -118,46 +138,61 @@ struct efa_ah {
u8 id[EFA_GID_SIZE];
};
+struct efa_eq {
+ struct efa_com_eq eeq;
+ struct efa_irq irq;
+};
+
int efa_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *udata);
-int efa_query_port(struct ib_device *ibdev, u8 port,
+int efa_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
-int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
+int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid);
-int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey);
int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
-struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
+int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
-struct ib_cq *efa_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_umem *umem, struct uverbs_attr_bundle *attrs);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
+struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
-int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable);
int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
int efa_mmap(struct ib_ucontext *ibucontext,
struct vm_area_struct *vma);
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
int efa_create_ah(struct ib_ah *ibah,
- struct rdma_ah_attr *ah_attr,
- u32 flags,
+ struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags);
int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_udata *udata);
enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
- u8 port_num);
+ u32 port_num);
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num);
+struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev);
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
#endif /* _EFA_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 2be0469d545f..57178dad5eb7 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_CMDS_H_
@@ -28,7 +28,10 @@ enum efa_admin_aq_opcode {
EFA_ADMIN_DEALLOC_PD = 15,
EFA_ADMIN_ALLOC_UAR = 16,
EFA_ADMIN_DEALLOC_UAR = 17,
- EFA_ADMIN_MAX_OPCODE = 17,
+ EFA_ADMIN_CREATE_EQ = 18,
+ EFA_ADMIN_DESTROY_EQ = 19,
+ EFA_ADMIN_ALLOC_MR = 20,
+ EFA_ADMIN_MAX_OPCODE = 20,
};
enum efa_admin_aq_feature_id {
@@ -37,7 +40,8 @@ enum efa_admin_aq_feature_id {
EFA_ADMIN_NETWORK_ATTR = 3,
EFA_ADMIN_QUEUE_ATTR = 4,
EFA_ADMIN_HW_HINTS = 5,
- EFA_ADMIN_FEATURES_OPCODE_NUM = 8,
+ EFA_ADMIN_HOST_INFO = 6,
+ EFA_ADMIN_EVENT_QUEUE_ATTR = 7,
};
/* QP transport type */
@@ -61,6 +65,10 @@ enum efa_admin_qp_state {
enum efa_admin_get_stats_type {
EFA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+ EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE = 3,
+ EFA_ADMIN_GET_STATS_TYPE_NETWORK = 4,
};
enum efa_admin_get_stats_scope {
@@ -68,14 +76,6 @@ enum efa_admin_get_stats_scope {
EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1,
};
-enum efa_admin_modify_qp_mask_bits {
- EFA_ADMIN_QP_STATE_BIT = 0,
- EFA_ADMIN_CUR_QP_STATE_BIT = 1,
- EFA_ADMIN_QKEY_BIT = 2,
- EFA_ADMIN_SQ_PSN_BIT = 3,
- EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4,
-};
-
/*
* QP allocation sizes, converted by fabric QueuePair (QP) create command
* from QP capabilities.
@@ -112,7 +112,10 @@ struct efa_admin_create_qp_cmd {
* virtual (IOVA returned by MR registration)
* 1 : rq_virt - If set, RQ ring base address is
* virtual (IOVA returned by MR registration)
- * 7:2 : reserved - MBZ
+ * 2 : unsolicited_write_recv - If set, work requests
+ * will not be consumed for incoming RDMA write with
+ * immediate
+ * 7:3 : reserved - MBZ
*/
u8 flags;
@@ -149,8 +152,11 @@ struct efa_admin_create_qp_cmd {
/* UAR number */
u16 uar;
+ /* Requested service level for the QP, 0 is the default SL */
+ u8 sl;
+
/* MBZ */
- u16 reserved;
+ u8 reserved;
/* MBZ */
u32 reserved2;
@@ -160,10 +166,16 @@ struct efa_admin_create_qp_resp {
/* Common Admin Queue completion descriptor */
struct efa_admin_acq_common_desc acq_common_desc;
- /* Opaque handle to be used for consequent operations on the QP */
+ /*
+ * Opaque handle to be used for consequent admin operations on the
+ * QP
+ */
u32 qp_handle;
- /* QP number in the given EFA virtual device */
+ /*
+ * QP number in the given EFA virtual device. Least-significant bits (as
+ * needed according to max_qp) carry unique QP ID
+ */
u16 qp_num;
/* MBZ */
@@ -193,8 +205,14 @@ struct efa_admin_modify_qp_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
/*
- * Mask indicating which fields should be updated see enum
- * efa_admin_modify_qp_mask_bits
+ * Mask indicating which fields should be updated
+ * 0 : qp_state
+ * 1 : cur_qp_state
+ * 2 : qkey
+ * 3 : sq_psn
+ * 4 : sq_drained_async_notify
+ * 5 : rnr_retry
+ * 31:6 : reserved
*/
u32 modify_mask;
@@ -216,8 +234,8 @@ struct efa_admin_modify_qp_cmd {
/* Enable async notification when SQ is drained */
u8 sq_drained_async_notify;
- /* MBZ */
- u8 reserved1;
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
/* MBZ */
u16 reserved2;
@@ -252,8 +270,8 @@ struct efa_admin_query_qp_resp {
/* Indicates that draining is in progress */
u8 sq_draining;
- /* MBZ */
- u8 reserved1;
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
/* MBZ */
u16 reserved2;
@@ -286,6 +304,7 @@ struct efa_admin_create_ah_cmd {
/* PD number */
u16 pd;
+ /* MBZ */
u16 reserved;
};
@@ -296,6 +315,7 @@ struct efa_admin_create_ah_resp {
/* Target interface address handle (opaque) */
u16 ah;
+ /* MBZ */
u16 reserved;
};
@@ -362,12 +382,19 @@ struct efa_admin_reg_mr_cmd {
/*
* permissions
- * 0 : local_write_enable - Write permissions: value
- * of 1 needed for RQ buffers and for RDMA write
- * 7:1 : reserved1 - remote access flags, etc
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : remote_write_enable - Remote write
+ * permissions: must be set to enable RDMA write to
+ * the region
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
*/
u8 permissions;
+ /* MBZ */
u16 reserved16_w5;
/* number of pages in PBL (redundant, could be calculated) */
@@ -396,6 +423,32 @@ struct efa_admin_reg_mr_resp {
* memory region
*/
u32 r_key;
+
+ /*
+ * Mask indicating which fields have valid values
+ * 0 : recv_ic_id
+ * 1 : rdma_read_ic_id
+ * 2 : rdma_recv_ic_id
+ */
+ u8 validity;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for receive
+ * operation
+ */
+ u8 recv_ic_id;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for RDMA
+ * read operation
+ */
+ u8 rdma_read_ic_id;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for RDMA
+ * write receive
+ */
+ u8 rdma_recv_ic_id;
};
struct efa_admin_dereg_mr_cmd {
@@ -411,32 +464,73 @@ struct efa_admin_dereg_mr_resp {
struct efa_admin_acq_common_desc acq_common_desc;
};
+/*
+ * Allocation of MemoryRegion, required for QP working with Virtual
+ * Addresses in kernel verbs semantics, ready for fast registration use.
+ */
+struct efa_admin_alloc_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved1;
+
+ /* Maximum number of pages this MR supports. */
+ u32 max_pages;
+};
+
+struct efa_admin_alloc_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * L_Key, to be used in conjunction with local buffer references in
+ * SQ and RQ WQE, or with virtual RQ/CQ rings
+ */
+ u32 l_key;
+
+ /*
+ * R_Key, to be used in RDMA messages to refer to remotely accessed
+ * memory region
+ */
+ u32 r_key;
+};
+
struct efa_admin_create_cq_cmd {
struct efa_admin_aq_common_desc aq_common_desc;
/*
- * 4:0 : reserved5
+ * 4:0 : reserved5 - MBZ
* 5 : interrupt_mode_enabled - if set, cq operates
- * in interrupt mode (i.e. CQ events and MSI-X are
- * generated), otherwise - polling
+ * in interrupt mode (i.e. CQ events and EQ elements
+ * are generated), otherwise - polling
* 6 : virt - If set, ring base address is virtual
* (IOVA returned by MR registration)
- * 7 : reserved6
+ * 7 : reserved6 - MBZ
*/
u8 cq_caps_1;
/*
* 4:0 : cq_entry_size_words - size of CQ entry in
* 32-bit words, valid values: 4, 8.
- * 7:5 : reserved7
+ * 5 : set_src_addr - If set, source address will be
+ * filled on RX completions from unknown senders.
+ * Requires 8 words CQ entry size.
+ * 7:6 : reserved7 - MBZ
*/
u8 cq_caps_2;
- /* completion queue depth in # of entries. must be power of 2 */
- u16 cq_depth;
+ /* Sub completion queue depth in # of entries. must be power of 2 */
+ u16 sub_cq_depth;
- /* msix vector assigned to this cq */
- u32 msix_vector_idx;
+ /* EQ number assigned to this cq */
+ u16 eqn;
+
+ /* MBZ */
+ u16 reserved;
/*
* CQ ring base address, virtual or physical depending on 'virt'
@@ -452,7 +546,7 @@ struct efa_admin_create_cq_cmd {
/*
* number of sub cqs - must be equal to sub_cqs_per_cq of queue
- * attributes.
+ * attributes.
*/
u16 num_sub_cqs;
@@ -465,8 +559,17 @@ struct efa_admin_create_cq_resp {
u16 cq_idx;
- /* actual cq depth in number of entries */
- u16 cq_actual_depth;
+ /* actual sub cq depth in number of entries */
+ u16 sub_cq_actual_depth;
+
+ /* CQ doorbell address, as offset to PCIe DB BAR */
+ u32 db_offset;
+
+ /*
+ * 0 : db_valid - If set, doorbell offset is valid.
+ * Always set when interrupts are requested.
+ */
+ u32 flags;
};
struct efa_admin_destroy_cq_cmd {
@@ -474,6 +577,7 @@ struct efa_admin_destroy_cq_cmd {
u16 cq_idx;
+ /* MBZ */
u16 reserved1;
};
@@ -514,21 +618,71 @@ struct efa_admin_basic_stats {
u64 rx_pkts;
u64 rx_drops;
+
+ u64 qkey_viol;
+};
+
+struct efa_admin_messages_stats {
+ u64 send_bytes;
+
+ u64 send_wrs;
+
+ u64 recv_bytes;
+
+ u64 recv_wrs;
+};
+
+struct efa_admin_rdma_read_stats {
+ u64 read_wrs;
+
+ u64 read_bytes;
+
+ u64 read_wr_err;
+
+ u64 read_resp_bytes;
+};
+
+struct efa_admin_rdma_write_stats {
+ u64 write_wrs;
+
+ u64 write_bytes;
+
+ u64 write_wr_err;
+
+ u64 write_recv_bytes;
+};
+
+struct efa_admin_network_stats {
+ u64 retrans_bytes;
+
+ u64 retrans_pkts;
+
+ u64 retrans_timeout_events;
+
+ u64 unresponsive_remote_events;
+
+ u64 impaired_remote_conn_events;
};
struct efa_admin_acq_get_stats_resp {
struct efa_admin_acq_common_desc acq_common_desc;
- struct efa_admin_basic_stats basic_stats;
+ union {
+ struct efa_admin_basic_stats basic_stats;
+
+ struct efa_admin_messages_stats messages_stats;
+
+ struct efa_admin_rdma_read_stats rdma_read_stats;
+
+ struct efa_admin_rdma_write_stats rdma_write_stats;
+
+ struct efa_admin_network_stats network_stats;
+ } u;
};
struct efa_admin_get_set_feature_common_desc {
- /*
- * 1:0 : select - 0x1 - current value; 0x3 - default
- * value
- * 7:3 : reserved3
- */
- u8 flags;
+ /* MBZ */
+ u8 reserved0;
/* as appears in efa_admin_aq_feature_id */
u8 feature_id;
@@ -553,38 +707,69 @@ struct efa_admin_feature_device_attr_desc {
/* Bar used for SQ and RQ doorbells */
u16 db_bar;
- /* Indicates how many bits are used physical address access */
+ /* Indicates how many bits are used on physical address access */
u8 phys_addr_width;
- /* Indicates how many bits are used virtual address access */
+ /* Indicates how many bits are used on virtual address access */
u8 virt_addr_width;
+
+ /*
+ * 0 : rdma_read - If set, RDMA Read is supported on
+ * TX queues
+ * 1 : rnr_retry - If set, RNR retry is supported on
+ * modify QP command
+ * 2 : data_polling_128 - If set, 128 bytes data
+ * polling is supported
+ * 3 : rdma_write - If set, RDMA Write is supported
+ * on TX queues
+ * 4 : unsolicited_write_recv - If set, unsolicited
+ * write with imm. receive is supported
+ * 31:5 : reserved - MBZ
+ */
+ u32 device_caps;
+
+ /* Max RDMA transfer size in bytes */
+ u32 max_rdma_size;
+
+ /* Unique global ID for an EFA device */
+ u64 guid;
+
+ /* The device maximum link speed in Gbit/sec */
+ u16 max_link_speed_gbps;
+
+ /* MBZ */
+ u16 reserved0;
+
+ /* MBZ */
+ u32 reserved1;
};
struct efa_admin_feature_queue_attr_desc {
/* The maximum number of queue pairs supported */
u32 max_qp;
+ /* Maximum number of WQEs per Send Queue */
u32 max_sq_depth;
- /* max send wr used in inline-buf */
+ /* Maximum size of data that can be sent inline in a Send WQE */
u32 inline_buf_size;
+ /* Maximum number of buffer descriptors per Recv Queue */
u32 max_rq_depth;
/* The maximum number of completion queues supported per VF */
u32 max_cq;
+ /* Maximum number of CQEs per Completion Queue */
u32 max_cq_depth;
/* Number of sub-CQs to be created for each CQ */
u16 sub_cqs_per_cq;
- u16 reserved;
+ /* Minimum number of WQEs per SQ */
+ u16 min_sq_depth;
- /*
- * Maximum number of SGEs (buffs) allowed for a single send work
- * queue element (WQE)
- */
+ /* Maximum number of SGEs (buffers) allowed for a single send WQE */
u16 max_wr_send_sges;
/* Maximum number of SGEs allowed for a single recv WQE */
@@ -604,6 +789,31 @@ struct efa_admin_feature_queue_attr_desc {
/* The maximum size of LLQ in bytes */
u32 max_llq_size;
+
+ /* Maximum number of SGEs for a single RDMA read/write WQE */
+ u16 max_wr_rdma_sges;
+
+ /*
+ * Maximum number of bytes that can be written to SQ between two
+ * consecutive doorbells (in units of 64B). Driver must ensure that only
+ * complete WQEs are written to queue before issuing a doorbell.
+ * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can
+ * be written to SQ between two consecutive doorbells. max_tx_batch=11
+ * and WQE size = 128B, means up to 5 WQEs can be written to SQ between
+ * two consecutive doorbells. Zero means unlimited.
+ */
+ u16 max_tx_batch;
+};
+
+struct efa_admin_event_queue_attr_desc {
+ /* The maximum number of event queues supported */
+ u32 max_eq;
+
+ /* Maximum number of EQEs per Event Queue */
+ u32 max_eq_depth;
+
+ /* Supported events bitmask */
+ u32 event_bitmask;
};
struct efa_admin_feature_aenq_desc {
@@ -618,6 +828,7 @@ struct efa_admin_feature_network_attr_desc {
/* Raw address data in network byte order */
u8 addr[16];
+ /* max packet payload size in bytes */
u32 mtu;
};
@@ -663,6 +874,8 @@ struct efa_admin_get_feature_resp {
struct efa_admin_feature_queue_attr_desc queue_attr;
+ struct efa_admin_event_queue_attr_desc event_queue_attr;
+
struct efa_admin_hw_hints hw_hints;
} u;
};
@@ -746,6 +959,60 @@ struct efa_admin_dealloc_uar_resp {
struct efa_admin_acq_common_desc acq_common_desc;
};
+struct efa_admin_create_eq_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* Size of the EQ in entries, must be power of 2 */
+ u16 depth;
+
+ /* MSI-X table entry index */
+ u8 msix_vec;
+
+ /*
+ * 4:0 : entry_size_words - size of EQ entry in
+ * 32-bit words
+ * 7:5 : reserved - MBZ
+ */
+ u8 caps;
+
+ /* EQ ring base address */
+ struct efa_common_mem_addr ba;
+
+ /*
+ * Enabled events on this EQ
+ * 0 : completion_events - Enable completion events
+ * 31:1 : reserved - MBZ
+ */
+ u32 event_bitmask;
+
+ /* MBZ */
+ u32 reserved;
+};
+
+struct efa_admin_create_eq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* EQ number */
+ u16 eqn;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_destroy_eq_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* EQ number */
+ u16 eqn;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_destroy_eq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
/* asynchronous event notification groups */
enum efa_admin_aenq_group {
EFA_ADMIN_FATAL_ERROR = 1,
@@ -755,12 +1022,6 @@ enum efa_admin_aenq_group {
EFA_ADMIN_AENQ_GROUPS_NUM = 5,
};
-enum efa_admin_aenq_notification_syndrom {
- EFA_ADMIN_SUSPEND = 0,
- EFA_ADMIN_RESUME = 1,
- EFA_ADMIN_UPDATE_HINTS = 2,
-};
-
struct efa_admin_mmio_req_read_less_resp {
u16 req_id;
@@ -770,25 +1031,110 @@ struct efa_admin_mmio_req_read_less_resp {
u32 reg_val;
};
+enum efa_admin_os_type {
+ EFA_ADMIN_OS_LINUX = 0,
+};
+
+struct efa_admin_host_info {
+ /* OS distribution string format */
+ u8 os_dist_str[128];
+
+ /* Defined in enum efa_admin_os_type */
+ u32 os_type;
+
+ /* Kernel version string format */
+ u8 kernel_ver_str[32];
+
+ /* Kernel version numeric format */
+ u32 kernel_ver;
+
+ /*
+ * 7:0 : driver_module_type
+ * 15:8 : driver_sub_minor
+ * 23:16 : driver_minor
+ * 31:24 : driver_major
+ */
+ u32 driver_ver;
+
+ /*
+ * Device's Bus, Device and Function
+ * 2:0 : function
+ * 7:3 : device
+ * 15:8 : bus
+ */
+ u16 bdf;
+
+ /*
+ * Spec version
+ * 7:0 : spec_minor
+ * 15:8 : spec_major
+ */
+ u16 spec_ver;
+
+ /*
+ * 0 : intree - Intree driver
+ * 1 : gdr - GPUDirect RDMA supported
+ * 31:2 : reserved2
+ */
+ u32 flags;
+};
+
/* create_qp_cmd */
#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
-#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_SHIFT 1
#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+#define EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV_MASK BIT(2)
+
+/* modify_qp_cmd */
+#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0)
+#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1)
+#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4)
+#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5)
/* reg_mr_cmd */
#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
-#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_SHIFT 7
#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7)
#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_WRITE_ENABLE_MASK BIT(1)
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2)
+
+/* reg_mr_resp */
+#define EFA_ADMIN_REG_MR_RESP_RECV_IC_ID_MASK BIT(0)
+#define EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID_MASK BIT(1)
+#define EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID_MASK BIT(2)
/* create_cq_cmd */
-#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
-#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_SHIFT 6
#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
-
-/* get_set_feature_common_desc */
-#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5)
+
+/* create_cq_resp */
+#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0)
+
+/* feature_device_attr_desc */
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_UNSOLICITED_WRITE_RECV_MASK BIT(4)
+
+/* create_eq_cmd */
+#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0)
+
+/* host_info */
+#define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0)
+#define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8)
+#define EFA_ADMIN_HOST_INFO_DRIVER_MINOR_MASK GENMASK(23, 16)
+#define EFA_ADMIN_HOST_INFO_DRIVER_MAJOR_MASK GENMASK(31, 24)
+#define EFA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0)
+#define EFA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3)
+#define EFA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8)
+#define EFA_ADMIN_HOST_INFO_SPEC_MINOR_MASK GENMASK(7, 0)
+#define EFA_ADMIN_HOST_INFO_SPEC_MAJOR_MASK GENMASK(15, 8)
+#define EFA_ADMIN_HOST_INFO_INTREE_MASK BIT(0)
+#define EFA_ADMIN_HOST_INFO_GDR_MASK BIT(1)
#endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
index c8e0c8b905be..35700c93e639 100644
--- a/drivers/infiniband/hw/efa/efa_admin_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_H_
@@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc {
/*
* indicates to the driver which AQ entry has been consumed by the
- * device and could be reused
+ * device and could be reused
*/
u16 sq_head_indx;
};
@@ -96,7 +96,7 @@ struct efa_admin_acq_entry {
struct efa_admin_aenq_common_desc {
u16 group;
- u16 syndrom;
+ u16 syndrome;
/*
* 0 : phase
@@ -118,12 +118,47 @@ struct efa_admin_aenq_entry {
u32 inline_data_w4[12];
};
+enum efa_admin_eqe_event_type {
+ EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0,
+};
+
+/* Completion event */
+struct efa_admin_comp_event {
+ /* CQ number */
+ u16 cqn;
+
+ /* MBZ */
+ u16 reserved;
+
+ /* MBZ */
+ u32 reserved2;
+};
+
+/* Event Queue Element */
+struct efa_admin_eqe {
+ /*
+ * 0 : phase
+ * 8:1 : event_type - Event type
+ * 31:9 : reserved - MBZ
+ */
+ u32 common;
+
+ /* MBZ */
+ u32 reserved;
+
+ union {
+ /* Event data */
+ u32 event_data[2];
+
+ /* Completion Event */
+ struct efa_admin_comp_event comp_event;
+ } u;
+};
+
/* aq_common_desc */
#define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
#define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
-#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
-#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
/* acq_common_desc */
@@ -133,4 +168,8 @@ struct efa_admin_aenq_entry {
/* aenq_common_desc */
#define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+/* eqe */
+#define EFA_ADMIN_EQE_PHASE_MASK BIT(0)
+#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1)
+
#endif /* _EFA_ADMIN_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index a5c788741a04..0e979ca10d24 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -16,31 +16,13 @@
#define EFA_ASYNC_QUEUE_DEPTH 16
#define EFA_ADMIN_QUEUE_DEPTH 32
-#define MIN_EFA_VER\
- ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \
- (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK))
-
#define EFA_CTRL_MAJOR 0
#define EFA_CTRL_MINOR 0
#define EFA_CTRL_SUB_MINOR 1
-#define MIN_EFA_CTRL_VER \
- (((EFA_CTRL_MAJOR) << \
- (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
- ((EFA_CTRL_MINOR) << \
- (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
- (EFA_CTRL_SUB_MINOR))
-
-#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x)))
-#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32))
-
-#define EFA_REGS_ADMIN_INTR_MASK 1
-
enum efa_cmd_status {
EFA_CMD_SUBMITTED,
EFA_CMD_COMPLETED,
- /* Abort - canceled by the driver */
- EFA_CMD_ABORTED,
};
struct efa_comp_ctx {
@@ -48,8 +30,7 @@ struct efa_comp_ctx {
struct efa_admin_acq_entry *user_cqe;
u32 comp_size;
enum efa_cmd_status status;
- /* status from the device */
- u8 comp_status;
+ u16 cmd_id;
u8 cmd_opcode;
u8 occupied;
};
@@ -76,17 +57,25 @@ static const char *efa_com_cmd_str(u8 cmd)
EFA_CMD_STR_CASE(DEALLOC_PD);
EFA_CMD_STR_CASE(ALLOC_UAR);
EFA_CMD_STR_CASE(DEALLOC_UAR);
+ EFA_CMD_STR_CASE(CREATE_EQ);
+ EFA_CMD_STR_CASE(DESTROY_EQ);
default: return "unknown command opcode";
}
#undef EFA_CMD_STR_CASE
}
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low)
+{
+ *addr_low = lower_32_bits(addr);
+ *addr_high = upper_32_bits(addr);
+}
+
static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
{
struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
struct efa_admin_mmio_req_read_less_resp *read_resp;
unsigned long exp_time;
- u32 mmio_read_reg;
+ u32 mmio_read_reg = 0;
u32 err;
read_resp = mmio_read->read_resp;
@@ -96,10 +85,9 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
/* trash DMA req_id to identify when hardware is done */
read_resp->req_id = mmio_read->seq_num + 0x9aL;
- mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
- EFA_REGS_MMIO_REG_READ_REG_OFF_MASK;
- mmio_read_reg |= mmio_read->seq_num &
- EFA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+ EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset);
+ EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID,
+ mmio_read->seq_num);
writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
@@ -111,17 +99,19 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
} while (time_is_after_jiffies(exp_time));
if (read_resp->req_id != mmio_read->seq_num) {
- ibdev_err(edev->efa_dev,
- "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
- mmio_read->seq_num, offset, read_resp->req_id,
- read_resp->reg_off);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
+ mmio_read->seq_num, offset, read_resp->req_id,
+ read_resp->reg_off);
err = EFA_MMIO_READ_INVALID;
goto out;
}
if (read_resp->reg_off != offset) {
- ibdev_err(edev->efa_dev,
- "Reading register failed: wrong offset provided\n");
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Reading register failed: wrong offset provided\n");
err = EFA_MMIO_READ_INVALID;
goto out;
}
@@ -137,9 +127,9 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev)
struct efa_com_admin_queue *aq = &edev->aq;
struct efa_com_admin_sq *sq = &aq->sq;
u16 size = aq->depth * sizeof(*sq->entries);
+ u32 aq_caps = 0;
u32 addr_high;
u32 addr_low;
- u32 aq_caps;
sq->entries =
dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
@@ -154,16 +144,15 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev)
sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
- addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr);
- addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr);
+ addr_high = upper_32_bits(sq->dma_addr);
+ addr_low = lower_32_bits(sq->dma_addr);
writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
- aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
- aq_caps |= (sizeof(struct efa_admin_aq_entry) <<
- EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
- EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+ EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth);
+ EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE,
+ sizeof(struct efa_admin_aq_entry));
writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
@@ -175,9 +164,9 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev)
struct efa_com_admin_queue *aq = &edev->aq;
struct efa_com_admin_cq *cq = &aq->cq;
u16 size = aq->depth * sizeof(*cq->entries);
+ u32 acq_caps = 0;
u32 addr_high;
u32 addr_low;
- u32 acq_caps;
cq->entries =
dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
@@ -189,19 +178,17 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev)
cq->cc = 0;
cq->phase = 1;
- addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr);
- addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr);
+ addr_high = upper_32_bits(cq->dma_addr);
+ addr_low = lower_32_bits(cq->dma_addr);
writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
- acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
- acq_caps |= (sizeof(struct efa_admin_acq_entry) <<
- EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
- EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
- acq_caps |= (aq->msix_vector_idx <<
- EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) &
- EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK;
+ EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth);
+ EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE,
+ sizeof(struct efa_admin_acq_entry));
+ EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR,
+ aq->msix_vector_idx);
writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
@@ -212,7 +199,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
struct efa_aenq_handlers *aenq_handlers)
{
struct efa_com_aenq *aenq = &edev->aenq;
- u32 addr_low, addr_high, aenq_caps;
+ u32 addr_low, addr_high;
+ u32 aenq_caps = 0;
u16 size;
if (!aenq_handlers) {
@@ -231,19 +219,17 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
aenq->cc = 0;
aenq->phase = 1;
- addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
- addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+ addr_low = lower_32_bits(aenq->dma_addr);
+ addr_high = upper_32_bits(aenq->dma_addr);
writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
- aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
- aenq_caps |= (sizeof(struct efa_admin_aenq_entry) <<
- EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
- EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
- aenq_caps |= (aenq->msix_vector_idx
- << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) &
- EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK;
+ EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth);
+ EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE,
+ sizeof(struct efa_admin_aenq_entry));
+ EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR,
+ aenq->msix_vector_idx);
writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
/*
@@ -280,36 +266,35 @@ static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
struct efa_comp_ctx *comp_ctx)
{
- u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command &
- EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+ u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command,
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
+ u16 ctx_id = cmd_id & (aq->depth - 1);
- ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id);
+ ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
comp_ctx->occupied = 0;
- efa_com_dealloc_ctx_id(aq, comp_id);
+ efa_com_dealloc_ctx_id(aq, ctx_id);
}
static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
- u16 command_id, bool capture)
+ u16 cmd_id, bool capture)
{
- if (command_id >= aq->depth) {
- ibdev_err(aq->efa_dev,
- "command id is larger than the queue size. cmd_id: %u queue size %d\n",
- command_id, aq->depth);
- return NULL;
- }
+ u16 ctx_id = cmd_id & (aq->depth - 1);
- if (aq->comp_ctx[command_id].occupied && capture) {
- ibdev_err(aq->efa_dev, "Completion context is occupied\n");
+ if (aq->comp_ctx[ctx_id].occupied && capture) {
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Completion context for command_id %#x is occupied\n",
+ cmd_id);
return NULL;
}
if (capture) {
- aq->comp_ctx[command_id].occupied = 1;
- ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n",
- command_id);
+ aq->comp_ctx[ctx_id].occupied = 1;
+ ibdev_dbg(aq->efa_dev,
+ "Take completion ctxt for command_id %#x\n", cmd_id);
}
- return &aq->comp_ctx[command_id];
+ return &aq->comp_ctx[ctx_id];
}
static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
@@ -318,8 +303,10 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
struct efa_admin_acq_entry *comp,
size_t comp_size_in_bytes)
{
+ struct efa_admin_aq_entry *aqe;
struct efa_comp_ctx *comp_ctx;
u16 queue_size_mask;
+ u16 cmd_id;
u16 ctx_id;
u16 pi;
@@ -328,13 +315,16 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
ctx_id = efa_com_alloc_ctx_id(aq);
- cmd->aq_common_descriptor.flags |= aq->sq.phase &
- EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+ /* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
+ cmd_id = ctx_id & queue_size_mask;
+ cmd_id |= aq->sq.pc & ~queue_size_mask;
+ cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
- cmd->aq_common_descriptor.command_id |= ctx_id &
- EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+ cmd->aq_common_descriptor.command_id = cmd_id;
+ EFA_SET(&cmd->aq_common_descriptor.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase);
- comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true);
+ comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
if (!comp_ctx) {
efa_com_dealloc_ctx_id(aq, ctx_id);
return ERR_PTR(-EINVAL);
@@ -344,10 +334,13 @@ static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queu
comp_ctx->comp_size = comp_size_in_bytes;
comp_ctx->user_cqe = comp;
comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+ comp_ctx->cmd_id = cmd_id;
reinit_completion(&comp_ctx->wait_event);
- memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes);
+ aqe = &aq->sq.entries[pi];
+ memset(aqe, 0, sizeof(*aqe));
+ memcpy(aqe, cmd, cmd_size_in_bytes);
aq->sq.pc++;
atomic64_inc(&aq->stats.submitted_cmd);
@@ -401,7 +394,7 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
spin_lock(&aq->sq.lock);
if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
- ibdev_err(aq->efa_dev, "Admin queue is closed\n");
+ ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
spin_unlock(&aq->sq.lock);
return ERR_PTR(-ENODEV);
}
@@ -415,38 +408,39 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
return comp_ctx;
}
-static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
- struct efa_admin_acq_entry *cqe)
+static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+ struct efa_admin_acq_entry *cqe)
{
struct efa_comp_ctx *comp_ctx;
u16 cmd_id;
- cmd_id = cqe->acq_common_descriptor.command &
- EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+ cmd_id = EFA_GET(&cqe->acq_common_descriptor.command,
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
- if (!comp_ctx) {
+ if (comp_ctx->status != EFA_CMD_SUBMITTED) {
ibdev_err(aq->efa_dev,
- "comp_ctx is NULL. Changing the admin queue running state\n");
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- return;
+ "Received completion with unexpected command id[%d], sq producer: %d, sq consumer: %d, cq consumer: %d\n",
+ cmd_id, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ return -EINVAL;
}
comp_ctx->status = EFA_CMD_COMPLETED;
- comp_ctx->comp_status = cqe->acq_common_descriptor.status;
- if (comp_ctx->user_cqe)
- memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
+ memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
complete(&comp_ctx->wait_event);
+
+ return 0;
}
static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
{
struct efa_admin_acq_entry *cqe;
u16 queue_size_mask;
- u16 comp_num = 0;
+ u16 comp_cmds = 0;
u8 phase;
+ int err;
u16 ci;
queue_size_mask = aq->depth - 1;
@@ -464,10 +458,12 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
* phase bit was validated
*/
dma_rmb();
- efa_com_handle_single_admin_completion(aq, cqe);
+ err = efa_com_handle_single_admin_completion(aq, cqe);
+ if (!err)
+ comp_cmds++;
+ aq->cq.cc++;
ci++;
- comp_num++;
if (ci == aq->depth) {
ci = 0;
phase = !phase;
@@ -476,10 +472,9 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
cqe = &aq->cq.entries[ci];
}
- aq->cq.cc += comp_num;
aq->cq.phase = phase;
- aq->sq.cc += comp_num;
- atomic64_add(comp_num, &aq->stats.completed_cmd);
+ aq->sq.cc += comp_cmds;
+ atomic64_add(comp_cmds, &aq->stats.completed_cmd);
}
static int efa_com_comp_status_to_errno(u8 comp_status)
@@ -519,8 +514,9 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
break;
if (time_is_before_jiffies(timeout)) {
- ibdev_err(aq->efa_dev,
- "Wait for completion (polling) timeout\n");
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Wait for completion (polling) timeout\n");
/* EFA didn't have any completion */
atomic64_inc(&aq->stats.no_completion);
@@ -532,17 +528,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c
msleep(aq->poll_interval);
}
- if (comp_ctx->status == EFA_CMD_ABORTED) {
- ibdev_err(aq->efa_dev, "Command was aborted\n");
- atomic64_inc(&aq->stats.aborted_cmd);
- err = -ENODEV;
- goto out;
- }
-
- WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED,
- "Invalid completion status %d\n", comp_ctx->status);
-
- err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+ err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
out:
efa_com_put_comp_ctx(aq, comp_ctx);
return err;
@@ -571,24 +557,28 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com
atomic64_inc(&aq->stats.no_completion);
if (comp_ctx->status == EFA_CMD_COMPLETED)
- ibdev_err(aq->efa_dev,
- "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
- efa_com_cmd_str(comp_ctx->cmd_opcode),
- comp_ctx->cmd_opcode, comp_ctx->status,
- comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc,
+ aq->cq.cc);
else
- ibdev_err(aq->efa_dev,
- "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
- efa_com_cmd_str(comp_ctx->cmd_opcode),
- comp_ctx->cmd_opcode, comp_ctx->status,
- comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "The device didn't send any completion for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc,
+ aq->cq.cc);
clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
err = -ETIME;
goto out;
}
- err = efa_com_comp_status_to_errno(comp_ctx->comp_status);
+ err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
out:
efa_com_put_comp_ctx(aq, comp_ctx);
return err;
@@ -643,22 +633,27 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
cmd->aq_common_descriptor.opcode);
comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
if (IS_ERR(comp_ctx)) {
- ibdev_err(aq->efa_dev,
- "Failed to submit command %s (opcode %u) err %ld\n",
- efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx));
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Failed to submit command %s (opcode %u) err %pe\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode, comp_ctx);
up(&aq->avail_cmds);
+ atomic64_inc(&aq->stats.cmd_err);
return PTR_ERR(comp_ctx);
}
err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
- if (err)
- ibdev_err(aq->efa_dev,
- "Failed to process command %s (opcode %u) comp_status %d err %d\n",
- efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
- cmd->aq_common_descriptor.opcode,
- comp_ctx->comp_status, err);
+ if (err) {
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Failed to process command %s (opcode %u) comp_status %d err %d\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode,
+ comp_ctx->user_cqe->acq_common_descriptor.status, err);
+ atomic64_inc(&aq->stats.cmd_err);
+ }
up(&aq->avail_cmds);
@@ -666,66 +661,6 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
}
/**
- * efa_com_abort_admin_commands - Abort all the outstanding admin commands.
- * @edev: EFA communication layer struct
- *
- * This method aborts all the outstanding admin commands.
- * The caller should then call efa_com_wait_for_abort_completion to make sure
- * all the commands were completed.
- */
-static void efa_com_abort_admin_commands(struct efa_com_dev *edev)
-{
- struct efa_com_admin_queue *aq = &edev->aq;
- struct efa_comp_ctx *comp_ctx;
- unsigned long flags;
- u16 i;
-
- spin_lock(&aq->sq.lock);
- spin_lock_irqsave(&aq->cq.lock, flags);
- for (i = 0; i < aq->depth; i++) {
- comp_ctx = efa_com_get_comp_ctx(aq, i, false);
- if (!comp_ctx)
- break;
-
- comp_ctx->status = EFA_CMD_ABORTED;
-
- complete(&comp_ctx->wait_event);
- }
- spin_unlock_irqrestore(&aq->cq.lock, flags);
- spin_unlock(&aq->sq.lock);
-}
-
-/**
- * efa_com_wait_for_abort_completion - Wait for admin commands abort.
- * @edev: EFA communication layer struct
- *
- * This method wait until all the outstanding admin commands will be completed.
- */
-static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev)
-{
- struct efa_com_admin_queue *aq = &edev->aq;
- int i;
-
- /* all mine */
- for (i = 0; i < aq->depth; i++)
- down(&aq->avail_cmds);
-
- /* let it go */
- for (i = 0; i < aq->depth; i++)
- up(&aq->avail_cmds);
-}
-
-static void efa_com_admin_flush(struct efa_com_dev *edev)
-{
- struct efa_com_admin_queue *aq = &edev->aq;
-
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
-
- efa_com_abort_admin_commands(edev);
- efa_com_wait_for_abort_completion(edev);
-}
-
-/**
* efa_com_admin_destroy - Destroy the admin and the async events queues.
* @edev: EFA communication layer struct
*/
@@ -737,7 +672,7 @@ void efa_com_admin_destroy(struct efa_com_dev *edev)
struct efa_com_admin_sq *sq = &aq->sq;
u16 size;
- efa_com_admin_flush(edev);
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
devm_kfree(edev->dmadev, aq->comp_ctx_pool);
devm_kfree(edev->dmadev, aq->comp_ctx);
@@ -764,7 +699,7 @@ void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
u32 mask_value = 0;
if (polling)
- mask_value = EFA_REGS_ADMIN_INTR_MASK;
+ EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1);
writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
if (polling)
@@ -802,7 +737,7 @@ int efa_com_admin_init(struct efa_com_dev *edev,
int err;
dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
- if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) {
+ if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) {
ibdev_err(edev->efa_dev,
"Device isn't ready, abort com init %#x\n", dev_sts);
return -ENODEV;
@@ -837,8 +772,7 @@ int efa_com_admin_init(struct efa_com_dev *edev,
goto err_destroy_cq;
cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
- timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
- EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+ timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
if (timeout)
/* the resolution of timeout reg is 100ms */
aq->completion_timeout = timeout * 100000;
@@ -870,7 +804,7 @@ err_destroy_comp_ctxt:
* This method goes over the admin completion queue and wakes up
* all the pending threads that wait on the commands wait event.
*
- * @note: Should be called after MSI-X interrupt.
+ * Note: Should be called after MSI-X interrupt.
*/
void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
{
@@ -999,7 +933,9 @@ void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
int efa_com_validate_version(struct efa_com_dev *edev)
{
+ u32 min_ctrl_ver = 0;
u32 ctrl_ver_masked;
+ u32 min_ver = 0;
u32 ctrl_ver;
u32 ver;
@@ -1012,33 +948,42 @@ int efa_com_validate_version(struct efa_com_dev *edev)
EFA_REGS_CONTROLLER_VERSION_OFF);
ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
- (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >>
- EFA_REGS_VERSION_MAJOR_VERSION_SHIFT,
- ver & EFA_REGS_VERSION_MINOR_VERSION_MASK);
-
- if (ver < MIN_EFA_VER) {
+ EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION),
+ EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION));
+
+ EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION,
+ EFA_ADMIN_API_VERSION_MAJOR);
+ EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION,
+ EFA_ADMIN_API_VERSION_MINOR);
+ if (ver < min_ver) {
ibdev_err(edev->efa_dev,
"EFA version is lower than the minimal version the driver supports\n");
return -EOPNOTSUPP;
}
- ibdev_dbg(edev->efa_dev,
- "efa controller version: %d.%d.%d implementation version %d\n",
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
- EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
- EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
- EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+ ibdev_dbg(
+ edev->efa_dev,
+ "efa controller version: %d.%d.%d implementation version %d\n",
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION),
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION),
+ EFA_GET(&ctrl_ver,
+ EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION),
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID));
ctrl_ver_masked =
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
- (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
-
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) |
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) |
+ EFA_GET(&ctrl_ver,
+ EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION);
+
+ EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION,
+ EFA_CTRL_MAJOR);
+ EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION,
+ EFA_CTRL_MINOR);
+ EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION,
+ EFA_CTRL_SUB_MINOR);
/* Validate the ctrl version without the implementation ID */
- if (ctrl_ver_masked < MIN_EFA_CTRL_VER) {
+ if (ctrl_ver_masked < min_ctrl_ver) {
ibdev_err(edev->efa_dev,
"EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
return -EOPNOTSUPP;
@@ -1061,8 +1006,7 @@ int efa_com_get_dma_width(struct efa_com_dev *edev)
u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
int width;
- width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
- EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+ width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH);
ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
@@ -1076,16 +1020,14 @@ int efa_com_get_dma_width(struct efa_com_dev *edev)
return width;
}
-static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
- u16 exp_state)
+static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on)
{
u32 val, i;
for (i = 0; i < timeout; i++) {
val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
- if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
- exp_state)
+ if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on)
return 0;
ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
@@ -1105,36 +1047,34 @@ static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout,
int efa_com_dev_reset(struct efa_com_dev *edev,
enum efa_regs_reset_reason_types reset_reason)
{
- u32 stat, timeout, cap, reset_val;
+ u32 stat, timeout, cap;
+ u32 reset_val = 0;
int err;
stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
- if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) {
+ if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) {
ibdev_err(edev->efa_dev,
"Device isn't ready, can't reset device\n");
return -EINVAL;
}
- timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
- EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+ timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT);
if (!timeout) {
ibdev_err(edev->efa_dev, "Invalid timeout value\n");
return -EINVAL;
}
/* start reset */
- reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK;
- reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) &
- EFA_REGS_DEV_CTL_RESET_REASON_MASK;
+ EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1);
+ EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason);
writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
/* reset clears the mmio readless address, restore it */
efa_com_mmio_reg_read_resp_addr_init(edev);
- err = wait_for_reset_state(edev, timeout,
- EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+ err = wait_for_reset_state(edev, timeout, 1);
if (err) {
ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
return err;
@@ -1148,8 +1088,7 @@ int efa_com_dev_reset(struct efa_com_dev *edev,
return err;
}
- timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >>
- EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT;
+ timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
if (timeout)
/* the resolution of timeout reg is 100ms */
edev->aq.completion_timeout = timeout * 100000;
@@ -1158,3 +1097,159 @@ int efa_com_dev_reset(struct efa_com_dev *edev,
return 0;
}
+
+static int efa_com_create_eq(struct efa_com_dev *edev,
+ struct efa_com_create_eq_params *params,
+ struct efa_com_create_eq_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_create_eq_resp resp = {};
+ struct efa_admin_create_eq_cmd cmd = {};
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ;
+ EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS,
+ params->entry_size_in_bytes / 4);
+ cmd.depth = params->depth;
+ cmd.event_bitmask = params->event_bitmask;
+ cmd.msix_vec = params->msix_vec;
+
+ efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high,
+ &cmd.ba.mem_addr_low);
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create eq[%d]\n", err);
+ return err;
+ }
+
+ result->eqn = resp.eqn;
+
+ return 0;
+}
+
+static void efa_com_destroy_eq(struct efa_com_dev *edev,
+ struct efa_com_destroy_eq_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_destroy_eq_resp resp = {};
+ struct efa_admin_destroy_eq_cmd cmd = {};
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ;
+ cmd.eqn = params->eqn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err)
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy EQ-%u [%d]\n", cmd.eqn,
+ err);
+}
+
+static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq)
+{
+ u32 val = 0;
+
+ EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn);
+ EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1);
+
+ writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF);
+}
+
+void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev,
+ struct efa_com_eq *eeq)
+{
+ struct efa_admin_eqe *eqe;
+ u32 processed = 0;
+ u8 phase;
+ u32 ci;
+
+ ci = eeq->cc & (eeq->depth - 1);
+ phase = eeq->phase;
+ eqe = &eeq->eqes[ci];
+
+ /* Go over all the events */
+ while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) {
+ /*
+ * Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+
+ eeq->cb(eeq, eqe);
+
+ /* Get next event entry */
+ ci++;
+ processed++;
+
+ if (ci == eeq->depth) {
+ ci = 0;
+ phase = !phase;
+ }
+
+ eqe = &eeq->eqes[ci];
+ }
+
+ eeq->cc += processed;
+ eeq->phase = phase;
+ efa_com_arm_eq(eeq->edev, eeq);
+}
+
+void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq)
+{
+ struct efa_com_destroy_eq_params params = {
+ .eqn = eeq->eqn,
+ };
+
+ efa_com_destroy_eq(edev, &params);
+ dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes),
+ eeq->eqes, eeq->dma_addr);
+}
+
+int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq,
+ efa_eqe_handler cb, u16 depth, u8 msix_vec)
+{
+ struct efa_com_create_eq_params params = {};
+ struct efa_com_create_eq_result result = {};
+ int err;
+
+ params.depth = depth;
+ params.entry_size_in_bytes = sizeof(*eeq->eqes);
+ EFA_SET(&params.event_bitmask,
+ EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1);
+ params.msix_vec = msix_vec;
+
+ eeq->eqes = dma_alloc_coherent(edev->dmadev,
+ params.depth * sizeof(*eeq->eqes),
+ &params.dma_addr, GFP_KERNEL);
+ if (!eeq->eqes)
+ return -ENOMEM;
+
+ err = efa_com_create_eq(edev, &params, &result);
+ if (err)
+ goto err_free_coherent;
+
+ eeq->eqn = result.eqn;
+ eeq->edev = edev;
+ eeq->dma_addr = params.dma_addr;
+ eeq->phase = 1;
+ eeq->depth = params.depth;
+ eeq->cb = cb;
+ efa_com_arm_eq(edev, eeq);
+
+ return 0;
+
+err_free_coherent:
+ dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes),
+ eeq->eqes, params.dma_addr);
+ return err;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
index 84d96724a74b..4d9ca97e4296 100644
--- a/drivers/infiniband/hw/efa/efa_com.h
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_H_
@@ -45,9 +45,9 @@ struct efa_com_admin_sq {
/* Don't use anything other than atomic64 */
struct efa_com_stats_admin {
- atomic64_t aborted_cmd;
atomic64_t submitted_cmd;
atomic64_t completed_cmd;
+ atomic64_t cmd_err;
atomic64_t no_completion;
};
@@ -65,7 +65,7 @@ struct efa_com_admin_queue {
u16 depth;
struct efa_com_admin_cq cq;
struct efa_com_admin_sq sq;
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
unsigned long state;
@@ -80,13 +80,16 @@ struct efa_com_admin_queue {
};
struct efa_aenq_handlers;
+struct efa_com_eq;
+typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq,
+ struct efa_admin_eqe *eqe);
struct efa_com_aenq {
struct efa_admin_aenq_entry *entries;
struct efa_aenq_handlers *aenq_handlers;
dma_addr_t dma_addr;
u32 cc; /* consumer counter */
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
u16 depth;
u8 phase;
};
@@ -112,6 +115,33 @@ struct efa_com_dev {
struct efa_com_mmio_read mmio_read;
};
+struct efa_com_eq {
+ struct efa_com_dev *edev;
+ struct efa_admin_eqe *eqes;
+ dma_addr_t dma_addr;
+ u32 cc; /* Consumer counter */
+ u16 eqn;
+ u16 depth;
+ u8 phase;
+ efa_eqe_handler cb;
+};
+
+struct efa_com_create_eq_params {
+ dma_addr_t dma_addr;
+ u32 event_bitmask;
+ u16 depth;
+ u8 entry_size_in_bytes;
+ u8 msix_vec;
+};
+
+struct efa_com_create_eq_result {
+ u16 eqn;
+};
+
+struct efa_com_destroy_eq_params {
+ u16 eqn;
+};
+
typedef void (*efa_aenq_handler)(void *data,
struct efa_admin_aenq_entry *aenq_e);
@@ -121,9 +151,13 @@ struct efa_aenq_handlers {
efa_aenq_handler unimplemented_handler;
};
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
int efa_com_admin_init(struct efa_com_dev *edev,
struct efa_aenq_handlers *aenq_handlers);
void efa_com_admin_destroy(struct efa_com_dev *edev);
+int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq,
+ efa_eqe_handler cb, u16 depth, u8 msix_vec);
+void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq);
int efa_com_dev_reset(struct efa_com_dev *edev,
enum efa_regs_reset_reason_types reset_reason);
void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling);
@@ -140,5 +174,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
struct efa_admin_acq_entry *comp,
size_t comp_size);
void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data);
+void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev,
+ struct efa_com_eq *eeq);
#endif /* _EFA_COM_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 14227725521c..9ead02800ac7 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -1,18 +1,11 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
-#include "efa.h"
#include "efa_com.h"
#include "efa_com_cmd.h"
-void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low)
-{
- *addr_low = lower_32_bits(addr);
- *addr_high = upper_32_bits(addr);
-}
-
int efa_com_create_qp(struct efa_com_dev *edev,
struct efa_com_create_qp_params *params,
struct efa_com_create_qp_result *res)
@@ -38,6 +31,10 @@ int efa_com_create_qp(struct efa_com_dev *edev,
create_qp_cmd.qp_alloc_size.recv_queue_depth =
params->rq_depth;
create_qp_cmd.uar = params->uarn;
+ create_qp_cmd.sl = params->sl;
+
+ if (params->unsolicited_write_recv)
+ EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1);
err = efa_com_cmd_exec(aq,
(struct efa_admin_aq_entry *)&create_qp_cmd,
@@ -45,7 +42,8 @@ int efa_com_create_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create qp [%d]\n", err);
return err;
}
@@ -57,7 +55,7 @@ int efa_com_create_qp(struct efa_com_dev *edev,
res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx;
res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx;
- return err;
+ return 0;
}
int efa_com_modify_qp(struct efa_com_dev *edev,
@@ -76,6 +74,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
cmd.qkey = params->qkey;
cmd.sq_psn = params->sq_psn;
cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+ cmd.rnr_retry = params->rnr_retry;
err = efa_com_cmd_exec(aq,
(struct efa_admin_aq_entry *)&cmd,
@@ -83,9 +82,10 @@ int efa_com_modify_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
- cmd.qp_handle, cmd.modify_mask, err);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
+ cmd.qp_handle, cmd.modify_mask, err);
return err;
}
@@ -110,8 +110,9 @@ int efa_com_query_qp(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n",
- cmd.qp_handle, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to query qp-%u [%d]\n",
+ cmd.qp_handle, err);
return err;
}
@@ -119,6 +120,7 @@ int efa_com_query_qp(struct efa_com_dev *edev,
result->qkey = resp.qkey;
result->sq_draining = resp.sq_draining;
result->sq_psn = resp.sq_psn;
+ result->rnr_retry = resp.rnr_retry;
return 0;
}
@@ -139,9 +141,12 @@ int efa_com_destroy_qp(struct efa_com_dev *edev,
sizeof(qp_cmd),
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
- if (err)
- ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n",
- qp_cmd.qp_handle, err);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy qp-%u [%d]\n",
+ qp_cmd.qp_handle, err);
+ return err;
+ }
return 0;
}
@@ -150,18 +155,27 @@ int efa_com_create_cq(struct efa_com_dev *edev,
struct efa_com_create_cq_params *params,
struct efa_com_create_cq_result *result)
{
- struct efa_admin_create_cq_resp cmd_completion;
+ struct efa_admin_create_cq_resp cmd_completion = {};
struct efa_admin_create_cq_cmd create_cmd = {};
struct efa_com_admin_queue *aq = &edev->aq;
int err;
create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ;
- create_cmd.cq_caps_2 = (params->entry_size_in_bytes / 4) &
- EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
- create_cmd.cq_depth = params->cq_depth;
+ EFA_SET(&create_cmd.cq_caps_2,
+ EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS,
+ params->entry_size_in_bytes / 4);
+ create_cmd.sub_cq_depth = params->sub_cq_depth;
create_cmd.num_sub_cqs = params->num_sub_cqs;
create_cmd.uar = params->uarn;
-
+ if (params->interrupt_mode_enabled) {
+ EFA_SET(&create_cmd.cq_caps_1,
+ EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1);
+ create_cmd.eqn = params->eqn;
+ }
+ if (params->set_src_addr) {
+ EFA_SET(&create_cmd.cq_caps_2,
+ EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1);
+ }
efa_com_set_dma_addr(params->dma_addr,
&create_cmd.cq_ba.mem_addr_high,
&create_cmd.cq_ba.mem_addr_low);
@@ -172,14 +186,18 @@ int efa_com_create_cq(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create cq[%d]\n", err);
return err;
}
result->cq_idx = cmd_completion.cq_idx;
- result->actual_depth = params->cq_depth;
+ result->actual_depth = params->sub_cq_depth;
+ result->db_off = cmd_completion.db_offset;
+ result->db_valid = EFA_GET(&cmd_completion.flags,
+ EFA_ADMIN_CREATE_CQ_RESP_DB_VALID);
- return err;
+ return 0;
}
int efa_com_destroy_cq(struct efa_com_dev *edev,
@@ -199,9 +217,12 @@ int efa_com_destroy_cq(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&destroy_resp,
sizeof(destroy_resp));
- if (err)
- ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n",
- params->cq_idx, err);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy CQ-%u [%d]\n",
+ params->cq_idx, err);
+ return err;
+ }
return 0;
}
@@ -218,11 +239,10 @@ int efa_com_register_mr(struct efa_com_dev *edev,
mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR;
mr_cmd.pd = params->pd;
mr_cmd.mr_length = params->mr_length_in_bytes;
- mr_cmd.flags |= params->page_shift &
- EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK;
+ EFA_SET(&mr_cmd.flags, EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT,
+ params->page_shift);
mr_cmd.iova = params->iova;
- mr_cmd.permissions |= params->permissions &
- EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK;
+ mr_cmd.permissions = params->permissions;
if (params->inline_pbl) {
memcpy(mr_cmd.pbl.inline_pbl_array,
@@ -234,11 +254,11 @@ int efa_com_register_mr(struct efa_com_dev *edev,
params->pbl.pbl.address.mem_addr_low;
mr_cmd.pbl.pbl.address.mem_addr_high =
params->pbl.pbl.address.mem_addr_high;
- mr_cmd.aq_common_desc.flags |=
- EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK;
+ EFA_SET(&mr_cmd.aq_common_desc.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
if (params->indirect)
- mr_cmd.aq_common_desc.flags |=
- EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ EFA_SET(&mr_cmd.aq_common_desc.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1);
}
err = efa_com_cmd_exec(aq,
@@ -247,12 +267,22 @@ int efa_com_register_mr(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to register mr [%d]\n", err);
return err;
}
result->l_key = cmd_completion.l_key;
result->r_key = cmd_completion.r_key;
+ result->ic_info.recv_ic_id = cmd_completion.recv_ic_id;
+ result->ic_info.rdma_read_ic_id = cmd_completion.rdma_read_ic_id;
+ result->ic_info.rdma_recv_ic_id = cmd_completion.rdma_recv_ic_id;
+ result->ic_info.recv_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RECV_IC_ID);
+ result->ic_info.rdma_read_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID);
+ result->ic_info.rdma_recv_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID);
return 0;
}
@@ -273,10 +303,12 @@ int efa_com_dereg_mr(struct efa_com_dev *edev,
sizeof(mr_cmd),
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
- if (err)
- ibdev_err(edev->efa_dev,
- "Failed to de-register mr(lkey-%u) [%d]\n",
- mr_cmd.l_key, err);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to de-register mr(lkey-%u) [%d]\n",
+ mr_cmd.l_key, err);
+ return err;
+ }
return 0;
}
@@ -301,7 +333,9 @@ int efa_com_create_ah(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to create ah [%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create ah for %pI6 [%d]\n",
+ ah_cmd.dest_addr, err);
return err;
}
@@ -327,14 +361,17 @@ int efa_com_destroy_ah(struct efa_com_dev *edev,
sizeof(ah_cmd),
(struct efa_admin_acq_entry *)&cmd_completion,
sizeof(cmd_completion));
- if (err)
- ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n",
- ah_cmd.ah, ah_cmd.pd, err);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy ah-%d pd-%d [%d]\n",
+ ah_cmd.ah, ah_cmd.pd, err);
+ return err;
+ }
return 0;
}
-static bool
+bool
efa_com_check_supported_feature_id(struct efa_com_dev *edev,
enum efa_admin_aq_feature_id feature_id)
{
@@ -359,8 +396,9 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
int err;
if (!efa_com_check_supported_feature_id(edev, feature_id)) {
- ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
- feature_id);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Feature %d isn't supported\n",
+ feature_id);
return -EOPNOTSUPP;
}
@@ -369,9 +407,8 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE;
if (control_buff_size)
- get_cmd.aq_common_descriptor.flags =
- EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
-
+ EFA_SET(&get_cmd.aq_common_descriptor.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
efa_com_set_dma_addr(control_buf_dma_addr,
&get_cmd.control_buffer.address.mem_addr_high,
@@ -387,10 +424,13 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev,
get_resp,
sizeof(*get_resp));
- if (err)
- ibdev_err(edev->efa_dev,
- "Failed to submit get_feature command %d [%d]\n",
- feature_id, err);
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to submit get_feature command %d [%d]\n",
+ feature_id, err);
+ return err;
+ }
return 0;
}
@@ -402,27 +442,6 @@ static int efa_com_get_feature(struct efa_com_dev *edev,
return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0);
}
-int efa_com_get_network_attr(struct efa_com_dev *edev,
- struct efa_com_get_network_attr_result *result)
-{
- struct efa_admin_get_feature_resp resp;
- int err;
-
- err = efa_com_get_feature(edev, &resp,
- EFA_ADMIN_NETWORK_ATTR);
- if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to get network attributes %d\n", err);
- return err;
- }
-
- memcpy(result->addr, resp.u.network_attr.addr,
- sizeof(resp.u.network_attr.addr));
- result->mtu = resp.u.network_attr.mtu;
-
- return 0;
-}
-
int efa_com_get_device_attr(struct efa_com_dev *edev,
struct efa_com_get_device_attr_result *result)
{
@@ -431,8 +450,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n",
- err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get device attributes %d\n",
+ err);
return err;
}
@@ -444,11 +464,16 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->phys_addr_width = resp.u.device_attr.phys_addr_width;
result->virt_addr_width = resp.u.device_attr.virt_addr_width;
result->db_bar = resp.u.device_attr.db_bar;
+ result->max_rdma_size = resp.u.device_attr.max_rdma_size;
+ result->device_caps = resp.u.device_attr.device_caps;
+ result->guid = resp.u.device_attr.guid;
+ result->max_link_speed_gbps = resp.u.device_attr.max_link_speed_gbps;
if (result->admin_api_version < 1) {
- ibdev_err(edev->efa_dev,
- "Failed to get device attr api version [%u < 1]\n",
- result->admin_api_version);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get device attr api version [%u < 1]\n",
+ result->admin_api_version);
return -EINVAL;
}
@@ -456,8 +481,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
err = efa_com_get_feature(edev, &resp,
EFA_ADMIN_QUEUE_ATTR);
if (err) {
- ibdev_err(edev->efa_dev,
- "Failed to get network attributes %d\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get queue attributes %d\n",
+ err);
return err;
}
@@ -475,6 +501,38 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->max_ah = resp.u.queue_attr.max_ah;
result->max_llq_size = resp.u.queue_attr.max_llq_size;
result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
+ result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges;
+ result->max_tx_batch = resp.u.queue_attr.max_tx_batch;
+ result->min_sq_depth = resp.u.queue_attr.min_sq_depth;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get network attributes %d\n",
+ err);
+ return err;
+ }
+
+ memcpy(result->addr, resp.u.network_attr.addr,
+ sizeof(resp.u.network_attr.addr));
+ result->mtu = resp.u.network_attr.mtu;
+
+ if (efa_com_check_supported_feature_id(edev,
+ EFA_ADMIN_EVENT_QUEUE_ATTR)) {
+ err = efa_com_get_feature(edev, &resp,
+ EFA_ADMIN_EVENT_QUEUE_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get event queue attributes %d\n",
+ err);
+ return err;
+ }
+
+ result->max_eq = resp.u.event_queue_attr.max_eq;
+ result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth;
+ result->event_bitmask = resp.u.event_queue_attr.event_bitmask;
+ }
return 0;
}
@@ -487,7 +545,8 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev,
err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get hw hints %d\n", err);
return err;
}
@@ -499,19 +558,20 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev,
return 0;
}
-static int efa_com_set_feature_ex(struct efa_com_dev *edev,
- struct efa_admin_set_feature_resp *set_resp,
- struct efa_admin_set_feature_cmd *set_cmd,
- enum efa_admin_aq_feature_id feature_id,
- dma_addr_t control_buf_dma_addr,
- u32 control_buff_size)
+int efa_com_set_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size)
{
struct efa_com_admin_queue *aq;
int err;
if (!efa_com_check_supported_feature_id(edev, feature_id)) {
- ibdev_err(edev->efa_dev, "Feature %d isn't supported\n",
- feature_id);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Feature %d isn't supported\n",
+ feature_id);
return -EOPNOTSUPP;
}
@@ -519,8 +579,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE;
if (control_buff_size) {
- set_cmd->aq_common_descriptor.flags =
- EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+ set_cmd->aq_common_descriptor.flags = 0;
+ EFA_SET(&set_cmd->aq_common_descriptor.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
efa_com_set_dma_addr(control_buf_dma_addr,
&set_cmd->control_buffer.address.mem_addr_high,
&set_cmd->control_buffer.address.mem_addr_low);
@@ -534,10 +595,13 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)set_resp,
sizeof(*set_resp));
- if (err)
- ibdev_err(edev->efa_dev,
- "Failed to submit set_feature command %d error: %d\n",
- feature_id, err);
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to submit set_feature command %d error: %d\n",
+ feature_id, err);
+ return err;
+ }
return 0;
}
@@ -562,8 +626,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n",
- err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get aenq attributes: %d\n",
+ err);
return err;
}
@@ -573,9 +638,10 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
get_resp.u.aenq.enabled_groups);
if ((get_resp.u.aenq.supported_groups & groups) != groups) {
- ibdev_err(edev->efa_dev,
- "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
- groups, get_resp.u.aenq.supported_groups);
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
+ groups, get_resp.u.aenq.supported_groups);
return -EOPNOTSUPP;
}
@@ -583,8 +649,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
err = efa_com_set_feature(edev, &set_resp, &cmd,
EFA_ADMIN_AENQ_CONFIG);
if (err) {
- ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n",
- err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to set aenq attributes: %d\n",
+ err);
return err;
}
@@ -607,7 +674,8 @@ int efa_com_alloc_pd(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to allocate pd[%d]\n", err);
return err;
}
@@ -633,8 +701,9 @@ int efa_com_dealloc_pd(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n",
- cmd.pd, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to deallocate pd-%u [%d]\n",
+ cmd.pd, err);
return err;
}
@@ -657,7 +726,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to allocate uar[%d]\n", err);
return err;
}
@@ -683,10 +753,86 @@ int efa_com_dealloc_uar(struct efa_com_dev *edev,
(struct efa_admin_acq_entry *)&resp,
sizeof(resp));
if (err) {
- ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n",
- cmd.uar, err);
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to deallocate uar-%u [%d]\n",
+ cmd.uar, err);
return err;
}
return 0;
}
+
+int efa_com_get_stats(struct efa_com_dev *edev,
+ struct efa_com_get_stats_params *params,
+ union efa_com_get_stats_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_aq_get_stats_cmd cmd = {};
+ struct efa_admin_acq_get_stats_resp resp;
+ struct efa_admin_rdma_write_stats *rws;
+ struct efa_admin_rdma_read_stats *rrs;
+ struct efa_admin_messages_stats *ms;
+ struct efa_admin_network_stats *ns;
+ struct efa_admin_basic_stats *bs;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_STATS;
+ cmd.type = params->type;
+ cmd.scope = params->scope;
+ cmd.scope_modifier = params->scope_modifier;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get stats type-%u scope-%u.%u [%d]\n",
+ cmd.type, cmd.scope, cmd.scope_modifier, err);
+ return err;
+ }
+
+ switch (cmd.type) {
+ case EFA_ADMIN_GET_STATS_TYPE_BASIC:
+ bs = &resp.u.basic_stats;
+ result->basic_stats.tx_bytes = bs->tx_bytes;
+ result->basic_stats.tx_pkts = bs->tx_pkts;
+ result->basic_stats.rx_bytes = bs->rx_bytes;
+ result->basic_stats.rx_pkts = bs->rx_pkts;
+ result->basic_stats.rx_drops = bs->rx_drops;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_MESSAGES:
+ ms = &resp.u.messages_stats;
+ result->messages_stats.send_bytes = ms->send_bytes;
+ result->messages_stats.send_wrs = ms->send_wrs;
+ result->messages_stats.recv_bytes = ms->recv_bytes;
+ result->messages_stats.recv_wrs = ms->recv_wrs;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ:
+ rrs = &resp.u.rdma_read_stats;
+ result->rdma_read_stats.read_wrs = rrs->read_wrs;
+ result->rdma_read_stats.read_bytes = rrs->read_bytes;
+ result->rdma_read_stats.read_wr_err = rrs->read_wr_err;
+ result->rdma_read_stats.read_resp_bytes = rrs->read_resp_bytes;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE:
+ rws = &resp.u.rdma_write_stats;
+ result->rdma_write_stats.write_wrs = rws->write_wrs;
+ result->rdma_write_stats.write_bytes = rws->write_bytes;
+ result->rdma_write_stats.write_wr_err = rws->write_wr_err;
+ result->rdma_write_stats.write_recv_bytes = rws->write_recv_bytes;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_NETWORK:
+ ns = &resp.u.network_stats;
+ result->network_stats.retrans_bytes = ns->retrans_bytes;
+ result->network_stats.retrans_pkts = ns->retrans_pkts;
+ result->network_stats.retrans_timeout_events = ns->retrans_timeout_events;
+ result->network_stats.unresponsive_remote_events = ns->unresponsive_remote_events;
+ result->network_stats.impaired_remote_conn_events = ns->impaired_remote_conn_events;
+ break;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index a1174380462c..3ac2686abba1 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_CMD_H_
@@ -27,6 +27,8 @@ struct efa_com_create_qp_params {
u16 pd;
u16 uarn;
u8 qp_type;
+ u8 sl;
+ u8 unsolicited_write_recv : 1;
};
struct efa_com_create_qp_result {
@@ -47,6 +49,7 @@ struct efa_com_modify_qp_params {
u32 qkey;
u32 sq_psn;
u8 sq_drained_async_notify;
+ u8 rnr_retry;
};
struct efa_com_query_qp_params {
@@ -58,6 +61,7 @@ struct efa_com_query_qp_result {
u32 qkey;
u32 sq_draining;
u32 sq_psn;
+ u8 rnr_retry;
};
struct efa_com_destroy_qp_params {
@@ -68,10 +72,13 @@ struct efa_com_create_cq_params {
/* cq physical base address in OS memory */
dma_addr_t dma_addr;
/* completion queue depth in # of entries */
- u16 cq_depth;
+ u16 sub_cq_depth;
u16 num_sub_cqs;
u16 uarn;
+ u16 eqn;
u8 entry_size_in_bytes;
+ u8 interrupt_mode_enabled : 1;
+ u8 set_src_addr : 1;
};
struct efa_com_create_cq_result {
@@ -79,6 +86,8 @@ struct efa_com_create_cq_result {
u16 cq_idx;
/* actual cq depth in # of entries */
u16 actual_depth;
+ u32 db_off;
+ bool db_valid;
};
struct efa_com_destroy_cq_params {
@@ -100,14 +109,12 @@ struct efa_com_destroy_ah_params {
u16 pdn;
};
-struct efa_com_get_network_attr_result {
- u8 addr[EFA_GID_SIZE];
- u32 mtu;
-};
-
struct efa_com_get_device_attr_result {
+ u8 addr[EFA_GID_SIZE];
u64 page_size_cap;
u64 max_mr_pages;
+ u64 guid;
+ u32 mtu;
u32 fw_version;
u32 admin_api_version;
u32 device_version;
@@ -124,9 +131,18 @@ struct efa_com_get_device_attr_result {
u32 max_pd;
u32 max_ah;
u32 max_llq_size;
+ u32 max_rdma_size;
+ u32 device_caps;
+ u32 max_eq;
+ u32 max_eq_depth;
+ u32 event_bitmask; /* EQ events bitmask */
u16 sub_cqs_per_cq;
u16 max_sq_sge;
u16 max_rq_sge;
+ u16 max_wr_rdma_sge;
+ u16 max_tx_batch;
+ u16 min_sq_depth;
+ u16 max_link_speed_gbps;
u8 db_bar;
};
@@ -181,17 +197,21 @@ struct efa_com_reg_mr_params {
* address mapping
*/
u8 page_shift;
- /*
- * permissions
- * 0: local_write_enable - Write permissions: value of 1 needed
- * for RQ buffers and for RDMA write:1: reserved1 - remote
- * access flags, etc
- */
+ /* see permissions field of struct efa_admin_reg_mr_cmd */
u8 permissions;
u8 inline_pbl;
u8 indirect;
};
+struct efa_com_mr_interconnect_info {
+ u16 recv_ic_id;
+ u16 rdma_read_ic_id;
+ u16 rdma_recv_ic_id;
+ u8 recv_ic_id_valid : 1;
+ u8 rdma_read_ic_id_valid : 1;
+ u8 rdma_recv_ic_id_valid : 1;
+};
+
struct efa_com_reg_mr_result {
/*
* To be used in conjunction with local buffers references in SQ and
@@ -203,6 +223,7 @@ struct efa_com_reg_mr_result {
* accessed memory region
*/
u32 r_key;
+ struct efa_com_mr_interconnect_info ic_info;
};
struct efa_com_dereg_mr_params {
@@ -225,7 +246,59 @@ struct efa_com_dealloc_uar_params {
u16 uarn;
};
-void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
+struct efa_com_get_stats_params {
+ /* see enum efa_admin_get_stats_type */
+ u8 type;
+ /* see enum efa_admin_get_stats_scope */
+ u8 scope;
+ u16 scope_modifier;
+};
+
+struct efa_com_basic_stats {
+ u64 tx_bytes;
+ u64 tx_pkts;
+ u64 rx_bytes;
+ u64 rx_pkts;
+ u64 rx_drops;
+};
+
+struct efa_com_messages_stats {
+ u64 send_bytes;
+ u64 send_wrs;
+ u64 recv_bytes;
+ u64 recv_wrs;
+};
+
+struct efa_com_rdma_read_stats {
+ u64 read_wrs;
+ u64 read_bytes;
+ u64 read_wr_err;
+ u64 read_resp_bytes;
+};
+
+struct efa_com_rdma_write_stats {
+ u64 write_wrs;
+ u64 write_bytes;
+ u64 write_wr_err;
+ u64 write_recv_bytes;
+};
+
+struct efa_com_network_stats {
+ u64 retrans_bytes;
+ u64 retrans_pkts;
+ u64 retrans_timeout_events;
+ u64 unresponsive_remote_events;
+ u64 impaired_remote_conn_events;
+};
+
+union efa_com_get_stats_result {
+ struct efa_com_basic_stats basic_stats;
+ struct efa_com_messages_stats messages_stats;
+ struct efa_com_rdma_read_stats rdma_read_stats;
+ struct efa_com_rdma_write_stats rdma_write_stats;
+ struct efa_com_network_stats network_stats;
+};
+
int efa_com_create_qp(struct efa_com_dev *edev,
struct efa_com_create_qp_params *params,
struct efa_com_create_qp_result *res);
@@ -251,12 +324,19 @@ int efa_com_create_ah(struct efa_com_dev *edev,
struct efa_com_create_ah_result *result);
int efa_com_destroy_ah(struct efa_com_dev *edev,
struct efa_com_destroy_ah_params *params);
-int efa_com_get_network_attr(struct efa_com_dev *edev,
- struct efa_com_get_network_attr_result *result);
int efa_com_get_device_attr(struct efa_com_dev *edev,
struct efa_com_get_device_attr_result *result);
int efa_com_get_hw_hints(struct efa_com_dev *edev,
struct efa_com_get_hw_hints_result *result);
+bool
+efa_com_check_supported_feature_id(struct efa_com_dev *edev,
+ enum efa_admin_aq_feature_id feature_id);
+int efa_com_set_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size);
int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups);
int efa_com_alloc_pd(struct efa_com_dev *edev,
struct efa_com_alloc_pd_result *result);
@@ -266,5 +346,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev,
struct efa_com_alloc_uar_result *result);
int efa_com_dealloc_uar(struct efa_com_dev *edev,
struct efa_com_dealloc_uar_params *params);
+int efa_com_get_stats(struct efa_com_dev *edev,
+ struct efa_com_get_stats_params *params,
+ union efa_com_get_stats_result *result);
#endif /* _EFA_COM_CMD_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h
index c559ec08898e..90af1c82c9c6 100644
--- a/drivers/infiniband/hw/efa/efa_common_defs.h
+++ b/drivers/infiniband/hw/efa/efa_common_defs.h
@@ -1,14 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COMMON_H_
#define _EFA_COMMON_H_
+#include <linux/bitfield.h>
+
#define EFA_COMMON_SPEC_VERSION_MAJOR 2
#define EFA_COMMON_SPEC_VERSION_MINOR 0
+#define EFA_GET(ptr, mask) FIELD_GET(mask##_MASK, *(ptr))
+
+#define EFA_SET(ptr, mask, value) \
+ ({ \
+ typeof(ptr) _ptr = ptr; \
+ *_ptr = (*_ptr & ~(mask##_MASK)) | \
+ FIELD_PREP(mask##_MASK, value); \
+ })
+
struct efa_common_mem_addr {
u32 mem_addr_low;
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
new file mode 100644
index 000000000000..a4c9fd33da38
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -0,0 +1,391 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_IO_H_
+#define _EFA_IO_H_
+
+#define EFA_IO_TX_DESC_NUM_BUFS 2
+#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
+#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
+#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
+#define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1
+
+enum efa_io_queue_type {
+ /* send queue (of a QP) */
+ EFA_IO_SEND_QUEUE = 1,
+ /* recv queue (of a QP) */
+ EFA_IO_RECV_QUEUE = 2,
+};
+
+enum efa_io_send_op_type {
+ /* send message */
+ EFA_IO_SEND = 0,
+ /* RDMA read */
+ EFA_IO_RDMA_READ = 1,
+ /* RDMA write */
+ EFA_IO_RDMA_WRITE = 2,
+ /* Fast MR registration */
+ EFA_IO_FAST_REG = 3,
+ /* Fast MR invalidation */
+ EFA_IO_FAST_INV = 4,
+};
+
+enum efa_io_comp_status {
+ /* Successful completion */
+ EFA_IO_COMP_STATUS_OK = 0,
+ /* Flushed during QP destroy */
+ EFA_IO_COMP_STATUS_FLUSHED = 1,
+ /* Internal QP error */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
+ /* Unsupported operation */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP = 3,
+ /* Bad AH */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
+ /* LKEY not registered or does not match IOVA */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
+ /* Message too long */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
+ /* RKEY not registered or does not match remote IOVA */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
+ /* Connection was reset by remote side */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
+ /* Bad dest QP number (QP does not exist or is in error state) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9,
+ /* Destination resource not ready (no WQEs posted on RQ) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10,
+ /* Receiver SGL too short */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
+ /* Unexpected status returned by responder */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
+ /* Unresponsive remote - was previously responsive */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
+ /* No valid AH at remote side (required for RDMA operations) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
+ /* Unreachable remote - never received a response */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
+};
+
+enum efa_io_frwr_pbl_mode {
+ EFA_IO_FRWR_INLINE_PBL = 0,
+ EFA_IO_FRWR_DIRECT_PBL = 1,
+};
+
+struct efa_io_tx_meta_desc {
+ /* Verbs-generated Request ID */
+ u16 req_id;
+
+ /*
+ * control flags
+ * 3:0 : op_type - enum efa_io_send_op_type
+ * 4 : has_imm - immediate_data field carries valid
+ * data.
+ * 5 : inline_msg - inline mode - inline message data
+ * follows this descriptor (no buffer descriptors).
+ * Note that it is different from immediate data
+ * 6 : meta_extension - Extended metadata. MBZ
+ * 7 : meta_desc - Indicates metadata descriptor.
+ * Must be set.
+ */
+ u8 ctrl1;
+
+ /*
+ * control flags
+ * 0 : phase
+ * 1 : reserved25 - MBZ
+ * 2 : first - Indicates first descriptor in
+ * transaction. Must be set.
+ * 3 : last - Indicates last descriptor in
+ * transaction. Must be set.
+ * 4 : comp_req - Indicates whether completion should
+ * be posted, after packet is transmitted. Valid only
+ * for the first descriptor
+ * 7:5 : reserved29 - MBZ
+ */
+ u8 ctrl2;
+
+ u16 dest_qp_num;
+
+ /*
+ * If inline_msg bit is set, length of inline message in bytes,
+ * otherwise length of SGL (number of buffers).
+ */
+ u16 length;
+
+ /*
+ * immediate data: if has_imm is set, then this field is included within
+ * Tx message and reported in remote Rx completion.
+ */
+ u32 immediate_data;
+
+ u16 ah;
+
+ u16 reserved;
+
+ /* Queue key */
+ u32 qkey;
+
+ u8 reserved2[12];
+};
+
+/*
+ * Tx queue buffer descriptor, for any transport type. Preceded by metadata
+ * descriptor.
+ */
+struct efa_io_tx_buf_desc {
+ /* length in bytes */
+ u32 length;
+
+ /*
+ * 23:0 : lkey - local memory translation key
+ * 31:24 : reserved - MBZ
+ */
+ u32 lkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_remote_mem_addr {
+ /* length in bytes */
+ u32 length;
+
+ /* remote memory translation key */
+ u32 rkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_rdma_req {
+ /* Remote memory address */
+ struct efa_io_remote_mem_addr remote_mem;
+
+ /* Local memory address */
+ struct efa_io_tx_buf_desc local_mem[1];
+};
+
+struct efa_io_fast_mr_reg_req {
+ /* Updated local key of the MR after lkey/rkey increment */
+ u32 lkey;
+
+ /*
+ * permissions
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : remote_write_enable - Remote write
+ * permissions: must be set to enable RDMA write to
+ * the region
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
+ */
+ u8 permissions;
+
+ /*
+ * control flags
+ * 4:0 : phys_page_size_shift - page size is (1 <<
+ * phys_page_size_shift)
+ * 6:5 : pbl_mode - enum efa_io_frwr_pbl_mode
+ * 7 : reserved - MBZ
+ */
+ u8 flags;
+
+ /* MBZ */
+ u8 reserved[2];
+
+ /* IO Virtual Address associated with this MR */
+ u64 iova;
+
+ /* Memory region length, in bytes */
+ u64 mr_length;
+
+ /* Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of physical page addresses (optimization
+ * for short region activation).
+ */
+ u64 inline_array[1];
+
+ /* points to PBL (Currently only direct) */
+ u64 dma_addr;
+ } pbl;
+};
+
+struct efa_io_fast_mr_inv_req {
+ /* Local key of the MR to invalidate */
+ u32 lkey;
+
+ /* MBZ */
+ u8 reserved[28];
+};
+
+/*
+ * Tx WQE, composed of tx meta descriptors followed by either tx buffer
+ * descriptors or inline data
+ */
+struct efa_io_tx_wqe {
+ /* TX meta */
+ struct efa_io_tx_meta_desc meta;
+
+ union {
+ /* Send buffer descriptors */
+ struct efa_io_tx_buf_desc sgl[2];
+
+ u8 inline_data[32];
+
+ /* RDMA local and remote memory addresses */
+ struct efa_io_rdma_req rdma_req;
+
+ /* Fast registration */
+ struct efa_io_fast_mr_reg_req reg_mr_req;
+
+ /* Fast invalidation */
+ struct efa_io_fast_mr_inv_req inv_mr_req;
+ } data;
+};
+
+/*
+ * Rx buffer descriptor; RX WQE is composed of one or more RX buffer
+ * descriptors.
+ */
+struct efa_io_rx_desc {
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer Pointer[63:32] */
+ u32 buf_addr_hi;
+
+ /* Verbs-generated request id. */
+ u16 req_id;
+
+ /* Length in bytes. */
+ u16 length;
+
+ /*
+ * LKey and control flags
+ * 23:0 : lkey
+ * 29:24 : reserved - MBZ
+ * 30 : first - Indicates first descriptor in WQE
+ * 31 : last - Indicates last descriptor in WQE
+ */
+ u32 lkey_ctrl;
+};
+
+/* Common IO completion descriptor */
+struct efa_io_cdesc_common {
+ /*
+ * verbs-generated request ID, as provided in the completed tx or rx
+ * descriptor.
+ */
+ u16 req_id;
+
+ u8 status;
+
+ /*
+ * flags
+ * 0 : phase - Phase bit
+ * 2:1 : q_type - enum efa_io_queue_type: send/recv
+ * 3 : has_imm - indicates that immediate data is
+ * present - for RX completions only
+ * 6:4 : op_type - enum efa_io_send_op_type
+ * 7 : unsolicited - indicates that there is no
+ * matching request - for RDMA with imm. RX only
+ */
+ u8 flags;
+
+ /* local QP number */
+ u16 qp_num;
+};
+
+/* Tx completion descriptor */
+struct efa_io_tx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+
+ /* MBZ */
+ u16 reserved16;
+};
+
+/* Rx Completion Descriptor */
+struct efa_io_rx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+
+ /* Transferred length bits[15:0] */
+ u16 length;
+
+ /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */
+ u16 ah;
+
+ u16 src_qp_num;
+
+ /* Immediate data */
+ u32 imm;
+};
+
+/* Rx Completion Descriptor RDMA write info */
+struct efa_io_rx_cdesc_rdma_write {
+ /* Transferred length bits[31:16] */
+ u16 length_hi;
+};
+
+/* Extended Rx Completion Descriptor */
+struct efa_io_rx_cdesc_ex {
+ /* Base RX completion info */
+ struct efa_io_rx_cdesc base;
+
+ union {
+ struct efa_io_rx_cdesc_rdma_write rdma_write;
+
+ /*
+ * Valid only in case of unknown AH (0xFFFF) and CQ
+ * set_src_addr is enabled.
+ */
+ u8 src_addr[16];
+ } u;
+};
+
+/* tx_meta_desc */
+#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0)
+#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4)
+#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5)
+#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6)
+#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7)
+#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0)
+#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2)
+#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3)
+#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4)
+
+/* tx_buf_desc */
+#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
+
+/* fast_mr_reg_req */
+#define EFA_IO_FAST_MR_REG_REQ_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_WRITE_ENABLE_MASK BIT(1)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_READ_ENABLE_MASK BIT(2)
+#define EFA_IO_FAST_MR_REG_REQ_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
+#define EFA_IO_FAST_MR_REG_REQ_PBL_MODE_MASK GENMASK(6, 5)
+
+/* rx_desc */
+#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
+#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
+#define EFA_IO_RX_DESC_LAST_MASK BIT(31)
+
+/* cdesc_common */
+#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0)
+#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
+#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
+#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4)
+#define EFA_IO_CDESC_COMMON_UNSOLICITED_MASK BIT(7)
+
+#endif /* _EFA_IO_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index db974caf1eb1..6c415b9adb5f 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,19 +1,28 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
#include "efa.h"
-#define PCI_DEV_ID_EFA_VF 0xefa0
+#define PCI_DEV_ID_EFA0_VF 0xefa0
+#define PCI_DEV_ID_EFA1_VF 0xefa1
+#define PCI_DEV_ID_EFA2_VF 0xefa2
+#define PCI_DEV_ID_EFA3_VF 0xefa3
static const struct pci_device_id efa_pci_tbl[] = {
- { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA3_VF) },
{ }
};
@@ -30,14 +39,7 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-static void efa_update_network_attr(struct efa_dev *dev,
- struct efa_com_get_network_attr_result *network_attr)
-{
- memcpy(dev->addr, network_attr->addr, sizeof(network_attr->addr));
- dev->mtu = network_attr->mtu;
-
- dev_dbg(&dev->pdev->dev, "Full address %pI6\n", dev->addr);
-}
+extern const struct uapi_definition efa_uapi_defs[];
/* This handler will called for unknown event group or unimplemented handlers */
static void unimplemented_aenq_handler(void *data,
@@ -72,6 +74,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask)
pci_release_selected_regions(pdev, release_bars);
}
+static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe)
+{
+ u16 cqn = eqe->u.comp_event.cqn;
+ struct efa_cq *cq;
+
+ /* Safe to load as we're in irq and removal calls synchronize_irq() */
+ cq = xa_load(&dev->cqs_xa, cqn);
+ if (unlikely(!cq)) {
+ ibdev_err_ratelimited(&dev->ibdev,
+ "Completion event on non-existent CQ[%u]",
+ cqn);
+ return;
+ }
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe)
+{
+ struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev);
+
+ if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) ==
+ EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION))
+ efa_process_comp_eqe(dev, eqe);
+ else
+ ibdev_err_ratelimited(&dev->ibdev,
+ "Unknown event type received %lu",
+ EFA_GET(&eqe->common,
+ EFA_ADMIN_EQE_EVENT_TYPE));
+}
+
+static irqreturn_t efa_intr_msix_comp(int irq, void *data)
+{
+ struct efa_eq *eq = data;
+ struct efa_com_dev *edev = eq->eeq.edev;
+
+ efa_com_eq_comp_intr_handler(edev, &eq->eeq);
+
+ return IRQ_HANDLED;
+}
+
static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data)
{
struct efa_dev *dev = data;
@@ -82,25 +125,40 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data)
return IRQ_HANDLED;
}
-static int efa_request_mgmnt_irq(struct efa_dev *dev)
+static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq)
{
- struct efa_irq *irq;
int err;
- irq = &dev->admin_irq;
- err = request_irq(irq->vector, irq->handler, 0, irq->name,
- irq->data);
+ err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data);
if (err) {
- dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n",
- err);
+ dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n",
+ irq->name, err);
return err;
}
- dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n",
- nr_cpumask_bits, &irq->affinity_hint_mask, irq->vector);
- irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+ irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask);
- return err;
+ return 0;
+}
+
+static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector)
+{
+ u32 cpu;
+
+ cpu = vector - EFA_COMP_EQS_VEC_BASE;
+ snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu,
+ pci_name(dev->pdev));
+ eq->irq.handler = efa_intr_msix_comp;
+ eq->irq.data = eq;
+ eq->irq.vector = vector;
+ eq->irq.irqn = pci_irq_vector(dev->pdev, vector);
+ cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask);
+}
+
+static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq)
+{
+ irq_set_affinity_hint(irq->irqn, NULL);
+ free_irq(irq->irqn, irq->data);
}
static void efa_setup_mgmnt_irq(struct efa_dev *dev)
@@ -111,47 +169,45 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev)
"efa-mgmnt@pci:%s", pci_name(dev->pdev));
dev->admin_irq.handler = efa_intr_msix_mgmnt;
dev->admin_irq.data = dev;
- dev->admin_irq.vector =
- pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx);
+ dev->admin_irq.vector = dev->admin_msix_vector_idx;
+ dev->admin_irq.irqn = pci_irq_vector(dev->pdev,
+ dev->admin_msix_vector_idx);
cpu = cpumask_first(cpu_online_mask);
- dev->admin_irq.cpu = cpu;
cpumask_set_cpu(cpu,
&dev->admin_irq.affinity_hint_mask);
- dev_info(&dev->pdev->dev, "Setup irq:0x%p vector:%d name:%s\n",
- &dev->admin_irq,
- dev->admin_irq.vector,
+ dev_info(&dev->pdev->dev, "Setup irq:%d name:%s\n",
+ dev->admin_irq.irqn,
dev->admin_irq.name);
}
-static void efa_free_mgmnt_irq(struct efa_dev *dev)
-{
- struct efa_irq *irq;
-
- irq = &dev->admin_irq;
- irq_set_affinity_hint(irq->vector, NULL);
- free_irq(irq->vector, irq->data);
-}
-
static int efa_set_mgmnt_irq(struct efa_dev *dev)
{
efa_setup_mgmnt_irq(dev);
- return efa_request_mgmnt_irq(dev);
+ return efa_request_irq(dev, &dev->admin_irq);
}
static int efa_request_doorbell_bar(struct efa_dev *dev)
{
u8 db_bar_idx = dev->dev_attr.db_bar;
struct pci_dev *pdev = dev->pdev;
- int bars;
+ int pci_mem_bars;
+ int db_bar;
int err;
- if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) {
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx);
+ db_bar = BIT(db_bar_idx);
+ if (!(db_bar & EFA_BASE_BAR_MASK)) {
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (db_bar & ~pci_mem_bars) {
+ dev_err(&pdev->dev,
+ "Doorbells BAR unavailable. Requested %#x, available %#x\n",
+ db_bar, pci_mem_bars);
+ return -ENODEV;
+ }
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ err = pci_request_selected_regions(pdev, db_bar, DRV_MODULE_NAME);
if (err) {
- dev_err(&dev->pdev->dev,
+ dev_err(&pdev->dev,
"pci_request_selected_regions for bar %d failed %d\n",
db_bar_idx, err);
return err;
@@ -196,21 +252,140 @@ static void efa_stats_init(struct efa_dev *dev)
atomic64_set(s, 0);
}
+static void efa_set_host_info(struct efa_dev *dev)
+{
+ struct efa_admin_set_feature_resp resp = {};
+ struct efa_admin_set_feature_cmd cmd = {};
+ struct efa_admin_host_info *hinf;
+ u32 bufsz = sizeof(*hinf);
+ dma_addr_t hinf_dma;
+
+ if (!efa_com_check_supported_feature_id(&dev->edev,
+ EFA_ADMIN_HOST_INFO))
+ return;
+
+ /* Failures in host info set shall not disturb probe */
+ hinf = dma_alloc_coherent(&dev->pdev->dev, bufsz, &hinf_dma,
+ GFP_KERNEL);
+ if (!hinf)
+ return;
+
+ strscpy(hinf->os_dist_str, utsname()->release,
+ sizeof(hinf->os_dist_str));
+ hinf->os_type = EFA_ADMIN_OS_LINUX;
+ strscpy(hinf->kernel_ver_str, utsname()->version,
+ sizeof(hinf->kernel_ver_str));
+ hinf->kernel_ver = LINUX_VERSION_CODE;
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MAJOR, 0);
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MINOR, 0);
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR, 0);
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE, 0);
+ EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_BUS, dev->pdev->bus->number);
+ EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_DEVICE,
+ PCI_SLOT(dev->pdev->devfn));
+ EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_FUNCTION,
+ PCI_FUNC(dev->pdev->devfn));
+ EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MAJOR,
+ EFA_COMMON_SPEC_VERSION_MAJOR);
+ EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MINOR,
+ EFA_COMMON_SPEC_VERSION_MINOR);
+ EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_INTREE, 1);
+ EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_GDR, 0);
+
+ efa_com_set_feature_ex(&dev->edev, &resp, &cmd, EFA_ADMIN_HOST_INFO,
+ hinf_dma, bufsz);
+
+ dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma);
+}
+
+static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq)
+{
+ efa_com_eq_destroy(&dev->edev, &eq->eeq);
+ efa_free_irq(dev, &eq->irq);
+}
+
+static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec)
+{
+ int err;
+
+ efa_setup_comp_irq(dev, eq, msix_vec);
+ err = efa_request_irq(dev, &eq->irq);
+ if (err)
+ return err;
+
+ err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe,
+ dev->dev_attr.max_eq_depth, msix_vec);
+ if (err)
+ goto err_free_comp_irq;
+
+ return 0;
+
+err_free_comp_irq:
+ efa_free_irq(dev, &eq->irq);
+ return err;
+}
+
+static int efa_create_eqs(struct efa_dev *dev)
+{
+ u32 neqs = dev->dev_attr.max_eq;
+ int err, i;
+
+ neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+ dev->neqs = neqs;
+ dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
+ if (!dev->eqs)
+ return -ENOMEM;
+
+ for (i = 0; i < neqs; i++) {
+ err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE);
+ if (err)
+ goto err_destroy_eqs;
+ }
+
+ return 0;
+
+err_destroy_eqs:
+ for (i--; i >= 0; i--)
+ efa_destroy_eq(dev, &dev->eqs[i]);
+ kfree(dev->eqs);
+
+ return err;
+}
+
+static void efa_destroy_eqs(struct efa_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->neqs; i++)
+ efa_destroy_eq(dev, &dev->eqs[i]);
+
+ kfree(dev->eqs);
+}
+
static const struct ib_device_ops efa_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_EFA,
+ .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
+
+ .alloc_hw_port_stats = efa_alloc_hw_port_stats,
+ .alloc_hw_device_stats = efa_alloc_hw_device_stats,
.alloc_pd = efa_alloc_pd,
.alloc_ucontext = efa_alloc_ucontext,
- .create_ah = efa_create_ah,
.create_cq = efa_create_cq,
+ .create_cq_umem = efa_create_cq_umem,
.create_qp = efa_create_qp,
+ .create_user_ah = efa_create_ah,
.dealloc_pd = efa_dealloc_pd,
.dealloc_ucontext = efa_dealloc_ucontext,
.dereg_mr = efa_dereg_mr,
.destroy_ah = efa_destroy_ah,
.destroy_cq = efa_destroy_cq,
.destroy_qp = efa_destroy_qp,
+ .get_hw_stats = efa_get_hw_stats,
.get_link_layer = efa_port_link_layer,
.get_port_immutable = efa_get_port_immutable,
.mmap = efa_mmap,
+ .mmap_free = efa_mmap_free,
.modify_qp = efa_modify_qp,
.query_device = efa_query_device,
.query_gid = efa_query_gid,
@@ -218,15 +393,17 @@ static const struct ib_device_ops efa_dev_ops = {
.query_port = efa_query_port,
.query_qp = efa_query_qp,
.reg_user_mr = efa_reg_mr,
+ .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf,
INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, efa_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
};
static int efa_ib_device_add(struct efa_dev *dev)
{
- struct efa_com_get_network_attr_result network_attr;
struct efa_com_get_hw_hints_result hw_hints;
struct pci_dev *pdev = dev->pdev;
int err;
@@ -242,12 +419,6 @@ static int efa_ib_device_add(struct efa_dev *dev)
if (err)
return err;
- err = efa_com_get_network_attr(&dev->edev, &network_attr);
- if (err)
- goto err_release_doorbell_bar;
-
- efa_update_network_attr(dev, &network_attr);
-
err = efa_com_get_hw_hints(&dev->edev, &hw_hints);
if (err)
goto err_release_doorbell_bar;
@@ -259,45 +430,32 @@ static int efa_ib_device_add(struct efa_dev *dev)
if (err)
goto err_release_doorbell_bar;
- dev->ibdev.owner = THIS_MODULE;
+ err = efa_create_eqs(dev);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ efa_set_host_info(dev);
+
dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
+ dev->ibdev.node_guid = dev->dev_attr.guid;
dev->ibdev.phys_port_cnt = 1;
- dev->ibdev.num_comp_vectors = 1;
+ dev->ibdev.num_comp_vectors = dev->neqs ?: 1;
dev->ibdev.dev.parent = &pdev->dev;
- dev->ibdev.uverbs_abi_ver = EFA_UVERBS_ABI_VERSION;
-
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
-
- dev->ibdev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
-
- dev->ibdev.driver_id = RDMA_DRIVER_EFA;
+
ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
- err = ib_register_device(&dev->ibdev, "efa_%d");
+ dev->ibdev.driver_def = efa_uapi_defs;
+
+ err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev);
if (err)
- goto err_release_doorbell_bar;
+ goto err_destroy_eqs;
ibdev_info(&dev->ibdev, "IB device registered\n");
return 0;
+err_destroy_eqs:
+ efa_destroy_eqs(dev);
err_release_doorbell_bar:
efa_release_doorbell_bar(dev);
return err;
@@ -305,9 +463,9 @@ err_release_doorbell_bar:
static void efa_ib_device_remove(struct efa_dev *dev)
{
- efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL);
ibdev_info(&dev->ibdev, "Unregister ib device\n");
ib_unregister_device(&dev->ibdev);
+ efa_destroy_eqs(dev);
efa_release_doorbell_bar(dev);
}
@@ -318,29 +476,30 @@ static void efa_disable_msix(struct efa_dev *dev)
static int efa_enable_msix(struct efa_dev *dev)
{
- int msix_vecs, irq_num;
-
- /* Reserve the max msix vectors we might need */
- msix_vecs = EFA_NUM_MSIX_VEC;
+ int max_vecs, num_vecs;
+
+ /*
+ * Reserve the max msix vectors we might need, one vector is reserved
+ * for admin.
+ */
+ max_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
+ num_online_cpus() + 1);
dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
- msix_vecs);
+ max_vecs);
dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
- irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
- msix_vecs, PCI_IRQ_MSIX);
+ num_vecs = pci_alloc_irq_vectors(dev->pdev, 1,
+ max_vecs, PCI_IRQ_MSIX);
- if (irq_num < 0) {
- dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
- irq_num);
+ if (num_vecs < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n",
+ num_vecs);
return -ENOSPC;
}
- if (irq_num != msix_vecs) {
- dev_err(&dev->pdev->dev,
- "Allocated %d MSI-X (out of %d requested)\n",
- irq_num, msix_vecs);
- return -ENOSPC;
- }
+ dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs);
+
+ dev->num_irq_vectors = num_vecs;
return 0;
}
@@ -364,20 +523,13 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
return err;
}
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
- if (err) {
- dev_err(&pdev->dev, "pci_set_dma_mask failed %d\n", err);
- return err;
- }
-
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width));
if (err) {
- dev_err(&pdev->dev,
- "err_pci_set_consistent_dma_mask failed %d\n",
- err);
+ dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err);
return err;
}
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
return 0;
}
@@ -385,7 +537,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
{
struct efa_com_dev *edev;
struct efa_dev *dev;
- int bars;
+ int pci_mem_bars;
int err;
err = pci_enable_device_mem(pdev);
@@ -408,9 +560,16 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
edev->efa_dev = dev;
edev->dmadev = &pdev->dev;
dev->pdev = pdev;
+ xa_init(&dev->cqs_xa);
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK;
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
+ dev_err(&pdev->dev, "BARs unavailable. Requested %#x, available %#x\n",
+ (int)EFA_BASE_BAR_MASK, pci_mem_bars);
+ err = -ENODEV;
+ goto err_ibdev_destroy;
+ }
+ err = pci_request_selected_regions(pdev, EFA_BASE_BAR_MASK, DRV_MODULE_NAME);
if (err) {
dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
err);
@@ -463,7 +622,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
return dev;
err_free_mgmnt_irq:
- efa_free_mgmnt_irq(dev);
+ efa_free_irq(dev, &dev->admin_irq);
err_disable_msix:
efa_disable_msix(dev);
err_reg_read_destroy:
@@ -479,18 +638,21 @@ err_disable_device:
return ERR_PTR(err);
}
-static void efa_remove_device(struct pci_dev *pdev)
+static void efa_remove_device(struct pci_dev *pdev,
+ enum efa_regs_reset_reason_types reset_reason)
{
struct efa_dev *dev = pci_get_drvdata(pdev);
struct efa_com_dev *edev;
edev = &dev->edev;
+ efa_com_dev_reset(edev, reset_reason);
efa_com_admin_destroy(edev);
- efa_free_mgmnt_irq(dev);
+ efa_free_irq(dev, &dev->admin_irq);
efa_disable_msix(dev);
efa_com_mmio_reg_read_destroy(edev);
devm_iounmap(&pdev->dev, edev->reg_bar);
efa_release_bars(dev, EFA_BASE_BAR_MASK);
+ xa_destroy(&dev->cqs_xa);
ib_dealloc_device(&dev->ibdev);
pci_disable_device(pdev);
}
@@ -511,7 +673,7 @@ static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
return 0;
err_remove_device:
- efa_remove_device(pdev);
+ efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR);
return err;
}
@@ -520,7 +682,17 @@ static void efa_remove(struct pci_dev *pdev)
struct efa_dev *dev = pci_get_drvdata(pdev);
efa_ib_device_remove(dev);
- efa_remove_device(pdev);
+ efa_remove_device(pdev, EFA_REGS_RESET_NORMAL);
+}
+
+static void efa_shutdown(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+
+ efa_destroy_eqs(dev);
+ efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_SHUTDOWN);
+ efa_free_irq(dev, &dev->admin_irq);
+ efa_disable_msix(dev);
}
static struct pci_driver efa_pci_driver = {
@@ -528,6 +700,7 @@ static struct pci_driver efa_pci_driver = {
.id_table = efa_pci_tbl,
.probe = efa_probe,
.remove = efa_remove,
+ .shutdown = efa_shutdown,
};
module_pci_driver(efa_pci_driver);
diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h
index bb9cad3d6a15..714ae6258800 100644
--- a/drivers/infiniband/hw/efa/efa_regs_defs.h
+++ b/drivers/infiniband/hw/efa/efa_regs_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_REGS_H_
@@ -42,72 +42,60 @@ enum efa_regs_reset_reason_types {
#define EFA_REGS_MMIO_REG_READ_OFF 0x5c
#define EFA_REGS_MMIO_RESP_LO_OFF 0x60
#define EFA_REGS_MMIO_RESP_HI_OFF 0x64
+#define EFA_REGS_EQ_DB_OFF 0x68
/* version register */
#define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff
-#define EFA_REGS_VERSION_MAJOR_VERSION_SHIFT 8
#define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00
/* controller_version register */
#define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff
-#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8
#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00
-#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16
#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000
-#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24
#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000
/* caps register */
#define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1
-#define EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1
#define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
-#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
-#define EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
#define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
/* aq_caps register */
#define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
-#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16
#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000
/* acq_caps register */
#define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff
-#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16
#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000
-#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT 24
#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000
/* aenq_caps register */
#define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff
-#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16
#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000
-#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT 24
#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000
+/* intr_mask register */
+#define EFA_REGS_INTR_MASK_EN_MASK 0x1
+
/* dev_ctl register */
#define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1
-#define EFA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1
#define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2
-#define EFA_REGS_DEV_CTL_RESET_REASON_SHIFT 28
#define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000
/* dev_sts register */
#define EFA_REGS_DEV_STS_READY_MASK 0x1
-#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1
#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2
-#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2
#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4
-#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3
#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8
-#define EFA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4
#define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10
-#define EFA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5
#define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20
/* mmio_reg_read register */
#define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff
-#define EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16
#define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000
+/* eq_db register */
+#define EFA_REGS_EQ_DB_EQN_MASK 0xffff
+#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000
+
#endif /* _EFA_REGS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 6d6886c9009f..22d3e25c3b9d 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1,21 +1,24 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include <linux/vmalloc.h>
+#include <linux/log2.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_ioctl.h>
+#define UVERBS_MODULE_NAME efa_ib
+#include <rdma/uverbs_named_ioctl.h>
+#include <rdma/ib_user_ioctl_cmds.h>
#include "efa.h"
-
-#define EFA_MMAP_FLAG_SHIFT 56
-#define EFA_MMAP_PAGE_MASK GENMASK(EFA_MMAP_FLAG_SHIFT - 1, 0)
-#define EFA_MMAP_INVALID U64_MAX
+#include "efa_io_defs.h"
enum {
EFA_MMAP_DMA_PAGE = 0,
@@ -23,23 +26,71 @@ enum {
EFA_MMAP_IO_NC,
};
-#define EFA_AENQ_ENABLED_GROUPS \
- (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
- BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-
-struct efa_mmap_entry {
- void *obj;
+struct efa_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
u64 address;
- u64 length;
- u32 mmap_page;
u8 mmap_flag;
};
-static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
-{
- return ((u64)efa->mmap_flag << EFA_MMAP_FLAG_SHIFT) |
- ((u64)efa->mmap_page << PAGE_SHIFT);
-}
+#define EFA_DEFINE_DEVICE_STATS(op) \
+ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
+ op(EFA_COMPLETED_CMDS, "completed_cmds") \
+ op(EFA_CMDS_ERR, "cmds_err") \
+ op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
+ op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
+ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
+ op(EFA_CREATE_QP_ERR, "create_qp_err") \
+ op(EFA_CREATE_CQ_ERR, "create_cq_err") \
+ op(EFA_REG_MR_ERR, "reg_mr_err") \
+ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
+ op(EFA_CREATE_AH_ERR, "create_ah_err") \
+ op(EFA_MMAP_ERR, "mmap_err")
+
+#define EFA_DEFINE_PORT_STATS(op) \
+ op(EFA_TX_BYTES, "tx_bytes") \
+ op(EFA_TX_PKTS, "tx_pkts") \
+ op(EFA_RX_BYTES, "rx_bytes") \
+ op(EFA_RX_PKTS, "rx_pkts") \
+ op(EFA_RX_DROPS, "rx_drops") \
+ op(EFA_SEND_BYTES, "send_bytes") \
+ op(EFA_SEND_WRS, "send_wrs") \
+ op(EFA_RECV_BYTES, "recv_bytes") \
+ op(EFA_RECV_WRS, "recv_wrs") \
+ op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
+ op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
+ op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
+ op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
+ op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \
+ op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \
+ op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \
+ op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \
+ op(EFA_RETRANS_BYTES, "retrans_bytes") \
+ op(EFA_RETRANS_PKTS, "retrans_pkts") \
+ op(EFA_RETRANS_TIMEOUT_EVENS, "retrans_timeout_events") \
+ op(EFA_UNRESPONSIVE_REMOTE_EVENTS, "unresponsive_remote_events") \
+ op(EFA_IMPAIRED_REMOTE_CONN_EVENTS, "impaired_remote_conn_events") \
+
+#define EFA_STATS_ENUM(ename, name) ename,
+#define EFA_STATS_STR(ename, nam) \
+ [ename].name = nam,
+
+enum efa_hw_device_stats {
+ EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
+};
+
+static const struct rdma_stat_desc efa_device_stats_descs[] = {
+ EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
+};
+
+enum efa_hw_port_stats {
+ EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
+};
+
+static const struct rdma_stat_desc efa_port_stats_descs[] = {
+ EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
+};
+
+#define EFA_DEFAULT_LINK_SPEED_GBPS 100
#define EFA_CHUNK_PAYLOAD_SHIFT 12
#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
@@ -55,8 +106,6 @@ static inline u64 get_mmap_key(const struct efa_mmap_entry *efa)
#define EFA_CHUNK_USED_SIZE \
((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
-#define EFA_SUPPORTED_ACCESS_FLAGS IB_ACCESS_LOCAL_WRITE
-
struct pbl_chunk {
dma_addr_t dma_addr;
u64 *buf;
@@ -120,8 +169,15 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah)
return container_of(ibah, struct efa_ah, ibah);
}
-#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
- sizeof(((typeof(x) *)0)->fld) <= (sz))
+static inline struct efa_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
+}
+
+#define EFA_DEV_CAP(dev, cap) \
+ ((dev)->dev_attr.device_caps & \
+ EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
#define is_reserved_cleared(reserved) \
!memchr_inv(reserved, 0, sizeof(reserved))
@@ -145,93 +201,12 @@ static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
return addr;
}
-/*
- * This is only called when the ucontext is destroyed and there can be no
- * concurrent query via mmap or allocate on the xarray, thus we can be sure no
- * other thread is using the entry pointer. We also know that all the BAR
- * pages have either been zap'd or munmaped at this point. Normal pages are
- * refcounted and will be freed at the proper time.
- */
-static void mmap_entries_remove_free(struct efa_dev *dev,
- struct efa_ucontext *ucontext)
-{
- struct efa_mmap_entry *entry;
- unsigned long mmap_page;
-
- xa_for_each(&ucontext->mmap_xa, mmap_page, entry) {
- xa_erase(&ucontext->mmap_xa, mmap_page);
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, get_mmap_key(entry), entry->address,
- entry->length);
- if (entry->mmap_flag == EFA_MMAP_DMA_PAGE)
- /* DMA mapping is already gone, now free the pages */
- free_pages_exact(phys_to_virt(entry->address),
- entry->length);
- kfree(entry);
- }
-}
-
-static struct efa_mmap_entry *mmap_entry_get(struct efa_dev *dev,
- struct efa_ucontext *ucontext,
- u64 key, u64 len)
-{
- struct efa_mmap_entry *entry;
- u64 mmap_page;
-
- mmap_page = (key & EFA_MMAP_PAGE_MASK) >> PAGE_SHIFT;
- if (mmap_page > U32_MAX)
- return NULL;
-
- entry = xa_load(&ucontext->mmap_xa, mmap_page);
- if (!entry || get_mmap_key(entry) != key || entry->length != len)
- return NULL;
-
- ibdev_dbg(&dev->ibdev,
- "mmap: obj[0x%p] key[%#llx] addr[%#llx] len[%#llx] removed\n",
- entry->obj, key, entry->address, entry->length);
-
- return entry;
-}
-
-/*
- * Note this locking scheme cannot support removal of entries, except during
- * ucontext destruction when the core code guarentees no concurrency.
- */
-static u64 mmap_entry_insert(struct efa_dev *dev, struct efa_ucontext *ucontext,
- void *obj, u64 address, u64 length, u8 mmap_flag)
+static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr,
+ dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir)
{
- struct efa_mmap_entry *entry;
- int err;
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return EFA_MMAP_INVALID;
-
- entry->obj = obj;
- entry->address = address;
- entry->length = length;
- entry->mmap_flag = mmap_flag;
-
- xa_lock(&ucontext->mmap_xa);
- entry->mmap_page = ucontext->mmap_xa_page;
- ucontext->mmap_xa_page += DIV_ROUND_UP(length, PAGE_SIZE);
- err = __xa_insert(&ucontext->mmap_xa, entry->mmap_page, entry,
- GFP_KERNEL);
- xa_unlock(&ucontext->mmap_xa);
- if (err){
- kfree(entry);
- return EFA_MMAP_INVALID;
- }
-
- ibdev_dbg(
- &dev->ibdev,
- "mmap: obj[0x%p] addr[%#llx], len[%#llx], key[%#llx] inserted\n",
- entry->obj, entry->address, entry->length, get_mmap_key(entry));
-
- return get_mmap_key(entry);
+ dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir);
+ free_pages_exact(cpu_addr, size);
}
int efa_query_device(struct ib_device *ibdev,
@@ -268,12 +243,35 @@ int efa_query_device(struct ib_device *ibdev,
dev_attr->max_rq_depth);
props->max_send_sge = dev_attr->max_sq_sge;
props->max_recv_sge = dev_attr->max_rq_sge;
+ props->max_sge_rd = dev_attr->max_wr_rdma_sge;
+ props->max_pkeys = 1;
if (udata && udata->outlen) {
resp.max_sq_sge = dev_attr->max_sq_sge;
resp.max_rq_sge = dev_attr->max_rq_sge;
resp.max_sq_wr = dev_attr->max_sq_depth;
resp.max_rq_wr = dev_attr->max_rq_depth;
+ resp.max_rdma_size = dev_attr->max_rdma_size;
+
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM;
+ if (EFA_DEV_CAP(dev, RDMA_READ))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
+
+ if (EFA_DEV_CAP(dev, RNR_RETRY))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
+
+ if (EFA_DEV_CAP(dev, DATA_POLLING_128))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128;
+
+ if (EFA_DEV_CAP(dev, RDMA_WRITE))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE;
+
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV;
+
+ if (dev->neqs)
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
err = ib_copy_to_udata(udata, &resp,
min(sizeof(resp), udata->outlen));
@@ -287,22 +285,61 @@ int efa_query_device(struct ib_device *ibdev,
return 0;
}
-int efa_query_port(struct ib_device *ibdev, u8 port,
+static void efa_link_gbps_to_speed_and_width(u16 gbps,
+ enum ib_port_speed *speed,
+ enum ib_port_width *width)
+{
+ if (gbps >= 400) {
+ *width = IB_WIDTH_8X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 200) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 120) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 100) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ } else if (gbps >= 60) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_DDR;
+ } else if (gbps >= 50) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 40) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 30) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_SDR;
+ } else {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ }
+}
+
+int efa_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct efa_dev *dev = to_edev(ibdev);
+ enum ib_port_speed link_speed;
+ enum ib_port_width link_width;
+ u16 link_gbps;
props->lmc = 1;
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
props->gid_tbl_len = 1;
props->pkey_tbl_len = 1;
- props->active_speed = IB_SPEED_EDR;
- props->active_width = IB_WIDTH_4X;
- props->max_mtu = ib_mtu_int_to_enum(dev->mtu);
- props->active_mtu = ib_mtu_int_to_enum(dev->mtu);
- props->max_msg_sz = dev->mtu;
+ link_gbps = dev->dev_attr.max_link_speed_gbps ?: EFA_DEFAULT_LINK_SPEED_GBPS;
+ efa_link_gbps_to_speed_and_width(link_gbps, &link_speed, &link_width);
+ props->active_speed = link_speed;
+ props->active_width = link_width;
+ props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->max_msg_sz = dev->dev_attr.mtu;
props->max_vl_num = 1;
return 0;
@@ -320,7 +357,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
#define EFA_QUERY_QP_SUPP_MASK \
(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
- IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP)
+ IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
ibdev_dbg(&dev->ibdev,
@@ -342,6 +379,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->sq_psn = result.sq_psn;
qp_attr->sq_draining = result.sq_draining;
qp_attr->port_num = 1;
+ qp_attr->rnr_retry = result.rnr_retry;
qp_attr->cap.max_send_wr = qp->max_send_wr;
qp_attr->cap.max_recv_wr = qp->max_recv_wr;
@@ -358,17 +396,17 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return 0;
}
-int efa_query_gid(struct ib_device *ibdev, u8 port, int index,
+int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct efa_dev *dev = to_edev(ibdev);
- memcpy(gid->raw, dev->addr, sizeof(dev->addr));
+ memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
return 0;
}
-int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
if (index > 0)
@@ -427,23 +465,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
err_dealloc_pd:
efa_pd_dealloc(dev, result.pdn);
err_out:
- atomic64_inc(&dev->stats.sw_stats.alloc_pd_err);
+ atomic64_inc(&dev->stats.alloc_pd_err);
return err;
}
-void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibpd->device);
struct efa_pd *pd = to_epd(ibpd);
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return;
- }
-
ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
efa_pd_dealloc(dev, pd->pdn);
+ return 0;
}
static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
@@ -453,87 +486,130 @@ static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
return efa_com_destroy_qp(&dev->edev, &params);
}
+static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
+{
+ rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
+}
+
int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibqp->pd->device);
struct efa_qp *qp = to_eqp(ibqp);
int err;
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return -EINVAL;
- }
-
ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
+
err = efa_destroy_qp_handle(dev, qp->qp_handle);
if (err)
return err;
+ efa_qp_user_mmap_entries_remove(qp);
+
if (qp->rq_cpu_addr) {
ibdev_dbg(&dev->ibdev,
"qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
qp->rq_cpu_addr, qp->rq_size,
&qp->rq_dma_addr);
- dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
- DMA_TO_DEVICE);
+ efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
+ qp->rq_size, DMA_TO_DEVICE);
}
- kfree(qp);
return 0;
}
+static struct rdma_user_mmap_entry*
+efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ u64 address, size_t length,
+ u8 mmap_flag, u64 *offset)
+{
+ struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ int err;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_flag = mmap_flag;
+
+ err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
+ length);
+ if (err) {
+ kfree(entry);
+ return NULL;
+ }
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
static int qp_mmap_entries_setup(struct efa_qp *qp,
struct efa_dev *dev,
struct efa_ucontext *ucontext,
struct efa_com_create_qp_params *params,
struct efa_ibv_create_qp_resp *resp)
{
- /*
- * Once an entry is inserted it might be mmapped, hence cannot be
- * cleaned up until dealloc_ucontext.
- */
- resp->sq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->sq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->sq_db_mmap_key == EFA_MMAP_INVALID)
+ size_t length;
+ u64 address;
+
+ address = dev->db_bar_addr + resp->sq_db_offset;
+ qp->sq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address,
+ PAGE_SIZE, EFA_MMAP_IO_NC,
+ &resp->sq_db_mmap_key);
+ if (!qp->sq_db_mmap_entry)
return -ENOMEM;
resp->sq_db_offset &= ~PAGE_MASK;
- resp->llq_desc_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->mem_bar_addr + resp->llq_desc_offset,
- PAGE_ALIGN(params->sq_ring_size_in_bytes +
- (resp->llq_desc_offset & ~PAGE_MASK)),
- EFA_MMAP_IO_WC);
- if (resp->llq_desc_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = dev->mem_bar_addr + resp->llq_desc_offset;
+ length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
+ offset_in_page(resp->llq_desc_offset));
+
+ qp->llq_desc_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, length,
+ EFA_MMAP_IO_WC,
+ &resp->llq_desc_mmap_key);
+ if (!qp->llq_desc_mmap_entry)
+ goto err_remove_mmap;
resp->llq_desc_offset &= ~PAGE_MASK;
if (qp->rq_size) {
- resp->rq_db_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- dev->db_bar_addr + resp->rq_db_offset,
- PAGE_SIZE, EFA_MMAP_IO_NC);
- if (resp->rq_db_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = dev->db_bar_addr + resp->rq_db_offset;
+
+ qp->rq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, PAGE_SIZE,
+ EFA_MMAP_IO_NC,
+ &resp->rq_db_mmap_key);
+ if (!qp->rq_db_mmap_entry)
+ goto err_remove_mmap;
resp->rq_db_offset &= ~PAGE_MASK;
- resp->rq_mmap_key =
- mmap_entry_insert(dev, ucontext, qp,
- virt_to_phys(qp->rq_cpu_addr),
- qp->rq_size, EFA_MMAP_DMA_PAGE);
- if (resp->rq_mmap_key == EFA_MMAP_INVALID)
- return -ENOMEM;
+ address = virt_to_phys(qp->rq_cpu_addr);
+ qp->rq_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, qp->rq_size,
+ EFA_MMAP_DMA_PAGE,
+ &resp->rq_mmap_key);
+ if (!qp->rq_mmap_entry)
+ goto err_remove_mmap;
resp->rq_mmap_size = qp->rq_size;
}
return 0;
+
+err_remove_mmap:
+ efa_qp_user_mmap_entries_remove(qp);
+
+ return -ENOMEM;
}
static int efa_qp_validate_cap(struct efa_dev *dev,
@@ -599,18 +675,17 @@ static int efa_qp_validate_attr(struct efa_dev *dev,
return 0;
}
-struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct efa_com_create_qp_params create_qp_params = {};
struct efa_com_create_qp_result create_qp_resp;
- struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_dev *dev = to_edev(ibqp->device);
struct efa_ibv_create_qp_resp resp = {};
struct efa_ibv_create_qp cmd = {};
- bool rq_entry_inserted = false;
+ struct efa_qp *qp = to_eqp(ibqp);
struct efa_ucontext *ucontext;
- struct efa_qp *qp;
+ u16 supported_efa_flags = 0;
int err;
ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
@@ -624,7 +699,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
if (err)
goto err_out;
- if (!field_avail(cmd, driver_qp_type, udata->inlen)) {
+ if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, no input udata\n");
err = -EINVAL;
@@ -648,21 +723,25 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
goto err_out;
}
- if (cmd.comp_mask) {
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, unknown fields in udata\n");
err = -EINVAL;
goto err_out;
}
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp) {
- err = -ENOMEM;
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ supported_efa_flags |= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV;
+
+ if (cmd.flags & ~supported_efa_flags) {
+ ibdev_dbg(&dev->ibdev, "Unsupported EFA QP create flags[%#x], supported[%#x]\n",
+ cmd.flags, supported_efa_flags);
+ err = -EOPNOTSUPP;
goto err_out;
}
create_qp_params.uarn = ucontext->uarn;
- create_qp_params.pd = to_epd(ibpd)->pdn;
+ create_qp_params.pd = to_epd(ibqp->pd)->pdn;
if (init_attr->qp_type == IB_QPT_UD) {
create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
@@ -673,7 +752,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
"Unsupported qp type %d driver qp type %d\n",
init_attr->qp_type, cmd.driver_qp_type);
err = -EOPNOTSUPP;
- goto err_free_qp;
+ goto err_out;
}
ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
@@ -691,7 +770,7 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
qp->rq_size, DMA_TO_DEVICE);
if (!qp->rq_cpu_addr) {
err = -ENOMEM;
- goto err_free_qp;
+ goto err_out;
}
ibdev_dbg(&dev->ibdev,
@@ -700,6 +779,11 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
create_qp_params.rq_base_addr = qp->rq_dma_addr;
}
+ create_qp_params.sl = cmd.sl;
+
+ if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV)
+ create_qp_params.unsolicited_write_recv = true;
+
err = efa_com_create_qp(&dev->edev, &create_qp_params,
&create_qp_resp);
if (err)
@@ -716,10 +800,8 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
if (err)
goto err_destroy_qp;
- rq_entry_inserted = true;
qp->qp_handle = create_qp_resp.qp_handle;
qp->ibqp.qp_num = create_qp_resp.qp_num;
- qp->ibqp.qp_type = init_attr->qp_type;
qp->max_send_wr = init_attr->cap.max_send_wr;
qp->max_recv_wr = init_attr->cap.max_recv_wr;
qp->max_send_sge = init_attr->cap.max_send_sge;
@@ -733,28 +815,134 @@ struct ib_qp *efa_create_qp(struct ib_pd *ibpd,
ibdev_dbg(&dev->ibdev,
"Failed to copy udata for qp[%u]\n",
create_qp_resp.qp_num);
- goto err_destroy_qp;
+ goto err_remove_mmap_entries;
}
}
ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
- return &qp->ibqp;
+ return 0;
+err_remove_mmap_entries:
+ efa_qp_user_mmap_entries_remove(qp);
err_destroy_qp:
efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
err_free_mapped:
- if (qp->rq_size) {
- dma_unmap_single(&dev->pdev->dev, qp->rq_dma_addr, qp->rq_size,
- DMA_TO_DEVICE);
- if (!rq_entry_inserted)
- free_pages_exact(qp->rq_cpu_addr, qp->rq_size);
- }
-err_free_qp:
- kfree(qp);
+ if (qp->rq_size)
+ efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
+ qp->rq_size, DMA_TO_DEVICE);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_qp_err);
- return ERR_PTR(err);
+ atomic64_inc(&dev->stats.create_qp_err);
+ return err;
+}
+
+static const struct {
+ int valid;
+ enum ib_qp_attr_mask req_param;
+ enum ib_qp_attr_mask opt_param;
+} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .req_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_RTR] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .req_param = IB_QP_SQ_PSN,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY |
+ IB_QP_RNR_RETRY,
+
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ }
+};
+
+static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
+ enum ib_qp_state next_state,
+ enum ib_qp_attr_mask mask)
+{
+ enum ib_qp_attr_mask req_param, opt_param;
+
+ if (mask & IB_QP_CUR_STATE &&
+ cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+ cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+ return false;
+
+ if (!srd_qp_state_table[cur_state][next_state].valid)
+ return false;
+
+ req_param = srd_qp_state_table[cur_state][next_state].req_param;
+ opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
+
+ if ((mask & req_param) != req_param)
+ return false;
+
+ if (mask & ~(req_param | opt_param | IB_QP_STATE))
+ return false;
+
+ return true;
}
static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
@@ -762,9 +950,12 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
enum ib_qp_state cur_state,
enum ib_qp_state new_state)
{
+ int err;
+
#define EFA_MODIFY_QP_SUPP_MASK \
(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
- IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN)
+ IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
+ IB_QP_RNR_RETRY)
if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
ibdev_dbg(&dev->ibdev,
@@ -773,8 +964,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
return -EOPNOTSUPP;
}
- if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
- qp_attr_mask)) {
+ if (qp->ibqp.qp_type == IB_QPT_DRIVER)
+ err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
+ qp_attr_mask);
+ else
+ err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+ qp_attr_mask);
+
+ if (err) {
ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
return -EINVAL;
}
@@ -802,6 +999,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
enum ib_qp_state new_state;
int err;
+ if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
if (udata->inlen &&
!ib_is_udata_cleared(udata, 0, udata->inlen)) {
ibdev_dbg(&dev->ibdev,
@@ -821,28 +1021,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
params.qp_handle = qp->qp_handle;
if (qp_attr_mask & IB_QP_STATE) {
- params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) |
- BIT(EFA_ADMIN_CUR_QP_STATE_BIT);
- params.cur_qp_state = qp_attr->cur_qp_state;
- params.qp_state = qp_attr->qp_state;
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
+ 1);
+ EFA_SET(&params.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
+ params.cur_qp_state = cur_state;
+ params.qp_state = new_state;
}
if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
- params.modify_mask |=
- BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT);
+ EFA_SET(&params.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
}
if (qp_attr_mask & IB_QP_QKEY) {
- params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT);
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
params.qkey = qp_attr->qkey;
}
if (qp_attr_mask & IB_QP_SQ_PSN) {
- params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT);
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
params.sq_psn = qp_attr->sq_psn;
}
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
+ 1);
+ params.rnr_retry = qp_attr->rnr_retry;
+ }
+
err = efa_com_modify_qp(&dev->edev, &params);
if (err)
return err;
@@ -859,61 +1067,92 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
return efa_com_destroy_cq(&dev->edev, &params);
}
+static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq)
+{
+ rdma_user_mmap_entry_remove(cq->db_mmap_entry);
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
+}
+
int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibcq->device);
struct efa_cq *cq = to_ecq(ibcq);
- int err;
-
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return -EINVAL;
- }
ibdev_dbg(&dev->ibdev,
"Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
- err = efa_destroy_cq_idx(dev, cq->cq_idx);
- if (err)
- return err;
-
- dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
- DMA_FROM_DEVICE);
+ efa_destroy_cq_idx(dev, cq->cq_idx);
+ efa_cq_user_mmap_entries_remove(cq);
+ if (cq->eq) {
+ xa_erase(&dev->cqs_xa, cq->cq_idx);
+ synchronize_irq(cq->eq->irq.irqn);
+ }
- kfree(cq);
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+ else
+ efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE);
return 0;
}
+static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec)
+{
+ return &dev->eqs[vec];
+}
+
static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
- struct efa_ibv_create_cq_resp *resp)
+ struct efa_ibv_create_cq_resp *resp,
+ bool db_valid)
{
resp->q_mmap_size = cq->size;
- resp->q_mmap_key = mmap_entry_insert(dev, cq->ucontext, cq,
- virt_to_phys(cq->cpu_addr),
- cq->size, EFA_MMAP_DMA_PAGE);
- if (resp->q_mmap_key == EFA_MMAP_INVALID)
+ cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+ virt_to_phys(cq->cpu_addr),
+ cq->size, EFA_MMAP_DMA_PAGE,
+ &resp->q_mmap_key);
+ if (!cq->mmap_entry)
return -ENOMEM;
+ if (db_valid) {
+ cq->db_mmap_entry =
+ efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+ dev->db_bar_addr + resp->db_off,
+ PAGE_SIZE, EFA_MMAP_IO_NC,
+ &resp->db_mmap_key);
+ if (!cq->db_mmap_entry) {
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
+ return -ENOMEM;
+ }
+
+ resp->db_off &= ~PAGE_MASK;
+ resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF;
+ }
+
return 0;
}
-static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
- int vector, struct ib_ucontext *ibucontext,
- struct ib_udata *udata)
+int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_umem *umem, struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct efa_ucontext, ibucontext);
+ struct efa_com_create_cq_params params = {};
struct efa_ibv_create_cq_resp resp = {};
- struct efa_com_create_cq_params params;
struct efa_com_create_cq_result result;
+ struct ib_device *ibdev = ibcq->device;
struct efa_dev *dev = to_edev(ibdev);
struct efa_ibv_create_cq cmd = {};
- bool cq_entry_inserted = false;
- struct efa_cq *cq;
+ struct efa_cq *cq = to_ecq(ibcq);
+ int entries = attr->cqe;
+ bool set_src_addr;
int err;
ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
ibdev_dbg(ibdev,
"cq: requested entries[%u] non-positive or greater than max[%u]\n",
@@ -922,7 +1161,7 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
goto err_out;
}
- if (!field_avail(cmd, num_sub_cqs, udata->inlen)) {
+ if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) {
ibdev_dbg(ibdev,
"Incompatible ABI params, no input udata\n");
err = -EINVAL;
@@ -945,14 +1184,17 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
goto err_out;
}
- if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) {
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) {
ibdev_dbg(ibdev,
"Incompatible ABI params, unknown fields in udata\n");
err = -EINVAL;
goto err_out;
}
- if (!cmd.cq_entry_size) {
+ set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID);
+ if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) &&
+ (set_src_addr ||
+ cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) {
ibdev_dbg(ibdev,
"Invalid entry size [%u]\n", cmd.cq_entry_size);
err = -EINVAL;
@@ -967,43 +1209,73 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
goto err_out;
}
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq) {
- err = -ENOMEM;
- goto err_out;
- }
-
- cq->ucontext = to_eucontext(ibucontext);
+ cq->ucontext = ucontext;
cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
- cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
- DMA_FROM_DEVICE);
- if (!cq->cpu_addr) {
- err = -ENOMEM;
- goto err_free_cq;
+
+ if (umem) {
+ if (umem->length < cq->size) {
+ ibdev_dbg(&dev->ibdev, "External memory too small\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (!ib_umem_is_contiguous(umem)) {
+ ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ cq->cpu_addr = NULL;
+ cq->dma_addr = ib_umem_start_dma_addr(umem);
+ cq->umem = umem;
+ } else {
+ cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+ if (!cq->cpu_addr) {
+ err = -ENOMEM;
+ goto err_out;
+ }
}
params.uarn = cq->ucontext->uarn;
- params.cq_depth = entries;
+ params.sub_cq_depth = entries;
params.dma_addr = cq->dma_addr;
params.entry_size_in_bytes = cmd.cq_entry_size;
params.num_sub_cqs = cmd.num_sub_cqs;
+ params.set_src_addr = set_src_addr;
+ if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
+ cq->eq = efa_vec2eq(dev, attr->comp_vector);
+ params.eqn = cq->eq->eeq.eqn;
+ params.interrupt_mode_enabled = true;
+ }
+
err = efa_com_create_cq(&dev->edev, &params, &result);
if (err)
goto err_free_mapped;
+ resp.db_off = result.db_off;
resp.cq_idx = result.cq_idx;
cq->cq_idx = result.cq_idx;
cq->ibcq.cqe = result.actual_depth;
WARN_ON_ONCE(entries != result.actual_depth);
- err = cq_mmap_entries_setup(dev, cq, &resp);
+ if (!umem)
+ err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
+
if (err) {
- ibdev_dbg(ibdev,
- "Could not setup cq[%u] mmap entries\n", cq->cq_idx);
+ ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
+ cq->cq_idx);
goto err_destroy_cq;
}
- cq_entry_inserted = true;
+ if (cq->eq) {
+ err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n",
+ cq->cq_idx);
+ goto err_remove_mmap;
+ }
+ }
if (udata->outlen) {
err = ib_copy_to_udata(udata, &resp,
@@ -1011,40 +1283,35 @@ static struct ib_cq *do_create_cq(struct ib_device *ibdev, int entries,
if (err) {
ibdev_dbg(ibdev,
"Failed to copy udata for create_cq\n");
- goto err_destroy_cq;
+ goto err_xa_erase;
}
}
- ibdev_dbg(ibdev,
- "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
+ ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
- return &cq->ibcq;
+ return 0;
+err_xa_erase:
+ if (cq->eq)
+ xa_erase(&dev->cqs_xa, cq->cq_idx);
+err_remove_mmap:
+ efa_cq_user_mmap_entries_remove(cq);
err_destroy_cq:
efa_destroy_cq_idx(dev, cq->cq_idx);
err_free_mapped:
- dma_unmap_single(&dev->pdev->dev, cq->dma_addr, cq->size,
- DMA_FROM_DEVICE);
- if (!cq_entry_inserted)
- free_pages_exact(cq->cpu_addr, cq->size);
-err_free_cq:
- kfree(cq);
+ if (!umem)
+ efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_cq_err);
- return ERR_PTR(err);
+ atomic64_inc(&dev->stats.create_cq_err);
+ return err;
}
-struct ib_cq *efa_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
- struct efa_ucontext *ucontext = rdma_udata_to_drv_context(udata,
- struct efa_ucontext,
- ibucontext);
-
- return do_create_cq(ibdev, attr->cqe, attr->comp_vector,
- &ucontext->ibucontext, udata);
+ return efa_create_cq_umem(ibcq, attr, NULL, attrs);
}
static int umem_to_page_list(struct efa_dev *dev,
@@ -1054,21 +1321,14 @@ static int umem_to_page_list(struct efa_dev *dev,
u8 hp_shift)
{
u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
- struct sg_dma_page_iter sg_iter;
- unsigned int page_idx = 0;
+ struct ib_block_iter biter;
unsigned int hp_idx = 0;
ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
hp_cnt, pages_in_hp);
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- if (page_idx % pages_in_hp == 0) {
- page_list[hp_idx] = sg_page_iter_dma_address(&sg_iter);
- hp_idx++;
- }
-
- page_idx++;
- }
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
+ page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
return 0;
}
@@ -1079,7 +1339,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
struct page *pg;
int i;
- sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL);
+ sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
if (!sglist)
return NULL;
sg_init_table(sglist, page_cnt);
@@ -1103,14 +1363,14 @@ err:
*/
static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
{
- unsigned int entry, payloads_in_sg, chunk_list_size, chunk_idx, payload_idx;
struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
+ unsigned int chunk_list_size, chunk_idx, payload_idx;
int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
struct efa_com_ctrl_buff_info *ctrl_buf;
u64 *cur_chunk_buf, *prev_chunk_buf;
- struct scatterlist *sg;
+ struct ib_block_iter biter;
dma_addr_t dma_addr;
int i;
@@ -1144,18 +1404,15 @@ static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
chunk_idx = 0;
payload_idx = 0;
cur_chunk_buf = chunk_list->chunks[0].buf;
- for_each_sg(pages_sgl, sg, sg_dma_cnt, entry) {
- payloads_in_sg = sg_dma_len(sg) >> EFA_CHUNK_PAYLOAD_SHIFT;
- for (i = 0; i < payloads_in_sg; i++) {
- cur_chunk_buf[payload_idx++] =
- (sg_dma_address(sg) & ~(EFA_CHUNK_PAYLOAD_SIZE - 1)) +
- (EFA_CHUNK_PAYLOAD_SIZE * i);
-
- if (payload_idx == EFA_PTRS_PER_CHUNK) {
- chunk_idx++;
- cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
- payload_idx = 0;
- }
+ rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
+ EFA_CHUNK_PAYLOAD_SIZE) {
+ cur_chunk_buf[payload_idx++] =
+ rdma_block_iter_dma_address(&biter);
+
+ if (payload_idx == EFA_PTRS_PER_CHUNK) {
+ chunk_idx++;
+ cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
+ payload_idx = 0;
}
}
@@ -1246,7 +1503,7 @@ static int pbl_continuous_initialize(struct efa_dev *dev,
*/
static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
{
- u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, PAGE_SIZE);
+ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
struct scatterlist *sgl;
int sg_dma_cnt, err;
@@ -1303,30 +1560,30 @@ static int pbl_create(struct efa_dev *dev,
int err;
pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
- pbl->pbl_buf = kzalloc(pbl->pbl_buf_size_in_bytes,
- GFP_KERNEL | __GFP_NOWARN);
- if (pbl->pbl_buf) {
- pbl->physically_continuous = 1;
+ pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
+ if (!pbl->pbl_buf)
+ return -ENOMEM;
+
+ if (is_vmalloc_addr(pbl->pbl_buf)) {
+ pbl->physically_continuous = 0;
err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
hp_shift);
if (err)
- goto err_continuous;
- err = pbl_continuous_initialize(dev, pbl);
+ goto err_free;
+
+ err = pbl_indirect_initialize(dev, pbl);
if (err)
- goto err_continuous;
+ goto err_free;
} else {
- pbl->physically_continuous = 0;
- pbl->pbl_buf = vzalloc(pbl->pbl_buf_size_in_bytes);
- if (!pbl->pbl_buf)
- return -ENOMEM;
-
+ pbl->physically_continuous = 1;
err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
hp_shift);
if (err)
- goto err_indirect;
- err = pbl_indirect_initialize(dev, pbl);
+ goto err_free;
+
+ err = pbl_continuous_initialize(dev, pbl);
if (err)
- goto err_indirect;
+ goto err_free;
}
ibdev_dbg(&dev->ibdev,
@@ -1335,24 +1592,20 @@ static int pbl_create(struct efa_dev *dev,
return 0;
-err_continuous:
- kfree(pbl->pbl_buf);
- return err;
-err_indirect:
- vfree(pbl->pbl_buf);
+err_free:
+ kvfree(pbl->pbl_buf);
return err;
}
static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
{
- if (pbl->physically_continuous) {
+ if (pbl->physically_continuous)
dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
- kfree(pbl->pbl_buf);
- } else {
+ else
pbl_indirect_terminate(dev, pbl);
- vfree(pbl->pbl_buf);
- }
+
+ kvfree(pbl->pbl_buf);
}
static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
@@ -1406,149 +1659,231 @@ static int efa_create_pbl(struct efa_dev *dev,
return 0;
}
-static void efa_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, u8 *shift, u32 *ncont)
-{
- struct scatterlist *sg;
- u64 base = ~0, p = 0;
- unsigned long tmp;
- unsigned long m;
- u64 len, pfn;
- int i = 0;
- int entry;
-
- addr = addr >> PAGE_SHIFT;
- tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = DIV_ROUND_UP(sg_dma_len(sg), PAGE_SIZE);
- pfn = sg_dma_address(sg) >> PAGE_SHIFT;
- if (base + p != pfn) {
- /*
- * If either the offset or the new
- * base are unaligned update m
- */
- tmp = (unsigned long)(pfn | p);
- if (!IS_ALIGNED(tmp, 1 << m))
- m = find_first_bit(&tmp, BITS_PER_LONG);
-
- base = pfn;
- p = 0;
- }
-
- p += len;
- i += len;
- }
-
- if (i) {
- m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
- *ncont = DIV_ROUND_UP(i, (1 << m));
- } else {
- m = 0;
- *ncont = 0;
- }
-
- *shift = PAGE_SHIFT + m;
- *count = i;
-}
-
-struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
+ struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibpd->device);
- struct efa_com_reg_mr_params params = {};
- struct efa_com_reg_mr_result result = {};
- unsigned long max_page_shift;
- struct pbl_context pbl;
+ int supp_access_flags;
struct efa_mr *mr;
- int inline_size;
- int npages;
- int err;
- if (udata->inlen &&
+ if (udata && udata->inlen &&
!ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, udata not cleared\n");
- err = -EINVAL;
- goto err_out;
+ return ERR_PTR(-EINVAL);
}
- if (access_flags & ~EFA_SUPPORTED_ACCESS_FLAGS) {
+ supp_access_flags =
+ IB_ACCESS_LOCAL_WRITE |
+ (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) |
+ (EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~supp_access_flags) {
ibdev_dbg(&dev->ibdev,
"Unsupported access flags[%#x], supported[%#x]\n",
- access_flags, EFA_SUPPORTED_ACCESS_FLAGS);
- err = -EOPNOTSUPP;
- goto err_out;
+ access_flags, supp_access_flags);
+ return ERR_PTR(-EOPNOTSUPP);
}
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- err = -ENOMEM;
- goto err_out;
- }
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
- if (IS_ERR(mr->umem)) {
- err = PTR_ERR(mr->umem);
- ibdev_dbg(&dev->ibdev,
- "Failed to pin and map user space memory[%d]\n", err);
- goto err_free;
- }
+ return mr;
+}
+
+static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
+ u64 length, u64 virt_addr, int access_flags)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_com_reg_mr_params params = {};
+ struct efa_com_reg_mr_result result = {};
+ struct pbl_context pbl;
+ unsigned int pg_sz;
+ int inline_size;
+ int err;
params.pd = to_epd(ibpd)->pdn;
params.iova = virt_addr;
params.mr_length_in_bytes = length;
- params.permissions = access_flags & 0x1;
- max_page_shift = fls64(dev->dev_attr.page_size_cap);
+ params.permissions = access_flags;
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->dev_attr.page_size_cap,
+ virt_addr);
+ if (!pg_sz) {
+ ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
+ dev->dev_attr.page_size_cap);
+ return -EOPNOTSUPP;
+ }
+
+ params.page_shift = order_base_2(pg_sz);
+ params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
- efa_cont_pages(mr->umem, start, max_page_shift, &npages,
- &params.page_shift, &params.page_num);
ibdev_dbg(&dev->ibdev,
- "start %#llx length %#llx npages %d params.page_shift %u params.page_num %u\n",
- start, length, npages, params.page_shift, params.page_num);
+ "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
+ start, length, params.page_shift, params.page_num);
inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
if (params.page_num <= inline_size) {
err = efa_create_inline_pbl(dev, mr, &params);
if (err)
- goto err_unmap;
+ return err;
err = efa_com_register_mr(&dev->edev, &params, &result);
if (err)
- goto err_unmap;
+ return err;
} else {
err = efa_create_pbl(dev, &pbl, mr, &params);
if (err)
- goto err_unmap;
+ return err;
err = efa_com_register_mr(&dev->edev, &params, &result);
pbl_destroy(dev, &pbl);
if (err)
- goto err_unmap;
+ return err;
}
mr->ibmr.lkey = result.l_key;
mr->ibmr.rkey = result.r_key;
mr->ibmr.length = length;
+ mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id;
+ mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id;
+ mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id;
+ mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid;
+ mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid;
+ mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid;
ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
+ return 0;
+}
+
+struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct efa_mr *mr;
+ int err;
+
+ if (dmah) {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ mr = efa_alloc_mr(ibpd, access_flags, &attrs->driver_udata);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto err_out;
+ }
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd,
+ access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%pe]\n",
+ umem_dmabuf);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+ err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
+ if (err)
+ goto err_release;
+
return &mr->ibmr;
-err_unmap:
+err_release:
ib_umem_release(mr->umem);
err_free:
kfree(mr);
err_out:
- atomic64_inc(&dev->stats.sw_stats.reg_mr_err);
+ atomic64_inc(&dev->stats.reg_mr_err);
return ERR_PTR(err);
}
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_mr *mr;
+ int err;
+
+ if (dmah) {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ mr = efa_alloc_mr(ibpd, access_flags, udata);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto err_out;
+ }
+
+ mr->umem = ib_umem_get(ibpd->device, start, length, access_flags);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ ibdev_dbg(&dev->ibdev,
+ "Failed to pin and map user space memory[%pe]\n",
+ mr->umem);
+ goto err_free;
+ }
+
+ err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
+ if (err)
+ goto err_release;
+
+ return &mr->ibmr;
+
+err_release:
+ ib_umem_release(mr->umem);
+err_free:
+ kfree(mr);
+err_out:
+ atomic64_inc(&dev->stats.reg_mr_err);
+ return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE);
+ struct efa_mr *mr = to_emr(ibmr);
+ u16 ic_id_validity = 0;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID,
+ &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID,
+ &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID,
+ &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id));
+ if (ret)
+ return ret;
+
+ if (mr->ic_info.recv_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID;
+ if (mr->ic_info.rdma_read_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID;
+ if (mr->ic_info.rdma_recv_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID;
+
+ return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY,
+ &ic_id_validity, sizeof(ic_id_validity));
+}
+
int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibmr->device);
@@ -1556,28 +1891,20 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
struct efa_mr *mr = to_emr(ibmr);
int err;
- if (udata->inlen &&
- !ib_is_udata_cleared(udata, 0, udata->inlen)) {
- ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
- return -EINVAL;
- }
-
ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
- if (mr->umem) {
- params.l_key = mr->ibmr.lkey;
- err = efa_com_dereg_mr(&dev->edev, &params);
- if (err)
- return err;
- ib_umem_release(mr->umem);
- }
+ params.l_key = mr->ibmr.lkey;
+ err = efa_com_dereg_mr(&dev->edev, &params);
+ if (err)
+ return err;
+ ib_umem_release(mr->umem);
kfree(mr);
return 0;
}
-int efa_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -1604,11 +1931,39 @@ static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
return efa_com_dealloc_uar(&dev->edev, &params);
}
+#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
+ (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
+ NULL : #_attr)
+
+static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
+ const struct efa_ibv_alloc_ucontext_cmd *cmd)
+{
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ char *attr_str;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
+ goto err;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
+ attr_str))
+ goto err;
+
+ return 0;
+
+err:
+ ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
+ attr_str);
+ return -EOPNOTSUPP;
+}
+
int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
{
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
struct efa_ibv_alloc_ucontext_resp resp = {};
+ struct efa_ibv_alloc_ucontext_cmd cmd = {};
struct efa_com_alloc_uar_result result;
int err;
@@ -1617,32 +1972,43 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
* we will ack input fields in our response.
*/
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Cannot copy udata for alloc_ucontext\n");
+ goto err_out;
+ }
+
+ err = efa_user_comp_handshake(ibucontext, &cmd);
+ if (err)
+ goto err_out;
+
err = efa_com_alloc_uar(&dev->edev, &result);
if (err)
goto err_out;
ucontext->uarn = result.uarn;
- xa_init(&ucontext->mmap_xa);
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
resp.inline_buf_size = dev->dev_attr.inline_buf_size;
resp.max_llq_size = dev->dev_attr.max_llq_size;
+ resp.max_tx_batch = dev->dev_attr.max_tx_batch;
+ resp.min_sq_wr = dev->dev_attr.min_sq_depth;
- if (udata && udata->outlen) {
- err = ib_copy_to_udata(udata, &resp,
- min(sizeof(resp), udata->outlen));
- if (err)
- goto err_dealloc_uar;
- }
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err)
+ goto err_dealloc_uar;
return 0;
err_dealloc_uar:
efa_dealloc_uar(dev, result.uarn);
err_out:
- atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err);
+ atomic64_inc(&dev->stats.alloc_ucontext_err);
return err;
}
@@ -1651,38 +2017,53 @@ void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- mmap_entries_remove_free(dev, ucontext);
efa_dealloc_uar(dev, ucontext->uarn);
}
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ kfree(entry);
+}
+
static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
- struct vm_area_struct *vma, u64 key, u64 length)
+ struct vm_area_struct *vma)
{
- struct efa_mmap_entry *entry;
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct efa_user_mmap_entry *entry;
unsigned long va;
+ int err = 0;
u64 pfn;
- int err;
- entry = mmap_entry_get(dev, ucontext, key, length);
- if (!entry) {
- ibdev_dbg(&dev->ibdev, "key[%#llx] does not have valid entry\n",
- key);
+ rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev,
+ "pgoff[%#lx] does not have valid entry\n",
+ vma->vm_pgoff);
+ atomic64_inc(&dev->stats.mmap_err);
return -EINVAL;
}
+ entry = to_emmap(rdma_entry);
ibdev_dbg(&dev->ibdev,
- "Mapping address[%#llx], length[%#llx], mmap_flag[%d]\n",
- entry->address, length, entry->mmap_flag);
+ "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag);
pfn = entry->address >> PAGE_SHIFT;
switch (entry->mmap_flag) {
case EFA_MMAP_IO_NC:
- err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
- pgprot_noncached(vma->vm_page_prot));
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
break;
case EFA_MMAP_IO_WC:
- err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, length,
- pgprot_writecombine(vma->vm_page_prot));
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
break;
case EFA_MMAP_DMA_PAGE:
for (va = vma->vm_start; va < vma->vm_end;
@@ -1696,12 +2077,16 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
err = -EINVAL;
}
- if (err)
+ if (err) {
ibdev_dbg(
&dev->ibdev,
- "Couldn't mmap address[%#llx] length[%#llx] mmap_flag[%d] err[%d]\n",
- entry->address, length, entry->mmap_flag, err);
+ "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag, err);
+ atomic64_inc(&dev->stats.mmap_err);
+ }
+ rdma_user_mmap_entry_put(rdma_entry);
return err;
}
@@ -1710,27 +2095,13 @@ int efa_mmap(struct ib_ucontext *ibucontext,
{
struct efa_ucontext *ucontext = to_eucontext(ibucontext);
struct efa_dev *dev = to_edev(ibucontext->device);
- u64 length = vma->vm_end - vma->vm_start;
- u64 key = vma->vm_pgoff << PAGE_SHIFT;
+ size_t length = vma->vm_end - vma->vm_start;
ibdev_dbg(&dev->ibdev,
- "start %#lx, end %#lx, length = %#llx, key = %#llx\n",
- vma->vm_start, vma->vm_end, length, key);
-
- if (length % PAGE_SIZE != 0 || !(vma->vm_flags & VM_SHARED)) {
- ibdev_dbg(&dev->ibdev,
- "length[%#llx] is not page size aligned[%#lx] or VM_SHARED is not set [%#lx]\n",
- length, PAGE_SIZE, vma->vm_flags);
- return -EINVAL;
- }
+ "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+ vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
- if (vma->vm_flags & VM_EXEC) {
- ibdev_dbg(&dev->ibdev, "Mapping executable pages is not permitted\n");
- return -EPERM;
- }
- vma->vm_flags &= ~VM_MAYEXEC;
-
- return __efa_mmap(dev, ucontext, vma, key, length);
+ return __efa_mmap(dev, ucontext, vma);
}
static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
@@ -1744,10 +2115,10 @@ static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
}
int efa_create_ah(struct ib_ah *ibah,
- struct rdma_ah_attr *ah_attr,
- u32 flags,
+ struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
struct efa_dev *dev = to_edev(ibah->device);
struct efa_com_create_ah_params params = {};
struct efa_ibv_create_ah_resp resp = {};
@@ -1755,7 +2126,7 @@ int efa_create_ah(struct ib_ah *ibah,
struct efa_ah *ah = to_eah(ibah);
int err;
- if (!(flags & RDMA_CREATE_AH_SLEEPABLE)) {
+ if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
ibdev_dbg(&dev->ibdev,
"Create address handle is not supported in atomic context\n");
err = -EOPNOTSUPP;
@@ -1797,11 +2168,11 @@ int efa_create_ah(struct ib_ah *ibah,
err_destroy_ah:
efa_ah_destroy(dev, ah);
err_out:
- atomic64_inc(&dev->stats.sw_stats.create_ah_err);
+ atomic64_inc(&dev->stats.create_ah_err);
return err;
}
-void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct efa_dev *dev = to_edev(ibah->pd->device);
struct efa_ah *ah = to_eah(ibah);
@@ -1811,15 +2182,167 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags)
if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
ibdev_dbg(&dev->ibdev,
"Destroy address handle is not supported in atomic context\n");
- return;
+ return -EOPNOTSUPP;
}
efa_ah_destroy(dev, ah);
+ return 0;
+}
+
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(efa_port_stats_descs,
+ ARRAY_SIZE(efa_port_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+ return rdma_alloc_hw_stats_struct(efa_device_stats_descs,
+ ARRAY_SIZE(efa_device_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int efa_fill_device_stats(struct efa_dev *dev,
+ struct rdma_hw_stats *stats)
+{
+ struct efa_com_stats_admin *as = &dev->edev.aq.stats;
+ struct efa_stats *s = &dev->stats;
+
+ stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
+ stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+ stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
+ stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
+
+ stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
+ stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
+ stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
+ stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
+ stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
+ stats->value[EFA_ALLOC_UCONTEXT_ERR] =
+ atomic64_read(&s->alloc_ucontext_err);
+ stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
+ stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
+
+ return ARRAY_SIZE(efa_device_stats_descs);
+}
+
+static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct efa_com_get_stats_params params = {};
+ union efa_com_get_stats_result result;
+ struct efa_com_rdma_write_stats *rws;
+ struct efa_com_rdma_read_stats *rrs;
+ struct efa_com_messages_stats *ms;
+ struct efa_com_network_stats *ns;
+ struct efa_com_basic_stats *bs;
+ int err;
+
+ params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+ params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
+
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ bs = &result.basic_stats;
+ stats->value[EFA_TX_BYTES] = bs->tx_bytes;
+ stats->value[EFA_TX_PKTS] = bs->tx_pkts;
+ stats->value[EFA_RX_BYTES] = bs->rx_bytes;
+ stats->value[EFA_RX_PKTS] = bs->rx_pkts;
+ stats->value[EFA_RX_DROPS] = bs->rx_drops;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ ms = &result.messages_stats;
+ stats->value[EFA_SEND_BYTES] = ms->send_bytes;
+ stats->value[EFA_SEND_WRS] = ms->send_wrs;
+ stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
+ stats->value[EFA_RECV_WRS] = ms->recv_wrs;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ rrs = &result.rdma_read_stats;
+ stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
+ stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
+ stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
+ stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
+
+ if (EFA_DEV_CAP(dev, RDMA_WRITE)) {
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ rws = &result.rdma_write_stats;
+ stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs;
+ stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes;
+ stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err;
+ stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes;
+ }
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_NETWORK;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ ns = &result.network_stats;
+ stats->value[EFA_RETRANS_BYTES] = ns->retrans_bytes;
+ stats->value[EFA_RETRANS_PKTS] = ns->retrans_pkts;
+ stats->value[EFA_RETRANS_TIMEOUT_EVENS] = ns->retrans_timeout_events;
+ stats->value[EFA_UNRESPONSIVE_REMOTE_EVENTS] = ns->unresponsive_remote_events;
+ stats->value[EFA_IMPAIRED_REMOTE_CONN_EVENTS] = ns->impaired_remote_conn_events;
+
+ return ARRAY_SIZE(efa_port_stats_descs);
+}
+
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ if (port_num)
+ return efa_fill_port_stats(to_edev(ibdev), stats, port_num);
+ else
+ return efa_fill_device_stats(to_edev(ibdev), stats);
}
enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
- u8 port_num)
+ u32 port_num)
{
return IB_LINK_LAYER_UNSPECIFIED;
}
+DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY,
+ UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(efa_mr,
+ UVERBS_OBJECT_MR,
+ &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY));
+
+const struct uapi_definition efa_uapi_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR,
+ &efa_mr),
+ {},
+};
diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig
new file mode 100644
index 000000000000..267fc1f3c42a
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_ERDMA
+ tristate "Alibaba Elastic RDMA Adapter (ERDMA) support"
+ depends on PCI_MSI && 64BIT
+ depends on INFINIBAND_ADDR_TRANS
+ depends on INFINIBAND_USER_ACCESS
+ help
+ This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA),
+ which supports RDMA features in Alibaba cloud environment.
+
+ To compile this driver as module, choose M here. The module will be
+ called erdma.
diff --git a/drivers/infiniband/hw/erdma/Makefile b/drivers/infiniband/hw/erdma/Makefile
new file mode 100644
index 000000000000..51d2ef91905a
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o
+
+erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
new file mode 100644
index 000000000000..2a023b99f992
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_H__
+#define __ERDMA_H__
+
+#include <linux/bitfield.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+#include <rdma/ib_verbs.h>
+
+#include "erdma_hw.h"
+
+#define DRV_MODULE_NAME "erdma"
+#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack"
+
+struct erdma_eq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+
+ u16 ci;
+ u16 rsvd;
+
+ atomic64_t event_num;
+ atomic64_t notify_num;
+
+ void __iomem *db;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_cmdq_sq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+ u16 ci;
+ u16 pi;
+
+ u16 wqebb_cnt;
+
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_cmdq_cq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+ u32 ci;
+ u32 cmdsn;
+
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+
+ atomic64_t armed_num;
+};
+
+enum {
+ ERDMA_CMD_STATUS_INIT,
+ ERDMA_CMD_STATUS_ISSUED,
+ ERDMA_CMD_STATUS_FINISHED,
+ ERDMA_CMD_STATUS_TIMEOUT
+};
+
+struct erdma_comp_wait {
+ struct completion wait_event;
+ u32 cmd_status;
+ u32 ctx_id;
+ u16 sq_pi;
+ u8 comp_status;
+ u8 rsvd;
+ u32 comp_data[4];
+};
+
+enum {
+ ERDMA_CMDQ_STATE_OK_BIT = 0,
+ ERDMA_CMDQ_STATE_TIMEOUT_BIT = 1,
+ ERDMA_CMDQ_STATE_CTX_ERR_BIT = 2,
+};
+
+#define ERDMA_CMDQ_TIMEOUT_MS 15000
+#define ERDMA_REG_ACCESS_WAIT_MS 20
+#define ERDMA_WAIT_DEV_DONE_CNT 500
+
+struct erdma_cmdq {
+ unsigned long *comp_wait_bitmap;
+ struct erdma_comp_wait *wait_pool;
+ spinlock_t lock;
+
+ struct erdma_cmdq_sq sq;
+ struct erdma_cmdq_cq cq;
+ struct erdma_eq eq;
+
+ unsigned long state;
+
+ struct semaphore credits;
+ u16 max_outstandings;
+};
+
+#define COMPROMISE_CC ERDMA_CC_CUBIC
+enum erdma_cc_alg {
+ ERDMA_CC_NEWRENO = 0,
+ ERDMA_CC_CUBIC,
+ ERDMA_CC_HPCC_RTT,
+ ERDMA_CC_HPCC_ECN,
+ ERDMA_CC_HPCC_INT,
+ ERDMA_CC_METHODS_NUM
+};
+
+struct erdma_devattr {
+ u32 fw_version;
+
+ unsigned char peer_addr[ETH_ALEN];
+ unsigned long cap_flags;
+
+ int numa_node;
+ enum erdma_cc_alg cc;
+ u32 irq_num;
+
+ u32 max_qp;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_ord;
+ u32 max_ird;
+
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_sge_rd;
+ u32 max_cq;
+ u32 max_cqe;
+ u64 max_mr_size;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_gid;
+ u32 max_ah;
+ u32 local_dma_key;
+};
+
+#define ERDMA_IRQNAME_SIZE 50
+
+struct erdma_irq {
+ char name[ERDMA_IRQNAME_SIZE];
+ u32 msix_vector;
+ cpumask_t affinity_hint_mask;
+};
+
+struct erdma_eq_cb {
+ bool ready;
+ void *dev; /* All EQs use this fields to get erdma_dev struct */
+ struct erdma_irq irq;
+ struct erdma_eq eq;
+ struct tasklet_struct tasklet;
+};
+
+struct erdma_resource_cb {
+ unsigned long *bitmap;
+ spinlock_t lock;
+ u32 next_alloc_idx;
+ u32 max_cap;
+};
+
+enum {
+ ERDMA_RES_TYPE_PD = 0,
+ ERDMA_RES_TYPE_STAG_IDX = 1,
+ ERDMA_RES_TYPE_AH = 2,
+ ERDMA_RES_CNT = 3,
+};
+
+struct erdma_dev {
+ struct ib_device ibdev;
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ struct notifier_block netdev_nb;
+ struct workqueue_struct *reflush_wq;
+
+ resource_size_t func_bar_addr;
+ resource_size_t func_bar_len;
+ u8 __iomem *func_bar;
+
+ struct erdma_devattr attrs;
+ u32 mtu;
+
+ /* cmdq and aeq use the same msix vector */
+ struct erdma_irq comm_irq;
+ struct erdma_cmdq cmdq;
+ struct erdma_eq aeq;
+ struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1];
+
+ spinlock_t lock;
+ struct erdma_resource_cb res_cb[ERDMA_RES_CNT];
+ struct xarray qp_xa;
+ struct xarray cq_xa;
+
+ u32 next_alloc_qpn;
+ u32 next_alloc_cqn;
+
+ atomic_t num_ctx;
+ struct list_head cep_list;
+
+ struct dma_pool *db_pool;
+ struct dma_pool *resp_pool;
+ enum erdma_proto_type proto;
+};
+
+static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
+{
+ idx &= (depth - 1);
+
+ return qbuf + (idx << shift);
+}
+
+static inline struct erdma_dev *to_edev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct erdma_dev, ibdev);
+}
+
+static inline u32 erdma_reg_read32(struct erdma_dev *dev, u32 reg)
+{
+ return readl(dev->func_bar + reg);
+}
+
+static inline u64 erdma_reg_read64(struct erdma_dev *dev, u32 reg)
+{
+ return readq(dev->func_bar + reg);
+}
+
+static inline void erdma_reg_write32(struct erdma_dev *dev, u32 reg, u32 value)
+{
+ writel(value, dev->func_bar + reg);
+}
+
+static inline void erdma_reg_write64(struct erdma_dev *dev, u32 reg, u64 value)
+{
+ writeq(value, dev->func_bar + reg);
+}
+
+static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg,
+ u32 filed_mask)
+{
+ u32 val = erdma_reg_read32(dev, reg);
+
+ return FIELD_GET(filed_mask, val);
+}
+
+#define ERDMA_GET(val, name) FIELD_GET(ERDMA_CMD_##name##_MASK, val)
+
+int erdma_cmdq_init(struct erdma_dev *dev);
+void erdma_finish_cmdq_init(struct erdma_dev *dev);
+void erdma_cmdq_destroy(struct erdma_dev *dev);
+
+void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
+int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
+ u64 *resp0, u64 *resp1, bool sleepable);
+void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
+
+int erdma_ceqs_init(struct erdma_dev *dev);
+void erdma_ceqs_uninit(struct erdma_dev *dev);
+void notify_eq(struct erdma_eq *eq);
+void *get_next_valid_eqe(struct erdma_eq *eq);
+
+int erdma_aeq_init(struct erdma_dev *dev);
+int erdma_eq_common_init(struct erdma_dev *dev, struct erdma_eq *eq, u32 depth);
+void erdma_eq_destroy(struct erdma_dev *dev, struct erdma_eq *eq);
+
+void erdma_aeq_event_handler(struct erdma_dev *dev);
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb);
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c
new file mode 100644
index 000000000000..ed21ba0037a4
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cm.c
@@ -0,0 +1,1431 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Fredy Neeser */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#include <linux/workqueue.h>
+#include <trace/events/sock.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+static struct workqueue_struct *erdma_cm_wq;
+
+static void erdma_cm_llp_state_change(struct sock *sk);
+static void erdma_cm_llp_data_ready(struct sock *sk);
+static void erdma_cm_llp_error_report(struct sock *sk);
+
+static void erdma_sk_assign_cm_upcalls(struct sock *sk)
+{
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_state_change = erdma_cm_llp_state_change;
+ sk->sk_data_ready = erdma_cm_llp_data_ready;
+ sk->sk_error_report = erdma_cm_llp_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void erdma_sk_save_upcalls(struct sock *sk)
+{
+ struct erdma_cep *cep = sk_to_cep(sk);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ cep->sk_state_change = sk->sk_state_change;
+ cep->sk_data_ready = sk->sk_data_ready;
+ cep->sk_error_report = sk->sk_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
+{
+ sk->sk_state_change = cep->sk_state_change;
+ sk->sk_data_ready = cep->sk_data_ready;
+ sk->sk_error_report = cep->sk_error_report;
+ sk->sk_user_data = NULL;
+}
+
+static void erdma_socket_disassoc(struct socket *s)
+{
+ struct sock *sk = s->sk;
+ struct erdma_cep *cep;
+
+ if (sk) {
+ write_lock_bh(&sk->sk_callback_lock);
+ cep = sk_to_cep(sk);
+ if (cep) {
+ erdma_sk_restore_upcalls(sk, cep);
+ erdma_cep_put(cep);
+ } else {
+ WARN_ON_ONCE(1);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+ } else {
+ WARN_ON_ONCE(1);
+ }
+}
+
+static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
+{
+ cep->sock = s;
+ erdma_cep_get(cep);
+ s->sk->sk_user_data = cep;
+
+ erdma_sk_save_upcalls(s->sk);
+ erdma_sk_assign_cm_upcalls(s->sk);
+}
+
+static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
+{
+ if (cep->listen_cep) {
+ erdma_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ }
+}
+
+static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
+{
+ struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+ unsigned long flags;
+
+ if (!cep)
+ return NULL;
+
+ INIT_LIST_HEAD(&cep->listenq);
+ INIT_LIST_HEAD(&cep->devq);
+ INIT_LIST_HEAD(&cep->work_freelist);
+
+ kref_init(&cep->ref);
+ cep->state = ERDMA_EPSTATE_IDLE;
+ init_waitqueue_head(&cep->waitq);
+ spin_lock_init(&cep->lock);
+ cep->dev = dev;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ list_add_tail(&cep->devq, &dev->cep_list);
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ return cep;
+}
+
+static void erdma_cm_free_work(struct erdma_cep *cep)
+{
+ struct list_head *w, *tmp;
+ struct erdma_cm_work *work;
+
+ list_for_each_safe(w, tmp, &cep->work_freelist) {
+ work = list_entry(w, struct erdma_cm_work, list);
+ list_del(&work->list);
+ kfree(work);
+ }
+}
+
+static void erdma_cancel_mpatimer(struct erdma_cep *cep)
+{
+ spin_lock_bh(&cep->lock);
+ if (cep->mpa_timer) {
+ if (cancel_delayed_work(&cep->mpa_timer->work)) {
+ erdma_cep_put(cep);
+ kfree(cep->mpa_timer);
+ }
+ cep->mpa_timer = NULL;
+ }
+ spin_unlock_bh(&cep->lock);
+}
+
+static void erdma_put_work(struct erdma_cm_work *work)
+{
+ INIT_LIST_HEAD(&work->list);
+ spin_lock_bh(&work->cep->lock);
+ list_add(&work->list, &work->cep->work_freelist);
+ spin_unlock_bh(&work->cep->lock);
+}
+
+static void erdma_cep_set_inuse(struct erdma_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ while (cep->in_use) {
+ spin_unlock_irqrestore(&cep->lock, flags);
+ wait_event_interruptible(cep->waitq, !cep->in_use);
+ if (signal_pending(current))
+ flush_signals(current);
+
+ spin_lock_irqsave(&cep->lock, flags);
+ }
+
+ cep->in_use = 1;
+ spin_unlock_irqrestore(&cep->lock, flags);
+}
+
+static void erdma_cep_set_free(struct erdma_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ cep->in_use = 0;
+ spin_unlock_irqrestore(&cep->lock, flags);
+
+ wake_up(&cep->waitq);
+}
+
+static void __erdma_cep_dealloc(struct kref *ref)
+{
+ struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
+ struct erdma_dev *dev = cep->dev;
+ unsigned long flags;
+
+ WARN_ON(cep->listen_cep);
+
+ kfree(cep->private_data);
+ kfree(cep->mpa.pdata);
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist))
+ erdma_cm_free_work(cep);
+ spin_unlock_bh(&cep->lock);
+
+ spin_lock_irqsave(&dev->lock, flags);
+ list_del(&cep->devq);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ kfree(cep);
+}
+
+static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
+{
+ struct erdma_cm_work *work = NULL;
+
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist)) {
+ work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
+ list);
+ list_del_init(&work->list);
+ }
+
+ spin_unlock_bh(&cep->lock);
+ return work;
+}
+
+static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
+{
+ struct erdma_cm_work *work;
+
+ while (num--) {
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ if (!(list_empty(&cep->work_freelist)))
+ erdma_cm_free_work(cep);
+ return -ENOMEM;
+ }
+ work->cep = cep;
+ INIT_LIST_HEAD(&work->list);
+ list_add(&work->list, &cep->work_freelist);
+ }
+
+ return 0;
+}
+
+static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
+ int status)
+{
+ struct iw_cm_event event;
+ struct iw_cm_id *cm_id;
+
+ memset(&event, 0, sizeof(event));
+ event.status = status;
+ event.event = reason;
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
+ event.provider_data = cep;
+ cm_id = cep->listen_cep->cm_id;
+
+ event.ird = cep->dev->attrs.max_ird;
+ event.ord = cep->dev->attrs.max_ord;
+ } else {
+ cm_id = cep->cm_id;
+ }
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
+ reason == IW_CM_EVENT_CONNECT_REPLY) {
+ u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
+
+ if (pd_len && cep->mpa.pdata) {
+ event.private_data_len = pd_len;
+ event.private_data = cep->mpa.pdata;
+ }
+
+ getname_local(cep->sock, &event.local_addr);
+ getname_peer(cep->sock, &event.remote_addr);
+ }
+
+ return cm_id->event_handler(cm_id, &event);
+}
+
+void erdma_qp_cm_drop(struct erdma_qp *qp)
+{
+ struct erdma_cep *cep = qp->cep;
+
+ if (!qp->cep)
+ return;
+
+ erdma_cep_set_inuse(cep);
+
+ /* already closed. */
+ if (cep->state == ERDMA_EPSTATE_CLOSED)
+ goto out;
+
+ if (cep->cm_id) {
+ switch (cep->state) {
+ case ERDMA_EPSTATE_AWAIT_MPAREP:
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EINVAL);
+ break;
+ case ERDMA_EPSTATE_RDMA_MODE:
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ break;
+ case ERDMA_EPSTATE_IDLE:
+ case ERDMA_EPSTATE_LISTENING:
+ case ERDMA_EPSTATE_CONNECTING:
+ case ERDMA_EPSTATE_AWAIT_MPAREQ:
+ case ERDMA_EPSTATE_RECVD_MPAREQ:
+ case ERDMA_EPSTATE_CLOSED:
+ default:
+ break;
+ }
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ erdma_cep_put(cep);
+ }
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+
+ if (cep->qp) {
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+ }
+out:
+ erdma_cep_set_free(cep);
+}
+
+void erdma_cep_put(struct erdma_cep *cep)
+{
+ WARN_ON(kref_read(&cep->ref) < 1);
+ kref_put(&cep->ref, __erdma_cep_dealloc);
+}
+
+void erdma_cep_get(struct erdma_cep *cep)
+{
+ kref_get(&cep->ref);
+}
+
+static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
+ u8 pd_len)
+{
+ struct socket *s = cep->sock;
+ struct mpa_rr *rr = &cep->mpa.hdr;
+ struct kvec iov[3];
+ struct msghdr msg;
+ int iovec_num = 0;
+ int ret;
+ int mpa_len;
+
+ memset(&msg, 0, sizeof(msg));
+
+ rr->params.pd_len = cpu_to_be16(pd_len);
+
+ iov[iovec_num].iov_base = rr;
+ iov[iovec_num].iov_len = sizeof(*rr);
+ iovec_num++;
+ mpa_len = sizeof(*rr);
+
+ iov[iovec_num].iov_base = &cep->mpa.ext_data;
+ iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
+ iovec_num++;
+ mpa_len += sizeof(cep->mpa.ext_data);
+
+ if (pd_len) {
+ iov[iovec_num].iov_base = (char *)pdata;
+ iov[iovec_num].iov_len = pd_len;
+ mpa_len += pd_len;
+ iovec_num++;
+ }
+
+ ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
+
+ return ret < 0 ? ret : 0;
+}
+
+static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
+ int flags)
+{
+ struct kvec iov = { buf, size };
+ struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
+
+ return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
+}
+
+static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
+ int hdr_size, int *rcvd_out)
+{
+ struct socket *s = cep->sock;
+ int rcvd;
+
+ *rcvd_out = 0;
+ if (hdr_rcvd < hdr_size) {
+ rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
+ MSG_DONTWAIT);
+ if (rcvd == -EAGAIN)
+ return -EAGAIN;
+
+ if (rcvd <= 0)
+ return -ECONNABORTED;
+
+ hdr_rcvd += rcvd;
+ *rcvd_out = rcvd;
+
+ if (hdr_rcvd < hdr_size)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
+{
+ *bits = (*bits & ~MPA_RR_MASK_REVISION) |
+ (cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
+}
+
+static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
+{
+ __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
+
+ return (u8)be16_to_cpu(rev);
+}
+
+static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
+{
+ *bits = (*bits & ~MPA_EXT_FLAG_CC) |
+ (cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
+}
+
+static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
+{
+ __be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
+
+ return (u8)be32_to_cpu(cc);
+}
+
+/*
+ * Receive MPA Request/Reply header.
+ *
+ * Returns 0 if complete MPA Request/Reply haeder including
+ * eventual private data was received. Returns -EAGAIN if
+ * header was partially received or negative error code otherwise.
+ *
+ * Context: May be called in process context only
+ */
+static int erdma_recv_mpa_rr(struct erdma_cep *cep)
+{
+ struct mpa_rr *hdr = &cep->mpa.hdr;
+ struct socket *s = cep->sock;
+ u16 pd_len;
+ int rcvd, to_rcv, ret, pd_rcvd;
+
+ if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
+ ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
+ (char *)&cep->mpa.hdr,
+ sizeof(struct mpa_rr), &rcvd);
+ cep->mpa.bytes_rcvd += rcvd;
+ if (ret)
+ return ret;
+ }
+
+ if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
+ __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
+ return -EPROTO;
+
+ if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
+ sizeof(struct erdma_mpa_ext)) {
+ ret = __recv_mpa_hdr(
+ cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
+ (char *)&cep->mpa.ext_data,
+ sizeof(struct erdma_mpa_ext), &rcvd);
+ cep->mpa.bytes_rcvd += rcvd;
+ if (ret)
+ return ret;
+ }
+
+ pd_len = be16_to_cpu(hdr->params.pd_len);
+ pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
+ sizeof(struct erdma_mpa_ext);
+ to_rcv = pd_len - pd_rcvd;
+
+ if (!to_rcv) {
+ /*
+ * We have received the whole MPA Request/Reply message.
+ * Check against peer protocol violation.
+ */
+ u32 word;
+
+ ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
+ &rcvd);
+ if (ret == -EAGAIN && rcvd == 0)
+ return 0;
+
+ if (ret)
+ return ret;
+
+ return -EPROTO;
+ }
+
+ /*
+ * At this point, MPA header has been fully received, and pd_len != 0.
+ * So, begin to receive private data.
+ */
+ if (!cep->mpa.pdata) {
+ cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
+ if (!cep->mpa.pdata)
+ return -ENOMEM;
+ }
+
+ rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
+ MSG_DONTWAIT);
+ if (rcvd < 0)
+ return rcvd;
+
+ if (rcvd > to_rcv)
+ return -EPROTO;
+
+ cep->mpa.bytes_rcvd += rcvd;
+
+ if (to_rcv == rcvd)
+ return 0;
+
+ return -EAGAIN;
+}
+
+/*
+ * erdma_proc_mpareq()
+ *
+ * Read MPA Request from socket and signal new connection to IWCM
+ * if success. Caller must hold lock on corresponding listening CEP.
+ */
+static int erdma_proc_mpareq(struct erdma_cep *cep)
+{
+ struct mpa_rr *req;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ return ret;
+
+ req = &cep->mpa.hdr;
+
+ if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
+ return -EPROTO;
+
+ memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
+
+ /* Currently does not support marker and crc. */
+ if (req->params.bits & MPA_RR_FLAG_MARKERS ||
+ req->params.bits & MPA_RR_FLAG_CRC)
+ goto reject_conn;
+
+ cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
+
+ /* Keep reference until IWCM accepts/rejects */
+ erdma_cep_get(cep);
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
+ if (ret)
+ erdma_cep_put(cep);
+
+ return ret;
+
+reject_conn:
+ req->params.bits &= ~MPA_RR_FLAG_MARKERS;
+ req->params.bits |= MPA_RR_FLAG_REJECT;
+ req->params.bits &= ~MPA_RR_FLAG_CRC;
+
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ erdma_send_mpareqrep(cep, NULL, 0);
+
+ return -EOPNOTSUPP;
+}
+
+static int erdma_proc_mpareply(struct erdma_cep *cep)
+{
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_mod_qp_params_iwarp params;
+ struct erdma_qp *qp = cep->qp;
+ struct mpa_rr *rep;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ goto out_err;
+
+ erdma_cancel_mpatimer(cep);
+
+ rep = &cep->mpa.hdr;
+
+ if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
+ ret = -EPROTO;
+ goto out_err;
+ }
+
+ if (rep->params.bits & MPA_RR_FLAG_REJECT) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
+ return -ECONNRESET;
+ }
+
+ /* Currently does not support marker and crc. */
+ if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
+ (rep->params.bits & MPA_RR_FLAG_CRC)) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
+ return -EINVAL;
+ }
+
+ memset(&params, 0, sizeof(params));
+ params.state = ERDMA_QPS_IWARP_RTS;
+ params.irq_size = cep->ird;
+ params.orq_size = cep->ord;
+
+ down_write(&qp->state_lock);
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto out_err;
+ }
+
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE |
+ ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_ORD;
+
+ params.qp_type = ERDMA_QP_ACTIVE;
+ if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ params.cc = COMPROMISE_CC;
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params, to_modify_attrs);
+
+ up_write(&qp->state_lock);
+
+ if (!ret) {
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
+ if (!ret)
+ cep->state = ERDMA_EPSTATE_RDMA_MODE;
+
+ return 0;
+ }
+
+out_err:
+ if (ret != -EAGAIN)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+
+ return ret;
+}
+
+static void erdma_accept_newconn(struct erdma_cep *cep)
+{
+ struct socket *s = cep->sock;
+ struct socket *new_s = NULL;
+ struct erdma_cep *new_cep = NULL;
+ int ret = 0;
+
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ goto error;
+
+ new_cep = erdma_cep_alloc(cep->dev);
+ if (!new_cep)
+ goto error;
+
+ /*
+ * 4: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout.
+ */
+ if (erdma_cm_alloc_work(new_cep, 4) != 0)
+ goto error;
+
+ /*
+ * Copy saved socket callbacks from listening CEP
+ * and assign new socket with new CEP
+ */
+ new_cep->sk_state_change = cep->sk_state_change;
+ new_cep->sk_data_ready = cep->sk_data_ready;
+ new_cep->sk_error_report = cep->sk_error_report;
+
+ ret = kernel_accept(s, &new_s, O_NONBLOCK);
+ if (ret != 0)
+ goto error;
+
+ new_cep->sock = new_s;
+ erdma_cep_get(new_cep);
+ new_s->sk->sk_user_data = new_cep;
+
+ tcp_sock_set_nodelay(new_s->sk);
+ new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
+
+ ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
+ if (ret)
+ goto error;
+
+ new_cep->listen_cep = cep;
+ erdma_cep_get(cep);
+
+ if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
+ /* MPA REQ already queued */
+ erdma_cep_set_inuse(new_cep);
+ ret = erdma_proc_mpareq(new_cep);
+ if (ret != -EAGAIN) {
+ erdma_cep_put(cep);
+ new_cep->listen_cep = NULL;
+ if (ret) {
+ erdma_cep_set_free(new_cep);
+ goto error;
+ }
+ }
+ erdma_cep_set_free(new_cep);
+ }
+ return;
+
+error:
+ if (new_cep) {
+ new_cep->state = ERDMA_EPSTATE_CLOSED;
+ erdma_cancel_mpatimer(new_cep);
+
+ erdma_cep_put(new_cep);
+ }
+
+ if (new_s) {
+ erdma_socket_disassoc(new_s);
+ sock_release(new_s);
+ }
+}
+
+static int erdma_newconn_connected(struct erdma_cep *cep)
+{
+ int ret = 0;
+
+ cep->mpa.hdr.params.bits = 0;
+ __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
+
+ memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
+ __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
+
+ ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
+ cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (ret >= 0)
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
+
+ return ret;
+}
+
+static void erdma_cm_work_handler(struct work_struct *w)
+{
+ struct erdma_cm_work *work;
+ struct erdma_cep *cep;
+ int release_cep = 0, ret = 0;
+
+ work = container_of(w, struct erdma_cm_work, work.work);
+ cep = work->cep;
+
+ erdma_cep_set_inuse(cep);
+
+ switch (work->type) {
+ case ERDMA_CM_WORK_CONNECTED:
+ erdma_cancel_mpatimer(cep);
+ if (cep->state == ERDMA_EPSTATE_CONNECTING) {
+ ret = erdma_newconn_connected(cep);
+ if (ret) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EIO);
+ release_cep = 1;
+ }
+ }
+ break;
+ case ERDMA_CM_WORK_CONNECTTIMEOUT:
+ if (cep->state == ERDMA_EPSTATE_CONNECTING) {
+ cep->mpa_timer = NULL;
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ }
+ break;
+ case ERDMA_CM_WORK_ACCEPT:
+ erdma_accept_newconn(cep);
+ break;
+ case ERDMA_CM_WORK_READ_MPAHDR:
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ if (cep->listen_cep) {
+ erdma_cep_set_inuse(cep->listen_cep);
+
+ if (cep->listen_cep->state ==
+ ERDMA_EPSTATE_LISTENING)
+ ret = erdma_proc_mpareq(cep);
+ else
+ ret = -EFAULT;
+
+ erdma_cep_set_free(cep->listen_cep);
+
+ if (ret != -EAGAIN) {
+ erdma_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ if (ret)
+ erdma_cep_put(cep);
+ }
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ ret = erdma_proc_mpareply(cep);
+ }
+
+ if (ret && ret != -EAGAIN)
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_CLOSE_LLP:
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_PEER_CLOSE:
+ if (cep->cm_id) {
+ if (cep->state == ERDMA_EPSTATE_CONNECTING ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA reply not received, but connection drop
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
+ /*
+ * NOTE: IW_CM_EVENT_DISCONNECT is given just
+ * to transition IWCM into CLOSING.
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* Socket close before MPA request received. */
+ erdma_disassoc_listen_cep(cep);
+ erdma_cep_put(cep);
+ }
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_MPATIMEOUT:
+ cep->mpa_timer = NULL;
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA request timed out:
+ * Hide any partially received private data and signal
+ * timeout
+ */
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* No MPA req received after peer TCP stream setup. */
+ erdma_disassoc_listen_cep(cep);
+
+ erdma_cep_put(cep);
+ release_cep = 1;
+ }
+ break;
+ default:
+ WARN(1, "Undefined CM work type: %d\n", work->type);
+ }
+
+ if (release_cep) {
+ erdma_cancel_mpatimer(cep);
+ cep->state = ERDMA_EPSTATE_CLOSED;
+ if (cep->qp) {
+ struct erdma_qp *qp = cep->qp;
+ /*
+ * Serialize a potential race with application
+ * closing the QP and calling erdma_qp_cm_drop()
+ */
+ erdma_qp_get(qp);
+ erdma_cep_set_free(cep);
+
+ erdma_qp_llp_close(qp);
+ erdma_qp_put(qp);
+
+ erdma_cep_set_inuse(cep);
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+ }
+
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ erdma_cep_put(cep);
+ }
+ }
+ erdma_cep_set_free(cep);
+ erdma_put_work(work);
+ erdma_cep_put(cep);
+}
+
+int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
+{
+ struct erdma_cm_work *work = erdma_get_work(cep);
+ unsigned long delay = 0;
+
+ if (!work)
+ return -ENOMEM;
+
+ work->type = type;
+ work->cep = cep;
+
+ erdma_cep_get(cep);
+
+ INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
+
+ if (type == ERDMA_CM_WORK_MPATIMEOUT) {
+ cep->mpa_timer = work;
+
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
+ delay = MPAREP_TIMEOUT;
+ else
+ delay = MPAREQ_TIMEOUT;
+ } else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
+ cep->mpa_timer = work;
+
+ delay = CONNECT_TIMEOUT;
+ }
+
+ queue_delayed_work(erdma_cm_wq, &work->work, delay);
+
+ return 0;
+}
+
+static void erdma_cm_llp_data_ready(struct sock *sk)
+{
+ struct erdma_cep *cep;
+
+ trace_sk_data_ready(sk);
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep)
+ goto out;
+
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
+
+out:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+static void erdma_cm_llp_error_report(struct sock *sk)
+{
+ struct erdma_cep *cep = sk_to_cep(sk);
+
+ if (cep)
+ cep->sk_error_report(sk);
+}
+
+static void erdma_cm_llp_state_change(struct sock *sk)
+{
+ struct erdma_cep *cep;
+ void (*orig_state_change)(struct sock *sk);
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep) {
+ read_unlock(&sk->sk_callback_lock);
+ return;
+ }
+ orig_state_change = cep->sk_state_change;
+
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
+ if (cep->state == ERDMA_EPSTATE_CONNECTING)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
+ else
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
+ break;
+ case TCP_CLOSE:
+ case TCP_CLOSE_WAIT:
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
+ break;
+ default:
+ break;
+ }
+ read_unlock(&sk->sk_callback_lock);
+ orig_state_change(sk);
+}
+
+static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
+ int laddrlen, struct sockaddr *raddr,
+ int raddrlen, int flags)
+{
+ int ret;
+
+ sock_set_reuseaddr(s->sk);
+ ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr, laddrlen);
+ if (ret)
+ return ret;
+ ret = s->ops->connect(s, (struct sockaddr_unsized *)raddr, raddrlen, flags);
+ return ret < 0 ? ret : 0;
+}
+
+int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct erdma_dev *dev = to_edev(id->device);
+ struct erdma_qp *qp;
+ struct erdma_cep *cep = NULL;
+ struct socket *s = NULL;
+ struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
+ u16 pd_len = params->private_data_len;
+ int ret;
+
+ if (pd_len > MPA_MAX_PRIVDATA)
+ return -EINVAL;
+
+ if (params->ird > dev->attrs.max_ird ||
+ params->ord > dev->attrs.max_ord)
+ return -EINVAL;
+
+ if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ qp = find_qp_by_qpn(dev, params->qpn);
+ if (!qp)
+ return -ENOENT;
+ erdma_qp_get(qp);
+
+ ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (ret < 0)
+ goto error_put_qp;
+
+ cep = erdma_cep_alloc(dev);
+ if (!cep) {
+ ret = -ENOMEM;
+ goto error_release_sock;
+ }
+
+ erdma_cep_set_inuse(cep);
+
+ /* Associate QP with CEP */
+ erdma_cep_get(cep);
+ qp->cep = cep;
+ cep->qp = qp;
+
+ /* Associate cm_id with CEP */
+ id->add_ref(id);
+ cep->cm_id = id;
+
+ /*
+ * 6: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout, connected event
+ * and connect timeout.
+ */
+ ret = erdma_cm_alloc_work(cep, 6);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto error_release_cep;
+ }
+
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+ cep->state = ERDMA_EPSTATE_CONNECTING;
+
+ erdma_cep_socket_assoc(cep, s);
+
+ if (pd_len) {
+ cep->pd_len = pd_len;
+ cep->private_data = kmalloc(pd_len, GFP_KERNEL);
+ if (!cep->private_data) {
+ ret = -ENOMEM;
+ goto error_disassoc;
+ }
+
+ memcpy(cep->private_data, params->private_data,
+ params->private_data_len);
+ }
+
+ ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
+ sizeof(*raddr), O_NONBLOCK);
+ if (ret != -EINPROGRESS && ret != 0) {
+ goto error_disassoc;
+ } else if (ret == 0) {
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
+ if (ret)
+ goto error_disassoc;
+ } else {
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
+ if (ret)
+ goto error_disassoc;
+ }
+
+ erdma_cep_set_free(cep);
+ return 0;
+
+error_disassoc:
+ kfree(cep->private_data);
+ cep->private_data = NULL;
+ cep->pd_len = 0;
+
+ erdma_socket_disassoc(s);
+
+error_release_cep:
+ /* disassoc with cm_id */
+ cep->cm_id = NULL;
+ id->rem_ref(id);
+
+ /* disassoc with qp */
+ qp->cep = NULL;
+ erdma_cep_put(cep);
+ cep->qp = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+
+ /* release the cep. */
+ erdma_cep_put(cep);
+
+error_release_sock:
+ if (s)
+ sock_release(s);
+error_put_qp:
+ erdma_qp_put(qp);
+
+ return ret;
+}
+
+int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+ struct erdma_mod_qp_params_iwarp mod_qp_params;
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_dev *dev = to_edev(id->device);
+ struct erdma_qp *qp;
+ int ret;
+
+ erdma_cep_set_inuse(cep);
+ erdma_cep_put(cep);
+
+ /* Free lingering inbound private data */
+ if (cep->mpa.hdr.params.pd_len) {
+ cep->mpa.hdr.params.pd_len = 0;
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ }
+ erdma_cancel_mpatimer(cep);
+
+ if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+
+ qp = find_qp_by_qpn(dev, params->qpn);
+ if (!qp)
+ return -ENOENT;
+ erdma_qp_get(qp);
+
+ down_write(&qp->state_lock);
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ if (params->ord > dev->attrs.max_ord ||
+ params->ird > dev->attrs.max_ord) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ if (params->private_data_len > MPA_MAX_PRIVDATA) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ memset(&mod_qp_params, 0, sizeof(mod_qp_params));
+
+ mod_qp_params.irq_size = params->ird;
+ mod_qp_params.orq_size = params->ord;
+ mod_qp_params.state = ERDMA_QPS_IWARP_RTS;
+
+ /* Associate QP with CEP */
+ erdma_cep_get(cep);
+ qp->cep = cep;
+ cep->qp = qp;
+
+ cep->state = ERDMA_EPSTATE_RDMA_MODE;
+
+ mod_qp_params.qp_type = ERDMA_QP_PASSIVE;
+ mod_qp_params.pd_len = params->private_data_len;
+
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD |
+ ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_MPA;
+
+ if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ mod_qp_params.cc = COMPROMISE_CC;
+ }
+
+ /* move to rts */
+ ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs);
+
+ up_write(&qp->state_lock);
+
+ if (ret)
+ goto error;
+
+ cep->mpa.ext_data.bits = 0;
+ __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
+
+ ret = erdma_send_mpareqrep(cep, params->private_data,
+ params->private_data_len);
+ if (!ret) {
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
+ if (ret)
+ goto error;
+
+ erdma_cep_set_free(cep);
+
+ return 0;
+ }
+
+error:
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(id);
+ cep->cm_id = NULL;
+ }
+
+ if (qp->cep) {
+ erdma_cep_put(cep);
+ qp->cep = NULL;
+ }
+
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return ret;
+}
+
+int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
+{
+ struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+
+ erdma_cep_set_inuse(cep);
+ erdma_cep_put(cep);
+
+ erdma_cancel_mpatimer(cep);
+
+ if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+
+ if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
+ erdma_send_mpareqrep(cep, pdata, plen);
+ }
+
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return 0;
+}
+
+int erdma_create_listen(struct iw_cm_id *id, int backlog)
+{
+ struct socket *s;
+ struct erdma_cep *cep = NULL;
+ int ret = 0;
+ struct erdma_dev *dev = to_edev(id->device);
+ int addr_family = id->local_addr.ss_family;
+ struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
+
+ if (addr_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (ret < 0)
+ return ret;
+
+ sock_set_reuseaddr(s->sk);
+
+ /* For wildcard addr, limit binding to current device only */
+ if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
+ s->sk->sk_bound_dev_if = dev->netdev->ifindex;
+
+ ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr,
+ sizeof(struct sockaddr_in));
+ if (ret)
+ goto error;
+
+ cep = erdma_cep_alloc(dev);
+ if (!cep) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ erdma_cep_socket_assoc(cep, s);
+
+ ret = erdma_cm_alloc_work(cep, backlog);
+ if (ret)
+ goto error;
+
+ ret = s->ops->listen(s, backlog);
+ if (ret)
+ goto error;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ if (!id->provider_data) {
+ id->provider_data =
+ kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!id->provider_data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ INIT_LIST_HEAD((struct list_head *)id->provider_data);
+ }
+
+ list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
+ cep->state = ERDMA_EPSTATE_LISTENING;
+
+ return 0;
+
+error:
+ if (cep) {
+ erdma_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ cep->sock = NULL;
+ erdma_socket_disassoc(s);
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+ }
+ sock_release(s);
+
+ return ret;
+}
+
+static void erdma_drop_listeners(struct iw_cm_id *id)
+{
+ struct list_head *p, *tmp;
+ /*
+ * In case of a wildcard rdma_listen on a multi-homed device,
+ * a listener's IWCM id is associated with more than one listening CEP.
+ */
+ list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
+ struct erdma_cep *cep =
+ list_entry(p, struct erdma_cep, listenq);
+
+ list_del(p);
+
+ erdma_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+ cep->state = ERDMA_EPSTATE_CLOSED;
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+ }
+}
+
+int erdma_destroy_listen(struct iw_cm_id *id)
+{
+ if (!id->provider_data)
+ return 0;
+
+ erdma_drop_listeners(id);
+ kfree(id->provider_data);
+ id->provider_data = NULL;
+
+ return 0;
+}
+
+int erdma_cm_init(void)
+{
+ erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
+ if (!erdma_cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void erdma_cm_exit(void)
+{
+ if (erdma_cm_wq)
+ destroy_workqueue(erdma_cm_wq);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h
new file mode 100644
index 000000000000..a26d80770188
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cm.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#ifndef __ERDMA_CM_H__
+#define __ERDMA_CM_H__
+
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <rdma/iw_cm.h>
+
+/* iWarp MPA protocol defs */
+#define MPA_REVISION_EXT_1 129
+#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+#define MPA_KEY_SIZE 16
+#define MPA_DEFAULT_HDR_LEN 28
+
+struct mpa_rr_params {
+ __be16 bits;
+ __be16 pd_len;
+};
+
+/*
+ * MPA request/response Hdr bits & fields
+ */
+enum {
+ MPA_RR_FLAG_MARKERS = cpu_to_be16(0x8000),
+ MPA_RR_FLAG_CRC = cpu_to_be16(0x4000),
+ MPA_RR_FLAG_REJECT = cpu_to_be16(0x2000),
+ MPA_RR_RESERVED = cpu_to_be16(0x1f00),
+ MPA_RR_MASK_REVISION = cpu_to_be16(0x00ff)
+};
+
+/*
+ * MPA request/reply header
+ */
+struct mpa_rr {
+ u8 key[16];
+ struct mpa_rr_params params;
+};
+
+struct erdma_mpa_ext {
+ __be32 cookie;
+ __be32 bits;
+};
+
+enum {
+ MPA_EXT_FLAG_CC = cpu_to_be32(0x0000000f),
+};
+
+struct erdma_mpa_info {
+ struct mpa_rr hdr; /* peer mpa hdr in host byte order */
+ struct erdma_mpa_ext ext_data;
+ char *pdata;
+ int bytes_rcvd;
+};
+
+struct erdma_sk_upcalls {
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk, int bytes);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+struct erdma_dev;
+
+enum erdma_cep_state {
+ ERDMA_EPSTATE_IDLE = 1,
+ ERDMA_EPSTATE_LISTENING,
+ ERDMA_EPSTATE_CONNECTING,
+ ERDMA_EPSTATE_AWAIT_MPAREQ,
+ ERDMA_EPSTATE_RECVD_MPAREQ,
+ ERDMA_EPSTATE_AWAIT_MPAREP,
+ ERDMA_EPSTATE_RDMA_MODE,
+ ERDMA_EPSTATE_CLOSED
+};
+
+struct erdma_cep {
+ struct iw_cm_id *cm_id;
+ struct erdma_dev *dev;
+ struct list_head devq;
+ spinlock_t lock;
+ struct kref ref;
+ int in_use;
+ wait_queue_head_t waitq;
+ enum erdma_cep_state state;
+
+ struct list_head listenq;
+ struct erdma_cep *listen_cep;
+
+ struct erdma_qp *qp;
+ struct socket *sock;
+
+ struct erdma_cm_work *mpa_timer;
+ struct list_head work_freelist;
+
+ struct erdma_mpa_info mpa;
+ int ord;
+ int ird;
+
+ int pd_len;
+ /* hold user's private data. */
+ void *private_data;
+
+ /* Saved upcalls of socket llp.sock */
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+#define MPAREQ_TIMEOUT (HZ * 20)
+#define MPAREP_TIMEOUT (HZ * 10)
+#define CONNECT_TIMEOUT (HZ * 10)
+
+enum erdma_work_type {
+ ERDMA_CM_WORK_ACCEPT = 1,
+ ERDMA_CM_WORK_READ_MPAHDR,
+ ERDMA_CM_WORK_CLOSE_LLP, /* close socket */
+ ERDMA_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
+ ERDMA_CM_WORK_MPATIMEOUT,
+ ERDMA_CM_WORK_CONNECTED,
+ ERDMA_CM_WORK_CONNECTTIMEOUT
+};
+
+struct erdma_cm_work {
+ struct delayed_work work;
+ struct list_head list;
+ enum erdma_work_type type;
+ struct erdma_cep *cep;
+};
+
+#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a)))
+
+static inline int getname_peer(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 1);
+}
+
+static inline int getname_local(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 0);
+}
+
+int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen);
+int erdma_create_listen(struct iw_cm_id *id, int backlog);
+int erdma_destroy_listen(struct iw_cm_id *id);
+
+void erdma_cep_get(struct erdma_cep *ceq);
+void erdma_cep_put(struct erdma_cep *ceq);
+int erdma_cm_queue_work(struct erdma_cep *ceq, enum erdma_work_type type);
+
+int erdma_cm_init(void);
+void erdma_cm_exit(void);
+
+#define sk_to_cep(sk) ((struct erdma_cep *)((sk)->sk_user_data))
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c
new file mode 100644
index 000000000000..b867aefe83b2
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma.h"
+
+static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
+{
+ struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
+ u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
+ FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
+
+ *cmdq->cq.dbrec = db_data;
+ writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
+
+ atomic64_inc(&cmdq->cq.armed_num);
+}
+
+static void kick_cmdq_db(struct erdma_cmdq *cmdq)
+{
+ struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
+ u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
+
+ *cmdq->sq.dbrec = db_data;
+ writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
+}
+
+static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
+{
+ int comp_idx;
+
+ spin_lock(&cmdq->lock);
+ comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
+ cmdq->max_outstandings);
+ if (comp_idx == cmdq->max_outstandings) {
+ spin_unlock(&cmdq->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ __set_bit(comp_idx, cmdq->comp_wait_bitmap);
+ spin_unlock(&cmdq->lock);
+
+ return &cmdq->wait_pool[comp_idx];
+}
+
+static void put_comp_wait(struct erdma_cmdq *cmdq,
+ struct erdma_comp_wait *comp_wait)
+{
+ int used;
+
+ cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
+ spin_lock(&cmdq->lock);
+ used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
+ spin_unlock(&cmdq->lock);
+
+ WARN_ON(!used);
+}
+
+static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
+ struct erdma_cmdq *cmdq)
+{
+ int i;
+
+ cmdq->wait_pool =
+ devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
+ sizeof(struct erdma_comp_wait), GFP_KERNEL);
+ if (!cmdq->wait_pool)
+ return -ENOMEM;
+
+ spin_lock_init(&cmdq->lock);
+ cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
+ &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
+ if (!cmdq->comp_wait_bitmap)
+ return -ENOMEM;
+
+ for (i = 0; i < cmdq->max_outstandings; i++) {
+ init_completion(&cmdq->wait_pool[i].wait_event);
+ cmdq->wait_pool[i].ctx_id = i;
+ }
+
+ return 0;
+}
+
+static int erdma_cmdq_sq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_cmdq_sq *sq = &cmdq->sq;
+
+ sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
+ sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
+
+ sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
+ &sq->qbuf_dma_addr, GFP_KERNEL);
+ if (!sq->qbuf)
+ return -ENOMEM;
+
+ sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma);
+ if (!sq->dbrec)
+ goto err_out;
+
+ spin_lock_init(&sq->lock);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
+ upper_32_bits(sq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
+ lower_32_bits(sq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
+ erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma);
+
+ return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
+ sq->qbuf, sq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+static int erdma_cmdq_cq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_cmdq_cq *cq = &cmdq->cq;
+
+ cq->depth = cmdq->sq.depth;
+ cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ &cq->qbuf_dma_addr, GFP_KERNEL);
+ if (!cq->qbuf)
+ return -ENOMEM;
+
+ spin_lock_init(&cq->lock);
+
+ cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma);
+ if (!cq->dbrec)
+ goto err_out;
+
+ atomic64_set(&cq->armed_num, 0);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
+ upper_32_bits(cq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
+ lower_32_bits(cq->qbuf_dma_addr));
+ erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma);
+
+ return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf,
+ cq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+static int erdma_cmdq_eq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_eq *eq = &cmdq->eq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, eq, cmdq->max_outstandings);
+ if (ret)
+ return ret;
+
+ eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG;
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
+ upper_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
+ lower_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
+ erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
+
+ return 0;
+}
+
+int erdma_cmdq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ int err;
+
+ cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
+
+ sema_init(&cmdq->credits, cmdq->max_outstandings);
+
+ err = erdma_cmdq_wait_res_init(dev, cmdq);
+ if (err)
+ return err;
+
+ err = erdma_cmdq_sq_init(dev);
+ if (err)
+ return err;
+
+ err = erdma_cmdq_cq_init(dev);
+ if (err)
+ goto err_destroy_sq;
+
+ err = erdma_cmdq_eq_init(dev);
+ if (err)
+ goto err_destroy_cq;
+
+ set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+
+ return 0;
+
+err_destroy_cq:
+ dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
+ cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
+
+err_destroy_sq:
+ dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
+ cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
+
+ return err;
+}
+
+void erdma_finish_cmdq_init(struct erdma_dev *dev)
+{
+ arm_cmdq_cq(&dev->cmdq);
+}
+
+void erdma_cmdq_destroy(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+
+ erdma_eq_destroy(dev, &cmdq->eq);
+
+ dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
+ cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
+
+ dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
+ cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
+}
+
+static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
+{
+ __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
+ cmdq->cq.depth, CQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(READ_ONCE(*cqe)));
+
+ return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
+}
+
+static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
+ struct erdma_comp_wait *comp_wait)
+{
+ __le64 *wqe;
+ u64 hdr = *req;
+
+ comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
+ reinit_completion(&comp_wait->wait_event);
+ comp_wait->sq_pi = cmdq->sq.pi;
+
+ wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
+ SQEBB_SHIFT);
+ memcpy(wqe, req, req_len);
+
+ cmdq->sq.pi += cmdq->sq.wqebb_cnt;
+ hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
+ FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
+ comp_wait->ctx_id) |
+ FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
+ *wqe = cpu_to_le64(hdr);
+
+ kick_cmdq_db(cmdq);
+}
+
+static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
+{
+ struct erdma_comp_wait *comp_wait;
+ u32 hdr0, sqe_idx;
+ __be32 *cqe;
+ u16 ctx_id;
+ u64 *sqe;
+
+ cqe = get_next_valid_cmdq_cqe(cmdq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cmdq->cq.ci++;
+
+ dma_rmb();
+ hdr0 = be32_to_cpu(*cqe);
+ sqe_idx = be32_to_cpu(*(cqe + 1));
+
+ sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
+ SQEBB_SHIFT);
+ ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
+ comp_wait = &cmdq->wait_pool[ctx_id];
+ if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
+ return -EIO;
+
+ comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
+ comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
+ cmdq->sq.ci += cmdq->sq.wqebb_cnt;
+ /* Copy 16B comp data after cqe hdr to outer */
+ be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
+
+ complete(&comp_wait->wait_event);
+
+ return 0;
+}
+
+static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
+{
+ unsigned long flags;
+ u16 comp_num;
+
+ spin_lock_irqsave(&cmdq->cq.lock, flags);
+
+ /* We must have less than # of max_outstandings
+ * completions at one time.
+ */
+ for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
+ if (erdma_poll_single_cmd_completion(cmdq))
+ break;
+
+ spin_unlock_irqrestore(&cmdq->cq.lock, flags);
+}
+
+void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
+{
+ int got_event = 0;
+
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
+ return;
+
+ while (get_next_valid_eqe(&cmdq->eq)) {
+ cmdq->eq.ci++;
+ got_event++;
+ }
+
+ if (got_event) {
+ cmdq->cq.cmdsn++;
+ erdma_polling_cmd_completions(cmdq);
+ arm_cmdq_cq(cmdq);
+ }
+
+ notify_eq(&cmdq->eq);
+}
+
+static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
+ struct erdma_cmdq *cmdq, u32 timeout)
+{
+ unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
+
+ while (1) {
+ erdma_polling_cmd_completions(cmdq);
+ if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
+ break;
+
+ if (time_is_before_jiffies(comp_timeout))
+ return -ETIME;
+
+ udelay(20);
+ }
+
+ return 0;
+}
+
+static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
+ struct erdma_cmdq *cmdq, u32 timeout)
+{
+ unsigned long flags = 0;
+
+ wait_for_completion_timeout(&comp_ctx->wait_event,
+ msecs_to_jiffies(timeout));
+
+ if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
+ spin_lock_irqsave(&cmdq->cq.lock, flags);
+ comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
+ spin_unlock_irqrestore(&cmdq->cq.lock, flags);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
+{
+ *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
+ FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
+}
+
+int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
+ u64 *resp0, u64 *resp1, bool sleepable)
+{
+ struct erdma_comp_wait *comp_wait;
+ int ret;
+
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
+ return -ENODEV;
+
+ if (!sleepable) {
+ while (down_trylock(&cmdq->credits))
+ ;
+ } else {
+ down(&cmdq->credits);
+ }
+
+ comp_wait = get_comp_wait(cmdq);
+ if (IS_ERR(comp_wait)) {
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+ set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
+ up(&cmdq->credits);
+ return PTR_ERR(comp_wait);
+ }
+
+ spin_lock(&cmdq->sq.lock);
+ push_cmdq_sqe(cmdq, req, req_size, comp_wait);
+ spin_unlock(&cmdq->sq.lock);
+
+ if (sleepable)
+ ret = erdma_wait_cmd_completion(comp_wait, cmdq,
+ ERDMA_CMDQ_TIMEOUT_MS);
+ else
+ ret = erdma_poll_cmd_completion(comp_wait, cmdq,
+ ERDMA_CMDQ_TIMEOUT_MS);
+
+ if (ret) {
+ set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+ goto out;
+ }
+
+ if (comp_wait->comp_status)
+ ret = -EIO;
+
+ if (resp0 && resp1) {
+ *resp0 = *((u64 *)&comp_wait->comp_data[0]);
+ *resp1 = *((u64 *)&comp_wait->comp_data[2]);
+ }
+ put_comp_wait(cmdq, comp_wait);
+
+out:
+ up(&cmdq->credits);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
new file mode 100644
index 000000000000..1f456327e63c
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma_verbs.h"
+
+static void *get_next_valid_cqe(struct erdma_cq *cq)
+{
+ __be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci,
+ cq->depth, CQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(READ_ONCE(*cqe)));
+
+ return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL;
+}
+
+static void notify_cq(struct erdma_cq *cq, u8 solcitied)
+{
+ u64 db_data =
+ FIELD_PREP(ERDMA_CQDB_IDX_MASK, (cq->kern_cq.notify_cnt)) |
+ FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) |
+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
+ FIELD_PREP(ERDMA_CQDB_SOL_MASK, solcitied) |
+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) |
+ FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci);
+
+ *cq->kern_cq.dbrec = db_data;
+ writeq(db_data, cq->kern_cq.db);
+}
+
+int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ unsigned long irq_flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, irq_flags);
+
+ notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq))
+ ret = 1;
+
+ cq->kern_cq.notify_cnt++;
+
+ spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags);
+
+ return ret;
+}
+
+static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
+ [ERDMA_OP_WRITE] = IB_WC_RDMA_WRITE,
+ [ERDMA_OP_READ] = IB_WC_RDMA_READ,
+ [ERDMA_OP_SEND] = IB_WC_SEND,
+ [ERDMA_OP_SEND_WITH_IMM] = IB_WC_SEND,
+ [ERDMA_OP_RECEIVE] = IB_WC_RECV,
+ [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM,
+ [ERDMA_OP_RECV_INV] = IB_WC_RECV,
+ [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV,
+ [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND,
+ [ERDMA_OP_REG_MR] = IB_WC_REG_MR,
+ [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
+ [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP,
+ [ERDMA_OP_ATOMIC_FAA] = IB_WC_FETCH_ADD,
+};
+
+static const struct {
+ enum erdma_wc_status erdma;
+ enum ib_wc_status base;
+ enum erdma_vendor_err vendor;
+} map_cqe_status[ERDMA_NUM_WC_STATUS] = {
+ { ERDMA_WC_SUCCESS, IB_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_GENERAL_ERR, IB_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_RECV_WQE_FORMAT_ERR, IB_WC_GENERAL_ERR,
+ ERDMA_WC_VENDOR_INVALID_RQE },
+ { ERDMA_WC_RECV_STAG_INVALID_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_INVALID_STAG },
+ { ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION },
+ { ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR },
+ { ERDMA_WC_RECV_PDID_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_INVALID_PD },
+ { ERDMA_WC_RECV_WARRPING_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_WRAP_ERR },
+ { ERDMA_WC_SEND_WQE_FORMAT_ERR, IB_WC_LOC_QP_OP_ERR,
+ ERDMA_WC_VENDOR_INVALID_SQE },
+ { ERDMA_WC_SEND_WQE_ORD_EXCEED, IB_WC_GENERAL_ERR,
+ ERDMA_WC_VENDOR_ZERO_ORD },
+ { ERDMA_WC_SEND_STAG_INVALID_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_INVALID_STAG },
+ { ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION },
+ { ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR },
+ { ERDMA_WC_SEND_PDID_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_INVALID_PD },
+ { ERDMA_WC_SEND_WARRPING_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_WARP_ERR },
+ { ERDMA_WC_FLUSH_ERR, IB_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
+};
+
+static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc)
+{
+ u32 ud_info;
+
+ wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ ud_info = be32_to_cpu(cqe->ud.info);
+ wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info);
+ if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4)
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info);
+ wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info);
+ wc->pkey_index = 0;
+}
+
+#define ERDMA_POLLCQ_NO_QP 1
+
+static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+ u8 opcode, syndrome, qtype;
+ struct erdma_kqp *kern_qp;
+ struct erdma_cqe *cqe;
+ struct erdma_qp *qp;
+ u16 wqe_idx, depth;
+ u32 qpn, cqe_hdr;
+ u64 *id_table;
+ u64 *wqe_hdr;
+
+ cqe = get_next_valid_cqe(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cq->kern_cq.ci++;
+
+ /* cqbuf should be ready when we poll */
+ dma_rmb();
+
+ qpn = be32_to_cpu(cqe->qpn);
+ wqe_idx = be32_to_cpu(cqe->qe_idx);
+ cqe_hdr = be32_to_cpu(cqe->hdr);
+
+ qp = find_qp_by_qpn(dev, qpn);
+ if (!qp)
+ return ERDMA_POLLCQ_NO_QP;
+
+ kern_qp = &qp->kern_qp;
+
+ qtype = FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr);
+ syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr);
+ opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr);
+
+ if (qtype == ERDMA_CQE_QTYPE_SQ) {
+ id_table = kern_qp->swr_tbl;
+ depth = qp->attrs.sq_size;
+ wqe_hdr = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ kern_qp->sq_ci =
+ FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *wqe_hdr) +
+ wqe_idx + 1;
+ } else {
+ id_table = kern_qp->rwr_tbl;
+ depth = qp->attrs.rq_size;
+ }
+ wc->wr_id = id_table[wqe_idx & (depth - 1)];
+ wc->byte_len = be32_to_cpu(cqe->size);
+
+ wc->wc_flags = 0;
+
+ wc->opcode = wc_mapping_table[opcode];
+ if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) {
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->imm_data));
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ } else if (opcode == ERDMA_OP_RECV_INV) {
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->inv_rkey);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+
+ if (erdma_device_rocev2(dev) &&
+ (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI))
+ erdma_process_ud_cqe(cqe, wc);
+
+ if (syndrome >= ERDMA_NUM_WC_STATUS)
+ syndrome = ERDMA_WC_GENERAL_ERR;
+
+ wc->status = map_cqe_status[syndrome].base;
+ wc->vendor_err = map_cqe_status[syndrome].vendor;
+ wc->qp = &qp->ibqp;
+
+ return 0;
+}
+
+int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ unsigned long flags;
+ int npolled, ret;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ for (npolled = 0; npolled < num_entries;) {
+ ret = erdma_poll_one_cqe(cq, wc + npolled);
+
+ if (ret == -EAGAIN) /* no received new CQEs. */
+ break;
+ else if (ret) /* ignore invalid CQEs. */
+ continue;
+
+ npolled++;
+ }
+
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+
+ return npolled;
+}
+
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_cqe *cqe, *dst_cqe;
+ u32 prev_cq_ci, cur_cq_ci;
+ u32 ncqe = 0, nqp_cqe = 0;
+ unsigned long flags;
+ u8 owner;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ prev_cq_ci = cq->kern_cq.ci;
+
+ while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) {
+ ++cq->kern_cq.ci;
+ ++ncqe;
+ }
+
+ while (ncqe > 0) {
+ cur_cq_ci = prev_cq_ci + ncqe - 1;
+ cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth,
+ CQE_SHIFT);
+
+ if (be32_to_cpu(cqe->qpn) == qpn) {
+ ++nqp_cqe;
+ } else if (nqp_cqe) {
+ dst_cqe = get_queue_entry(cq->kern_cq.qbuf,
+ cur_cq_ci + nqp_cqe,
+ cq->depth, CQE_SHIFT);
+ owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(dst_cqe->hdr));
+ cqe->hdr = cpu_to_be32(
+ (be32_to_cpu(cqe->hdr) &
+ ~ERDMA_CQE_HDR_OWNER_MASK) |
+ FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner));
+ memcpy(dst_cqe, cqe, sizeof(*cqe));
+ }
+
+ --ncqe;
+ }
+
+ cq->kern_cq.ci = prev_cq_ci + nqp_cqe;
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
new file mode 100644
index 000000000000..6486234a2360
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma_verbs.h"
+
+#define MAX_POLL_CHUNK_SIZE 16
+
+void notify_eq(struct erdma_eq *eq)
+{
+ u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) |
+ FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1);
+
+ *eq->dbrec = db_data;
+ writeq(db_data, eq->db);
+
+ atomic64_inc(&eq->notify_num);
+}
+
+void *get_next_valid_eqe(struct erdma_eq *eq)
+{
+ u64 *eqe = get_queue_entry(eq->qbuf, eq->ci, eq->depth, EQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CEQE_HDR_O_MASK, READ_ONCE(*eqe));
+
+ return owner ^ !!(eq->ci & eq->depth) ? eqe : NULL;
+}
+
+void erdma_aeq_event_handler(struct erdma_dev *dev)
+{
+ struct erdma_aeqe *aeqe;
+ u32 cqn, qpn;
+ struct erdma_qp *qp;
+ struct erdma_cq *cq;
+ struct ib_event event;
+ u32 poll_cnt = 0;
+
+ memset(&event, 0, sizeof(event));
+
+ while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+ aeqe = get_next_valid_eqe(&dev->aeq);
+ if (!aeqe)
+ break;
+
+ dma_rmb();
+
+ dev->aeq.ci++;
+ atomic64_inc(&dev->aeq.event_num);
+ poll_cnt++;
+
+ if (FIELD_GET(ERDMA_AEQE_HDR_TYPE_MASK,
+ le32_to_cpu(aeqe->hdr)) == ERDMA_AE_TYPE_CQ_ERR) {
+ cqn = le32_to_cpu(aeqe->event_data0);
+ cq = find_cq_by_cqn(dev, cqn);
+ if (!cq)
+ continue;
+
+ event.device = cq->ibcq.device;
+ event.element.cq = &cq->ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&event,
+ cq->ibcq.cq_context);
+ } else {
+ qpn = le32_to_cpu(aeqe->event_data0);
+ qp = find_qp_by_qpn(dev, qpn);
+ if (!qp)
+ continue;
+
+ event.device = qp->ibqp.device;
+ event.element.qp = &qp->ibqp;
+ event.event = IB_EVENT_QP_FATAL;
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&event,
+ qp->ibqp.qp_context);
+ }
+ }
+
+ notify_eq(&dev->aeq);
+}
+
+int erdma_eq_common_init(struct erdma_dev *dev, struct erdma_eq *eq, u32 depth)
+{
+ u32 buf_size = depth << EQE_SHIFT;
+
+ eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, buf_size,
+ &eq->qbuf_dma_addr, GFP_KERNEL);
+ if (!eq->qbuf)
+ return -ENOMEM;
+
+ eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma);
+ if (!eq->dbrec)
+ goto err_free_qbuf;
+
+ spin_lock_init(&eq->lock);
+ atomic64_set(&eq->event_num, 0);
+ atomic64_set(&eq->notify_num, 0);
+ eq->ci = 0;
+ eq->depth = depth;
+
+ return 0;
+
+err_free_qbuf:
+ dma_free_coherent(&dev->pdev->dev, buf_size, eq->qbuf,
+ eq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+void erdma_eq_destroy(struct erdma_dev *dev, struct erdma_eq *eq)
+{
+ dma_pool_free(dev->db_pool, eq->dbrec, eq->dbrec_dma);
+ dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf,
+ eq->qbuf_dma_addr);
+}
+
+int erdma_aeq_init(struct erdma_dev *dev)
+{
+ struct erdma_eq *eq = &dev->aeq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, &dev->aeq, ERDMA_DEFAULT_EQ_DEPTH);
+ if (ret)
+ return ret;
+
+ eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG;
+
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG,
+ upper_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG,
+ lower_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth);
+ erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
+
+ return 0;
+}
+
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb)
+{
+ struct erdma_dev *dev = ceq_cb->dev;
+ struct erdma_cq *cq;
+ u32 poll_cnt = 0;
+ u64 *ceqe;
+ int cqn;
+
+ if (!ceq_cb->ready)
+ return;
+
+ while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+ ceqe = get_next_valid_eqe(&ceq_cb->eq);
+ if (!ceqe)
+ break;
+
+ dma_rmb();
+ ceq_cb->eq.ci++;
+ poll_cnt++;
+ cqn = FIELD_GET(ERDMA_CEQE_HDR_CQN_MASK, READ_ONCE(*ceqe));
+
+ cq = find_cq_by_cqn(dev, cqn);
+ if (!cq)
+ continue;
+
+ if (rdma_is_kernel_res(&cq->ibcq.res))
+ cq->kern_cq.cmdsn++;
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ }
+
+ notify_eq(&ceq_cb->eq);
+}
+
+static irqreturn_t erdma_intr_ceq_handler(int irq, void *data)
+{
+ struct erdma_eq_cb *ceq_cb = data;
+
+ tasklet_schedule(&ceq_cb->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+static void erdma_intr_ceq_task(unsigned long data)
+{
+ erdma_ceq_completion_handler((struct erdma_eq_cb *)data);
+}
+
+static int erdma_set_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+ int err;
+
+ snprintf(eqc->irq.name, ERDMA_IRQNAME_SIZE, "erdma-ceq%u@pci:%s", ceqn,
+ pci_name(dev->pdev));
+ eqc->irq.msix_vector = pci_irq_vector(dev->pdev, ceqn + 1);
+
+ tasklet_init(&dev->ceqs[ceqn].tasklet, erdma_intr_ceq_task,
+ (unsigned long)&dev->ceqs[ceqn]);
+
+ cpumask_set_cpu(cpumask_local_spread(ceqn + 1, dev->attrs.numa_node),
+ &eqc->irq.affinity_hint_mask);
+
+ err = request_irq(eqc->irq.msix_vector, erdma_intr_ceq_handler, 0,
+ eqc->irq.name, eqc);
+ if (err) {
+ dev_err(&dev->pdev->dev, "failed to request_irq(%d)\n", err);
+ return err;
+ }
+
+ irq_set_affinity_hint(eqc->irq.msix_vector,
+ &eqc->irq.affinity_hint_mask);
+
+ return 0;
+}
+
+static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+
+ irq_set_affinity_hint(eqc->irq.msix_vector, NULL);
+ free_irq(eqc->irq.msix_vector, eqc);
+}
+
+static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
+{
+ struct erdma_cmdq_create_eq_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CREATE_EQ);
+ req.eqn = eqn;
+ req.depth = ilog2(eq->depth);
+ req.qbuf_addr = eq->qbuf_dma_addr;
+ req.qtype = ERDMA_EQ_TYPE_CEQ;
+ /* Vector index is the same as EQN. */
+ req.vector_idx = eqn;
+ req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma);
+ req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma);
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
+}
+
+static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, eq, ERDMA_DEFAULT_EQ_DEPTH);
+ if (ret)
+ return ret;
+
+ eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG +
+ (ceqn + 1) * ERDMA_DB_SIZE;
+ dev->ceqs[ceqn].dev = dev;
+ dev->ceqs[ceqn].ready = true;
+
+ /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+ ret = create_eq_cmd(dev, ceqn + 1, eq);
+ if (ret) {
+ erdma_eq_destroy(dev, eq);
+ dev->ceqs[ceqn].ready = false;
+ }
+
+ return ret;
+}
+
+static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+ struct erdma_cmdq_destroy_eq_req req;
+ int err;
+
+ dev->ceqs[ceqn].ready = 0;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_DESTROY_EQ);
+ /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+ req.eqn = ceqn + 1;
+ req.qtype = ERDMA_EQ_TYPE_CEQ;
+ req.vector_idx = ceqn + 1;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
+ if (err)
+ return;
+
+ erdma_eq_destroy(dev, eq);
+}
+
+int erdma_ceqs_init(struct erdma_dev *dev)
+{
+ u32 i, j;
+ int err;
+
+ for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+ err = erdma_ceq_init_one(dev, i);
+ if (err)
+ goto out_err;
+
+ err = erdma_set_ceq_irq(dev, i);
+ if (err) {
+ erdma_ceq_uninit_one(dev, i);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (j = 0; j < i; j++) {
+ erdma_free_ceq_irq(dev, j);
+ erdma_ceq_uninit_one(dev, j);
+ }
+
+ return err;
+}
+
+void erdma_ceqs_uninit(struct erdma_dev *dev)
+{
+ u32 i;
+
+ for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+ erdma_free_ceq_irq(dev, i);
+ erdma_ceq_uninit_one(dev, i);
+ }
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
new file mode 100644
index 000000000000..ea4db53901a4
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -0,0 +1,753 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_HW_H__
+#define __ERDMA_HW_H__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+/* PCIe device related definition. */
+#define ERDMA_PCI_WIDTH 64
+#define ERDMA_FUNC_BAR 0
+#define ERDMA_MISX_BAR 2
+
+#define ERDMA_BAR_MASK (BIT(ERDMA_FUNC_BAR) | BIT(ERDMA_MISX_BAR))
+
+/* MSI-X related. */
+#define ERDMA_NUM_MSIX_VEC 32U
+#define ERDMA_MSIX_VECTOR_CMDQ 0
+
+/* RoCEv2 related */
+#define ERDMA_ROCEV2_GID_SIZE 16
+#define ERDMA_MAX_PKEYS 1
+#define ERDMA_DEFAULT_PKEY 0xFFFF
+
+/* erdma device protocol type */
+enum erdma_proto_type {
+ ERDMA_PROTO_IWARP = 0,
+ ERDMA_PROTO_ROCEV2 = 1,
+ ERDMA_PROTO_COUNT = 2,
+};
+
+/* PCIe Bar0 Registers. */
+#define ERDMA_REGS_VERSION_REG 0x0
+#define ERDMA_REGS_DEV_PROTO_REG 0xC
+#define ERDMA_REGS_DEV_CTRL_REG 0x10
+#define ERDMA_REGS_DEV_ST_REG 0x14
+#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
+#define ERDMA_REGS_NETDEV_MAC_H_REG 0x1C
+#define ERDMA_REGS_CMDQ_SQ_ADDR_L_REG 0x20
+#define ERDMA_REGS_CMDQ_SQ_ADDR_H_REG 0x24
+#define ERDMA_REGS_CMDQ_CQ_ADDR_L_REG 0x28
+#define ERDMA_REGS_CMDQ_CQ_ADDR_H_REG 0x2C
+#define ERDMA_REGS_CMDQ_DEPTH_REG 0x30
+#define ERDMA_REGS_CMDQ_EQ_DEPTH_REG 0x34
+#define ERDMA_REGS_CMDQ_EQ_ADDR_L_REG 0x38
+#define ERDMA_REGS_CMDQ_EQ_ADDR_H_REG 0x3C
+#define ERDMA_REGS_AEQ_ADDR_L_REG 0x40
+#define ERDMA_REGS_AEQ_ADDR_H_REG 0x44
+#define ERDMA_REGS_AEQ_DEPTH_REG 0x48
+#define ERDMA_REGS_GRP_NUM_REG 0x4c
+#define ERDMA_REGS_AEQ_DB_REG 0x50
+#define ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG 0x60
+#define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68
+#define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70
+#define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78
+#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80
+#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88
+#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90
+#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98
+#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0
+#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8
+#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0
+#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8
+#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0
+#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8
+#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0
+#define ERDMA_REGS_CEQ_DB_BASE_REG 0x100
+#define ERDMA_CMDQ_SQDB_REG 0x200
+#define ERDMA_CMDQ_CQDB_REG 0x300
+
+/* DEV_CTRL_REG details. */
+#define ERDMA_REG_DEV_CTRL_RESET_MASK 0x00000001
+#define ERDMA_REG_DEV_CTRL_INIT_MASK 0x00000002
+
+/* DEV_ST_REG details. */
+#define ERDMA_REG_DEV_ST_RESET_DONE_MASK 0x00000001U
+#define ERDMA_REG_DEV_ST_INIT_DONE_MASK 0x00000002U
+
+/* eRDMA PCIe DBs definition. */
+#define ERDMA_BAR_DB_SPACE_BASE 4096
+
+#define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE
+#define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024)
+
+#define ERDMA_BAR_RQDB_SPACE_OFFSET \
+ (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE)
+#define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024)
+
+#define ERDMA_BAR_CQDB_SPACE_OFFSET \
+ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE)
+
+#define ERDMA_SDB_SHARED_PAGE_INDEX 95
+
+/* Doorbell related. */
+#define ERDMA_DB_SIZE 8
+
+#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56)
+#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32)
+#define ERDMA_CQDB_ARM_MASK BIT_ULL(31)
+#define ERDMA_CQDB_SOL_MASK BIT_ULL(30)
+#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28)
+#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0)
+
+#define ERDMA_EQDB_ARM_MASK BIT(31)
+#define ERDMA_EQDB_CI_MASK GENMASK_ULL(23, 0)
+
+#define ERDMA_PAGE_SIZE_SUPPORT 0x7FFFF000
+
+/* Hardware page size definition */
+#define ERDMA_HW_PAGE_SHIFT 12
+#define ERDMA_HW_PAGE_SIZE 4096
+
+/* WQE related. */
+#define EQE_SIZE 16
+#define EQE_SHIFT 4
+#define RQE_SIZE 32
+#define RQE_SHIFT 5
+#define CQE_SIZE 32
+#define CQE_SHIFT 5
+#define SQEBB_SIZE 32
+#define SQEBB_SHIFT 5
+#define SQEBB_MASK (~(SQEBB_SIZE - 1))
+#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK)
+#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT)
+
+#define ERDMA_MAX_SQE_SIZE 128
+#define ERDMA_MAX_WQEBB_PER_SQE 4
+
+/* CMDQ related. */
+#define ERDMA_CMDQ_MAX_OUTSTANDING 128
+#define ERDMA_CMDQ_SQE_SIZE 128
+
+/* cmdq sub module definition. */
+enum CMDQ_WQE_SUB_MOD {
+ CMDQ_SUBMOD_RDMA = 0,
+ CMDQ_SUBMOD_COMMON = 1
+};
+
+enum CMDQ_RDMA_OPCODE {
+ CMDQ_OPCODE_QUERY_DEVICE = 0,
+ CMDQ_OPCODE_CREATE_QP = 1,
+ CMDQ_OPCODE_DESTROY_QP = 2,
+ CMDQ_OPCODE_MODIFY_QP = 3,
+ CMDQ_OPCODE_CREATE_CQ = 4,
+ CMDQ_OPCODE_DESTROY_CQ = 5,
+ CMDQ_OPCODE_REFLUSH = 6,
+ CMDQ_OPCODE_REG_MR = 8,
+ CMDQ_OPCODE_DEREG_MR = 9,
+ CMDQ_OPCODE_SET_GID = 14,
+ CMDQ_OPCODE_CREATE_AH = 15,
+ CMDQ_OPCODE_DESTROY_AH = 16,
+ CMDQ_OPCODE_QUERY_QP = 17,
+};
+
+enum CMDQ_COMMON_OPCODE {
+ CMDQ_OPCODE_CREATE_EQ = 0,
+ CMDQ_OPCODE_DESTROY_EQ = 1,
+ CMDQ_OPCODE_QUERY_FW_INFO = 2,
+ CMDQ_OPCODE_CONF_MTU = 3,
+ CMDQ_OPCODE_GET_STATS = 4,
+ CMDQ_OPCODE_CONF_DEVICE = 5,
+ CMDQ_OPCODE_ALLOC_DB = 8,
+ CMDQ_OPCODE_FREE_DB = 9,
+};
+
+/* cmdq-SQE HDR */
+#define ERDMA_CMD_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
+#define ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK GENMASK_ULL(47, 32)
+#define ERDMA_CMD_HDR_SUB_MOD_MASK GENMASK_ULL(25, 24)
+#define ERDMA_CMD_HDR_OPCODE_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
+
+struct erdma_cmdq_destroy_cq_req {
+ u64 hdr;
+ u32 cqn;
+};
+
+#define ERDMA_EQ_TYPE_AEQ 0
+#define ERDMA_EQ_TYPE_CEQ 1
+
+struct erdma_cmdq_create_eq_req {
+ u64 hdr;
+ u64 qbuf_addr;
+ u8 vector_idx;
+ u8 eqn;
+ u8 depth;
+ u8 qtype;
+ u32 db_dma_addr_l;
+ u32 db_dma_addr_h;
+};
+
+struct erdma_cmdq_destroy_eq_req {
+ u64 hdr;
+ u64 rsvd0;
+ u8 vector_idx;
+ u8 eqn;
+ u8 rsvd1;
+ u8 qtype;
+};
+
+/* config device cfg */
+#define ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK BIT(31)
+#define ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK GENMASK(4, 0)
+
+struct erdma_cmdq_config_device_req {
+ u64 hdr;
+ u32 cfg;
+ u32 rsvd[5];
+};
+
+struct erdma_cmdq_config_mtu_req {
+ u64 hdr;
+ u32 mtu;
+};
+
+/* ext db requests(alloc and free) cfg */
+#define ERDMA_CMD_EXT_DB_CQ_EN_MASK BIT(2)
+#define ERDMA_CMD_EXT_DB_RQ_EN_MASK BIT(1)
+#define ERDMA_CMD_EXT_DB_SQ_EN_MASK BIT(0)
+
+struct erdma_cmdq_ext_db_req {
+ u64 hdr;
+ u32 cfg;
+ u16 rdb_off;
+ u16 sdb_off;
+ u16 rsvd0;
+ u16 cdb_off;
+ u32 rsvd1[3];
+};
+
+/* alloc db response qword 0 definition */
+#define ERDMA_CMD_ALLOC_DB_RESP_RDB_MASK GENMASK_ULL(63, 48)
+#define ERDMA_CMD_ALLOC_DB_RESP_CDB_MASK GENMASK_ULL(47, 32)
+#define ERDMA_CMD_ALLOC_DB_RESP_SDB_MASK GENMASK_ULL(15, 0)
+
+/* create_cq cfg0 */
+#define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24)
+#define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20)
+#define ERDMA_CMD_CREATE_CQ_CQN_MASK GENMASK(19, 0)
+
+/* create_cq cfg1 */
+#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
+#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15)
+#define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11)
+#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
+
+/* create_cq cfg2 */
+#define ERDMA_CMD_CREATE_CQ_DB_CFG_MASK GENMASK(15, 0)
+
+struct erdma_cmdq_create_cq_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 qbuf_addr_l;
+ u32 qbuf_addr_h;
+ u32 cfg1;
+ u64 cq_dbrec_dma;
+ u32 first_page_offset;
+ u32 cfg2;
+};
+
+/* regmr/deregmr cfg0 */
+#define ERDMA_CMD_MR_VALID_MASK BIT(31)
+#define ERDMA_CMD_MR_VERSION_MASK GENMASK(30, 28)
+#define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20)
+#define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0)
+
+/* regmr cfg1 */
+#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
+#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
+#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1)
+
+/* regmr cfg2 */
+#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
+#define ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK GENMASK(26, 24)
+#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20)
+#define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_reg_mr_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u64 start_va;
+ u32 size;
+ u32 cfg2;
+ union {
+ u64 phy_addr[4];
+ struct {
+ u64 rsvd;
+ u32 size_h;
+ u32 mtt_cnt_h;
+ };
+ };
+};
+
+struct erdma_cmdq_dereg_mr_req {
+ u64 hdr;
+ u32 cfg;
+};
+
+/* create_av cfg0 */
+#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0)
+#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20)
+
+struct erdma_av_cfg {
+ u32 cfg0;
+ u8 traffic_class;
+ u8 hop_limit;
+ u8 sl;
+ u8 rsvd;
+ u16 udp_sport;
+ u16 sgid_index;
+ u8 dmac[ETH_ALEN];
+ u8 padding[2];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+};
+
+struct erdma_cmdq_create_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+ struct erdma_av_cfg av_cfg;
+};
+
+struct erdma_cmdq_destroy_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+};
+
+/* modify qp cfg */
+#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
+#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
+#define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_modify_qp_req {
+ u64 hdr;
+ u32 cfg;
+ u32 cookie;
+ __be32 dip;
+ __be32 sip;
+ __be16 sport;
+ __be16 dport;
+ u32 send_nxt;
+ u32 recv_nxt;
+};
+
+/* modify qp cfg1 for roce device */
+#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_mod_qp_req_rocev2 {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 attr_mask;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ struct erdma_av_cfg av_cfg;
+};
+
+/* query qp response mask */
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0)
+#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24)
+#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48)
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56)
+
+struct erdma_cmdq_query_qp_req_rocev2 {
+ u64 hdr;
+ u32 qpn;
+};
+
+enum erdma_qp_type {
+ ERDMA_QPT_RC = 0,
+ ERDMA_QPT_UD = 1,
+};
+
+/* create qp cfg0 */
+#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
+#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
+
+/* create qp cfg1 */
+#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
+#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
+
+/* create qp cfg2 */
+#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0)
+
+/* create qp cqn_mtt_cfg */
+#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
+#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25)
+#define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0)
+
+/* create qp mtt_cfg */
+#define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12)
+#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1)
+#define ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK BIT(0)
+
+/* create qp db cfg */
+#define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16)
+#define ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK GENMASK(15, 0)
+
+#define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0)
+
+struct erdma_cmdq_create_qp_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 sq_cqn_mtt_cfg;
+ u32 rq_cqn_mtt_cfg;
+ u64 sq_buf_addr;
+ u64 rq_buf_addr;
+ u32 sq_mtt_cfg;
+ u32 rq_mtt_cfg;
+ u64 sq_dbrec_dma;
+ u64 rq_dbrec_dma;
+
+ u64 sq_mtt_entry[3];
+ u64 rq_mtt_entry[3];
+
+ u32 db_cfg;
+ u32 cfg2;
+};
+
+struct erdma_cmdq_destroy_qp_req {
+ u64 hdr;
+ u32 qpn;
+};
+
+struct erdma_cmdq_reflush_req {
+ u64 hdr;
+ u32 qpn;
+ u32 sq_pi;
+ u32 rq_pi;
+};
+
+#define ERDMA_HW_RESP_SIZE 256
+
+struct erdma_cmdq_query_req {
+ u64 hdr;
+ u32 rsvd;
+ u32 index;
+
+ u64 target_addr;
+ u32 target_length;
+};
+
+#define ERDMA_HW_RESP_MAGIC 0x5566
+
+struct erdma_cmdq_query_resp_hdr {
+ u16 magic;
+ u8 ver;
+ u8 length;
+
+ u32 index;
+ u32 rsvd[2];
+};
+
+struct erdma_cmdq_query_stats_resp {
+ struct erdma_cmdq_query_resp_hdr hdr;
+
+ u64 tx_req_cnt;
+ u64 tx_packets_cnt;
+ u64 tx_bytes_cnt;
+ u64 tx_drop_packets_cnt;
+ u64 tx_bps_meter_drop_packets_cnt;
+ u64 tx_pps_meter_drop_packets_cnt;
+ u64 rx_packets_cnt;
+ u64 rx_bytes_cnt;
+ u64 rx_drop_packets_cnt;
+ u64 rx_bps_meter_drop_packets_cnt;
+ u64 rx_pps_meter_drop_packets_cnt;
+};
+
+enum erdma_network_type {
+ ERDMA_NETWORK_TYPE_IPV4 = 0,
+ ERDMA_NETWORK_TYPE_IPV6 = 1,
+};
+
+enum erdma_set_gid_op {
+ ERDMA_SET_GID_OP_ADD = 0,
+ ERDMA_SET_GID_OP_DEL = 1,
+};
+
+/* set gid cfg */
+#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0)
+#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16)
+#define ERDMA_CMD_SET_GID_OP_MASK BIT(31)
+
+struct erdma_cmdq_set_gid_req {
+ u64 hdr;
+ u32 cfg;
+ u8 gid[ERDMA_ROCEV2_GID_SIZE];
+};
+
+/* cap qword 0 definition */
+#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48)
+#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
+#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
+#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8)
+#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
+
+/* cap qword 1 definition */
+#define ERDMA_CMD_DEV_CAP_DMA_LOCAL_KEY_MASK GENMASK_ULL(63, 32)
+#define ERDMA_CMD_DEV_CAP_DEFAULT_CC_MASK GENMASK_ULL(31, 28)
+#define ERDMA_CMD_DEV_CAP_QBLOCK_MASK GENMASK_ULL(27, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_MW_MASK GENMASK_ULL(7, 0)
+
+#define ERDMA_NQP_PER_QBLOCK 1024
+
+enum {
+ ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
+ ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5,
+ ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3,
+};
+
+#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
+
+/* CQE hdr */
+#define ERDMA_CQE_HDR_OWNER_MASK BIT(31)
+#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16)
+#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8)
+#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0)
+
+#define ERDMA_CQE_QTYPE_SQ 0
+#define ERDMA_CQE_QTYPE_RQ 1
+#define ERDMA_CQE_QTYPE_CMDQ 2
+
+#define ERDMA_CQE_NTYPE_MASK BIT(31)
+#define ERDMA_CQE_SL_MASK GENMASK(27, 20)
+#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0)
+
+struct erdma_cqe {
+ __be32 hdr;
+ __be32 qe_idx;
+ __be32 qpn;
+ union {
+ __le32 imm_data;
+ __be32 inv_rkey;
+ };
+ __be32 size;
+ union {
+ struct {
+ __be32 rsvd[3];
+ } rc;
+
+ struct {
+ __be32 rsvd[2];
+ __be32 info;
+ } ud;
+ };
+};
+
+struct erdma_sge {
+ __aligned_le64 addr;
+ __le32 length;
+ __le32 key;
+};
+
+/* Receive Queue Element */
+struct erdma_rqe {
+ __le16 qe_idx;
+ __le16 rsvd0;
+ __le32 qpn;
+ __le32 rsvd1;
+ __le32 rsvd2;
+ __le64 to;
+ __le32 length;
+ __le32 stag;
+};
+
+/* SQE */
+#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56)
+#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
+#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32)
+#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27)
+#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26)
+#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25)
+#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24)
+#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23)
+#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22)
+#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
+
+/* REG MR attrs */
+#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1)
+#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
+#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
+
+struct erdma_write_sqe {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+
+ __le32 sink_stag;
+ __le32 sink_to_l;
+ __le32 sink_to_h;
+
+ __le32 rsvd;
+
+ struct erdma_sge sgl[];
+};
+
+struct erdma_send_sqe_rc {
+ __le64 hdr;
+ union {
+ __be32 imm_data;
+ __le32 invalid_stag;
+ };
+
+ __le32 length;
+ struct erdma_sge sgl[];
+};
+
+struct erdma_send_sqe_ud {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+ __le32 qkey;
+ __le32 dst_qpn;
+ __le32 ahn;
+ __le32 rsvd;
+ struct erdma_sge sgl[];
+};
+
+struct erdma_readreq_sqe {
+ __le64 hdr;
+ __le32 invalid_stag;
+ __le32 length;
+ __le32 sink_stag;
+ __le32 sink_to_l;
+ __le32 sink_to_h;
+ __le32 rsvd;
+};
+
+struct erdma_atomic_sqe {
+ __le64 hdr;
+ __le64 rsvd;
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+
+ struct erdma_sge remote;
+ struct erdma_sge sgl;
+};
+
+struct erdma_reg_mr_sqe {
+ __le64 hdr;
+ __le64 addr;
+ __le32 length;
+ __le32 stag;
+ __le32 attrs;
+ __le32 rsvd;
+};
+
+/* EQ related. */
+#define ERDMA_DEFAULT_EQ_DEPTH 4096
+
+/* ceqe */
+#define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63)
+#define ERDMA_CEQE_HDR_PI_MASK GENMASK_ULL(55, 32)
+#define ERDMA_CEQE_HDR_O_MASK BIT_ULL(31)
+#define ERDMA_CEQE_HDR_CQN_MASK GENMASK_ULL(19, 0)
+
+/* aeqe */
+#define ERDMA_AEQE_HDR_O_MASK BIT(31)
+#define ERDMA_AEQE_HDR_TYPE_MASK GENMASK(23, 16)
+#define ERDMA_AEQE_HDR_SUBTYPE_MASK GENMASK(7, 0)
+
+#define ERDMA_AE_TYPE_QP_FATAL_EVENT 0
+#define ERDMA_AE_TYPE_QP_ERQ_ERR_EVENT 1
+#define ERDMA_AE_TYPE_ACC_ERR_EVENT 2
+#define ERDMA_AE_TYPE_CQ_ERR 3
+#define ERDMA_AE_TYPE_OTHER_ERROR 4
+
+struct erdma_aeqe {
+ __le32 hdr;
+ __le32 event_data0;
+ __le32 event_data1;
+ __le32 rsvd;
+};
+
+enum erdma_opcode {
+ ERDMA_OP_WRITE = 0,
+ ERDMA_OP_READ = 1,
+ ERDMA_OP_SEND = 2,
+ ERDMA_OP_SEND_WITH_IMM = 3,
+
+ ERDMA_OP_RECEIVE = 4,
+ ERDMA_OP_RECV_IMM = 5,
+ ERDMA_OP_RECV_INV = 6,
+
+ ERDMA_OP_RSVD0 = 7,
+ ERDMA_OP_RSVD1 = 8,
+ ERDMA_OP_WRITE_WITH_IMM = 9,
+
+ ERDMA_OP_RSVD2 = 10,
+ ERDMA_OP_RSVD3 = 11,
+
+ ERDMA_OP_RSP_SEND_IMM = 12,
+ ERDMA_OP_SEND_WITH_INV = 13,
+
+ ERDMA_OP_REG_MR = 14,
+ ERDMA_OP_LOCAL_INV = 15,
+ ERDMA_OP_READ_WITH_INV = 16,
+ ERDMA_OP_ATOMIC_CAS = 17,
+ ERDMA_OP_ATOMIC_FAA = 18,
+ ERDMA_NUM_OPCODES = 19,
+ ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
+};
+
+enum erdma_wc_status {
+ ERDMA_WC_SUCCESS = 0,
+ ERDMA_WC_GENERAL_ERR = 1,
+ ERDMA_WC_RECV_WQE_FORMAT_ERR = 2,
+ ERDMA_WC_RECV_STAG_INVALID_ERR = 3,
+ ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4,
+ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5,
+ ERDMA_WC_RECV_PDID_ERR = 6,
+ ERDMA_WC_RECV_WARRPING_ERR = 7,
+ ERDMA_WC_SEND_WQE_FORMAT_ERR = 8,
+ ERDMA_WC_SEND_WQE_ORD_EXCEED = 9,
+ ERDMA_WC_SEND_STAG_INVALID_ERR = 10,
+ ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11,
+ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12,
+ ERDMA_WC_SEND_PDID_ERR = 13,
+ ERDMA_WC_SEND_WARRPING_ERR = 14,
+ ERDMA_WC_FLUSH_ERR = 15,
+ ERDMA_WC_RETRY_EXC_ERR = 16,
+ ERDMA_NUM_WC_STATUS
+};
+
+enum erdma_vendor_err {
+ ERDMA_WC_VENDOR_NO_ERR = 0,
+ ERDMA_WC_VENDOR_INVALID_RQE = 1,
+ ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2,
+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3,
+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4,
+ ERDMA_WC_VENDOR_RQE_INVALID_PD = 5,
+ ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6,
+ ERDMA_WC_VENDOR_INVALID_SQE = 0x20,
+ ERDMA_WC_VENDOR_ZERO_ORD = 0x21,
+ ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30,
+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31,
+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32,
+ ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33,
+ ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34
+};
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
new file mode 100644
index 000000000000..f35b30235018
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include <linux/module.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
+MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *arg)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(arg);
+ struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
+
+ if (dev->netdev == NULL || dev->netdev != netdev)
+ goto done;
+
+ switch (event) {
+ case NETDEV_CHANGEMTU:
+ if (dev->mtu != netdev->mtu) {
+ erdma_set_mtu(dev, netdev->mtu);
+ dev->mtu = netdev->mtu;
+ }
+ break;
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGE:
+ default:
+ break;
+ }
+
+done:
+ return NOTIFY_OK;
+}
+
+static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
+{
+ struct net_device *netdev;
+ int ret = -EPROBE_DEFER;
+
+ /* Already binded to a net_device, so we skip. */
+ if (dev->netdev)
+ return 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ /*
+ * In erdma, the paired netdev and ibdev should have the same
+ * MAC address. erdma can get the value from its PCIe bar
+ * registers. Since erdma can not get the paired netdev
+ * reference directly, we do a traverse here to get the paired
+ * netdev.
+ */
+ if (ether_addr_equal_unaligned(netdev->perm_addr,
+ dev->attrs.peer_addr)) {
+ ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
+ if (ret) {
+ rtnl_unlock();
+ ibdev_warn(&dev->ibdev,
+ "failed (%d) to link netdev", ret);
+ return ret;
+ }
+
+ dev->netdev = netdev;
+ break;
+ }
+ }
+
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int erdma_device_register(struct erdma_dev *dev)
+{
+ struct ib_device *ibdev = &dev->ibdev;
+ int ret;
+
+ ret = erdma_enum_and_get_netdev(dev);
+ if (ret)
+ return ret;
+
+ dev->mtu = dev->netdev->mtu;
+ addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
+
+ ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "ib_register_device failed: ret = %d\n", ret);
+ return ret;
+ }
+
+ dev->netdev_nb.notifier_call = erdma_netdev_event;
+ ret = register_netdevice_notifier(&dev->netdev_nb);
+ if (ret) {
+ ibdev_err(&dev->ibdev, "failed to register notifier.\n");
+ ib_unregister_device(ibdev);
+ }
+
+ return ret;
+}
+
+static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
+{
+ struct erdma_dev *dev = data;
+
+ erdma_cmdq_completion_handler(&dev->cmdq);
+ erdma_aeq_event_handler(dev);
+
+ return IRQ_HANDLED;
+}
+
+static int erdma_request_vectors(struct erdma_dev *dev)
+{
+ int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
+ ret);
+ return ret;
+ }
+ dev->attrs.irq_num = ret;
+
+ return 0;
+}
+
+static int erdma_comm_irq_init(struct erdma_dev *dev)
+{
+ snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
+ pci_name(dev->pdev));
+ dev->comm_irq.msix_vector =
+ pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
+
+ cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
+ &dev->comm_irq.affinity_hint_mask);
+ irq_set_affinity_hint(dev->comm_irq.msix_vector,
+ &dev->comm_irq.affinity_hint_mask);
+
+ return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
+ dev->comm_irq.name, dev);
+}
+
+static void erdma_comm_irq_uninit(struct erdma_dev *dev)
+{
+ irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
+ free_irq(dev->comm_irq.msix_vector, dev);
+}
+
+static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
+{
+ int ret;
+
+ dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG);
+
+ dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev,
+ ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE,
+ 0);
+ if (!dev->resp_pool)
+ return -ENOMEM;
+
+ dev->db_pool = dma_pool_create("erdma_db_pool", &pdev->dev,
+ ERDMA_DB_SIZE, ERDMA_DB_SIZE, 0);
+ if (!dev->db_pool) {
+ ret = -ENOMEM;
+ goto destroy_resp_pool;
+ }
+
+ ret = dma_set_mask_and_coherent(&pdev->dev,
+ DMA_BIT_MASK(ERDMA_PCI_WIDTH));
+ if (ret)
+ goto destroy_db_pool;
+
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
+ return 0;
+
+destroy_db_pool:
+ dma_pool_destroy(dev->db_pool);
+
+destroy_resp_pool:
+ dma_pool_destroy(dev->resp_pool);
+
+ return ret;
+}
+
+static void erdma_device_uninit(struct erdma_dev *dev)
+{
+ dma_pool_destroy(dev->db_pool);
+ dma_pool_destroy(dev->resp_pool);
+}
+
+static void erdma_hw_reset(struct erdma_dev *dev)
+{
+ u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
+
+ erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
+}
+
+static int erdma_wait_hw_init_done(struct erdma_dev *dev)
+{
+ int i;
+
+ erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG,
+ FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1));
+
+ for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
+ if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
+ ERDMA_REG_DEV_ST_INIT_DONE_MASK))
+ break;
+
+ msleep(ERDMA_REG_ACCESS_WAIT_MS);
+ }
+
+ if (i == ERDMA_WAIT_DEV_DONE_CNT) {
+ dev_err(&dev->pdev->dev, "wait init done failed.\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static const struct pci_device_id erdma_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
+ {}
+};
+
+static int erdma_probe_dev(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev;
+ int bars, err;
+ u32 version;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
+ return err;
+ }
+
+ pci_set_master(pdev);
+
+ dev = ib_alloc_device(erdma_dev, ibdev);
+ if (!dev) {
+ dev_err(&pdev->dev, "ib_alloc_device failed\n");
+ err = -ENOMEM;
+ goto err_disable_device;
+ }
+
+ pci_set_drvdata(pdev, dev);
+ dev->pdev = pdev;
+ dev->attrs.numa_node = dev_to_node(&pdev->dev);
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ if (bars != ERDMA_BAR_MASK || err) {
+ err = err ? err : -EINVAL;
+ goto err_ib_device_release;
+ }
+
+ dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
+ dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
+
+ dev->func_bar =
+ devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
+ if (!dev->func_bar) {
+ dev_err(&pdev->dev, "devm_ioremap failed.\n");
+ err = -EFAULT;
+ goto err_release_bars;
+ }
+
+ version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
+ if (version == 0) {
+ /* we knows that it is a non-functional function. */
+ err = -ENODEV;
+ goto err_iounmap_func_bar;
+ }
+
+ err = erdma_device_init(dev, pdev);
+ if (err)
+ goto err_iounmap_func_bar;
+
+ err = erdma_request_vectors(dev);
+ if (err)
+ goto err_uninit_device;
+
+ err = erdma_comm_irq_init(dev);
+ if (err)
+ goto err_free_vectors;
+
+ err = erdma_aeq_init(dev);
+ if (err)
+ goto err_uninit_comm_irq;
+
+ err = erdma_cmdq_init(dev);
+ if (err)
+ goto err_uninit_aeq;
+
+ err = erdma_wait_hw_init_done(dev);
+ if (err)
+ goto err_uninit_cmdq;
+
+ err = erdma_ceqs_init(dev);
+ if (err)
+ goto err_reset_hw;
+
+ erdma_finish_cmdq_init(dev);
+
+ return 0;
+
+err_reset_hw:
+ erdma_hw_reset(dev);
+
+err_uninit_cmdq:
+ erdma_cmdq_destroy(dev);
+
+err_uninit_aeq:
+ erdma_eq_destroy(dev, &dev->aeq);
+
+err_uninit_comm_irq:
+ erdma_comm_irq_uninit(dev);
+
+err_free_vectors:
+ pci_free_irq_vectors(dev->pdev);
+
+err_uninit_device:
+ erdma_device_uninit(dev);
+
+err_iounmap_func_bar:
+ devm_iounmap(&pdev->dev, dev->func_bar);
+
+err_release_bars:
+ pci_release_selected_regions(pdev, bars);
+
+err_ib_device_release:
+ ib_dealloc_device(&dev->ibdev);
+
+err_disable_device:
+ pci_disable_device(pdev);
+
+ return err;
+}
+
+static void erdma_remove_dev(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+
+ erdma_ceqs_uninit(dev);
+ erdma_hw_reset(dev);
+ erdma_cmdq_destroy(dev);
+ erdma_eq_destroy(dev, &dev->aeq);
+ erdma_comm_irq_uninit(dev);
+ pci_free_irq_vectors(dev->pdev);
+ erdma_device_uninit(dev);
+
+ devm_iounmap(&pdev->dev, dev->func_bar);
+ pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
+
+ ib_dealloc_device(&dev->ibdev);
+
+ pci_disable_device(pdev);
+}
+
+#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
+
+static int erdma_dev_attrs_init(struct erdma_dev *dev)
+{
+ int err;
+ u64 req_hdr, cap0, cap1;
+
+ erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_DEVICE);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
+ &cap1, true);
+ if (err)
+ return err;
+
+ dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
+ dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
+ dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
+ dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
+ dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0);
+ dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0);
+ dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
+ dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
+ dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
+ dev->attrs.max_mr = dev->attrs.max_qp << 1;
+ dev->attrs.max_cq = dev->attrs.max_qp << 1;
+ dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
+
+ dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
+ dev->attrs.max_ord = ERDMA_MAX_ORD;
+ dev->attrs.max_ird = ERDMA_MAX_IRD;
+ dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
+ dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
+ dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
+ dev->attrs.max_pd = ERDMA_MAX_PD;
+
+ dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
+ dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
+ dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah;
+
+ erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_QUERY_FW_INFO);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
+ &cap1, true);
+ if (!err)
+ dev->attrs.fw_version =
+ FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
+
+ return err;
+}
+
+static int erdma_device_config(struct erdma_dev *dev)
+{
+ struct erdma_cmdq_config_device_req req = {};
+
+ if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB))
+ return 0;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CONF_DEVICE);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
+ FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int erdma_res_cb_init(struct erdma_dev *dev)
+{
+ int i, j;
+
+ for (i = 0; i < ERDMA_RES_CNT; i++) {
+ dev->res_cb[i].next_alloc_idx = 1;
+ spin_lock_init(&dev->res_cb[i].lock);
+ dev->res_cb[i].bitmap =
+ bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
+ if (!dev->res_cb[i].bitmap)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (j = 0; j < i; j++)
+ bitmap_free(dev->res_cb[j].bitmap);
+
+ return -ENOMEM;
+}
+
+static void erdma_res_cb_free(struct erdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ERDMA_RES_CNT; i++)
+ bitmap_free(dev->res_cb[i].bitmap);
+}
+
+static const struct ib_device_ops erdma_device_ops_rocev2 = {
+ .get_link_layer = erdma_get_link_layer,
+ .add_gid = erdma_add_gid,
+ .del_gid = erdma_del_gid,
+ .query_pkey = erdma_query_pkey,
+ .create_ah = erdma_create_ah,
+ .destroy_ah = erdma_destroy_ah,
+ .query_ah = erdma_query_ah,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, erdma_ah, ibah),
+};
+
+static const struct ib_device_ops erdma_device_ops_iwarp = {
+ .iw_accept = erdma_accept,
+ .iw_add_ref = erdma_qp_get_ref,
+ .iw_connect = erdma_connect,
+ .iw_create_listen = erdma_create_listen,
+ .iw_destroy_listen = erdma_destroy_listen,
+ .iw_get_qp = erdma_get_ibqp,
+ .iw_reject = erdma_reject,
+ .iw_rem_ref = erdma_qp_put_ref,
+};
+
+static const struct ib_device_ops erdma_device_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_ERDMA,
+ .uverbs_abi_ver = ERDMA_ABI_VERSION,
+
+ .alloc_hw_port_stats = erdma_alloc_hw_port_stats,
+ .alloc_mr = erdma_ib_alloc_mr,
+ .alloc_pd = erdma_alloc_pd,
+ .alloc_ucontext = erdma_alloc_ucontext,
+ .create_cq = erdma_create_cq,
+ .create_qp = erdma_create_qp,
+ .dealloc_pd = erdma_dealloc_pd,
+ .dealloc_ucontext = erdma_dealloc_ucontext,
+ .dereg_mr = erdma_dereg_mr,
+ .destroy_cq = erdma_destroy_cq,
+ .destroy_qp = erdma_destroy_qp,
+ .disassociate_ucontext = erdma_disassociate_ucontext,
+ .get_dma_mr = erdma_get_dma_mr,
+ .get_hw_stats = erdma_get_hw_stats,
+ .get_port_immutable = erdma_get_port_immutable,
+ .map_mr_sg = erdma_map_mr_sg,
+ .mmap = erdma_mmap,
+ .mmap_free = erdma_mmap_free,
+ .post_recv = erdma_post_recv,
+ .post_send = erdma_post_send,
+ .poll_cq = erdma_poll_cq,
+ .query_device = erdma_query_device,
+ .query_gid = erdma_query_gid,
+ .query_port = erdma_query_port,
+ .query_qp = erdma_query_qp,
+ .req_notify_cq = erdma_req_notify_cq,
+ .reg_user_mr = erdma_reg_user_mr,
+ .modify_qp = erdma_modify_qp,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
+};
+
+static int erdma_ib_device_add(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+ struct ib_device *ibdev = &dev->ibdev;
+ u64 mac;
+ int ret;
+
+ ret = erdma_dev_attrs_init(dev);
+ if (ret)
+ return ret;
+
+ ret = erdma_device_config(dev);
+ if (ret)
+ return ret;
+
+ if (erdma_device_iwarp(dev)) {
+ ibdev->node_type = RDMA_NODE_RNIC;
+ ib_set_device_ops(ibdev, &erdma_device_ops_iwarp);
+ } else {
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ib_set_device_ops(ibdev, &erdma_device_ops_rocev2);
+ }
+
+ memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
+
+ /*
+ * Current model (one-to-one device association):
+ * One ERDMA device per net_device or, equivalently,
+ * per physical port.
+ */
+ ibdev->phys_port_cnt = 1;
+ ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
+
+ ib_set_device_ops(ibdev, &erdma_device_ops);
+
+ INIT_LIST_HEAD(&dev->cep_list);
+
+ spin_lock_init(&dev->lock);
+ xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
+ dev->next_alloc_cqn = 1;
+ dev->next_alloc_qpn = 1;
+
+ ret = erdma_res_cb_init(dev);
+ if (ret)
+ return ret;
+
+ atomic_set(&dev->num_ctx, 0);
+
+ mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
+ mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
+
+ u64_to_ether_addr(mac, dev->attrs.peer_addr);
+
+ dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!dev->reflush_wq) {
+ ret = -ENOMEM;
+ goto err_alloc_workqueue;
+ }
+
+ ret = erdma_device_register(dev);
+ if (ret)
+ goto err_register;
+
+ return 0;
+
+err_register:
+ destroy_workqueue(dev->reflush_wq);
+err_alloc_workqueue:
+ xa_destroy(&dev->qp_xa);
+ xa_destroy(&dev->cq_xa);
+
+ erdma_res_cb_free(dev);
+
+ return ret;
+}
+
+static void erdma_ib_device_remove(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+
+ unregister_netdevice_notifier(&dev->netdev_nb);
+ ib_unregister_device(&dev->ibdev);
+
+ destroy_workqueue(dev->reflush_wq);
+ erdma_res_cb_free(dev);
+ xa_destroy(&dev->qp_xa);
+ xa_destroy(&dev->cq_xa);
+}
+
+static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int ret;
+
+ ret = erdma_probe_dev(pdev);
+ if (ret)
+ return ret;
+
+ ret = erdma_ib_device_add(pdev);
+ if (ret) {
+ erdma_remove_dev(pdev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void erdma_remove(struct pci_dev *pdev)
+{
+ erdma_ib_device_remove(pdev);
+ erdma_remove_dev(pdev);
+}
+
+static struct pci_driver erdma_pci_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = erdma_pci_tbl,
+ .probe = erdma_probe,
+ .remove = erdma_remove
+};
+
+MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
+
+static __init int erdma_init_module(void)
+{
+ int ret;
+
+ ret = erdma_cm_init();
+ if (ret)
+ return ret;
+
+ ret = pci_register_driver(&erdma_pci_driver);
+ if (ret)
+ erdma_cm_exit();
+
+ return ret;
+}
+
+static void __exit erdma_exit_module(void)
+{
+ pci_unregister_driver(&erdma_pci_driver);
+
+ erdma_cm_exit();
+}
+
+module_init(erdma_init_module);
+module_exit(erdma_exit_module);
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
new file mode 100644
index 000000000000..25f6c49aec77
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2021, Alibaba Group */
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+void erdma_qp_llp_close(struct erdma_qp *qp)
+{
+ struct erdma_mod_qp_params_iwarp params;
+
+ down_write(&qp->state_lock);
+
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_RTS:
+ case ERDMA_QPS_IWARP_RTR:
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_TERMINATE:
+ params.state = ERDMA_QPS_IWARP_CLOSING;
+ erdma_modify_qp_state_iwarp(qp, &params, ERDMA_QPA_IWARP_STATE);
+ break;
+ case ERDMA_QPS_IWARP_CLOSING:
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ break;
+ default:
+ break;
+ }
+
+ if (qp->cep) {
+ erdma_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+
+ up_write(&qp->state_lock);
+}
+
+struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
+{
+ struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
+
+ if (qp)
+ return &qp->ibqp;
+
+ return NULL;
+}
+
+static int
+erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
+{
+ int ret;
+ struct erdma_dev *dev = qp->dev;
+ struct erdma_cmdq_modify_qp_req req;
+ struct tcp_sock *tp;
+ struct erdma_cep *cep = qp->cep;
+ struct sockaddr_storage local_addr, remote_addr;
+
+ if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
+ return -EINVAL;
+
+ if (!(mask & ERDMA_QPA_IWARP_MPA))
+ return -EINVAL;
+
+ if (!(mask & ERDMA_QPA_IWARP_CC))
+ params->cc = qp->attrs.cc;
+
+ ret = getname_local(cep->sock, &local_addr);
+ if (ret < 0)
+ return ret;
+
+ ret = getname_peer(cep->sock, &remote_addr);
+ if (ret < 0)
+ return ret;
+
+ tp = tcp_sk(qp->cep->sock->sk);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
+ req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
+ req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
+ req.dport = to_sockaddr_in(remote_addr).sin_port;
+ req.sport = to_sockaddr_in(local_addr).sin_port;
+
+ req.send_nxt = tp->snd_nxt;
+ /* rsvd tcp seq for mpa-rsp in server. */
+ if (params->qp_type == ERDMA_QP_PASSIVE)
+ req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
+ req.recv_nxt = tp->rcv_nxt;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ if (mask & ERDMA_QPA_IWARP_IRD)
+ qp->attrs.irq_size = params->irq_size;
+
+ if (mask & ERDMA_QPA_IWARP_ORD)
+ qp->attrs.orq_size = params->orq_size;
+
+ if (mask & ERDMA_QPA_IWARP_CC)
+ qp->attrs.cc = params->cc;
+
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
+
+ return 0;
+}
+
+static int
+erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
+{
+ struct erdma_dev *dev = qp->dev;
+ struct erdma_cmdq_modify_qp_req req;
+ int ret;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ qp->attrs.iwarp.state = params->state;
+
+ return 0;
+}
+
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask)
+{
+ bool need_reflush = false;
+ int drop_conn, ret = 0;
+
+ if (!mask)
+ return 0;
+
+ if (!(mask & ERDMA_QPA_IWARP_STATE))
+ return 0;
+
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_RTR:
+ if (params->state == ERDMA_QPS_IWARP_RTS) {
+ ret = erdma_modify_qp_state_to_rts(qp, params, mask);
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ need_reflush = true;
+ if (qp->cep) {
+ erdma_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ }
+ break;
+ case ERDMA_QPS_IWARP_RTS:
+ drop_conn = 0;
+
+ if (params->state == ERDMA_QPS_IWARP_CLOSING ||
+ params->state == ERDMA_QPS_IWARP_TERMINATE ||
+ params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ drop_conn = 1;
+ need_reflush = true;
+ }
+
+ if (drop_conn)
+ erdma_qp_cm_drop(qp);
+
+ break;
+ case ERDMA_QPS_IWARP_TERMINATE:
+ if (params->state == ERDMA_QPS_IWARP_ERROR)
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ break;
+ case ERDMA_QPS_IWARP_CLOSING:
+ if (params->state == ERDMA_QPS_IWARP_IDLE) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ } else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
+ return -ECONNABORTED;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
+ return ret;
+}
+
+static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ enum erdma_qpa_mask_rocev2 attr_mask)
+{
+ struct erdma_cmdq_mod_qp_req_rocev2 req;
+
+ memset(&req, 0, sizeof(req));
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
+ params->state);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
+ params->dst_qpn);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ req.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ erdma_set_av_cfg(&req.av_cfg, &params->av);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
+ req.sq_psn = params->sq_psn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
+ req.rq_psn = params->rq_psn;
+
+ req.attr_mask = attr_mask;
+
+ return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
+ NULL, true);
+}
+
+static void erdma_reset_qp(struct erdma_qp *qp)
+{
+ qp->kern_qp.sq_pi = 0;
+ qp->kern_qp.sq_ci = 0;
+ qp->kern_qp.rq_pi = 0;
+ qp->kern_qp.rq_ci = 0;
+ memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
+ memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
+ memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
+ memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
+ erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
+ if (qp->rcq != qp->scq)
+ erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
+}
+
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask)
+{
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
+ int ret;
+
+ ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
+ if (ret)
+ return ret;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ qp->attrs.rocev2.state = params->state;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ qp->attrs.rocev2.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ qp->attrs.rocev2.dst_qpn = params->dst_qpn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ memcpy(&qp->attrs.rocev2.av, &params->av,
+ sizeof(struct erdma_av));
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_RESET)
+ erdma_reset_qp(qp);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_ERROR) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
+ return 0;
+}
+
+static void erdma_qp_safe_free(struct kref *ref)
+{
+ struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
+
+ complete(&qp->safe_free);
+}
+
+void erdma_qp_put(struct erdma_qp *qp)
+{
+ WARN_ON(kref_read(&qp->ref) < 1);
+ kref_put(&qp->ref, erdma_qp_safe_free);
+}
+
+void erdma_qp_get(struct erdma_qp *qp)
+{
+ kref_get(&qp->ref);
+}
+
+static int fill_inline_data(struct erdma_qp *qp,
+ const struct ib_send_wr *send_wr, u16 wqe_idx,
+ u32 sgl_offset, __le32 *length_field)
+{
+ u32 remain_size, copy_size, data_off, bytes = 0;
+ char *data;
+ int i = 0;
+
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+ data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
+ SQEBB_SHIFT);
+
+ while (i < send_wr->num_sge) {
+ bytes += send_wr->sg_list[i].length;
+ if (bytes > (int)ERDMA_MAX_INLINE)
+ return -EINVAL;
+
+ remain_size = send_wr->sg_list[i].length;
+ data_off = 0;
+
+ while (1) {
+ copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
+
+ memcpy(data + sgl_offset,
+ (void *)(uintptr_t)send_wr->sg_list[i].addr +
+ data_off,
+ copy_size);
+ remain_size -= copy_size;
+ data_off += copy_size;
+ sgl_offset += copy_size;
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+
+ data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ if (!remain_size)
+ break;
+ }
+
+ i++;
+ }
+ *length_field = cpu_to_le32(bytes);
+
+ return bytes;
+}
+
+static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
+ u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
+{
+ int i = 0;
+ u32 bytes = 0;
+ char *sgl;
+
+ if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
+ return -EINVAL;
+
+ if (sgl_offset & 0xF)
+ return -EINVAL;
+
+ while (i < send_wr->num_sge) {
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+ sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+
+ bytes += send_wr->sg_list[i].length;
+ memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
+ sizeof(struct ib_sge));
+
+ sgl_offset += sizeof(struct ib_sge);
+ i++;
+ }
+
+ *length_field = cpu_to_le32(bytes);
+ return 0;
+}
+
+static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ } else if (wr->opcode == IB_WR_SEND_WITH_INV) {
+ op = ERDMA_OP_SEND_WITH_INV;
+ sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
+ }
+
+ *hw_op = op;
+}
+
+static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ const struct ib_ud_wr *uwr = ud_wr(wr);
+ struct erdma_ah *ah = to_eah(uwr->ah);
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ }
+
+ *hw_op = op;
+
+ sqe->ahn = cpu_to_le32(ah->ahn);
+ sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
+ /* Not allowed to send control qkey */
+ if (uwr->remote_qkey & 0x80000000)
+ sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
+ else
+ sqe->qkey = cpu_to_le32(uwr->remote_qkey);
+}
+
+static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
+ const struct ib_send_wr *send_wr)
+{
+ u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
+ u32 idx = *pi & (qp->attrs.sq_size - 1);
+ enum ib_wr_opcode op = send_wr->opcode;
+ struct erdma_send_sqe_rc *rc_send_sqe;
+ struct erdma_send_sqe_ud *ud_send_sqe;
+ struct erdma_atomic_sqe *atomic_sqe;
+ struct erdma_readreq_sqe *read_sqe;
+ struct erdma_reg_mr_sqe *regmr_sge;
+ struct erdma_write_sqe *write_sqe;
+ struct ib_rdma_wr *rdma_wr;
+ struct erdma_sge *sge;
+ __le32 *length_field;
+ struct erdma_mr *mr;
+ u64 wqe_hdr, *entry;
+ u32 attrs;
+ int ret;
+
+ if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
+ send_wr->opcode != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
+ SQEBB_SHIFT);
+
+ /* Clear the SQE header section. */
+ *entry = 0;
+
+ qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
+ flags = send_wr->send_flags;
+ wqe_hdr = FIELD_PREP(
+ ERDMA_SQE_HDR_CE_MASK,
+ ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
+ flags & IB_SEND_SOLICITED ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
+ flags & IB_SEND_FENCE ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
+ flags & IB_SEND_INLINE ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
+
+ switch (op) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ hw_op = ERDMA_OP_WRITE;
+ if (op == IB_WR_RDMA_WRITE_WITH_IMM)
+ hw_op = ERDMA_OP_WRITE_WITH_IMM;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
+ write_sqe = (struct erdma_write_sqe *)entry;
+
+ write_sqe->imm_data = send_wr->ex.imm_data;
+ write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
+ write_sqe->sink_to_h =
+ cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
+ write_sqe->sink_to_l =
+ cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
+
+ length_field = &write_sqe->length;
+ wqe_size = sizeof(struct erdma_write_sqe);
+ sgl_offset = wqe_size;
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ read_sqe = (struct erdma_readreq_sqe *)entry;
+ if (unlikely(send_wr->num_sge != 1))
+ return -EINVAL;
+ hw_op = ERDMA_OP_READ;
+ if (op == IB_WR_RDMA_READ_WITH_INV) {
+ hw_op = ERDMA_OP_READ_WITH_INV;
+ read_sqe->invalid_stag =
+ cpu_to_le32(send_wr->ex.invalidate_rkey);
+ }
+
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
+ read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
+ read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
+ read_sqe->sink_to_l =
+ cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
+ read_sqe->sink_to_h =
+ cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
+
+ sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ sge->addr = cpu_to_le64(rdma_wr->remote_addr);
+ sge->key = cpu_to_le32(rdma_wr->rkey);
+ sge->length = cpu_to_le32(send_wr->sg_list[0].length);
+ wqe_size = sizeof(struct erdma_readreq_sqe) +
+ send_wr->num_sge * sizeof(struct ib_sge);
+
+ goto out;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
+ init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
+ length_field = &rc_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_rc);
+ } else {
+ ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
+ init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
+ length_field = &ud_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_ud);
+ }
+
+ sgl_offset = wqe_size;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ break;
+ case IB_WR_REG_MR:
+ wqe_hdr |=
+ FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
+ regmr_sge = (struct erdma_reg_mr_sqe *)entry;
+ mr = to_emr(reg_wr(send_wr)->mr);
+
+ mr->access = ERDMA_MR_ACC_LR |
+ to_erdma_access_flags(reg_wr(send_wr)->access);
+ regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
+ regmr_sge->length = cpu_to_le32(mr->ibmr.length);
+ regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
+ attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
+ FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
+ mr->mem.mtt_nents);
+
+ if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
+ /* Copy SGLs to SQE content to accelerate */
+ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT),
+ mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
+ wqe_size = sizeof(struct erdma_reg_mr_sqe) +
+ MTT_SIZE(mr->mem.mtt_nents);
+ } else {
+ attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
+ wqe_size = sizeof(struct erdma_reg_mr_sqe);
+ }
+
+ regmr_sge->attrs = cpu_to_le32(attrs);
+ goto out;
+ case IB_WR_LOCAL_INV:
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_LOCAL_INV);
+ regmr_sge = (struct erdma_reg_mr_sqe *)entry;
+ regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
+ wqe_size = sizeof(struct erdma_reg_mr_sqe);
+ goto out;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ atomic_sqe = (struct erdma_atomic_sqe *)entry;
+ if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_ATOMIC_CAS);
+ atomic_sqe->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(send_wr)->swap);
+ atomic_sqe->cmp_data =
+ cpu_to_le64(atomic_wr(send_wr)->compare_add);
+ } else {
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_ATOMIC_FAA);
+ atomic_sqe->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(send_wr)->compare_add);
+ }
+
+ sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
+ sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
+ sge++;
+
+ sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
+ sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
+ sge->length = cpu_to_le32(send_wr->sg_list[0].length);
+
+ wqe_size = sizeof(*atomic_sqe);
+ goto out;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (flags & IB_SEND_INLINE) {
+ ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
+ length_field);
+ if (ret < 0)
+ return -EINVAL;
+ wqe_size += ret;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
+ } else {
+ ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
+ if (ret)
+ return -EINVAL;
+ wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
+ send_wr->num_sge);
+ }
+
+out:
+ wqebb_cnt = SQEBB_COUNT(wqe_size);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
+ *pi += wqebb_cnt;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
+
+ *entry = wqe_hdr;
+
+ return 0;
+}
+
+static void kick_sq_db(struct erdma_qp *qp, u16 pi)
+{
+ u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
+ FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
+
+ *(u64 *)qp->kern_qp.sq_dbrec = db_data;
+ writeq(db_data, qp->kern_qp.hw_sq_db);
+}
+
+int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ int ret = 0;
+ const struct ib_send_wr *wr = send_wr;
+ unsigned long flags;
+ u16 sq_pi;
+
+ if (!send_wr)
+ return -EINVAL;
+
+ spin_lock_irqsave(&qp->lock, flags);
+ sq_pi = qp->kern_qp.sq_pi;
+
+ while (wr) {
+ if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
+ ret = -ENOMEM;
+ *bad_send_wr = send_wr;
+ break;
+ }
+
+ ret = erdma_push_one_sqe(qp, &sq_pi, wr);
+ if (ret) {
+ *bad_send_wr = wr;
+ break;
+ }
+ qp->kern_qp.sq_pi = sq_pi;
+ kick_sq_db(qp, sq_pi);
+
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+
+ return ret;
+}
+
+static int erdma_post_recv_one(struct erdma_qp *qp,
+ const struct ib_recv_wr *recv_wr)
+{
+ struct erdma_rqe *rqe =
+ get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
+ qp->attrs.rq_size, RQE_SHIFT);
+
+ rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
+ rqe->qpn = cpu_to_le32(QP_ID(qp));
+
+ if (recv_wr->num_sge == 0) {
+ rqe->length = 0;
+ } else if (recv_wr->num_sge == 1) {
+ rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
+ rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
+ rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
+ } else {
+ return -EINVAL;
+ }
+
+ *(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe;
+ writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
+
+ qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
+ recv_wr->wr_id;
+ qp->kern_qp.rq_pi++;
+
+ return 0;
+}
+
+int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr)
+{
+ const struct ib_recv_wr *wr = recv_wr;
+ struct erdma_qp *qp = to_eqp(ibqp);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&qp->lock, flags);
+
+ while (wr) {
+ ret = erdma_post_recv_one(qp, wr);
+ if (ret) {
+ *bad_recv_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
new file mode 100644
index 000000000000..109a3f3de911
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -0,0 +1,2300 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+/* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
+
+#include <linux/vmalloc.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
+ u64 *addr0, u64 *addr1)
+{
+ struct erdma_mtt *mtt = mem->mtt;
+
+ if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ *addr0 = mtt->buf_dma;
+ *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_1LEVEL);
+ } else {
+ *addr0 = mtt->buf[0];
+ memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
+ *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ }
+}
+
+static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
+{
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
+ struct erdma_pd *pd = to_epd(qp->ibqp.pd);
+ struct erdma_cmdq_create_qp_req req;
+ struct erdma_uqp *user_qp;
+ u64 resp0, resp1;
+ int err;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
+ ilog2(qp->attrs.sq_size)) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
+ ilog2(qp->attrs.rq_size)) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_RC);
+ else
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_UD);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res)) {
+ u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
+
+ req.sq_cqn_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ pgsz_range) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
+ req.rq_cqn_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ pgsz_range) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
+
+ req.sq_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ req.rq_mtt_cfg = req.sq_mtt_cfg;
+
+ req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
+ req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
+ req.sq_dbrec_dma = qp->kern_qp.sq_dbrec_dma;
+ req.rq_dbrec_dma = qp->kern_qp.rq_dbrec_dma;
+ } else {
+ user_qp = &qp->user_qp;
+ req.sq_cqn_mtt_cfg = FIELD_PREP(
+ ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
+ req.sq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
+
+ req.rq_cqn_mtt_cfg = FIELD_PREP(
+ ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
+ req.rq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
+
+ req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
+ req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
+ user_qp->sq_mem.mtt_nents);
+
+ req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
+ req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
+ user_qp->rq_mem.mtt_nents);
+
+ assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
+ &req.sq_buf_addr, req.sq_mtt_entry);
+ assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
+ &req.rq_buf_addr, req.rq_mtt_entry);
+
+ req.sq_dbrec_dma = user_qp->sq_dbrec_dma;
+ req.rq_dbrec_dma = user_qp->rq_dbrec_dma;
+
+ if (uctx->ext_db.enable) {
+ req.sq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
+ req.db_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
+ uctx->ext_db.sdb_off) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
+ uctx->ext_db.rdb_off);
+ }
+ }
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1,
+ true);
+ if (!err && erdma_device_iwarp(dev))
+ qp->attrs.iwarp.cookie =
+ FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
+
+ return err;
+}
+
+static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
+{
+ struct erdma_pd *pd = to_epd(mr->ibmr.pd);
+ u32 mtt_level = ERDMA_MR_MTT_0LEVEL;
+ struct erdma_cmdq_reg_mr_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
+
+ if (mr->type == ERDMA_MR_TYPE_FRMR ||
+ mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ if (mr->mem.mtt->continuous) {
+ req.phy_addr[0] = mr->mem.mtt->buf_dma;
+ mtt_level = ERDMA_MR_MTT_1LEVEL;
+ } else {
+ req.phy_addr[0] = mr->mem.mtt->dma_addrs[0];
+ mtt_level = mr->mem.mtt->level;
+ }
+ } else if (mr->type != ERDMA_MR_TYPE_DMA) {
+ memcpy(req.phy_addr, mr->mem.mtt->buf,
+ MTT_SIZE(mr->mem.page_cnt));
+ }
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
+ FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
+ FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
+ FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
+ FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
+ ilog2(mr->mem.page_size)) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
+
+ if (mr->type == ERDMA_MR_TYPE_DMA)
+ goto post_cmd;
+
+ if (mr->type == ERDMA_MR_TYPE_NORMAL) {
+ req.start_va = mr->mem.va;
+ req.size = mr->mem.len;
+ }
+
+ if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
+ req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
+ PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
+ req.size_h = upper_32_bits(mr->mem.len);
+ req.mtt_cnt_h = mr->mem.page_cnt >> 20;
+ }
+
+post_cmd:
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+ struct erdma_cmdq_create_cq_req req;
+ struct erdma_mem *mem;
+ u32 page_size;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_CQ);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
+
+ if (rdma_is_kernel_res(&cq->ibcq.res)) {
+ page_size = SZ_32M;
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
+ ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
+ req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
+ req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
+
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+
+ req.first_page_offset = 0;
+ req.cq_dbrec_dma = cq->kern_cq.dbrec_dma;
+ } else {
+ mem = &cq->user_cq.qbuf_mem;
+ req.cfg0 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
+ ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
+ if (mem->mtt_nents == 1) {
+ req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
+ req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
+ req.cfg1 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ } else {
+ req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
+ req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
+ req.cfg1 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_1LEVEL);
+ }
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
+ mem->mtt_nents);
+
+ req.first_page_offset = mem->page_offset;
+ req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
+
+ if (uctx->ext_db.enable) {
+ req.cfg1 |= FIELD_PREP(
+ ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
+ uctx->ext_db.cdb_off);
+ }
+ }
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
+{
+ int idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&res_cb->lock, flags);
+ idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
+ res_cb->next_alloc_idx);
+ if (idx == res_cb->max_cap) {
+ idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
+ if (idx == res_cb->max_cap) {
+ res_cb->next_alloc_idx = 1;
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+ return -ENOSPC;
+ }
+ }
+
+ set_bit(idx, res_cb->bitmap);
+ res_cb->next_alloc_idx = idx + 1;
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+
+ return idx;
+}
+
+static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
+{
+ unsigned long flags;
+ u32 used;
+
+ spin_lock_irqsave(&res_cb->lock, flags);
+ used = __test_and_clear_bit(idx, res_cb->bitmap);
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+ WARN_ON(!used);
+}
+
+static struct rdma_user_mmap_entry *
+erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
+ u32 size, u8 mmap_flag, u64 *mmap_offset)
+{
+ struct erdma_user_mmap_entry *entry =
+ kzalloc(sizeof(*entry), GFP_KERNEL);
+ int ret;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = (u64)address;
+ entry->mmap_flag = mmap_flag;
+
+ size = PAGE_ALIGN(size);
+
+ ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
+ size);
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+
+ *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
+ struct ib_udata *unused)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ memset(attr, 0, sizeof(*attr));
+
+ attr->max_mr_size = dev->attrs.max_mr_size;
+ attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
+ attr->vendor_part_id = dev->pdev->device;
+ attr->hw_ver = dev->pdev->revision;
+ attr->max_qp = dev->attrs.max_qp - 1;
+ attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
+ attr->max_qp_rd_atom = dev->attrs.max_ord;
+ attr->max_qp_init_rd_atom = dev->attrs.max_ird;
+ attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
+ attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ ibdev->local_dma_lkey = dev->attrs.local_dma_key;
+ attr->max_send_sge = dev->attrs.max_send_sge;
+ attr->max_recv_sge = dev->attrs.max_recv_sge;
+ attr->max_sge_rd = dev->attrs.max_sge_rd;
+ attr->max_cq = dev->attrs.max_cq - 1;
+ attr->max_cqe = dev->attrs.max_cqe;
+ attr->max_mr = dev->attrs.max_mr;
+ attr->max_pd = dev->attrs.max_pd;
+ attr->max_mw = dev->attrs.max_mw;
+ attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
+ attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+
+ if (erdma_device_rocev2(dev)) {
+ attr->max_pkeys = ERDMA_MAX_PKEYS;
+ attr->max_ah = dev->attrs.max_ah;
+ }
+
+ if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
+ attr->atomic_cap = IB_ATOMIC_GLOB;
+
+ attr->fw_ver = dev->attrs.fw_version;
+
+ if (dev->netdev)
+ addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
+ dev->netdev->dev_addr);
+
+ return 0;
+}
+
+int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
+ union ib_gid *gid)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ memset(gid, 0, sizeof(*gid));
+ ether_addr_copy(gid->raw, dev->attrs.peer_addr);
+
+ return 0;
+}
+
+int erdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+ struct net_device *ndev = dev->netdev;
+
+ memset(attr, 0, sizeof(*attr));
+
+ if (erdma_device_iwarp(dev)) {
+ attr->gid_tbl_len = 1;
+ } else {
+ attr->gid_tbl_len = dev->attrs.max_gid;
+ attr->ip_gids = true;
+ attr->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
+
+ attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
+ attr->max_msg_sz = -1;
+
+ if (!ndev)
+ goto out;
+
+ ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
+ attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
+ attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+ attr->state = ib_get_curr_port_state(ndev);
+
+out:
+ if (attr->state == IB_PORT_ACTIVE)
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ else
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+
+ return 0;
+}
+
+int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
+ struct ib_port_immutable *port_immutable)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ if (erdma_device_iwarp(dev)) {
+ port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ port_immutable->gid_tbl_len = 1;
+ } else {
+ port_immutable->core_cap_flags =
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ port_immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ port_immutable->gid_tbl_len = dev->attrs.max_gid;
+ port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
+
+ return 0;
+}
+
+int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct erdma_pd *pd = to_epd(ibpd);
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ int pdn;
+
+ pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
+ if (pdn < 0)
+ return pdn;
+
+ pd->pdn = pdn;
+
+ return 0;
+}
+
+int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct erdma_pd *pd = to_epd(ibpd);
+ struct erdma_dev *dev = to_edev(ibpd->device);
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
+
+ return 0;
+}
+
+static void erdma_flush_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct erdma_qp *qp =
+ container_of(dwork, struct erdma_qp, reflush_dwork);
+ struct erdma_cmdq_reflush_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_REFLUSH);
+ req.qpn = QP_ID(qp);
+ req.sq_pi = qp->kern_qp.sq_pi;
+ req.rq_pi = qp->kern_qp.rq_pi;
+ erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int erdma_qp_validate_cap(struct erdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
+ (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
+ (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
+ (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
+ (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
+ !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int erdma_qp_validate_attr(struct erdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+
+ if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI)
+ return -EOPNOTSUPP;
+
+ if (attrs->srq)
+ return -EOPNOTSUPP;
+
+ if (!attrs->send_cq || !attrs->recv_cq)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void free_kernel_qp(struct erdma_qp *qp)
+{
+ struct erdma_dev *dev = qp->dev;
+
+ vfree(qp->kern_qp.swr_tbl);
+ vfree(qp->kern_qp.rwr_tbl);
+
+ if (qp->kern_qp.sq_buf)
+ dma_free_coherent(&dev->pdev->dev,
+ qp->attrs.sq_size << SQEBB_SHIFT,
+ qp->kern_qp.sq_buf,
+ qp->kern_qp.sq_buf_dma_addr);
+
+ if (qp->kern_qp.sq_dbrec)
+ dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec,
+ qp->kern_qp.sq_dbrec_dma);
+
+ if (qp->kern_qp.rq_buf)
+ dma_free_coherent(&dev->pdev->dev,
+ qp->attrs.rq_size << RQE_SHIFT,
+ qp->kern_qp.rq_buf,
+ qp->kern_qp.rq_buf_dma_addr);
+
+ if (qp->kern_qp.rq_dbrec)
+ dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec,
+ qp->kern_qp.rq_dbrec_dma);
+}
+
+static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
+ struct ib_qp_init_attr *attrs)
+{
+ struct erdma_kqp *kqp = &qp->kern_qp;
+ int size;
+
+ if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
+ kqp->sig_all = 1;
+
+ kqp->sq_pi = 0;
+ kqp->sq_ci = 0;
+ kqp->rq_pi = 0;
+ kqp->rq_ci = 0;
+ kqp->hw_sq_db =
+ dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
+ kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
+
+ kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
+ kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
+ if (!kqp->swr_tbl || !kqp->rwr_tbl)
+ goto err_out;
+
+ size = qp->attrs.sq_size << SQEBB_SHIFT;
+ kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &kqp->sq_buf_dma_addr, GFP_KERNEL);
+ if (!kqp->sq_buf)
+ goto err_out;
+
+ kqp->sq_dbrec =
+ dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma);
+ if (!kqp->sq_dbrec)
+ goto err_out;
+
+ size = qp->attrs.rq_size << RQE_SHIFT;
+ kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &kqp->rq_buf_dma_addr, GFP_KERNEL);
+ if (!kqp->rq_buf)
+ goto err_out;
+
+ kqp->rq_dbrec =
+ dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma);
+ if (!kqp->rq_dbrec)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ free_kernel_qp(qp);
+ return -ENOMEM;
+}
+
+static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+ struct erdma_mtt *mtt = mem->mtt;
+ struct ib_block_iter biter;
+ u32 idx = 0;
+
+ while (mtt->low_level)
+ mtt = mtt->low_level;
+
+ rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
+ mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
+}
+
+static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
+ size_t size)
+{
+ struct erdma_mtt *mtt;
+
+ mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->size = size;
+ mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
+ if (!mtt->buf)
+ goto err_free_mtt;
+
+ mtt->continuous = true;
+ mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
+ goto err_free_mtt_buf;
+
+ return mtt;
+
+err_free_mtt_buf:
+ kfree(mtt->buf);
+
+err_free_mtt:
+ kfree(mtt);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma,
+ u32 npages)
+{
+ u32 i;
+
+ for (i = 0; i < npages; i++)
+ dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE,
+ DMA_TO_DEVICE);
+}
+
+static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages);
+ vfree(mtt->dma_addrs);
+}
+
+static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ erdma_destroy_mtt_buf_dma_addrs(dev, mtt);
+ vfree(mtt->buf);
+ kfree(mtt);
+}
+
+static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
+ struct erdma_mtt *low_mtt)
+{
+ dma_addr_t *pg_addr = mtt->buf;
+ u32 i;
+
+ for (i = 0; i < low_mtt->npages; i++)
+ pg_addr[i] = low_mtt->dma_addrs[i];
+}
+
+static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs,
+ void *buf, u64 len)
+{
+ dma_addr_t *pg_dma;
+ struct page *pg;
+ u32 npages, i;
+ void *addr;
+
+ npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >>
+ PAGE_SHIFT;
+ pg_dma = vcalloc(npages, sizeof(*pg_dma));
+ if (!pg_dma)
+ return 0;
+
+ addr = buf;
+ for (i = 0; i < npages; i++) {
+ pg = vmalloc_to_page(addr);
+ if (!pg)
+ goto err;
+
+ pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, pg_dma[i]))
+ goto err;
+
+ addr += PAGE_SIZE;
+ }
+
+ *dma_addrs = pg_dma;
+
+ return npages;
+err:
+ erdma_unmap_page_list(dev, pg_dma, i);
+ vfree(pg_dma);
+
+ return 0;
+}
+
+static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ dma_addr_t *addrs;
+ u32 npages;
+
+ /* Failed if buf is not page aligned */
+ if ((uintptr_t)mtt->buf & ~PAGE_MASK)
+ return -EINVAL;
+
+ npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size);
+ if (!npages)
+ return -ENOMEM;
+
+ mtt->dma_addrs = addrs;
+ mtt->npages = npages;
+
+ return 0;
+}
+
+static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
+ size_t size)
+{
+ struct erdma_mtt *mtt;
+ int ret = -ENOMEM;
+
+ mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->size = ALIGN(size, PAGE_SIZE);
+ mtt->buf = vzalloc(mtt->size);
+ mtt->continuous = false;
+ if (!mtt->buf)
+ goto err_free_mtt;
+
+ ret = erdma_create_mtt_buf_dma_addrs(dev, mtt);
+ if (ret)
+ goto err_free_mtt_buf;
+
+ ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n",
+ mtt->size, mtt->npages);
+
+ return mtt;
+
+err_free_mtt_buf:
+ vfree(mtt->buf);
+
+err_free_mtt:
+ kfree(mtt);
+
+ return ERR_PTR(ret);
+}
+
+static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
+ bool force_continuous)
+{
+ struct erdma_mtt *mtt, *tmp_mtt;
+ int ret, level = 0;
+
+ ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
+ force_continuous);
+
+ if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
+ force_continuous = true;
+
+ if (force_continuous)
+ return erdma_create_cont_mtt(dev, size);
+
+ mtt = erdma_create_scatter_mtt(dev, size);
+ if (IS_ERR(mtt))
+ return mtt;
+ level = 1;
+
+ /* convergence the mtt table. */
+ while (mtt->npages != 1 && level <= 3) {
+ tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages));
+ if (IS_ERR(tmp_mtt)) {
+ ret = PTR_ERR(tmp_mtt);
+ goto err_free_mtt;
+ }
+ erdma_init_middle_mtt(tmp_mtt, mtt);
+ tmp_mtt->low_level = mtt;
+ mtt = tmp_mtt;
+ level++;
+ }
+
+ if (level > 3) {
+ ret = -ENOMEM;
+ goto err_free_mtt;
+ }
+
+ mtt->level = level;
+ ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
+ mtt->level, mtt->dma_addrs[0]);
+
+ return mtt;
+err_free_mtt:
+ while (mtt) {
+ tmp_mtt = mtt->low_level;
+ erdma_destroy_scatter_mtt(dev, mtt);
+ mtt = tmp_mtt;
+ }
+
+ return ERR_PTR(ret);
+}
+
+static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
+{
+ struct erdma_mtt *tmp_mtt;
+
+ if (mtt->continuous) {
+ dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
+ DMA_TO_DEVICE);
+ kfree(mtt->buf);
+ kfree(mtt);
+ } else {
+ while (mtt) {
+ tmp_mtt = mtt->low_level;
+ erdma_destroy_scatter_mtt(dev, mtt);
+ mtt = tmp_mtt;
+ }
+ }
+}
+
+static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
+ u64 start, u64 len, int access, u64 virt,
+ unsigned long req_page_size, bool force_continuous)
+{
+ int ret = 0;
+
+ mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
+ if (IS_ERR(mem->umem)) {
+ ret = PTR_ERR(mem->umem);
+ mem->umem = NULL;
+ return ret;
+ }
+
+ mem->va = virt;
+ mem->len = len;
+ mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
+ mem->page_offset = start & (mem->page_size - 1);
+ mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
+ mem->page_cnt = mem->mtt_nents;
+ mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
+ force_continuous);
+ if (IS_ERR(mem->mtt)) {
+ ret = PTR_ERR(mem->mtt);
+ goto error_ret;
+ }
+
+ erdma_fill_bottom_mtt(dev, mem);
+
+ return 0;
+
+error_ret:
+ if (mem->umem) {
+ ib_umem_release(mem->umem);
+ mem->umem = NULL;
+ }
+
+ return ret;
+}
+
+static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+ if (mem->mtt)
+ erdma_destroy_mtt(dev, mem->mtt);
+
+ if (mem->umem) {
+ ib_umem_release(mem->umem);
+ mem->umem = NULL;
+ }
+}
+
+static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
+ u64 dbrecords_va,
+ struct erdma_user_dbrecords_page **dbr_page,
+ dma_addr_t *dma_addr)
+{
+ struct erdma_user_dbrecords_page *page = NULL;
+ int rv = 0;
+
+ mutex_lock(&ctx->dbrecords_page_mutex);
+
+ list_for_each_entry(page, &ctx->dbrecords_page_list, list)
+ if (page->va == (dbrecords_va & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ page->va = (dbrecords_va & PAGE_MASK);
+ page->refcnt = 0;
+
+ page->umem = ib_umem_get(ctx->ibucontext.device,
+ dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
+ if (IS_ERR(page->umem)) {
+ rv = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &ctx->dbrecords_page_list);
+
+found:
+ *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (dbrecords_va & ~PAGE_MASK);
+ *dbr_page = page;
+ page->refcnt++;
+
+out:
+ mutex_unlock(&ctx->dbrecords_page_mutex);
+ return rv;
+}
+
+static void
+erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
+ struct erdma_user_dbrecords_page **dbr_page)
+{
+ if (!ctx || !(*dbr_page))
+ return;
+
+ mutex_lock(&ctx->dbrecords_page_mutex);
+ if (--(*dbr_page)->refcnt == 0) {
+ list_del(&(*dbr_page)->list);
+ ib_umem_release((*dbr_page)->umem);
+ kfree(*dbr_page);
+ }
+
+ *dbr_page = NULL;
+ mutex_unlock(&ctx->dbrecords_page_mutex);
+}
+
+static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
+ u64 va, u32 len, u64 dbrec_va)
+{
+ dma_addr_t dbrec_dma;
+ u32 rq_offset;
+ int ret;
+
+ if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
+ qp->attrs.rq_size * RQE_SIZE))
+ return -EINVAL;
+
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
+ qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
+ (SZ_1M - SZ_4K), true);
+ if (ret)
+ return ret;
+
+ rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
+ qp->user_qp.rq_offset = rq_offset;
+
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
+ qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
+ (SZ_1M - SZ_4K), true);
+ if (ret)
+ goto put_sq_mtt;
+
+ ret = erdma_map_user_dbrecords(uctx, dbrec_va,
+ &qp->user_qp.user_dbr_page,
+ &dbrec_dma);
+ if (ret)
+ goto put_rq_mtt;
+
+ qp->user_qp.sq_dbrec_dma = dbrec_dma;
+ qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE;
+
+ return 0;
+
+put_rq_mtt:
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
+
+put_sq_mtt:
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
+
+ return ret;
+}
+
+static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
+{
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
+ erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
+}
+
+int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ struct erdma_dev *dev = to_edev(ibqp->device);
+ struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ struct erdma_ureq_create_qp ureq;
+ struct erdma_uresp_create_qp uresp;
+ void *old_entry;
+ int ret = 0;
+
+ ret = erdma_qp_validate_cap(dev, attrs);
+ if (ret)
+ goto err_out;
+
+ ret = erdma_qp_validate_attr(dev, attrs);
+ if (ret)
+ goto err_out;
+
+ qp->scq = to_ecq(attrs->send_cq);
+ qp->rcq = to_ecq(attrs->recv_cq);
+ qp->dev = dev;
+ qp->attrs.cc = dev->attrs.cc;
+
+ init_rwsem(&qp->state_lock);
+ kref_init(&qp->ref);
+ init_completion(&qp->safe_free);
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI) {
+ old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
+ if (xa_is_err(old_entry))
+ ret = xa_err(old_entry);
+ else
+ qp->ibqp.qp_num = 1;
+ } else {
+ ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
+ XA_LIMIT(1, dev->attrs.max_qp - 1),
+ &dev->next_alloc_qpn, GFP_KERNEL);
+ }
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
+ ERDMA_MAX_WQEBB_PER_SQE);
+ qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
+
+ if (uctx) {
+ ret = ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen));
+ if (ret)
+ goto err_out_xa;
+
+ ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
+ ureq.db_record_va);
+ if (ret)
+ goto err_out_xa;
+
+ memset(&uresp, 0, sizeof(uresp));
+
+ uresp.num_sqe = qp->attrs.sq_size;
+ uresp.num_rqe = qp->attrs.rq_size;
+ uresp.qp_id = QP_ID(qp);
+ uresp.rq_offset = qp->user_qp.rq_offset;
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_out_cmd;
+ } else {
+ ret = init_kernel_qp(dev, qp, attrs);
+ if (ret)
+ goto err_out_xa;
+ }
+
+ qp->attrs.max_send_sge = attrs->cap.max_send_sge;
+ qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
+
+ if (erdma_device_iwarp(qp->dev))
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ else
+ qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET;
+
+ INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
+
+ ret = create_qp_cmd(uctx, qp);
+ if (ret)
+ goto err_out_cmd;
+
+ spin_lock_init(&qp->lock);
+
+ return 0;
+
+err_out_cmd:
+ if (uctx)
+ free_user_qp(qp, uctx);
+ else
+ free_kernel_qp(qp);
+err_out_xa:
+ xa_erase(&dev->qp_xa, QP_ID(qp));
+err_out:
+ return ret;
+}
+
+static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
+{
+ int stag_idx;
+
+ stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
+ if (stag_idx < 0)
+ return stag_idx;
+
+ /* For now, we always let key field be zero. */
+ *stag = (stag_idx << 8);
+
+ return 0;
+}
+
+struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ struct erdma_mr *mr;
+ u32 stag;
+ int ret;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto out_free;
+
+ mr->type = ERDMA_MR_TYPE_DMA;
+
+ mr->ibmr.lkey = stag;
+ mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto out_remove_stag;
+
+ return &mr->ibmr;
+
+out_remove_stag:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct erdma_mr *mr;
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ int ret;
+ u32 stag;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto out_free;
+
+ mr->type = ERDMA_MR_TYPE_FRMR;
+
+ mr->ibmr.lkey = stag;
+ mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ /* update it in FRMR. */
+ mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
+ ERDMA_MR_ACC_RW;
+
+ mr->mem.page_size = PAGE_SIZE; /* update it later. */
+ mr->mem.page_cnt = max_num_sg;
+ mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
+ if (IS_ERR(mr->mem.mtt)) {
+ ret = PTR_ERR(mr->mem.mtt);
+ goto out_remove_stag;
+ }
+
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto out_destroy_mtt;
+
+ return &mr->ibmr;
+
+out_destroy_mtt:
+ erdma_destroy_mtt(dev, mr->mem.mtt);
+
+out_remove_stag:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct erdma_mr *mr = to_emr(ibmr);
+
+ if (mr->mem.mtt_nents >= mr->mem.page_cnt)
+ return -1;
+
+ mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
+ mr->mem.mtt_nents++;
+
+ return 0;
+}
+
+int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct erdma_mr *mr = to_emr(ibmr);
+ int num;
+
+ mr->mem.mtt_nents = 0;
+
+ num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
+ erdma_set_page);
+
+ return num;
+}
+
+struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 virt, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct erdma_mr *mr = NULL;
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ u32 stag;
+ int ret;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!len || len > dev->attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
+ SZ_2G - SZ_4K, false);
+ if (ret)
+ goto err_out_free;
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto err_out_put_mtt;
+
+ mr->ibmr.lkey = mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ mr->mem.va = virt;
+ mr->mem.len = len;
+ mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
+ mr->valid = 1;
+ mr->type = ERDMA_MR_TYPE_NORMAL;
+
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto err_out_mr;
+
+ return &mr->ibmr;
+
+err_out_mr:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+err_out_put_mtt:
+ put_mtt_entries(dev, &mr->mem);
+
+err_out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct erdma_mr *mr;
+ struct erdma_dev *dev = to_edev(ibmr->device);
+ struct erdma_cmdq_dereg_mr_req req;
+ int ret;
+
+ mr = to_emr(ibmr);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DEREG_MR);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
+ FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
+
+ put_mtt_entries(dev, &mr->mem);
+
+ kfree(mr);
+ return 0;
+}
+
+int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_dev *dev = to_edev(ibcq->device);
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ int err;
+ struct erdma_cmdq_destroy_cq_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_CQ);
+ req.cqn = cq->cqn;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (err)
+ return err;
+
+ if (rdma_is_kernel_res(&cq->ibcq.res)) {
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+ cq->kern_cq.dbrec_dma);
+ } else {
+ erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
+ }
+
+ xa_erase(&dev->cq_xa, cq->cqn);
+
+ return 0;
+}
+
+int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ struct erdma_dev *dev = to_edev(ibqp->device);
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ struct erdma_cmdq_destroy_qp_req req;
+ union erdma_mod_qp_params params;
+ int err;
+
+ down_write(&qp->state_lock);
+ if (erdma_device_iwarp(dev)) {
+ params.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ ERDMA_QPA_IWARP_STATE);
+ } else {
+ params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR;
+ erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ ERDMA_QPA_ROCEV2_STATE);
+ }
+ up_write(&qp->state_lock);
+
+ cancel_delayed_work_sync(&qp->reflush_dwork);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_QP);
+ req.qpn = QP_ID(qp);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (err)
+ return err;
+
+ erdma_qp_put(qp);
+ wait_for_completion(&qp->safe_free);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res)) {
+ free_kernel_qp(qp);
+ } else {
+ put_mtt_entries(dev, &qp->user_qp.sq_mem);
+ put_mtt_entries(dev, &qp->user_qp.rq_mem);
+ erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
+ }
+
+ if (qp->cep)
+ erdma_cep_put(qp->cep);
+ xa_erase(&dev->qp_xa, QP_ID(qp));
+
+ return 0;
+}
+
+void erdma_qp_get_ref(struct ib_qp *ibqp)
+{
+ erdma_qp_get(to_eqp(ibqp));
+}
+
+void erdma_qp_put_ref(struct ib_qp *ibqp)
+{
+ erdma_qp_put(to_eqp(ibqp));
+}
+
+int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct erdma_user_mmap_entry *entry;
+ pgprot_t prot;
+ int err;
+
+ rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
+ if (!rdma_entry)
+ return -EINVAL;
+
+ entry = to_emmap(rdma_entry);
+
+ switch (entry->mmap_flag) {
+ case ERDMA_MMAP_IO_NC:
+ /* map doorbell. */
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ default:
+ err = -EINVAL;
+ goto put_entry;
+ }
+
+ err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
+ prot, rdma_entry);
+
+put_entry:
+ rdma_user_mmap_entry_put(rdma_entry);
+ return err;
+}
+
+void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ kfree(entry);
+}
+
+static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
+ bool ext_db_en)
+{
+ struct erdma_cmdq_ext_db_req req = {};
+ u64 val0, val1;
+ int ret;
+
+ /*
+ * CAP_SYS_RAWIO is required if hardware does not support extend
+ * doorbell mechanism.
+ */
+ if (!ext_db_en && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!ext_db_en) {
+ ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
+ ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
+ ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
+ return 0;
+ }
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_ALLOC_DB);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1,
+ true);
+ if (ret)
+ return ret;
+
+ ctx->ext_db.enable = true;
+ ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
+ ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
+ ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
+
+ ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
+ ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
+ ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
+
+ return 0;
+}
+
+static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
+{
+ struct erdma_cmdq_ext_db_req req = {};
+ int ret;
+
+ if (!ctx->ext_db.enable)
+ return;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_FREE_DB);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
+
+ req.sdb_off = ctx->ext_db.sdb_off;
+ req.rdb_off = ctx->ext_db.rdb_off;
+ req.cdb_off = ctx->ext_db.cdb_off;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ ibdev_err_ratelimited(&dev->ibdev,
+ "free db resources failed %d", ret);
+}
+
+static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
+{
+ rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
+}
+
+int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
+{
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
+ struct erdma_dev *dev = to_edev(ibctx->device);
+ int ret;
+ struct erdma_uresp_alloc_ctx uresp = {};
+
+ if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ if (udata->outlen < sizeof(uresp)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ INIT_LIST_HEAD(&ctx->dbrecords_page_list);
+ mutex_init(&ctx->dbrecords_page_mutex);
+
+ ret = alloc_db_resources(dev, ctx,
+ !!(dev->attrs.cap_flags &
+ ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
+ if (ret)
+ goto err_out;
+
+ ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
+ if (!ctx->sq_db_mmap_entry) {
+ ret = -ENOMEM;
+ goto err_free_ext_db;
+ }
+
+ ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
+ if (!ctx->rq_db_mmap_entry) {
+ ret = -EINVAL;
+ goto err_put_mmap_entries;
+ }
+
+ ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
+ if (!ctx->cq_db_mmap_entry) {
+ ret = -EINVAL;
+ goto err_put_mmap_entries;
+ }
+
+ uresp.dev_id = dev->pdev->device;
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_put_mmap_entries;
+
+ return 0;
+
+err_put_mmap_entries:
+ erdma_uctx_user_mmap_entries_remove(ctx);
+
+err_free_ext_db:
+ free_db_resources(dev, ctx);
+
+err_out:
+ atomic_dec(&dev->num_ctx);
+ return ret;
+}
+
+void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct erdma_dev *dev = to_edev(ibctx->device);
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
+
+ erdma_uctx_user_mmap_entries_remove(ctx);
+ free_db_resources(dev, ctx);
+ atomic_dec(&dev->num_ctx);
+}
+
+static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr,
+ struct erdma_av *av, u16 sport)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ av->port = rdma_ah_get_port_num(ah_attr);
+ av->sgid_index = grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
+ av->traffic_class = grh->traffic_class;
+ av->sl = rdma_ah_get_sl(ah_attr);
+
+ av->flow_label = grh->flow_label;
+ av->udp_sport = sport;
+
+ ether_addr_copy(av->dmac, ah_attr->roce.dmac);
+ memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE);
+
+ if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid))
+ av->ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ av->ntype = ERDMA_NETWORK_TYPE_IPV6;
+}
+
+static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr)
+{
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_sl(ah_attr, av->sl);
+ rdma_ah_set_port_num(ah_attr, av->port);
+ rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index,
+ av->hop_limit, av->traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, av->dgid);
+}
+
+static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = {
+ [ERDMA_PROTO_IWARP] = {
+ [IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING,
+ [IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE,
+ [IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET,
+ [IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT,
+ [IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD,
+ [IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE,
+ [IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR,
+ },
+};
+
+static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = {
+ [ERDMA_PROTO_IWARP] = {
+ [ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT,
+ [ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET,
+ [ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT,
+ [ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD,
+ [ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE,
+ [ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR,
+ },
+};
+
+static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state)
+{
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state];
+}
+
+static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state)
+{
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state];
+}
+
+static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state];
+}
+
+static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state];
+}
+
+static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ enum ib_qp_state cur_state, nxt_state;
+ struct erdma_dev *dev = qp->dev;
+ int ret = -EINVAL;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(&dev->ibdev, attr->port_num))
+ goto out;
+
+ if (erdma_device_rocev2(dev)) {
+ cur_state = (attr_mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state :
+ rocev2_to_ib_qps(qp->attrs.rocev2.state);
+
+ nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state :
+ cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type,
+ attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_AV) &&
+ erdma_check_gid_attr(
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_attr))
+ goto out;
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= ERDMA_MAX_PKEYS)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ return ret;
+}
+
+static void erdma_init_mod_qp_params_rocev2(
+ struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params,
+ int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask)
+{
+ enum erdma_qpa_mask_rocev2 to_modify_attrs = 0;
+ enum erdma_qps_rocev2 cur_state, nxt_state;
+ u16 udp_sport;
+
+ if (ib_attr_mask & IB_QP_CUR_STATE)
+ cur_state = ib_to_rocev2_qps(attr->cur_qp_state);
+ else
+ cur_state = qp->attrs.rocev2.state;
+
+ if (ib_attr_mask & IB_QP_STATE)
+ nxt_state = ib_to_rocev2_qps(attr->qp_state);
+ else
+ nxt_state = cur_state;
+
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE;
+ params->state = nxt_state;
+
+ if (ib_attr_mask & IB_QP_QKEY) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY;
+ params->qkey = attr->qkey;
+ }
+
+ if (ib_attr_mask & IB_QP_SQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN;
+ params->sq_psn = attr->sq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_RQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN;
+ params->rq_psn = attr->rq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_DEST_QPN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN;
+ params->dst_qpn = attr->dest_qp_num;
+ }
+
+ if (ib_attr_mask & IB_QP_AV) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_AV;
+ udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ QP_ID(qp), params->dst_qpn);
+ erdma_attr_to_av(&attr->ah_attr, &params->av, udp_sport);
+ }
+
+ *erdma_attr_mask = to_modify_attrs;
+}
+
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ union erdma_mod_qp_params params;
+ int ret = 0, erdma_attr_mask = 0;
+
+ down_write(&qp->state_lock);
+
+ ret = erdma_check_qp_attrs(qp, attr, attr_mask);
+ if (ret)
+ goto out;
+
+ if (erdma_device_iwarp(qp->dev)) {
+ if (attr_mask & IB_QP_STATE) {
+ erdma_attr_mask |= ERDMA_QPA_IWARP_STATE;
+ params.iwarp.state = ib_to_iwarp_qps(attr->qp_state);
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ erdma_attr_mask);
+ } else {
+ erdma_init_mod_qp_params_rocev2(
+ qp, &params.rocev2, &erdma_attr_mask, attr, attr_mask);
+
+ ret = erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ erdma_attr_mask);
+ }
+
+out:
+ up_write(&qp->state_lock);
+ return ret;
+}
+
+static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
+{
+ if (erdma_device_iwarp(qp->dev))
+ return iwarp_to_ib_qps(qp->attrs.iwarp.state);
+ else
+ return rocev2_to_ib_qps(qp->attrs.rocev2.state);
+}
+
+int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct erdma_cmdq_query_qp_req_rocev2 req;
+ struct erdma_dev *dev;
+ struct erdma_qp *qp;
+ u64 resp0, resp1;
+ int ret;
+
+ if (ibqp && qp_attr && qp_init_attr) {
+ qp = to_eqp(ibqp);
+ dev = to_edev(ibqp->device);
+ } else {
+ return -EINVAL;
+ }
+
+ qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
+ qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
+
+ qp_attr->cap.max_send_wr = qp->attrs.sq_size;
+ qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
+ qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
+ qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
+
+ qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
+ qp_attr->max_rd_atomic = qp->attrs.irq_size;
+ qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
+
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ if (erdma_device_rocev2(dev)) {
+ /* Query hardware to get some attributes */
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_QP);
+ req.qpn = QP_ID(qp);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
+ &resp1, true);
+ if (ret)
+ return ret;
+
+ qp_attr->sq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0);
+ qp_attr->rq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0);
+ qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0));
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->sq_draining = FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0);
+
+ qp_attr->pkey_index = 0;
+ qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ erdma_av_to_attr(&qp->attrs.rocev2.av,
+ &qp_attr->ah_attr);
+ } else {
+ qp_attr->qp_state = query_qp_state(qp);
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ }
+
+ return 0;
+}
+
+static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
+ struct erdma_ureq_create_cq *ureq)
+{
+ int ret;
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+
+ ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
+ ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
+ true);
+ if (ret)
+ return ret;
+
+ ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
+ &cq->user_cq.user_dbr_page,
+ &cq->user_cq.dbrec_dma);
+ if (ret)
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
+
+ return ret;
+}
+
+static int erdma_init_kernel_cq(struct erdma_cq *cq)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+
+ cq->kern_cq.qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
+ if (!cq->kern_cq.qbuf)
+ return -ENOMEM;
+
+ cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL,
+ &cq->kern_cq.dbrec_dma);
+ if (!cq->kern_cq.dbrec)
+ goto err_out;
+
+ spin_lock_init(&cq->kern_cq.lock);
+ /* use default cqdb addr */
+ cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
+
+ return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_dev *dev = to_edev(ibcq->device);
+ unsigned int depth = attr->cqe;
+ int ret;
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+
+ if (depth > dev->attrs.max_cqe)
+ return -EINVAL;
+
+ depth = roundup_pow_of_two(depth);
+ cq->ibcq.cqe = depth;
+ cq->depth = depth;
+ cq->assoc_eqn = attr->comp_vector + 1;
+
+ ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
+ XA_LIMIT(1, dev->attrs.max_cq - 1),
+ &dev->next_alloc_cqn, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ if (!rdma_is_kernel_res(&ibcq->res)) {
+ struct erdma_ureq_create_cq ureq;
+ struct erdma_uresp_create_cq uresp;
+
+ ret = ib_copy_from_udata(&ureq, udata,
+ min(udata->inlen, sizeof(ureq)));
+ if (ret)
+ goto err_out_xa;
+
+ ret = erdma_init_user_cq(ctx, cq, &ureq);
+ if (ret)
+ goto err_out_xa;
+
+ uresp.cq_id = cq->cqn;
+ uresp.num_cqe = depth;
+
+ ret = ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen));
+ if (ret)
+ goto err_free_res;
+ } else {
+ ret = erdma_init_kernel_cq(cq);
+ if (ret)
+ goto err_out_xa;
+ }
+
+ ret = create_cq_cmd(ctx, cq);
+ if (ret)
+ goto err_free_res;
+
+ return 0;
+
+err_free_res:
+ if (!rdma_is_kernel_res(&ibcq->res)) {
+ erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
+ } else {
+ dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+ cq->kern_cq.dbrec_dma);
+ }
+
+err_out_xa:
+ xa_erase(&dev->cq_xa, cq->cqn);
+
+ return ret;
+}
+
+void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
+void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
+{
+ struct erdma_cmdq_config_mtu_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CONF_MTU);
+ req.mtu = mtu;
+
+ erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true);
+}
+
+void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
+{
+ struct ib_event event;
+
+ event.device = &dev->ibdev;
+ event.element.port_num = 1;
+ event.event = reason;
+
+ ib_dispatch_event(&event);
+}
+
+enum counters {
+ ERDMA_STATS_TX_REQS_CNT,
+ ERDMA_STATS_TX_PACKETS_CNT,
+ ERDMA_STATS_TX_BYTES_CNT,
+ ERDMA_STATS_TX_DISABLE_DROP_CNT,
+ ERDMA_STATS_TX_BPS_METER_DROP_CNT,
+ ERDMA_STATS_TX_PPS_METER_DROP_CNT,
+
+ ERDMA_STATS_RX_PACKETS_CNT,
+ ERDMA_STATS_RX_BYTES_CNT,
+ ERDMA_STATS_RX_DISABLE_DROP_CNT,
+ ERDMA_STATS_RX_BPS_METER_DROP_CNT,
+ ERDMA_STATS_RX_PPS_METER_DROP_CNT,
+
+ ERDMA_STATS_MAX
+};
+
+static const struct rdma_stat_desc erdma_descs[] = {
+ [ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt",
+ [ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt",
+ [ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt",
+ [ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt",
+ [ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt",
+ [ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt",
+ [ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt",
+ [ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt",
+ [ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt",
+ [ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt",
+ [ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt",
+};
+
+struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int erdma_query_hw_stats(struct erdma_dev *dev,
+ struct rdma_hw_stats *stats)
+{
+ struct erdma_cmdq_query_stats_resp *resp;
+ struct erdma_cmdq_query_req req;
+ dma_addr_t dma_addr;
+ int err;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_GET_STATS);
+
+ resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr);
+ if (!resp)
+ return -ENOMEM;
+
+ req.target_addr = dma_addr;
+ req.target_length = ERDMA_HW_RESP_SIZE;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (err)
+ goto out;
+
+ if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&stats->value[0], &resp->tx_req_cnt,
+ sizeof(u64) * stats->num_counters);
+
+out:
+ dma_pool_free(dev->resp_pool, resp, dma_addr);
+
+ return err;
+}
+
+int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port, int index)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+ int ret;
+
+ if (port == 0)
+ return 0;
+
+ ret = erdma_query_hw_stats(dev, stats);
+ if (ret)
+ return ret;
+
+ return stats->num_counters;
+}
+
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx,
+ const union ib_gid *gid)
+{
+ struct erdma_cmdq_set_gid_req req;
+ u8 ntype;
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) |
+ FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op);
+
+ if (op == ERDMA_SET_GID_OP_ADD) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ ntype = ERDMA_NETWORK_TYPE_IPV6;
+
+ req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype);
+
+ memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE);
+ }
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_SET_GID);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct erdma_dev *dev = to_edev(attr->device);
+ int ret;
+
+ ret = erdma_check_gid_attr(attr);
+ if (ret)
+ return ret;
+
+ return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index,
+ &attr->gid);
+}
+
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL,
+ attr->index, NULL);
+}
+
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index >= ERDMA_MAX_PKEYS)
+ return -EINVAL;
+
+ *pkey = ERDMA_DEFAULT_PKEY;
+ return 0;
+}
+
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av)
+{
+ av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) |
+ FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype);
+
+ av_cfg->traffic_class = av->traffic_class;
+ av_cfg->hop_limit = av->hop_limit;
+ av_cfg->sl = av->sl;
+
+ av_cfg->udp_sport = av->udp_sport;
+ av_cfg->sgid_index = av->sgid_index;
+
+ ether_addr_copy(av_cfg->dmac, av->dmac);
+ memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE);
+}
+
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(init_attr->ah_attr);
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_create_ah_req req;
+ u32 udp_sport;
+ int ret;
+
+ ret = erdma_check_gid_attr(grh->sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]);
+ if (ret < 0)
+ return ret;
+
+ ah->ahn = ret;
+
+ if (grh->flow_label)
+ udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label);
+ else
+ udp_sport =
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF);
+
+ erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+ erdma_set_av_cfg(&req.av_cfg, &ah->av);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+ if (ret) {
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+ return ret;
+ }
+
+ return 0;
+}
+
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_destroy_ah_req req;
+ int ret;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ flags & RDMA_DESTROY_AH_SLEEPABLE);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+
+ return 0;
+}
+
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct erdma_ah *ah = to_eah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ erdma_av_to_attr(&ah->av, ah_attr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
new file mode 100644
index 000000000000..7d8d3fe501d5
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -0,0 +1,491 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_VERBS_H__
+#define __ERDMA_VERBS_H__
+
+#include "erdma.h"
+
+/* RDMA Capability. */
+#define ERDMA_MAX_PD (128 * 1024)
+#define ERDMA_MAX_SEND_WR 8192
+#define ERDMA_MAX_ORD 128
+#define ERDMA_MAX_IRD 128
+#define ERDMA_MAX_SGE_RD 1
+#define ERDMA_MAX_CONTEXT (128 * 1024)
+#define ERDMA_MAX_SEND_SGE 6
+#define ERDMA_MAX_RECV_SGE 1
+#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE))
+#define ERDMA_MAX_FRMR_PA 512
+
+enum {
+ ERDMA_MMAP_IO_NC = 0, /* no cache */
+};
+
+struct erdma_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u64 address;
+ u8 mmap_flag;
+};
+
+struct erdma_ext_db_info {
+ bool enable;
+ u16 sdb_off;
+ u16 rdb_off;
+ u16 cdb_off;
+};
+
+struct erdma_ucontext {
+ struct ib_ucontext ibucontext;
+
+ struct erdma_ext_db_info ext_db;
+
+ u64 sdb;
+ u64 rdb;
+ u64 cdb;
+
+ struct rdma_user_mmap_entry *sq_db_mmap_entry;
+ struct rdma_user_mmap_entry *rq_db_mmap_entry;
+ struct rdma_user_mmap_entry *cq_db_mmap_entry;
+
+ /* doorbell records */
+ struct list_head dbrecords_page_list;
+ struct mutex dbrecords_page_mutex;
+};
+
+struct erdma_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+};
+
+/*
+ * MemoryRegion definition.
+ */
+#define ERDMA_MAX_INLINE_MTT_ENTRIES 4
+#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */
+#define ERDMA_MR_MAX_MTT_CNT 524288
+#define ERDMA_MTT_ENTRY_SIZE 8
+
+#define ERDMA_MR_TYPE_NORMAL 0
+#define ERDMA_MR_TYPE_FRMR 1
+#define ERDMA_MR_TYPE_DMA 2
+
+#define ERDMA_MR_MTT_0LEVEL 0
+#define ERDMA_MR_MTT_1LEVEL 1
+
+#define ERDMA_MR_ACC_RA BIT(0)
+#define ERDMA_MR_ACC_LR BIT(1)
+#define ERDMA_MR_ACC_LW BIT(2)
+#define ERDMA_MR_ACC_RR BIT(3)
+#define ERDMA_MR_ACC_RW BIT(4)
+
+static inline u8 to_erdma_access_flags(int access)
+{
+ return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
+ (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
+ (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) |
+ (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
+}
+
+/* Hierarchical storage structure for MTT entries */
+struct erdma_mtt {
+ u64 *buf;
+ size_t size;
+
+ bool continuous;
+ union {
+ dma_addr_t buf_dma;
+ struct {
+ dma_addr_t *dma_addrs;
+ u32 npages;
+ u32 level;
+ };
+ };
+
+ struct erdma_mtt *low_level;
+};
+
+struct erdma_mem {
+ struct ib_umem *umem;
+ struct erdma_mtt *mtt;
+
+ u32 page_size;
+ u32 page_offset;
+ u32 page_cnt;
+ u32 mtt_nents;
+
+ u64 va;
+ u64 len;
+};
+
+struct erdma_mr {
+ struct ib_mr ibmr;
+ struct erdma_mem mem;
+ u8 type;
+ u8 access;
+ u8 valid;
+};
+
+struct erdma_user_dbrecords_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ u64 va;
+ int refcnt;
+};
+
+struct erdma_av {
+ u8 port;
+ u8 hop_limit;
+ u8 traffic_class;
+ u8 sl;
+ u8 sgid_index;
+ u16 udp_sport;
+ u32 flow_label;
+ u8 dmac[ETH_ALEN];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+ enum erdma_network_type ntype;
+};
+
+struct erdma_ah {
+ struct ib_ah ibah;
+ struct erdma_av av;
+ u32 ahn;
+};
+
+struct erdma_uqp {
+ struct erdma_mem sq_mem;
+ struct erdma_mem rq_mem;
+
+ dma_addr_t sq_dbrec_dma;
+ dma_addr_t rq_dbrec_dma;
+
+ struct erdma_user_dbrecords_page *user_dbr_page;
+
+ u32 rq_offset;
+};
+
+struct erdma_kqp {
+ u16 sq_pi;
+ u16 sq_ci;
+
+ u16 rq_pi;
+ u16 rq_ci;
+
+ u64 *swr_tbl;
+ u64 *rwr_tbl;
+
+ void __iomem *hw_sq_db;
+ void __iomem *hw_rq_db;
+
+ void *sq_buf;
+ dma_addr_t sq_buf_dma_addr;
+
+ void *rq_buf;
+ dma_addr_t rq_buf_dma_addr;
+
+ void *sq_dbrec;
+ void *rq_dbrec;
+
+ dma_addr_t sq_dbrec_dma;
+ dma_addr_t rq_dbrec_dma;
+
+ u8 sig_all;
+};
+
+enum erdma_qps_iwarp {
+ ERDMA_QPS_IWARP_IDLE = 0,
+ ERDMA_QPS_IWARP_RTR = 1,
+ ERDMA_QPS_IWARP_RTS = 2,
+ ERDMA_QPS_IWARP_CLOSING = 3,
+ ERDMA_QPS_IWARP_TERMINATE = 4,
+ ERDMA_QPS_IWARP_ERROR = 5,
+ ERDMA_QPS_IWARP_UNDEF = 6,
+ ERDMA_QPS_IWARP_COUNT = 7,
+};
+
+enum erdma_qpa_mask_iwarp {
+ ERDMA_QPA_IWARP_STATE = (1 << 0),
+ ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2),
+ ERDMA_QPA_IWARP_ORD = (1 << 3),
+ ERDMA_QPA_IWARP_IRD = (1 << 4),
+ ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5),
+ ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6),
+ ERDMA_QPA_IWARP_MPA = (1 << 7),
+ ERDMA_QPA_IWARP_CC = (1 << 8),
+};
+
+enum erdma_qps_rocev2 {
+ ERDMA_QPS_ROCEV2_RESET = 0,
+ ERDMA_QPS_ROCEV2_INIT = 1,
+ ERDMA_QPS_ROCEV2_RTR = 2,
+ ERDMA_QPS_ROCEV2_RTS = 3,
+ ERDMA_QPS_ROCEV2_SQD = 4,
+ ERDMA_QPS_ROCEV2_SQE = 5,
+ ERDMA_QPS_ROCEV2_ERROR = 6,
+ ERDMA_QPS_ROCEV2_COUNT = 7,
+};
+
+enum erdma_qpa_mask_rocev2 {
+ ERDMA_QPA_ROCEV2_STATE = (1 << 0),
+ ERDMA_QPA_ROCEV2_QKEY = (1 << 1),
+ ERDMA_QPA_ROCEV2_AV = (1 << 2),
+ ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3),
+ ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4),
+ ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5),
+};
+
+enum erdma_qp_flags {
+ ERDMA_QP_IN_FLUSHING = (1 << 0),
+};
+
+#define ERDMA_QP_ACTIVE 0
+#define ERDMA_QP_PASSIVE 1
+
+struct erdma_mod_qp_params_iwarp {
+ enum erdma_qps_iwarp state;
+ enum erdma_cc_alg cc;
+ u8 qp_type;
+ u8 pd_len;
+ u32 irq_size;
+ u32 orq_size;
+};
+
+struct erdma_qp_attrs_iwarp {
+ enum erdma_qps_iwarp state;
+ u32 cookie;
+};
+
+struct erdma_mod_qp_params_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 sq_psn;
+ u32 rq_psn;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
+union erdma_mod_qp_params {
+ struct erdma_mod_qp_params_iwarp iwarp;
+ struct erdma_mod_qp_params_rocev2 rocev2;
+};
+
+struct erdma_qp_attrs_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
+struct erdma_qp_attrs {
+ enum erdma_cc_alg cc; /* Congestion control algorithm */
+ u32 sq_size;
+ u32 rq_size;
+ u32 orq_size;
+ u32 irq_size;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ union {
+ struct erdma_qp_attrs_iwarp iwarp;
+ struct erdma_qp_attrs_rocev2 rocev2;
+ };
+};
+
+struct erdma_qp {
+ struct ib_qp ibqp;
+ struct kref ref;
+ struct completion safe_free;
+ struct erdma_dev *dev;
+ struct erdma_cep *cep;
+ struct rw_semaphore state_lock;
+
+ unsigned long flags;
+ struct delayed_work reflush_dwork;
+
+ union {
+ struct erdma_kqp kern_qp;
+ struct erdma_uqp user_qp;
+ };
+
+ struct erdma_cq *scq;
+ struct erdma_cq *rcq;
+
+ struct erdma_qp_attrs attrs;
+ spinlock_t lock;
+};
+
+struct erdma_kcq_info {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+ u32 ci;
+ u32 cmdsn;
+ u32 notify_cnt;
+
+ spinlock_t lock;
+ u8 __iomem *db;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_ucq_info {
+ struct erdma_mem qbuf_mem;
+ struct erdma_user_dbrecords_page *user_dbr_page;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_cq {
+ struct ib_cq ibcq;
+ u32 cqn;
+
+ u32 depth;
+ u32 assoc_eqn;
+
+ union {
+ struct erdma_kcq_info kern_cq;
+ struct erdma_ucq_info user_cq;
+ };
+};
+
+#define QP_ID(qp) ((qp)->ibqp.qp_num)
+
+static inline struct erdma_qp *find_qp_by_qpn(struct erdma_dev *dev, int id)
+{
+ return (struct erdma_qp *)xa_load(&dev->qp_xa, id);
+}
+
+static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
+{
+ return (struct erdma_cq *)xa_load(&dev->cq_xa, id);
+}
+
+void erdma_qp_get(struct erdma_qp *qp);
+void erdma_qp_put(struct erdma_qp *qp);
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask);
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask);
+void erdma_qp_llp_close(struct erdma_qp *qp);
+void erdma_qp_cm_drop(struct erdma_qp *qp);
+
+static inline bool erdma_device_iwarp(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_IWARP;
+}
+
+static inline bool erdma_device_rocev2(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_ROCEV2;
+}
+
+static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
+{
+ return container_of(ibctx, struct erdma_ucontext, ibucontext);
+}
+
+static inline struct erdma_pd *to_epd(struct ib_pd *pd)
+{
+ return container_of(pd, struct erdma_pd, ibpd);
+}
+
+static inline struct erdma_mr *to_emr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct erdma_mr, ibmr);
+}
+
+static inline struct erdma_qp *to_eqp(struct ib_qp *qp)
+{
+ return container_of(qp, struct erdma_qp, ibqp);
+}
+
+static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct erdma_cq, ibcq);
+}
+
+static inline struct erdma_ah *to_eah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct erdma_ah, ibah);
+}
+
+static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr)
+{
+ u8 ntype = rdma_gid_attr_network_type(attr);
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline struct erdma_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *ibmmap)
+{
+ return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry);
+}
+
+int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data);
+void erdma_dealloc_ucontext(struct ib_ucontext *ibctx);
+int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr,
+ struct ib_udata *data);
+int erdma_get_port_immutable(struct ib_device *dev, u32 port,
+ struct ib_port_immutable *ib_port_immutable);
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int erdma_query_port(struct ib_device *dev, u32 port,
+ struct ib_port_attr *attr);
+int erdma_query_gid(struct ib_device *dev, u32 port, int idx,
+ union ib_gid *gid);
+int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data);
+int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *data);
+int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_qp_init_attr *init_attr);
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *data);
+int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext);
+int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 virt, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int rights);
+int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *data);
+int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
+void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+void erdma_qp_get_ref(struct ib_qp *ibqp);
+void erdma_qp_put_ref(struct ib_qp *ibqp);
+struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id);
+int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn);
+struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason);
+void erdma_set_mtu(struct erdma_dev *dev, u32 mtu);
+struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
+ u32 port_num);
+int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port, int index);
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev,
+ u32 port_num);
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av);
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags);
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index 7b146b67a80f..14b92e12bf29 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,22 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HFI1
- tristate "Intel OPA Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT && I2C
+ tristate "Cornelis OPX Gen1 support"
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
- ---help---
- This is a low-level driver for Intel OPA Gen1 adapter.
+ help
+ This is a low-level driver for Cornelis OPX Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER
bool "HFI1 SDMA Order debug"
depends on INFINIBAND_HFI1
default n
- ---help---
+ help
This is a debug flag to test for out of order
sdma completions for unit testing
config SDMA_VERBOSITY
bool "Config SDMA Verbosity"
depends on INFINIBAND_HFI1
default n
- ---help---
+ help
This is a configuration flag to enable verbose
SDMA debug
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 4044a8c8dbf4..5d977f363684 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := \
affinity.o \
+ aspm.o \
chip.o \
device.o \
driver.o \
@@ -21,11 +22,16 @@ hfi1-y := \
init.o \
intr.o \
iowait.o \
+ ipoib_main.o \
+ ipoib_rx.o \
+ ipoib_tx.o \
mad.o \
mmu_rb.o \
msix.o \
+ netdev_rx.o \
opfn.o \
pcie.o \
+ pin_system.o \
pio.o \
pio_copy.o \
platform.o \
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 4fe662c3bbc1..ee7fedc67b86 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,52 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
+
#include <linux/topology.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/numa.h>
@@ -64,6 +22,7 @@ struct hfi1_affinity_node_list node_affinity = {
static const char * const irq_type_names[] = {
"SDMA",
"RCVCTXT",
+ "NETDEVCTXT",
"GENERAL",
"OTHER",
};
@@ -133,9 +92,7 @@ static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void)
{
- int possible, curr_cpu, i, ht;
-
- cpumask_clear(&node_affinity.real_cpu_mask);
+ int possible, curr_cpu, ht;
/* Start with cpu online mask as the real cpu mask */
cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
@@ -151,17 +108,10 @@ void init_real_cpu_mask(void)
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
- curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
- for (i = 0; i < possible / ht; i++)
- curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
- /*
- * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
- * skip any gaps.
- */
- for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
- curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
- }
+ curr_cpu = cpumask_nth(possible / ht, &node_affinity.real_cpu_mask) + 1;
+
+ /* Step 2. Remove the remaining HT siblings. */
+ cpumask_clear_cpus(&node_affinity.real_cpu_mask, curr_cpu, nr_cpu_ids - curr_cpu);
}
int node_affinity_init(void)
@@ -218,6 +168,8 @@ out:
for (node = 0; node < node_affinity.num_possible_nodes; node++)
hfi1_per_node_cntr[node] = 1;
+ pci_dev_put(dev);
+
return 0;
}
@@ -269,11 +221,9 @@ static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
/* It must be called with node_affinity.lock held */
static struct hfi1_affinity_node *node_affinity_lookup(int node)
{
- struct list_head *pos;
struct hfi1_affinity_node *entry;
- list_for_each(pos, &node_affinity.list) {
- entry = list_entry(pos, struct hfi1_affinity_node, list);
+ list_for_each_entry(entry, &node_affinity.list, list) {
if (entry->node == node)
return entry;
}
@@ -387,9 +337,10 @@ static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
&entry->def_intr.used);
/* If there are non-interrupt CPUs available, use them first */
- if (!cpumask_empty(non_intr_cpus))
- cpu = cpumask_first(non_intr_cpus);
- else /* Otherwise, use interrupt CPUs */
+ cpu = cpumask_first(non_intr_cpus);
+
+ /* Otherwise, use interrupt CPUs */
+ if (cpu >= nr_cpu_ids)
cpu = cpumask_first(available_cpus);
if (cpu >= nr_cpu_ids) { /* empty */
@@ -479,6 +430,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
}
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
return 0;
fail:
@@ -629,22 +582,11 @@ static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
*/
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
- int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
int curr_cpu, possible, i, ret;
bool new_entry = false;
- /*
- * If the BIOS does not have the NUMA node information set, select
- * NUMA 0 so we get consistent performance.
- */
- if (node < 0) {
- dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
- node = 0;
- }
- dd->node = node;
-
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
@@ -657,7 +599,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* create an entry in the global affinity structure and initialize it.
*/
if (!entry) {
- entry = node_affinity_allocate(node);
+ entry = node_affinity_allocate(dd->node);
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
@@ -716,7 +658,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* engines, use the same CPU cores as general/control
* context.
*/
- if (cpumask_weight(&entry->def_intr.mask) == 0)
+ if (cpumask_empty(&entry->def_intr.mask))
cpumask_copy(&entry->def_intr.mask,
&entry->general_intr_mask);
}
@@ -736,7 +678,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* vectors, use the same CPU core as the general/control
* context.
*/
- if (cpumask_weight(&entry->comp_vect_mask) == 0)
+ if (cpumask_empty(&entry->comp_vect_mask))
cpumask_copy(&entry->comp_vect_mask,
&entry->general_intr_mask);
}
@@ -748,6 +690,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (new_entry)
node_affinity_add_tail(entry);
+ dd->affinity_entry = entry;
mutex_unlock(&node_affinity.lock);
return 0;
@@ -763,10 +706,9 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
{
struct hfi1_affinity_node *entry;
- if (dd->node < 0)
- return;
-
mutex_lock(&node_affinity.lock);
+ if (!dd->affinity_entry)
+ goto unlock;
entry = node_affinity_lookup(dd->node);
if (!entry)
goto unlock;
@@ -777,8 +719,8 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
*/
_dev_comp_vect_cpu_mask_clean_up(dd, entry);
unlock:
+ dd->affinity_entry = NULL;
mutex_unlock(&node_affinity.lock);
- dd->node = NUMA_NO_NODE;
}
/*
@@ -913,6 +855,11 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
set = &entry->rcv_intr;
scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
break;
+ case IRQ_NETDEVCTXT:
+ rcd = (struct hfi1_ctxtdata *)msix->arg;
+ set = &entry->def_intr;
+ scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+ break;
default:
dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
return -EINVAL;
@@ -965,7 +912,6 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
struct cpu_mask_set *set = NULL;
- struct hfi1_ctxtdata *rcd;
struct hfi1_affinity_node *entry;
mutex_lock(&node_affinity.lock);
@@ -979,12 +925,17 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
case IRQ_GENERAL:
/* Don't do accounting for general contexts */
break;
- case IRQ_RCVCTXT:
- rcd = (struct hfi1_ctxtdata *)msix->arg;
+ case IRQ_RCVCTXT: {
+ struct hfi1_ctxtdata *rcd = msix->arg;
+
/* Don't do accounting for control contexts */
if (rcd->ctxt != HFI1_CTRL_CTXT)
set = &entry->rcv_intr;
break;
+ }
+ case IRQ_NETDEVCTXT:
+ set = &entry->def_intr;
+ break;
default:
mutex_unlock(&node_affinity.lock);
return;
@@ -1004,32 +955,23 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
struct hfi1_affinity_node_list *affinity)
{
- int possible, curr_cpu, i;
- uint num_cores_per_socket = node_affinity.num_online_cpus /
- affinity->num_core_siblings /
- node_affinity.num_online_nodes;
+ int curr_cpu;
+ uint num_cores;
cpumask_copy(hw_thread_mask, &affinity->proc.mask);
- if (affinity->num_core_siblings > 0) {
- /* Removing other siblings not needed for now */
- possible = cpumask_weight(hw_thread_mask);
- curr_cpu = cpumask_first(hw_thread_mask);
- for (i = 0;
- i < num_cores_per_socket * node_affinity.num_online_nodes;
- i++)
- curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
-
- for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, hw_thread_mask);
- curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
- }
- /* Identifying correct HW threads within physical cores */
- cpumask_shift_left(hw_thread_mask, hw_thread_mask,
- num_cores_per_socket *
- node_affinity.num_online_nodes *
- hw_thread_no);
- }
+ if (affinity->num_core_siblings == 0)
+ return;
+
+ num_cores = rounddown(node_affinity.num_online_cpus / affinity->num_core_siblings,
+ node_affinity.num_online_nodes);
+
+ /* Removing other siblings not needed for now */
+ curr_cpu = cpumask_nth(num_cores * node_affinity.num_online_nodes, hw_thread_mask) + 1;
+ cpumask_clear_cpus(hw_thread_mask, curr_cpu, nr_cpu_ids - curr_cpu);
+
+ /* Identifying correct HW threads within physical cores */
+ cpumask_shift_left(hw_thread_mask, hw_thread_mask, num_cores * hw_thread_no);
}
int hfi1_get_proc_affinity(int node)
@@ -1038,7 +980,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = &current->cpus_allowed;
+ *proc_mask = current->cpus_ptr;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;
@@ -1046,7 +988,7 @@ int hfi1_get_proc_affinity(int node)
* check whether process/context affinity has already
* been set
*/
- if (cpumask_weight(proc_mask) == 1) {
+ if (current->nr_cpus_allowed == 1) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -1057,7 +999,7 @@ int hfi1_get_proc_affinity(int node)
cpu = cpumask_first(proc_mask);
cpumask_set_cpu(cpu, &set->used);
goto done;
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -1128,22 +1070,19 @@ int hfi1_get_proc_affinity(int node)
* If HT cores are enabled, identify which HW threads within the
* physical cores should be used.
*/
- if (affinity->num_core_siblings > 0) {
- for (i = 0; i < affinity->num_core_siblings; i++) {
- find_hw_thread_mask(i, hw_thread_mask, affinity);
+ for (i = 0; i < affinity->num_core_siblings; i++) {
+ find_hw_thread_mask(i, hw_thread_mask, affinity);
- /*
- * If there's at least one available core for this HW
- * thread number, stop looking for a core.
- *
- * diff will always be not empty at least once in this
- * loop as the used mask gets reset when
- * (set->mask == set->used) before this loop.
- */
- cpumask_andnot(diff, hw_thread_mask, &set->used);
- if (!cpumask_empty(diff))
- break;
- }
+ /*
+ * If there's at least one available core for this HW
+ * thread number, stop looking for a core.
+ *
+ * diff will always be not empty at least once in this
+ * loop as the used mask gets reset when
+ * (set->mask == set->used) before this loop.
+ */
+ if (cpumask_andnot(diff, hw_thread_mask, &set->used))
+ break;
}
hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
cpumask_pr_args(hw_thread_mask));
@@ -1174,8 +1113,7 @@ int hfi1_get_proc_affinity(int node)
* used for process assignments using the same method as
* the preferred NUMA node.
*/
- cpumask_andnot(diff, available_mask, intrs_mask);
- if (!cpumask_empty(diff))
+ if (cpumask_andnot(diff, available_mask, intrs_mask))
cpumask_copy(available_mask, diff);
/* If we don't have CPUs on the preferred node, use other NUMA nodes */
@@ -1191,8 +1129,7 @@ int hfi1_get_proc_affinity(int node)
* At first, we don't want to place processes on the same
* CPUs as interrupt handlers.
*/
- cpumask_andnot(diff, available_mask, intrs_mask);
- if (!cpumask_empty(diff))
+ if (cpumask_andnot(diff, available_mask, intrs_mask))
cpumask_copy(available_mask, diff);
}
hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 6a7e6ea4e426..ffdd0d571c7a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
+
#ifndef _HFI1_AFFINITY_H
#define _HFI1_AFFINITY_H
@@ -52,6 +11,7 @@
enum irq_type {
IRQ_SDMA,
IRQ_RCVCTXT,
+ IRQ_NETDEVCTXT,
IRQ_GENERAL,
IRQ_OTHER
};
diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c
new file mode 100644
index 000000000000..79990d09522b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/aspm.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ */
+
+#include "aspm.h"
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+ ((((reg) & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, 0444);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
+static bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+ u32 up, dn;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return false;
+
+ pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+ dn = ASPM_L1_SUPPORTED(dn);
+
+ pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+ up = ASPM_L1_SUPPORTED(up);
+
+ /* ASPM works on A-step but is reported as not supported */
+ return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+ u32 l1_ent_lat = 0x4u;
+ u32 reg32;
+
+ pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+ reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+ reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+ pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return;
+
+ /* Enable ASPM L1 first in upstream component and then downstream */
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /* Disable ASPM L1 first in downstream component and then upstream */
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+ if (parent)
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static void aspm_enable(struct hfi1_devdata *dd)
+{
+ if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+ !dd->aspm_supported)
+ return;
+
+ aspm_hw_enable_l1(dd);
+ dd->aspm_enabled = true;
+}
+
+static void aspm_disable(struct hfi1_devdata *dd)
+{
+ if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+ return;
+
+ aspm_hw_disable_l1(dd);
+ dd->aspm_enabled = false;
+}
+
+static void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ aspm_disable(dd);
+ atomic_inc(&dd->aspm_disabled_cnt);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+ aspm_enable(dd);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+ bool restart_timer;
+ bool close_interrupts;
+ unsigned long flags;
+ ktime_t now, prev;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ /* PSM contexts are open */
+ if (!rcd->aspm_intr_enable)
+ goto unlock;
+
+ prev = rcd->aspm_ts_last_intr;
+ now = ktime_get();
+ rcd->aspm_ts_last_intr = now;
+
+ /* An interrupt pair close together in time */
+ close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+ /* Don't push out our timer till this much time has elapsed */
+ restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+ ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+ restart_timer = restart_timer && close_interrupts;
+
+ /* Disable ASPM and schedule timer */
+ if (rcd->aspm_enabled && close_interrupts) {
+ aspm_disable_inc(rcd->dd);
+ rcd->aspm_enabled = false;
+ restart_timer = true;
+ }
+
+ if (restart_timer) {
+ mod_timer(&rcd->aspm_timer,
+ jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+ rcd->aspm_ts_timer_sched = now;
+ }
+unlock:
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static void aspm_ctx_timer_function(struct timer_list *t)
+{
+ struct hfi1_ctxtdata *rcd = timer_container_of(rcd, t, aspm_timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ aspm_enable_dec(rcd->dd);
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
+void aspm_disable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ u16 i;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd) {
+ timer_delete_sync(&rcd->aspm_timer);
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = false;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ hfi1_rcd_put(rcd);
+ }
+ }
+
+ aspm_disable(dd);
+ atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+void aspm_enable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ u16 i;
+
+ aspm_enable(dd);
+
+ if (aspm_mode != ASPM_MODE_DYNAMIC)
+ return;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd) {
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = true;
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ hfi1_rcd_put(rcd);
+ }
+ }
+}
+
+static void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+ spin_lock_init(&rcd->aspm_lock);
+ timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0);
+ rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+ aspm_mode == ASPM_MODE_DYNAMIC &&
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
+}
+
+void aspm_init(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ u16 i;
+
+ spin_lock_init(&dd->aspm_lock);
+ dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd)
+ aspm_ctx_init(rcd);
+ hfi1_rcd_put(rcd);
+ }
+
+ /* Start with ASPM disabled */
+ aspm_hw_set_l1_ent_latency(dd);
+ dd->aspm_enabled = false;
+ aspm_hw_disable_l1(dd);
+
+ /* Now turn on ASPM if configured */
+ aspm_enable_all(dd);
+}
+
+void aspm_exit(struct hfi1_devdata *dd)
+{
+ aspm_disable_all(dd);
+
+ /* Turn on ASPM on exit to conserve power */
+ aspm_enable(dd);
+}
+
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index e8133870ee87..c8d92dc13daa 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef _ASPM_H
#define _ASPM_H
@@ -57,266 +16,20 @@ enum aspm_mode {
ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */
};
-/* Time after which the timer interrupt will re-enable ASPM */
-#define ASPM_TIMER_MS 1000
-/* Time for which interrupts are ignored after a timer has been scheduled */
-#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
-/* Two interrupts within this time trigger ASPM disable */
-#define ASPM_TRIGGER_MS 1
-#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
-#define ASPM_L1_SUPPORTED(reg) \
- (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
-
-static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
- u32 up, dn;
-
- /*
- * If the driver does not have access to the upstream component,
- * it cannot support ASPM L1 at all.
- */
- if (!parent)
- return false;
-
- pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
- dn = ASPM_L1_SUPPORTED(dn);
-
- pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
- up = ASPM_L1_SUPPORTED(up);
-
- /* ASPM works on A-step but is reported as not supported */
- return (!!dn || is_ax(dd)) && !!up;
-}
-
-/* Set L1 entrance latency for slower entry to L1 */
-static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
-{
- u32 l1_ent_lat = 0x4u;
- u32 reg32;
-
- pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
- reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
- reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
- pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
-}
-
-static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
-
- /*
- * If the driver does not have access to the upstream component,
- * it cannot support ASPM L1 at all.
- */
- if (!parent)
- return;
-
- /* Enable ASPM L1 first in upstream component and then downstream */
- pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- PCI_EXP_LNKCTL_ASPM_L1);
- pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- PCI_EXP_LNKCTL_ASPM_L1);
-}
-
-static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
-
- /* Disable ASPM L1 first in downstream component and then upstream */
- pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC, 0x0);
- if (parent)
- pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC, 0x0);
-}
-
-static inline void aspm_enable(struct hfi1_devdata *dd)
-{
- if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
- !dd->aspm_supported)
- return;
-
- aspm_hw_enable_l1(dd);
- dd->aspm_enabled = true;
-}
-
-static inline void aspm_disable(struct hfi1_devdata *dd)
-{
- if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
- return;
-
- aspm_hw_disable_l1(dd);
- dd->aspm_enabled = false;
-}
-
-static inline void aspm_disable_inc(struct hfi1_devdata *dd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dd->aspm_lock, flags);
- aspm_disable(dd);
- atomic_inc(&dd->aspm_disabled_cnt);
- spin_unlock_irqrestore(&dd->aspm_lock, flags);
-}
-
-static inline void aspm_enable_dec(struct hfi1_devdata *dd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dd->aspm_lock, flags);
- if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
- aspm_enable(dd);
- spin_unlock_irqrestore(&dd->aspm_lock, flags);
-}
+void aspm_init(struct hfi1_devdata *dd);
+void aspm_exit(struct hfi1_devdata *dd);
+void aspm_hw_disable_l1(struct hfi1_devdata *dd);
+void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd);
+void aspm_disable_all(struct hfi1_devdata *dd);
+void aspm_enable_all(struct hfi1_devdata *dd);
-/* ASPM processing for each receive context interrupt */
static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
{
- bool restart_timer;
- bool close_interrupts;
- unsigned long flags;
- ktime_t now, prev;
-
/* Quickest exit for minimum impact */
- if (!rcd->aspm_intr_supported)
+ if (likely(!rcd->aspm_intr_supported))
return;
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- /* PSM contexts are open */
- if (!rcd->aspm_intr_enable)
- goto unlock;
-
- prev = rcd->aspm_ts_last_intr;
- now = ktime_get();
- rcd->aspm_ts_last_intr = now;
-
- /* An interrupt pair close together in time */
- close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
-
- /* Don't push out our timer till this much time has elapsed */
- restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
- ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
- restart_timer = restart_timer && close_interrupts;
-
- /* Disable ASPM and schedule timer */
- if (rcd->aspm_enabled && close_interrupts) {
- aspm_disable_inc(rcd->dd);
- rcd->aspm_enabled = false;
- restart_timer = true;
- }
-
- if (restart_timer) {
- mod_timer(&rcd->aspm_timer,
- jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
- rcd->aspm_ts_timer_sched = now;
- }
-unlock:
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
-}
-
-/* Timer function for re-enabling ASPM in the absence of interrupt activity */
-static inline void aspm_ctx_timer_function(struct timer_list *t)
-{
- struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer);
- unsigned long flags;
-
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- aspm_enable_dec(rcd->dd);
- rcd->aspm_enabled = true;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
-}
-
-/*
- * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
- * are open.
- */
-static inline void aspm_disable_all(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- unsigned long flags;
- u16 i;
-
- for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd) {
- del_timer_sync(&rcd->aspm_timer);
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- rcd->aspm_intr_enable = false;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
- hfi1_rcd_put(rcd);
- }
- }
-
- aspm_disable(dd);
- atomic_set(&dd->aspm_disabled_cnt, 0);
-}
-
-/* Re-enable interrupt processing for verbs contexts */
-static inline void aspm_enable_all(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- unsigned long flags;
- u16 i;
-
- aspm_enable(dd);
-
- if (aspm_mode != ASPM_MODE_DYNAMIC)
- return;
-
- for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd) {
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- rcd->aspm_intr_enable = true;
- rcd->aspm_enabled = true;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
- hfi1_rcd_put(rcd);
- }
- }
-}
-
-static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
-{
- spin_lock_init(&rcd->aspm_lock);
- timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0);
- rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
- aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
-}
-
-static inline void aspm_init(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- u16 i;
-
- spin_lock_init(&dd->aspm_lock);
- dd->aspm_supported = aspm_hw_l1_supported(dd);
-
- for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- aspm_ctx_init(rcd);
- hfi1_rcd_put(rcd);
- }
-
- /* Start with ASPM disabled */
- aspm_hw_set_l1_ent_latency(dd);
- dd->aspm_enabled = false;
- aspm_hw_disable_l1(dd);
-
- /* Now turn on ASPM if configured */
- aspm_enable_all(dd);
-}
-
-static inline void aspm_exit(struct hfi1_devdata *dd)
-{
- aspm_disable_all(dd);
-
- /* Turn on ASPM on exit to conserve power */
- aspm_enable(dd);
+ __aspm_ctx_disable(rcd);
}
#endif /* _ASPM_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 310105d4e3de..0781ab756d44 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,48 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
/*
@@ -66,10 +25,7 @@
#include "affinity.h"
#include "debugfs.h"
#include "fault.h"
-
-uint kdeth_qp;
-module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
-MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
+#include "netdev.h"
uint num_vls = HFI1_MAX_VLS_SUPPORTED;
module_param(num_vls, uint, S_IRUGO);
@@ -128,13 +84,15 @@ struct flag_table {
/*
* RSM instance allocation
- * 0 - Verbs
- * 1 - User Fecn Handling
- * 2 - Vnic
+ * 0 - User Fecn Handling
+ * 1 - Vnic
+ * 2 - AIP
+ * 3 - Verbs
*/
-#define RSM_INS_VERBS 0
-#define RSM_INS_FECN 1
-#define RSM_INS_VNIC 2
+#define RSM_INS_FECN 0
+#define RSM_INS_VNIC 1
+#define RSM_INS_AIP 2
+#define RSM_INS_VERBS 3
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -175,6 +133,25 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for AIP */
+/* LRH.BTH above is reused for this rule */
+
+/* BTH.DESTQP: QW 1, OFFSET 16 for match */
+#define BTH_DESTQP_QW 1ull
+#define BTH_DESTQP_BIT_OFFSET 16ull
+#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off))
+#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET)
+#define BTH_DESTQP_MASK 0xFFull
+#define BTH_DESTQP_VALUE 0x81ull
+
+/* DETH.SQPN: QW 1 Offset 56 for select */
+/* We use 8 most significant Soure QPN bits as entropy fpr AIP */
+#define DETH_AIP_SQPN_QW 3ull
+#define DETH_AIP_SQPN_BIT_OFFSET 56ull
+#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off))
+#define DETH_AIP_SQPN_SELECT_OFFSET \
+ DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET)
+
/* RSM fields for Vnic */
/* L2_TYPE: QW 0, OFFSET 61 - for match */
#define L2_TYPE_QW 0ull
@@ -274,7 +251,7 @@ struct flag_table {
/*
* CCE Error flags.
*/
-static struct flag_table cce_err_status_flags[] = {
+static const struct flag_table cce_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("CceCsrParityErr",
CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
/* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr",
@@ -364,7 +341,7 @@ static struct flag_table cce_err_status_flags[] = {
* Misc Error flags
*/
#define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
-static struct flag_table misc_err_status_flags[] = {
+static const struct flag_table misc_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
/* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
/* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
@@ -383,7 +360,7 @@ static struct flag_table misc_err_status_flags[] = {
/*
* TXE PIO Error flags and consequences
*/
-static struct flag_table pio_err_status_flags[] = {
+static const struct flag_table pio_err_status_flags[] = {
/* 0*/ FLAG_ENTRY("PioWriteBadCtxt",
SEC_WRITE_DROPPED,
SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
@@ -525,7 +502,7 @@ static struct flag_table pio_err_status_flags[] = {
/*
* TXE SDMA Error flags
*/
-static struct flag_table sdma_err_status_flags[] = {
+static const struct flag_table sdma_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("SDmaRpyTagErr",
SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
/* 1*/ FLAG_ENTRY0("SDmaCsrParityErr",
@@ -553,7 +530,7 @@ static struct flag_table sdma_err_status_flags[] = {
* TXE Egress Error flags
*/
#define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
-static struct flag_table egress_err_status_flags[] = {
+static const struct flag_table egress_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
/* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
/* 2 reserved */
@@ -654,7 +631,7 @@ static struct flag_table egress_err_status_flags[] = {
* TXE Egress Error Info flags
*/
#define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
-static struct flag_table egress_err_info_flags[] = {
+static const struct flag_table egress_err_info_flags[] = {
/* 0*/ FLAG_ENTRY0("Reserved", 0ull),
/* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)),
/* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
@@ -703,7 +680,7 @@ static struct flag_table egress_err_info_flags[] = {
* TXE Send error flags
*/
#define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
-static struct flag_table send_err_status_flags[] = {
+static const struct flag_table send_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("SendCsrParityErr", SES(CSR_PARITY)),
/* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
/* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
@@ -712,7 +689,7 @@ static struct flag_table send_err_status_flags[] = {
/*
* TXE Send Context Error flags and consequences
*/
-static struct flag_table sc_err_status_flags[] = {
+static const struct flag_table sc_err_status_flags[] = {
/* 0*/ FLAG_ENTRY("InconsistentSop",
SEC_PACKET_DROPPED | SEC_SC_HALTED,
SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
@@ -735,7 +712,7 @@ static struct flag_table sc_err_status_flags[] = {
* RXE Receive Error flags
*/
#define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
-static struct flag_table rxe_err_status_flags[] = {
+static const struct flag_table rxe_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
/* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
/* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
@@ -870,7 +847,7 @@ static struct flag_table rxe_err_status_flags[] = {
* DCC Error Flags
*/
#define DCCE(name) DCC_ERR_FLG_##name##_SMASK
-static struct flag_table dcc_err_flags[] = {
+static const struct flag_table dcc_err_flags[] = {
FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
@@ -923,7 +900,7 @@ static struct flag_table dcc_err_flags[] = {
* LCB error flags
*/
#define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
-static struct flag_table lcb_err_flags[] = {
+static const struct flag_table lcb_err_flags[] = {
/* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
/* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
/* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
@@ -966,7 +943,7 @@ static struct flag_table lcb_err_flags[] = {
* DC8051 Error Flags
*/
#define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
-static struct flag_table dc8051_err_flags[] = {
+static const struct flag_table dc8051_err_flags[] = {
FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
@@ -985,7 +962,7 @@ static struct flag_table dc8051_err_flags[] = {
*
* Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
*/
-static struct flag_table dc8051_info_err_flags[] = {
+static const struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED),
FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME),
FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET),
@@ -1009,7 +986,7 @@ static struct flag_table dc8051_info_err_flags[] = {
*
* Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
*/
-static struct flag_table dc8051_info_host_msg_flags[] = {
+static const struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Host request done", 0x0001),
FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
FLAG_ENTRY0("BC SMA message", 0x0004),
@@ -1079,7 +1056,7 @@ static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
static void dc_shutdown(struct hfi1_devdata *dd);
static void dc_start(struct hfi1_devdata *dd);
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
@@ -1304,9 +1281,9 @@ CNTR_ELEM(#name, \
access_ibp_##cntr)
/**
- * hfi_addr_from_offset - return addr for readq/writeq
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
+ * hfi1_addr_from_offset - return addr for readq/writeq
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
*
* This routine selects the appropriate base address
* based on the indicated offset.
@@ -1322,8 +1299,8 @@ static inline void __iomem *hfi1_addr_from_offset(
/**
* read_csr - read CSR at the indicated offset
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
*
* Return: the value read or all FF's if there
* is no mapping
@@ -1337,9 +1314,9 @@ u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
/**
* write_csr - write CSR at the indicated offset
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
- * @value - value to write
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
+ * @value: value to write
*/
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{
@@ -1355,8 +1332,8 @@ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
/**
* get_csr_addr - return te iomem address for offset
- * @dd - the dd device
- * @offset - the offset of the CSR within bar0
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
*
* Return: The iomem address to use in subsequent
* writeq/readq operations.
@@ -1484,7 +1461,8 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
ret = write_lcb_csr(dd, csr, data);
if (ret) {
- dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
+ if (!(dd->flags & HFI1_SHUTDOWN))
+ dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
return 0;
}
@@ -1685,6 +1663,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
return dd->verbs_dev.n_piodrain;
}
+static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = context;
+
+ return dd->ctx0_seq_drop;
+}
+
static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data)
{
@@ -4101,10 +4087,12 @@ def_access_ibp_counter(rc_dupreq);
def_access_ibp_counter(rdma_seq);
def_access_ibp_counter(unaligned);
def_access_ibp_counter(seq_naks);
+def_access_ibp_counter(rc_crwaits);
static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
+[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH),
[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
@@ -4248,6 +4236,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_cpu_intr),
[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
access_sw_cpu_rcv_limit),
+[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL,
+ access_sw_ctx0_seq_drop),
[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
@@ -5119,6 +5109,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
access_sw_cpu_rc_acks),
[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
@@ -5284,7 +5275,7 @@ done:
* the buffer. End in '*' if the buffer is too short.
*/
static char *flag_string(char *buf, int buf_len, u64 flags,
- struct flag_table *table, int table_size)
+ const struct flag_table *table, int table_size)
{
char extra[32];
char *p = buf;
@@ -5343,7 +5334,7 @@ static const char * const cce_misc_names[] = {
static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
{
if (source < ARRAY_SIZE(cce_misc_names))
- strncpy(buf, cce_misc_names[source], bsize);
+ strscpy_pad(buf, cce_misc_names[source], bsize);
else
snprintf(buf, bsize, "Reserved%u",
source + IS_GENERAL_ERR_START);
@@ -5383,7 +5374,7 @@ static const char * const various_names[] = {
static char *is_various_name(char *buf, size_t bsize, unsigned int source)
{
if (source < ARRAY_SIZE(various_names))
- strncpy(buf, various_names[source], bsize);
+ strscpy_pad(buf, various_names[source], bsize);
else
snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
return buf;
@@ -5557,7 +5548,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
#define RCVERR_CHECK_TIME 10
static void update_rcverr_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = from_timer(dd, t, rcverr_timer);
+ struct hfi1_devdata *dd = timer_container_of(dd, t, rcverr_timer);
struct hfi1_pportdata *ppd = dd->pport;
u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
@@ -5585,7 +5576,7 @@ static int init_rcverr(struct hfi1_devdata *dd)
static void free_rcverr(struct hfi1_devdata *dd)
{
if (dd->rcverr_timer.function)
- del_timer_sync(&dd->rcverr_timer);
+ timer_delete_sync(&dd->rcverr_timer);
}
static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -6170,7 +6161,7 @@ static int request_host_lcb_access(struct hfi1_devdata *dd)
ret = do_8051_command(dd, HCMD_MISC,
(u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
LOAD_DATA_FIELD_ID_SHIFT, NULL);
- if (ret != HCMD_SUCCESS) {
+ if (ret != HCMD_SUCCESS && !(dd->flags & HFI1_SHUTDOWN)) {
dd_dev_err(dd, "%s: command failed with error %d\n",
__func__, ret);
}
@@ -6251,7 +6242,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
if (dd->lcb_access_count == 0) {
ret = request_host_lcb_access(dd);
if (ret) {
- dd_dev_err(dd,
+ if (!(dd->flags & HFI1_SHUTDOWN))
+ dd_dev_err(dd,
"%s: unable to acquire LCB access, err %d\n",
__func__, ret);
goto done;
@@ -6860,7 +6852,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
}
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= rcd->rcvhdrtail_kvaddr ?
+ rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(dd, rcvmask, rcd);
hfi1_rcd_put(rcd);
@@ -7286,11 +7278,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
case 1: return OPA_LINK_WIDTH_1X;
case 2: return OPA_LINK_WIDTH_2X;
case 3: return OPA_LINK_WIDTH_3X;
+ case 4: return OPA_LINK_WIDTH_4X;
default:
dd_dev_info(dd, "%s: invalid width %d, using 4\n",
__func__, width);
- /* fall through */
- case 4: return OPA_LINK_WIDTH_4X;
+ return OPA_LINK_WIDTH_4X;
}
}
@@ -7345,12 +7337,13 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
case 0:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
break;
+ case 1:
+ dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
+ break;
default:
dd_dev_err(dd,
"%s: unexpected max rate %d, using 25Gb\n",
__func__, (int)max_rate);
- /* fall through */
- case 1:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
break;
}
@@ -8284,7 +8277,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
}
/**
- * gerneral_interrupt() - General interrupt handler
+ * general_interrupt - General interrupt handler
* @irq: MSIx IRQ vector
* @data: hfi1 devdata
*
@@ -8392,20 +8385,107 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd)
static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
{
u32 tail;
- int present;
- if (!rcd->rcvhdrtail_kvaddr)
- present = (rcd->seq_cnt ==
- rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
- else /* is RDMA rtail */
- present = (rcd->head != get_rcvhdrtail(rcd));
-
- if (present)
+ if (hfi1_packet_present(rcd))
return 1;
/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
- return rcd->head != tail;
+ return hfi1_rcd_head(rcd) != tail;
+}
+
+/*
+ * Common code for receive contexts interrupt handlers.
+ * Update traces, increment kernel IRQ counter and
+ * setup ASPM when needed.
+ */
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+
+ trace_hfi1_receive_interrupt(dd, rcd);
+ this_cpu_inc(*dd->int_counter);
+ aspm_ctx_disable(rcd);
+}
+
+/*
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
+ * when there are packets present in the queue. When calling
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
+ *
+ * @rcd: valid receive context
+ */
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ if (!rcd->rcvhdrq)
+ return;
+ clear_recv_intr(rcd);
+ if (check_packet_present(rcd))
+ force_recv_intr(rcd);
+}
+
+/**
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
+ *
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
+ *
+ * Hold IRQs so we can safely clear the interrupt and
+ * recheck for a packet that may have arrived after the previous
+ * check and the interrupt clear. If a packet arrived, force another
+ * interrupt. This routine can be called at the end of receive packet
+ * processing in interrupt service routines, interrupt service thread
+ * and softirqs
+ */
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __hfi1_rcd_eoi_intr(rcd);
+ local_irq_restore(flags);
+}
+
+/**
+ * hfi1_netdev_rx_napi - napi poll function to move eoi inline
+ * @napi: pointer to napi object
+ * @budget: netdev budget
+ */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
+ struct hfi1_netdev_rxq, napi);
+ struct hfi1_ctxtdata *rcd = rxq->rcd;
+ int work_done = 0;
+
+ work_done = rcd->do_interrupt(rcd, budget);
+
+ if (work_done < budget) {
+ napi_complete_done(napi, work_done);
+ hfi1_rcd_eoi_intr(rcd);
+ }
+
+ return work_done;
+}
+
+/* Receive packet napi handler for netdevs VNIC and AIP */
+irqreturn_t receive_context_interrupt_napi(int irq, void *data)
+{
+ struct hfi1_ctxtdata *rcd = data;
+
+ receive_interrupt_common(rcd);
+
+ if (likely(rcd->napi)) {
+ if (likely(napi_schedule_prep(rcd->napi)))
+ __napi_schedule_irqoff(rcd->napi);
+ else
+ __hfi1_rcd_eoi_intr(rcd);
+ } else {
+ WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n",
+ rcd->ctxt);
+ __hfi1_rcd_eoi_intr(rcd);
+ }
+
+ return IRQ_HANDLED;
}
/*
@@ -8419,13 +8499,9 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- struct hfi1_devdata *dd = rcd->dd;
int disposition;
- int present;
- trace_hfi1_receive_interrupt(dd, rcd);
- this_cpu_inc(*dd->int_counter);
- aspm_ctx_disable(rcd);
+ receive_interrupt_common(rcd);
/* receive interrupt remains blocked while processing packets */
disposition = rcd->do_interrupt(rcd, 0);
@@ -8438,17 +8514,7 @@ irqreturn_t receive_context_interrupt(int irq, void *data)
if (disposition == RCV_PKT_LIMIT)
return IRQ_WAKE_THREAD;
- /*
- * The packet processor detected no more packets. Clear the receive
- * interrupt and recheck for a packet packet that may have arrived
- * after the previous check and interrupt clear. If a packet arrived,
- * force another interrupt.
- */
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
-
+ __hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -8459,24 +8525,11 @@ irqreturn_t receive_context_interrupt(int irq, void *data)
irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- int present;
/* receive interrupt is still blocked from the IRQ handler */
(void)rcd->do_interrupt(rcd, 1);
- /*
- * The packet processor will only return if it detected no more
- * packets. Hold IRQs here so we can safely clear the interrupt and
- * recheck for a packet that may have arrived after the previous
- * check and the interrupt clear. If a packet arrived, force another
- * interrupt.
- */
- local_irq_disable();
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
- local_irq_enable();
+ hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -8702,7 +8755,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
/*
* When writing a LCB CSR, out_data contains the full value to
- * to be written, while in_data contains the relative LCB
+ * be written, while in_data contains the relative LCB
* address in 7:0. Do the work here, rather than the caller,
* of distrubting the write data to where it needs to go:
*
@@ -9850,6 +9903,7 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
/* disable the port */
clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+ cancel_work_sync(&ppd->freeze_work);
}
static inline int init_cpu_counters(struct hfi1_devdata *dd)
@@ -10046,12 +10100,12 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
* the first kernel context would have been allocated by now so
* we are guaranteed a valid value.
*/
- return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
+ return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
}
/*
* Set Send Length
- * @ppd - per port data
+ * @ppd: per port data
*
* Set the MTU by limiting how many DWs may be sent. The SendLenCheck*
* registers compare against LRH.PktLen, so use the max bytes included
@@ -10091,7 +10145,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
sc_mtu_to_threshold(dd->vld[i].sc,
dd->vld[i].mtu,
- dd->rcd[0]->rcvhdrqentsize));
+ get_hdrqentsize(dd->rcd[0])));
for (j = 0; j < INIT_SC_PER_VL; j++)
sc_set_cr_threshold(
pio_select_send_context_vl(dd, j, i),
@@ -11818,7 +11872,7 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
tail = get_rcvhdrtail(rcd);
else
tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
@@ -11862,6 +11916,84 @@ static u32 encoded_size(u32 size)
return 0x1; /* if invalid, go with the minimum size */
}
+/**
+ * encode_rcv_header_entry_size - return chip specific encoding for size
+ * @size: size in dwords
+ *
+ * Convert a receive header entry size that to the encoding used in the CSR.
+ *
+ * Return a zero if the given size is invalid, otherwise the encoding.
+ */
+u8 encode_rcv_header_entry_size(u8 size)
+{
+ /* there are only 3 valid receive header entry sizes */
+ if (size == 2)
+ return 1;
+ if (size == 16)
+ return 2;
+ if (size == 32)
+ return 4;
+ return 0; /* invalid */
+}
+
+/**
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
+ * @dd: the device data
+ * @thecnt: the header count
+ */
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
+{
+ if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd, "Receive header queue count too small\n");
+ return -EINVAL;
+ }
+
+ if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
+ return -EINVAL;
+ }
+
+ if (thecnt % HDRQ_INCREMENT) {
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * set_hdrq_regs - set header queue registers for context
+ * @dd: the device data
+ * @ctxt: the context
+ * @entsize: the dword entry size
+ * @hdrcnt: the number of header entries
+ */
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
+{
+ u64 reg;
+
+ reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
+ RCV_HDR_CNT_CNT_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
+ reg = ((u64)encode_rcv_header_entry_size(entsize) &
+ RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
+ RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
+ reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
+ RCV_HDR_SIZE_HDR_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
+
+ /*
+ * Program dummy tail address for every receive context
+ * before enabling any receive context
+ */
+ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
+ dd->rcvhdrtail_dummy_dma);
+}
+
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
struct hfi1_ctxtdata *rcd)
{
@@ -11883,13 +12015,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
- rcd->seq_cnt = 1;
+ hfi1_set_seq_cnt(rcd, 1);
/* reset the cached receive header queue head value */
- rcd->head = 0;
+ hfi1_set_rcd_head(rcd, 0);
/*
* Zero the receive header queue so we don't get false
@@ -11969,7 +12101,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
}
- if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -12005,7 +12137,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
IS_RCVURGENT_START + rcd->ctxt, false);
- hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
+ hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
/* work around sticky RcvCtxtStatus.BlockedRHQFull */
@@ -12075,10 +12207,10 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
hfi1_cdbg(CNTR, "reading %s", entry->name);
if (entry->flags & CNTR_DISABLED) {
/* Nothing */
- hfi1_cdbg(CNTR, "\tDisabled\n");
+ hfi1_cdbg(CNTR, "\tDisabled");
} else {
if (entry->flags & CNTR_VL) {
- hfi1_cdbg(CNTR, "\tPer VL\n");
+ hfi1_cdbg(CNTR, "\tPer VL");
for (j = 0; j < C_VL_COUNT; j++) {
val = entry->rw_cntr(entry,
dd, j,
@@ -12086,21 +12218,21 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
0);
hfi1_cdbg(
CNTR,
- "\t\tRead 0x%llx for %d\n",
+ "\t\tRead 0x%llx for %d",
val, j);
dd->cntrs[entry->offset + j] =
val;
}
} else if (entry->flags & CNTR_SDMA) {
hfi1_cdbg(CNTR,
- "\t Per SDMA Engine\n");
+ "\t Per SDMA Engine");
for (j = 0; j < chip_sdma_engines(dd);
j++) {
val =
entry->rw_cntr(entry, dd, j,
CNTR_MODE_R, 0);
hfi1_cdbg(CNTR,
- "\t\tRead 0x%llx for %d\n",
+ "\t\tRead 0x%llx for %d",
val, j);
dd->cntrs[entry->offset + j] =
val;
@@ -12141,7 +12273,7 @@ u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
hfi1_cdbg(CNTR, "reading %s", entry->name);
if (entry->flags & CNTR_DISABLED) {
/* Nothing */
- hfi1_cdbg(CNTR, "\tDisabled\n");
+ hfi1_cdbg(CNTR, "\tDisabled");
continue;
}
@@ -12176,7 +12308,8 @@ static void free_cntrs(struct hfi1_devdata *dd)
int i;
if (dd->synth_stats_timer.function)
- del_timer_sync(&dd->synth_stats_timer);
+ timer_delete_sync(&dd->synth_stats_timer);
+ cancel_work_sync(&dd->update_cntr_work);
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
kfree(ppd->cntrs);
@@ -12383,7 +12516,7 @@ static void do_update_synth_timer(struct work_struct *work)
hfi1_cdbg(
CNTR,
- "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
+ "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx",
dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
@@ -12397,7 +12530,7 @@ static void do_update_synth_timer(struct work_struct *work)
} else {
total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
hfi1_cdbg(CNTR,
- "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
+ "[%d] total flits 0x%llx limit 0x%llx", dd->unit,
total_flits, (u64)CNTR_32BIT_MAX);
if (total_flits >= CNTR_32BIT_MAX) {
hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
@@ -12454,7 +12587,7 @@ static void do_update_synth_timer(struct work_struct *work)
static void update_synth_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = from_timer(dd, t, synth_stats_timer);
+ struct hfi1_devdata *dd = timer_container_of(dd, t, synth_stats_timer);
queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
@@ -12710,11 +12843,6 @@ bail:
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
{
switch (chip_lstate) {
- default:
- dd_dev_err(dd,
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
- chip_lstate);
- /* fall through */
case LSTATE_DOWN:
return IB_PORT_DOWN;
case LSTATE_INIT:
@@ -12723,6 +12851,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
return IB_PORT_ARMED;
case LSTATE_ACTIVE:
return IB_PORT_ACTIVE;
+ default:
+ dd_dev_err(dd,
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+ chip_lstate);
+ return IB_PORT_DOWN;
}
}
@@ -12730,10 +12863,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
{
/* look at the HFI meta-states only */
switch (chip_pstate & 0xf0) {
- default:
- dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
- chip_pstate);
- /* fall through */
case PLS_DISABLED:
return IB_PORTPHYSSTATE_DISABLED;
case PLS_OFFLINE:
@@ -12746,25 +12875,13 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
return IB_PORTPHYSSTATE_LINKUP;
case PLS_PHYTEST:
return IB_PORTPHYSSTATE_PHY_TEST;
+ default:
+ dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
+ chip_pstate);
+ return IB_PORTPHYSSTATE_DISABLED;
}
}
-/* return the OPA port logical state name */
-const char *opa_lstate_name(u32 lstate)
-{
- static const char * const port_logical_names[] = {
- "PORT_NOP",
- "PORT_DOWN",
- "PORT_INIT",
- "PORT_ARMED",
- "PORT_ACTIVE",
- "PORT_ACTIVE_DEFER",
- };
- if (lstate < ARRAY_SIZE(port_logical_names))
- return port_logical_names[lstate];
- return "unknown";
-}
-
/* return the OPA port physical state name */
const char *opa_pstate_name(u32 pstate)
{
@@ -12823,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
break;
}
}
- dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(state), state);
}
/**
@@ -13052,15 +13167,16 @@ static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
{
u64 reg;
u16 idx = src / BITS_PER_REGISTER;
+ unsigned long flags;
- spin_lock(&dd->irq_src_lock);
+ spin_lock_irqsave(&dd->irq_src_lock, flags);
reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
if (set)
reg |= bits;
else
reg &= ~bits;
write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
- spin_unlock(&dd->irq_src_lock);
+ spin_unlock_irqrestore(&dd->irq_src_lock, flags);
}
/**
@@ -13101,7 +13217,7 @@ int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
/*
* Clear all interrupt sources on the chip.
*/
-void clear_all_interrupts(struct hfi1_devdata *dd)
+static void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13223,17 +13339,15 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
* in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
- * num_vnic_contexts - number of contexts reserved for VNIC
+ * num_netdev_contexts - number of contexts reserved for netdev
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
unsigned long num_kernel_contexts;
- u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT;
- int total_contexts;
+ u16 num_netdev_contexts;
int ret;
unsigned ngroups;
int rmt_count;
- int user_rmt_reduced;
u32 n_usr_ctxts;
u32 send_contexts = chip_send_contexts(dd);
u32 rcv_contexts = chip_rcv_contexts(dd);
@@ -13266,13 +13380,6 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
num_kernel_contexts = send_contexts - num_vls - 1;
}
- /* Accommodate VNIC contexts if possible */
- if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
- dd_dev_err(dd, "No receive contexts available for VNIC\n");
- num_vnic_contexts = 0;
- }
- total_contexts = num_kernel_contexts + num_vnic_contexts;
-
/*
* User contexts:
* - default to 1 user context per real (non-HT) CPU core if
@@ -13285,55 +13392,64 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
/*
* Adjust the counts given a global max.
*/
- if (total_contexts + n_usr_ctxts > rcv_contexts) {
+ if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
dd_dev_err(dd,
- "Reducing # user receive contexts to: %d, from %u\n",
- rcv_contexts - total_contexts,
+ "Reducing # user receive contexts to: %u, from %u\n",
+ (u32)(rcv_contexts - num_kernel_contexts),
n_usr_ctxts);
/* recalculate */
- n_usr_ctxts = rcv_contexts - total_contexts;
+ n_usr_ctxts = rcv_contexts - num_kernel_contexts;
}
+ num_netdev_contexts =
+ hfi1_num_netdev_contexts(dd, rcv_contexts -
+ (num_kernel_contexts + n_usr_ctxts),
+ &node_affinity.real_cpu_mask);
/*
- * The RMT entries are currently allocated as shown below:
- * 1. QOS (0 to 128 entries);
- * 2. FECN (num_kernel_context - 1 + num_user_contexts +
- * num_vnic_contexts);
- * 3. VNIC (num_vnic_contexts).
- * It should be noted that FECN oversubscribe num_vnic_contexts
- * entries of RMT because both VNIC and PSM could allocate any receive
- * context between dd->first_dyn_alloc_text and dd->num_rcv_contexts,
- * and PSM FECN must reserve an RMT entry for each possible PSM receive
- * context.
+ * RMT entries are allocated as follows:
+ * 1. QOS (0 to 128 entries)
+ * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts +
+ * num_netdev_contexts [b])
+ * 3. netdev (NUM_NETDEV_MAP_ENTRIES)
+ *
+ * Notes:
+ * [a] Kernel contexts (except control) are included in FECN if kernel
+ * TID_RDMA is active.
+ * [b] Netdev and user contexts are randomly allocated from the same
+ * context pool, so FECN must cover all contexts in the pool.
*/
- rmt_count = qos_rmt_entries(dd, NULL, NULL) + (num_vnic_contexts * 2);
- if (HFI1_CAP_IS_KSET(TID_RDMA))
- rmt_count += num_kernel_contexts - 1;
- if (rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) {
- user_rmt_reduced = NUM_MAP_ENTRIES - rmt_count;
- dd_dev_err(dd,
- "RMT size is reducing the number of user receive contexts from %u to %d\n",
- n_usr_ctxts,
- user_rmt_reduced);
- /* recalculate */
- n_usr_ctxts = user_rmt_reduced;
+ rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL)
+ + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1
+ : 0)
+ + n_usr_ctxts
+ + num_netdev_contexts
+ + NUM_NETDEV_MAP_ENTRIES;
+ if (rmt_count > NUM_MAP_ENTRIES) {
+ int over = rmt_count - NUM_MAP_ENTRIES;
+ /* try to squish user contexts, minimum of 1 */
+ if (over >= n_usr_ctxts) {
+ dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n");
+ return -EINVAL;
+ }
+ dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n",
+ n_usr_ctxts, n_usr_ctxts - over);
+ n_usr_ctxts -= over;
}
- total_contexts += n_usr_ctxts;
-
- /* the first N are kernel contexts, the rest are user/vnic contexts */
- dd->num_rcv_contexts = total_contexts;
+ /* the first N are kernel contexts, the rest are user/netdev contexts */
+ dd->num_rcv_contexts =
+ num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
dd->n_krcv_queues = num_kernel_contexts;
dd->first_dyn_alloc_ctxt = num_kernel_contexts;
- dd->num_vnic_contexts = num_vnic_contexts;
+ dd->num_netdev_contexts = num_netdev_contexts;
dd->num_user_contexts = n_usr_ctxts;
dd->freectxts = n_usr_ctxts;
dd_dev_info(dd,
- "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
+ "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
rcv_contexts,
(int)dd->num_rcv_contexts,
(int)dd->n_krcv_queues,
- dd->num_vnic_contexts,
+ dd->num_netdev_contexts,
dd->num_user_contexts);
/*
@@ -14012,29 +14128,33 @@ static void init_early_variables(struct hfi1_devdata *dd)
static void init_kdeth_qp(struct hfi1_devdata *dd)
{
- /* user changed the KDETH_QP */
- if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
- /* out of range or illegal value */
- dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
- kdeth_qp = 0;
- }
- if (kdeth_qp == 0) /* not set, or failed range check */
- kdeth_qp = DEFAULT_KDETH_QP;
-
write_csr(dd, SEND_BTH_QP,
- (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+ (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) <<
SEND_BTH_QP_KDETH_QP_SHIFT);
write_csr(dd, RCV_BTH_QP,
- (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+ (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) <<
RCV_BTH_QP_KDETH_QP_SHIFT);
}
/**
- * init_qpmap_table
- * @dd - device data
- * @first_ctxt - first context
- * @last_ctxt - first context
+ * hfi1_get_qp_map - get qp map
+ * @dd: device data
+ * @idx: index to read
+ */
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
+{
+ u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
+
+ reg >>= (idx % 8) * 8;
+ return reg;
+}
+
+/**
+ * init_qpmap_table - init qp map
+ * @dd: device data
+ * @first_ctxt: first context
+ * @last_ctxt: first context
*
* This return sets the qpn mapping table that
* is indexed by qpn[8:1].
@@ -14129,6 +14249,12 @@ static void complete_rsm_map_table(struct hfi1_devdata *dd,
}
}
+/* Is a receive side mapping rule */
+static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0;
+}
+
/*
* Add a receive side mapping rule.
*/
@@ -14164,15 +14290,15 @@ static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
}
/* return the number of RSM map table entries that will be used for QOS */
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np)
{
int i;
unsigned int m, n;
- u8 max_by_vl = 0;
+ uint max_by_vl = 0;
/* is QOS active at all? */
- if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
+ if (n_krcv_queues < MIN_KERNEL_KCTXTS ||
num_vls == 1 ||
krcvqsset <= 1)
goto no_qos;
@@ -14209,8 +14335,8 @@ no_qos:
/**
* init_qos - init RX qos
- * @dd - device data
- * @rmt - RSM map table
+ * @dd: device data
+ * @rmt: RSM map table
*
* This routine initializes Rule 0 and the RSM map table to implement
* quality of service (qos).
@@ -14230,7 +14356,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
if (!rmt)
goto bail;
- rmt_entries = qos_rmt_entries(dd, &m, &n);
+ rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n);
if (rmt_entries == 0)
goto bail;
qpns_per_vl = 1 << m;
@@ -14240,7 +14366,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
if (rmt->used + rmt_entries >= NUM_MAP_ENTRIES)
goto bail;
- /* add qos entries to the the RSM map table */
+ /* add qos entries to the RSM map table */
for (i = 0, ctxt = FIRST_KERNEL_KCTXT; i < num_vls; i++) {
unsigned tctxt;
@@ -14365,80 +14491,141 @@ static void init_fecn_handling(struct hfi1_devdata *dd,
rmt->used += total_cnt;
}
-/* Initialize RSM for VNIC */
-void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+static inline bool hfi1_is_rmt_full(int start, int spare)
+{
+ return (start + spare) > NUM_MAP_ENTRIES;
+}
+
+static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
{
u8 i, j;
u8 ctx_id = 0;
u64 reg;
u32 regoff;
- struct rsm_rule_data rrd;
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
- if (hfi1_vnic_is_rsm_full(dd, NUM_VNIC_MAP_ENTRIES)) {
- dd_dev_err(dd, "Vnic RSM disabled, rmt entries used = %d\n",
- dd->vnic.rmt_start);
- return;
+ /* We already have contexts mapped in RMT */
+ if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
+ dd_dev_info(dd, "Contexts are already mapped in RMT\n");
+ return true;
}
- dev_dbg(&(dd)->pcidev->dev, "Vnic rsm start = %d, end %d\n",
- dd->vnic.rmt_start,
- dd->vnic.rmt_start + NUM_VNIC_MAP_ENTRIES);
+ if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Not enough RMT entries used = %d\n",
+ rmt_start);
+ return false;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
+ rmt_start,
+ rmt_start + NUM_NETDEV_MAP_ENTRIES);
/* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
- regoff = RCV_RSM_MAP_TABLE + (dd->vnic.rmt_start / 8) * 8;
+ regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
reg = read_csr(dd, regoff);
- for (i = 0; i < NUM_VNIC_MAP_ENTRIES; i++) {
- /* Update map register with vnic context */
- j = (dd->vnic.rmt_start + i) % 8;
+ for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
+ /* Update map register with netdev context */
+ j = (rmt_start + i) % 8;
reg &= ~(0xffllu << (j * 8));
- reg |= (u64)dd->vnic.ctxt[ctx_id++]->ctxt << (j * 8);
- /* Wrap up vnic ctx index */
- ctx_id %= dd->vnic.num_ctxt;
+ reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
+ /* Wrap up netdev ctx index */
+ ctx_id %= ctxt_count;
/* Write back map register */
- if (j == 7 || ((i + 1) == NUM_VNIC_MAP_ENTRIES)) {
+ if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
dev_dbg(&(dd)->pcidev->dev,
- "Vnic rsm map reg[%d] =0x%llx\n",
+ "RMT[%d] =0x%llx\n",
regoff - RCV_RSM_MAP_TABLE, reg);
write_csr(dd, regoff, reg);
regoff += 8;
- if (i < (NUM_VNIC_MAP_ENTRIES - 1))
+ if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
reg = read_csr(dd, regoff);
}
}
- /* Add rule for vnic */
- rrd.offset = dd->vnic.rmt_start;
- rrd.pkt_type = 4;
- /* Match 16B packets */
- rrd.field1_off = L2_TYPE_MATCH_OFFSET;
- rrd.mask1 = L2_TYPE_MASK;
- rrd.value1 = L2_16B_VALUE;
- /* Match ETH L4 packets */
- rrd.field2_off = L4_TYPE_MATCH_OFFSET;
- rrd.mask2 = L4_16B_TYPE_MASK;
- rrd.value2 = L4_16B_ETH_VALUE;
- /* Calc context from veswid and entropy */
- rrd.index1_off = L4_16B_HDR_VESWID_OFFSET;
- rrd.index1_width = ilog2(NUM_VNIC_MAP_ENTRIES);
- rrd.index2_off = L2_16B_ENTROPY_OFFSET;
- rrd.index2_width = ilog2(NUM_VNIC_MAP_ENTRIES);
- add_rsm_rule(dd, RSM_INS_VNIC, &rrd);
-
- /* Enable RSM if not already enabled */
+ return true;
+}
+
+static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd,
+ int rule, struct rsm_rule_data *rrd)
+{
+ if (!hfi1_netdev_update_rmt(dd)) {
+ dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule);
+ return;
+ }
+
+ add_rsm_rule(dd, rule, rrd);
add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
}
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
+{
+ /*
+ * go through with the initialisation only if this rule actually doesn't
+ * exist yet
+ */
+ if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ struct rsm_rule_data rrd = {
+ .offset = rmt_start,
+ .pkt_type = IB_PACKET_TYPE,
+ .field1_off = LRH_BTH_MATCH_OFFSET,
+ .mask1 = LRH_BTH_MASK,
+ .value1 = LRH_BTH_VALUE,
+ .field2_off = BTH_DESTQP_MATCH_OFFSET,
+ .mask2 = BTH_DESTQP_MASK,
+ .value2 = BTH_DESTQP_VALUE,
+ .index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
+ ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+ };
+
+ hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
+ }
+}
+
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ struct rsm_rule_data rrd = {
+ /* Add rule for vnic */
+ .offset = rmt_start,
+ .pkt_type = 4,
+ /* Match 16B packets */
+ .field1_off = L2_TYPE_MATCH_OFFSET,
+ .mask1 = L2_TYPE_MASK,
+ .value1 = L2_16B_VALUE,
+ /* Match ETH L4 packets */
+ .field2_off = L4_TYPE_MATCH_OFFSET,
+ .mask2 = L4_16B_TYPE_MASK,
+ .value2 = L4_16B_ETH_VALUE,
+ /* Calc context from veswid and entropy */
+ .index1_off = L4_16B_HDR_VESWID_OFFSET,
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index2_off = L2_16B_ENTROPY_OFFSET,
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+ };
+
+ hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+}
+
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
{
clear_rsm_rule(dd, RSM_INS_VNIC);
+}
- /* Disable RSM if used only by vnic */
- if (dd->vnic.rmt_start == 0)
- clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd)
+{
+ /* only actually clear the rule if it's the last user asking to do so */
+ if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1)
+ clear_rsm_rule(dd, RSM_INS_AIP);
}
-static void init_rxe(struct hfi1_devdata *dd)
+static int init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
u64 val;
@@ -14447,12 +14634,15 @@ static void init_rxe(struct hfi1_devdata *dd)
write_csr(dd, RCV_ERR_MASK, ~0ull);
rmt = alloc_rsm_map_table(dd);
+ if (!rmt)
+ return -ENOMEM;
+
/* set up QOS, including the QPN map table */
init_qos(dd, rmt);
init_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
- /* record number of used rsm map entries for vnic */
- dd->vnic.rmt_start = rmt->used;
+ /* record number of used rsm map entries for netdev */
+ hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
kfree(rmt);
/*
@@ -14473,6 +14663,7 @@ static void init_rxe(struct hfi1_devdata *dd)
val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) <<
RCV_BYPASS_HDR_SIZE_SHIFT);
write_csr(dd, RCV_BYPASS, val);
+ return 0;
}
static void init_other(struct hfi1_devdata *dd)
@@ -14654,7 +14845,7 @@ int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
}
/*
- * Start doing the clean up the the chip. Our clean up happens in multiple
+ * Start doing the clean up the chip. Our clean up happens in multiple
* stages and this is just the first.
*/
void hfi1_start_cleanup(struct hfi1_devdata *dd)
@@ -14721,7 +14912,7 @@ static int obtain_boardname(struct hfi1_devdata *dd)
{
/* generic board description */
const char generic[] =
- "Intel Omni-Path Host Fabric Interface Adapter 100 Series";
+ "Cornelis Omni-Path Host Fabric Interface Adapter 100 Series";
unsigned long size;
int ret;
@@ -14783,8 +14974,7 @@ err_exit:
/**
* hfi1_init_dd() - Initialize most of the dd structure.
- * @dev: the pci_dev for hfi1_ib device
- * @ent: pci_device_id struct for this dev
+ * @dd: the dd device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
@@ -15005,12 +15195,20 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
+ /* alloc VNIC/AIP rx data */
+ ret = hfi1_alloc_rx(dd);
+ if (ret)
+ goto bail_cleanup;
+
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
/* set initial RXE CSRs */
- init_rxe(dd);
+ ret = init_rxe(dd);
+ if (ret)
+ goto bail_cleanup;
+
/* set initial TXE CSRs */
init_txe(dd);
/* set initial non-RXE, non-TXE CSRs */
@@ -15090,7 +15288,7 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
init_completion(&dd->user_comp);
/* The user refcount starts with one to inidicate an active device */
- atomic_set(&dd->user_refcount, 1);
+ refcount_set(&dd->user_refcount, 1);
goto bail;
@@ -15102,6 +15300,7 @@ bail_clear_intr:
hfi1_comp_vectors_clean_up(dd);
msix_clean_up_interrupts(dd);
bail_cleanup:
+ hfi1_free_rx(dd);
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
@@ -15130,10 +15329,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
/**
* create_pbc - build a pbc for transmission
+ * @ppd: info of physical Hfi port
* @flags: special case flags or-ed in built pbc
- * @srate: static rate
+ * @srate_mbs: static rate
* @vl: vl
- * @dwlen: dword length (header words + data words + pbc words)
+ * @dw_len: dword length (header words + data words + pbc words)
*
* Create a PBC with the given flags, rate, VL, and length.
*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 4e6c3556ec48..6992f6d40255 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,52 +1,10 @@
-#ifndef _CHIP_H
-#define _CHIP_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
+#ifndef _CHIP_H
+#define _CHIP_H
/*
* This file contains all of the defines that is specific to the HFI chip
*/
@@ -358,6 +316,8 @@
#define MAX_EAGER_BUFFER (256 * 1024)
#define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */
#define MAX_EXPECTED_BUFFER (2048 * 1024)
+#define HFI1_MIN_HDRQ_EGRBUF_CNT 32
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
/*
* Receive expected base and count and eager base and count increment -
@@ -699,6 +659,10 @@ static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
return read_csr(dd, RCV_ARRAY_CNT);
}
+u8 encode_rcv_header_entry_size(u8 size);
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt);
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt);
+
u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
u32 dw_len);
@@ -807,7 +771,6 @@ int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
-const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_pstate(struct hfi1_pportdata *ppd);
u32 driver_lstate(struct hfi1_pportdata *ppd);
@@ -816,11 +779,6 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok);
int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok);
#define LCB_START DC_LCB_CSRS
#define LCB_END DC_8051_CSRS /* next block is 8051 */
-static inline int is_lcb_offset(u32 offset)
-{
- return (offset >= LCB_START && offset < LCB_END);
-}
-
extern uint num_vls;
extern uint disable_integrity;
@@ -859,6 +817,7 @@ static inline int idx_from_vl(int vl)
enum {
C_RCV_OVF = 0,
C_RX_LEN_ERR,
+ C_RX_SHORT_ERR,
C_RX_ICRC_ERR,
C_RX_EBP,
C_RX_TID_FULL,
@@ -926,6 +885,7 @@ enum {
C_DC_PG_STS_TX_MBE_CNT,
C_SW_CPU_INTR,
C_SW_CPU_RCV_LIM,
+ C_SW_CTX0_SEQ_DROP,
C_SW_VTX_WAIT,
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
@@ -1245,6 +1205,7 @@ enum {
C_SW_IBP_RDMA_SEQ,
C_SW_IBP_UNALIGNED,
C_SW_IBP_SEQ_NAK,
+ C_SW_IBP_RC_CRWAITS,
C_SW_CPU_RC_ACKS,
C_SW_CPU_RC_QACKS,
C_SW_CPU_RC_DELAYED_COMP,
@@ -1438,13 +1399,16 @@ irqreturn_t general_interrupt(int irq, void *data);
irqreturn_t sdma_interrupt(int irq, void *data);
irqreturn_t receive_context_interrupt(int irq, void *data);
irqreturn_t receive_context_thread(int irq, void *data);
+irqreturn_t receive_context_interrupt_napi(int irq, void *data);
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
void init_qsfp_int(struct hfi1_devdata *dd);
-void clear_all_interrupts(struct hfi1_devdata *dd);
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ab3589d17aee..d79e25d20fb8 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -1,53 +1,11 @@
-#ifndef DEF_CHIP_REG
-#define DEF_CHIP_REG
-
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef DEF_CHIP_REG
+#define DEF_CHIP_REG
+
#define CORE 0x000000000000
#define CCE (CORE + 0x000000000000)
#define ASIC (CORE + 0x000000400000)
@@ -381,6 +339,7 @@
#define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468)
#define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0)
#define RCV_LENGTH_ERR_CNT 0
+#define RCV_SHORT_ERR_CNT 2
#define RCV_ICRC_ERR_CNT 6
#define RCV_EBP_CNT 9
#define RCV_BUF_OVFL_CNT 10
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index d47da7b0438f..8abc902b96f3 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#ifndef _COMMON_H
@@ -72,13 +30,6 @@
* compilation unit
*/
-/*
- * If a packet's QP[23:16] bits match this value, then it is
- * a PSM packet and the hardware will expect a KDETH header
- * following the BTH.
- */
-#define DEFAULT_KDETH_QP 0x80
-
/* driver/hw feature set bitmask */
#define HFI1_CAP_USER_SHIFT 24
#define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1)
@@ -149,7 +100,8 @@
HFI1_CAP_NO_INTEGRITY | \
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_TID_RDMA | \
- HFI1_CAP_OPFN) << \
+ HFI1_CAP_OPFN | \
+ HFI1_CAP_AIP) << \
HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
@@ -166,6 +118,7 @@
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_AIP | \
((HFI1_CAP_HDRSUPP | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_STATIC_RATE_CTRL | \
@@ -184,61 +137,6 @@
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
HFI1_USER_SWMINOR)
-#ifndef HFI1_KERN_TYPE
-#define HFI1_KERN_TYPE 0
-#endif
-
-/*
- * Similarly, this is the kernel version going back to the user. It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a Intel release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of hfi1_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
-
-/*
- * Define the driver version number. This is something that refers only
- * to the driver itself, not the software interfaces it supports.
- */
-#ifndef HFI1_DRIVER_VERSION_BASE
-#define HFI1_DRIVER_VERSION_BASE "0.9-294"
-#endif
-
-/* create the final driver version string */
-#ifdef HFI1_IDSTR
-#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
-#else
-#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
-#endif
-
-/*
- * Diagnostics can send a packet by writing the following
- * struct to the diag packet special file.
- *
- * This allows a custom PBC qword, so that special modes and deliberate
- * changes to CRCs can be used.
- */
-#define _DIAG_PKT_VERS 1
-struct diag_pkt {
- __u16 version; /* structure version */
- __u16 unit; /* which device */
- __u16 sw_index; /* send sw index to use */
- __u16 len; /* data length, in bytes */
- __u16 port; /* port number */
- __u16 unused;
- __u32 flags; /* call flags */
- __u64 data; /* user data pointer */
- __u64 pbc; /* PBC for the packet */
-};
-
-/* diag_pkt flags */
-#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
-
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software.
@@ -323,6 +221,9 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
+/* MAX RcvSEQ */
+#define RHF_MAX_SEQ 13
+
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 15efb4a380b2..ac37ab7f8995 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,54 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/export.h>
-#include <linux/module.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ratelimit.h>
@@ -64,34 +22,6 @@
static struct dentry *hfi1_dbg_root;
-/* wrappers to enforce srcu in seq file */
-ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
- loff_t *ppos)
-{
- struct dentry *d = file->f_path.dentry;
- ssize_t r;
-
- r = debugfs_file_get(d);
- if (unlikely(r))
- return r;
- r = seq_read(file, buf, size, ppos);
- debugfs_file_put(d);
- return r;
-}
-
-loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence)
-{
- struct dentry *d = file->f_path.dentry;
- loff_t r;
-
- r = debugfs_file_get(d);
- if (unlikely(r))
- return r;
- r = seq_lseek(file, offset, whence);
- debugfs_file_put(d);
- return r;
-}
-
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
@@ -379,7 +309,7 @@ static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos)
struct hfi1_devdata *dd = dd_from_dev(ibd);
++*pos;
- if (!dd->rcd || *pos >= dd->n_krcv_queues)
+ if (!dd->rcd || *pos >= dd->num_rcv_contexts)
return NULL;
return pos;
}
@@ -985,18 +915,10 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
{
struct hfi1_pportdata *ppd;
- int ret;
-
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
ppd = private2ppd(fp);
- ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
- if (ret) /* failed - release the module */
- module_put(THIS_MODULE);
-
- return ret;
+ return acquire_chip_resource(ppd->dd, i2c_target(target), 0);
}
static int i2c1_debugfs_open(struct inode *in, struct file *fp)
@@ -1016,7 +938,6 @@ static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
ppd = private2ppd(fp);
release_chip_resource(ppd->dd, i2c_target(target));
- module_put(THIS_MODULE);
return 0;
}
@@ -1034,18 +955,10 @@ static int i2c2_debugfs_release(struct inode *in, struct file *fp)
static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
{
struct hfi1_pportdata *ppd;
- int ret;
-
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
ppd = private2ppd(fp);
- ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
- if (ret) /* failed - release the module */
- module_put(THIS_MODULE);
-
- return ret;
+ return acquire_chip_resource(ppd->dd, i2c_target(target), 0);
}
static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
@@ -1065,7 +978,6 @@ static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
ppd = private2ppd(fp);
release_chip_resource(ppd->dd, i2c_target(target));
- module_put(THIS_MODULE);
return 0;
}
@@ -1155,6 +1067,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp)
{ \
.name = nm, \
.ops = { \
+ .owner = THIS_MODULE, \
.read = readroutine, \
.write = writeroutine, \
.llseek = generic_file_llseek, \
@@ -1165,6 +1078,7 @@ static int exprom_wp_debugfs_release(struct inode *in, struct file *fp)
{ \
.name = nm, \
.ops = { \
+ .owner = THIS_MODULE, \
.read = readf, \
.write = writef, \
.llseek = generic_file_llseek, \
@@ -1374,7 +1288,7 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v)
{
}
-static u64 hfi1_sps_ints(void)
+static void hfi1_sps_show_ints(struct seq_file *s)
{
unsigned long index, flags;
struct hfi1_devdata *dd;
@@ -1385,24 +1299,19 @@ static u64 hfi1_sps_ints(void)
sps_ints += get_all_cpu_total(dd->int_counter);
}
xa_unlock_irqrestore(&hfi1_dev_table, flags);
- return sps_ints;
+ seq_write(s, &sps_ints, sizeof(u64));
}
static int _driver_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
- char *buffer;
u64 *stats = (u64 *)&hfi1_stats;
- size_t sz = seq_get_buf(s, &buffer);
- if (sz < sizeof(u64))
- return SEQ_SKIP;
/* special case for interrupts */
if (*spos == 0)
- *(u64 *)buffer = hfi1_sps_ints();
+ hfi1_sps_show_ints(s);
else
- *(u64 *)buffer = stats[*spos];
- seq_commit(s, sizeof(u64));
+ seq_write(s, stats + *spos, sizeof(u64));
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 57e582caa5eb..65b48839abc6 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_DEBUGFS_H
-#define _HFI1_DEBUGFS_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016, 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_DEBUGFS_H
+#define _HFI1_DEBUGFS_H
+
struct hfi1_ibdev;
#define DEBUGFS_SEQ_FILE_OPS(name) \
@@ -74,16 +33,11 @@ static int _##name##_open(struct inode *inode, struct file *s) \
static const struct file_operations _##name##_file_ops = { \
.owner = THIS_MODULE, \
.open = _##name##_open, \
- .read = hfi1_seq_read, \
- .llseek = hfi1_seq_lseek, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
.release = seq_release \
}
-
-ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
- loff_t *ppos);
-loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence);
-
#ifdef CONFIG_DEBUG_FS
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index bbb6069dec2a..a98a4175e53b 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -1,60 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/cdev.h>
-#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include "hfi.h"
#include "device.h"
-static struct class *class;
-static struct class *user_class;
+static char *hfi1_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0600;
+ return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+static const struct class class = {
+ .name = "hfi1",
+ .devnode = hfi1_devnode,
+};
+
+static char *hfi1_user_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+static const struct class user_class = {
+ .name = "hfi1_user",
+ .devnode = hfi1_user_devnode,
+};
static dev_t hfi1_dev;
int hfi1_cdev_init(int minor, const char *name,
@@ -80,15 +58,15 @@ int hfi1_cdev_init(int minor, const char *name,
}
if (user_accessible)
- device = device_create(user_class, NULL, dev, NULL, "%s", name);
+ device = device_create(&user_class, NULL, dev, NULL, "%s", name);
else
- device = device_create(class, NULL, dev, NULL, "%s", name);
+ device = device_create(&class, NULL, dev, NULL, "%s", name);
if (IS_ERR(device)) {
ret = PTR_ERR(device);
+ pr_err("Could not create device for minor %d, %s (err %pe)\n",
+ minor, name, device);
device = NULL;
- pr_err("Could not create device for minor %d, %s (err %d)\n",
- minor, name, -ret);
cdev_del(cdev);
}
done:
@@ -115,26 +93,6 @@ const char *class_name(void)
return hfi1_class_name;
}
-static char *hfi1_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0600;
- return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
-}
-
-static const char *hfi1_class_name_user = "hfi1_user";
-static const char *class_name_user(void)
-{
- return hfi1_class_name_user;
-}
-
-static char *hfi1_user_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
-}
-
int __init dev_init(void)
{
int ret;
@@ -145,27 +103,21 @@ int __init dev_init(void)
goto done;
}
- class = class_create(THIS_MODULE, class_name());
- if (IS_ERR(class)) {
- ret = PTR_ERR(class);
+ ret = class_register(&class);
+ if (ret) {
pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
goto done;
}
- class->devnode = hfi1_devnode;
- user_class = class_create(THIS_MODULE, class_name_user());
- if (IS_ERR(user_class)) {
- ret = PTR_ERR(user_class);
+ ret = class_register(&user_class);
+ if (ret) {
pr_err("Could not create device class for user accessible files (err %d)\n",
-ret);
- class_destroy(class);
- class = NULL;
- user_class = NULL;
+ class_unregister(&class);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
goto done;
}
- user_class->devnode = hfi1_user_devnode;
done:
return ret;
@@ -173,11 +125,8 @@ done:
void dev_cleanup(void)
{
- class_destroy(class);
- class = NULL;
-
- class_destroy(user_class);
- user_class = NULL;
+ class_unregister(&class);
+ class_unregister(&user_class);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
}
diff --git a/drivers/infiniband/hw/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index c3ec19cb0ac9..a91bea426ba5 100644
--- a/drivers/infiniband/hw/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_DEVICE_H
-#define _HFI1_DEVICE_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_DEVICE_H
+#define _HFI1_DEVICE_H
+
int hfi1_cdev_init(int minor, const char *name,
const struct file_operations *fops,
struct cdev *cdev, struct device **devp,
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 01aa1f132f55..06487e20f723 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,48 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
#include <linux/spinlock.h>
@@ -54,6 +13,7 @@
#include <linux/module.h>
#include <linux/prefetch.h>
#include <rdma/ib_verbs.h>
+#include <linux/etherdevice.h>
#include "hfi.h"
#include "trace.h"
@@ -63,15 +23,12 @@
#include "vnic.h"
#include "fault.h"
+#include "ipoib.h"
+#include "netdev.h"
+
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the initialization code.
- */
-const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
-
DEFINE_MUTEX(hfi1_mutex); /* general driver use */
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
@@ -94,7 +51,7 @@ module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
+MODULE_DESCRIPTION("Cornelis Omni-Path Express driver");
/*
* MAX_PKT_RCV is the max # if packets processed per receive interrupt.
@@ -155,7 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
- return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
+ return sysfs_emit(buffer, "0x%lx\n", cap_mask);
}
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
@@ -411,14 +368,14 @@ drop:
static inline void init_packet(struct hfi1_ctxtdata *rcd,
struct hfi1_packet *packet)
{
- packet->rsize = rcd->rcvhdrqentsize; /* words */
- packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
+ packet->rsize = get_hdrqentsize(rcd); /* words */
+ packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */
packet->rcd = rcd;
packet->updegr = 0;
packet->etail = -1;
packet->rhf_addr = get_rhf_addr(rcd);
packet->rhf = rhf_to_cpu(packet->rhf_addr);
- packet->rhqoff = rcd->head;
+ packet->rhqoff = hfi1_rcd_head(rcd);
packet->numpkt = 0;
}
@@ -551,22 +508,22 @@ static inline void init_ps_mdata(struct ps_mdata *mdata,
mdata->maxcnt = packet->maxcnt;
mdata->ps_head = packet->rhqoff;
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (get_dma_rtail_setting(rcd)) {
mdata->ps_tail = get_rcvhdrtail(rcd);
if (rcd->ctxt == HFI1_CTRL_CTXT)
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
else
mdata->ps_seq = 0; /* not used with DMA_RTAIL */
} else {
mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
}
}
static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
struct hfi1_ctxtdata *rcd)
{
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (get_dma_rtail_setting(rcd))
return mdata->ps_head == mdata->ps_tail;
return mdata->ps_seq != rhf_rcv_seq(rhf);
}
@@ -592,11 +549,9 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
mdata->ps_head = 0;
/* Control context must do seq counting */
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
- (rcd->ctxt == HFI1_CTRL_CTXT)) {
- if (++mdata->ps_seq > 13)
- mdata->ps_seq = 1;
- }
+ if (!get_dma_rtail_setting(rcd) ||
+ rcd->ctxt == HFI1_CTRL_CTXT)
+ mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq);
}
/*
@@ -734,6 +689,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
+ packet->rcd->dd->ctx0_seq_drop++;
/* Set up for the next packet */
packet->rhqoff += packet->rsize;
if (packet->rhqoff >= packet->maxcnt)
@@ -749,6 +705,39 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
return ret;
}
+static void process_rcv_packet_napi(struct hfi1_packet *packet)
+{
+ packet->etype = rhf_rcv_type(packet->rhf);
+
+ /* total length */
+ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
+ /* retrieve eager buffer details */
+ packet->etail = rhf_egr_index(packet->rhf);
+ packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
+ &packet->updegr);
+ /*
+ * Prefetch the contents of the eager buffer. It is
+ * OK to send a negative length to prefetch_range().
+ * The +2 is the size of the RHF.
+ */
+ prefetch_range(packet->ebuf,
+ packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ (rhf_hdrq_offset(packet->rhf)
+ + 2)) * 4));
+
+ packet->rcd->rhf_rcv_function_map[packet->etype](packet);
+ packet->numpkt++;
+
+ /* Set up for the next packet */
+ packet->rhqoff += packet->rsize;
+ if (packet->rhqoff >= packet->maxcnt)
+ packet->rhqoff = 0;
+
+ packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
+ packet->rcd->rhf_offset;
+ packet->rhf = rhf_to_cpu(packet->rhf_addr);
+}
+
static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
@@ -769,7 +758,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
* The +2 is the size of the RHF.
*/
prefetch_range(packet->ebuf,
- packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ packet->tlen - ((get_hdrqentsize(packet->rcd) -
(rhf_hdrq_offset(packet->rhf)
+ 2)) * 4));
}
@@ -823,22 +812,50 @@ static inline void finish_packet(struct hfi1_packet *packet)
* The only thing we need to do is a final update and call for an
* interrupt
*/
- update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
+ update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr,
packet->etail, rcv_intr_dynamic, packet->numpkt);
}
/*
+ * handle_receive_interrupt_napi_fp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for receive interrupt.
+ * This is the fast path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget)
+{
+ struct hfi1_packet packet;
+
+ init_packet(rcd, &packet);
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+ goto bail;
+
+ while (packet.numpkt < budget) {
+ process_rcv_packet_napi(&packet);
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+ break;
+
+ process_rcv_update(0, &packet);
+ }
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
+bail:
+ finish_packet(&packet);
+ return packet.numpkt;
+}
+
+/*
* Handle receive interrupts when using the no dma rtail option.
*/
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{
- u32 seq;
int last = RCV_PKT_OK;
struct hfi1_packet packet;
init_packet(rcd, &packet);
- seq = rhf_rcv_seq(packet.rhf);
- if (seq != rcd->seq_cnt) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -847,15 +864,12 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet, thread);
- seq = rhf_rcv_seq(packet.rhf);
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
@@ -884,15 +898,14 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
}
-static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
+static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd;
u16 i;
/*
@@ -900,50 +913,17 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt)
* interrupt handler only for that context. Otherwise, switch
* interrupt handler for all statically allocated kernel contexts.
*/
- if (ctxt >= dd->first_dyn_alloc_ctxt) {
- rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
- if (rcd) {
- rcd->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
- hfi1_rcd_put(rcd);
- }
- return;
- }
-
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
- rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- rcd->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
+ if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) {
+ hfi1_rcd_get(rcd);
+ hfi1_set_fast(rcd);
hfi1_rcd_put(rcd);
- }
-}
-
-static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt)
-{
- struct hfi1_ctxtdata *rcd;
- u16 i;
-
- /*
- * For dynamically allocated kernel contexts (like vnic) switch
- * interrupt handler only for that context. Otherwise, switch
- * interrupt handler for all statically allocated kernel contexts.
- */
- if (ctxt >= dd->first_dyn_alloc_ctxt) {
- rcd = hfi1_rcd_get_by_index_safe(dd, ctxt);
- if (rcd) {
- rcd->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
- hfi1_rcd_put(rcd);
- }
return;
}
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_dyn_alloc_ctxt; i++) {
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
- if (rcd)
- rcd->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
+ if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic))
+ hfi1_set_fast(rcd);
hfi1_rcd_put(rcd);
}
}
@@ -959,17 +939,14 @@ void set_all_slowpath(struct hfi1_devdata *dd)
if (!rcd)
continue;
if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- rcd->do_interrupt = &handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
hfi1_rcd_put(rcd);
}
}
-static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet,
- struct hfi1_devdata *dd)
+static bool __set_armed_to_active(struct hfi1_packet *packet)
{
- struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
u8 etype = rhf_rcv_type(packet->rhf);
u8 sc = SC15_PACKET;
@@ -984,19 +961,34 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
sc = hfi1_16B_get_sc(hdr);
}
if (sc != SC15_PACKET) {
- int hwstate = driver_lstate(rcd->ppd);
+ int hwstate = driver_lstate(packet->rcd->ppd);
+ struct work_struct *lsaw =
+ &packet->rcd->ppd->linkstate_active_work;
if (hwstate != IB_PORT_ACTIVE) {
- dd_dev_info(dd,
+ dd_dev_info(packet->rcd->dd,
"Unexpected link state %s\n",
- opa_lstate_name(hwstate));
- return 0;
+ ib_port_state_to_str(hwstate));
+ return false;
}
- queue_work(rcd->ppd->link_wq, lsaw);
- return 1;
+ queue_work(packet->rcd->ppd->link_wq, lsaw);
+ return true;
}
- return 0;
+ return false;
+}
+
+/**
+ * set_armed_to_active - the fast path for armed to active
+ * @packet: the packet structure
+ *
+ * Return true if packet processing needs to bail.
+ */
+static bool set_armed_to_active(struct hfi1_packet *packet)
+{
+ if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED))
+ return false;
+ return __set_armed_to_active(packet);
}
/*
@@ -1014,15 +1006,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
struct hfi1_packet packet;
int skip_pkt = 0;
+ if (!rcd->rcvhdrq)
+ return RCV_PKT_OK;
/* Control context will always use the slow path interrupt handler */
needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
init_packet(rcd, &packet);
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt) {
+ if (!get_dma_rtail_setting(rcd)) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -1039,22 +1031,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
* Control context can potentially receive an invalid
* rhf. Drop such packets.
*/
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt)
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
skip_pkt = 1;
- }
}
prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
- if (unlikely(dd->do_drop &&
- atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
- DROP_PACKET_ON)) {
- dd->do_drop = 0;
-
+ if (hfi1_need_drop(dd)) {
/* On to the next packet */
packet.rhqoff += packet.rsize;
packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
@@ -1066,26 +1051,14 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
last = skip_rcv_packet(&packet, thread);
skip_pkt = 0;
} else {
- /* Auto activate link on non-SC15 packet receive */
- if (unlikely(rcd->ppd->host_link_state ==
- HLS_UP_ARMED) &&
- set_armed_to_active(rcd, &packet, dd))
+ if (set_armed_to_active(&packet))
goto bail;
last = process_rcv_packet(&packet, thread);
}
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (!get_dma_rtail_setting(rcd)) {
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
- if (needset) {
- dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_nodma_rtail(dd, rcd->ctxt);
- needset = 0;
- }
} else {
if (packet.rhqoff == hdrqtail)
last = RCV_PKT_DONE;
@@ -1094,27 +1067,24 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
* rhf. Drop such packets.
*/
if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
+ bool lseq;
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (!last && (seq != rcd->seq_cnt))
+ lseq = hfi1_seq_incr(rcd,
+ rhf_rcv_seq(packet.rhf));
+ if (!last && lseq)
skip_pkt = 1;
}
-
- if (needset) {
- dd_dev_info(dd,
- "Switching to DMA_RTAIL\n");
- set_dma_rtail(dd, rcd->ctxt);
- needset = 0;
- }
}
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
- rcd->head = packet.rhqoff;
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
/*
@@ -1126,6 +1096,63 @@ bail:
}
/*
+ * handle_receive_interrupt_napi_sp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for errors or receive interrupt.
+ * This is the slow path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ int last = RCV_PKT_OK;
+ bool needset = true;
+ struct hfi1_packet packet;
+
+ init_packet(rcd, &packet);
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+ goto bail;
+
+ while (last != RCV_PKT_DONE && packet.numpkt < budget) {
+ if (hfi1_need_drop(dd)) {
+ /* On to the next packet */
+ packet.rhqoff += packet.rsize;
+ packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
+ packet.rhqoff +
+ rcd->rhf_offset;
+ packet.rhf = rhf_to_cpu(packet.rhf_addr);
+
+ } else {
+ if (set_armed_to_active(&packet))
+ goto bail;
+ process_rcv_packet_napi(&packet);
+ }
+
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+ last = RCV_PKT_DONE;
+
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
+
+ process_rcv_update(last, &packet);
+ }
+
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
+
+bail:
+ /*
+ * Always write head at end, and setup rcv interrupt, even
+ * if no packets were processed.
+ */
+ finish_packet(&packet);
+ return packet.numpkt;
+}
+
+/*
* We may discover in the interrupt that the hardware link state has
* changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
* and we need to update the driver's notion of the link state. We cannot
@@ -1276,7 +1303,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
*/
smp_rmb();
if (atomic_read(&ppd->led_override_timer_active)) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
/* Ensure the atomic_set is visible to all CPUs */
smp_wmb();
@@ -1288,7 +1315,8 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
static void run_led_override(struct timer_list *t)
{
- struct hfi1_pportdata *ppd = from_timer(ppd, t, led_override_timer);
+ struct hfi1_pportdata *ppd = timer_container_of(ppd, t,
+ led_override_timer);
struct hfi1_devdata *dd = ppd->dd;
unsigned long timeout;
int phase_idx;
@@ -1570,7 +1598,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
return 0;
drop:
- hfi1_cdbg(PKT, "%s: packet dropped\n", __func__);
+ hfi1_cdbg(PKT, "%s: packet dropped", __func__);
ibp->rvp.n_pkt_drops++;
return -EINVAL;
}
@@ -1602,59 +1630,114 @@ void handle_eflags(struct hfi1_packet *packet)
show_eflags_errs(packet);
}
+static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp;
+ struct net_device *netdev;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct napi_struct *napi = rcd->napi;
+ struct sk_buff *skb;
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
+ struct hfi1_netdev_rxq, napi);
+ u32 extra_bytes;
+ u32 tlen, qpnum;
+ bool do_work, do_cnp;
+
+ trace_hfi1_rcvhdr(packet);
+
+ hfi1_setup_ib_header(packet);
+
+ packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
+ packet->grh = NULL;
+
+ if (unlikely(rhf_err_flags(packet->rhf))) {
+ handle_eflags(packet);
+ return;
+ }
+
+ qpnum = ib_bth_get_qpn(packet->ohdr);
+ netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
+ if (!netdev)
+ goto drop_no_nd;
+
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+ trace_ctxt_rsm_hist(rcd->ctxt);
+
+ /* handle congestion notifications */
+ do_work = hfi1_may_ecn(packet);
+ if (unlikely(do_work)) {
+ do_cnp = (packet->opcode != IB_OPCODE_CNP);
+ (void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
+ packet, do_cnp);
+ }
+
+ /*
+ * We have split point after last byte of DETH
+ * lets strip padding and CRC and ICRC.
+ * tlen is whole packet len so we need to
+ * subtract header size as well.
+ */
+ tlen = packet->tlen;
+ extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
+ packet->hlen;
+ if (unlikely(tlen < extra_bytes))
+ goto drop;
+
+ tlen -= extra_bytes;
+
+ skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
+ if (unlikely(!skb))
+ goto drop;
+
+ dev_sw_netstats_rx_add(netdev, skb->len);
+
+ skb->dev = netdev;
+ skb->pkt_type = PACKET_HOST;
+ netif_receive_skb(skb);
+
+ return;
+
+drop:
+ ++netdev->stats.rx_dropped;
+drop_no_nd:
+ ibp = rcd_to_iport(packet->rcd);
+ ++ibp->rvp.n_pkt_drops;
+}
+
/*
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
*/
-static int process_receive_ib(struct hfi1_packet *packet)
+static void process_receive_ib(struct hfi1_packet *packet)
{
if (hfi1_setup_9B_packet(packet))
- return RHF_RCV_CONTINUE;
+ return;
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_ib_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
-{
- /* Packet received in VNIC context via RSM */
- if (packet->rcd->is_vnic)
- return true;
-
- if ((hfi1_16B_get_l2(packet->ebuf) == OPA_16B_L2_TYPE) &&
- (hfi1_16B_get_l4(packet->ebuf) == OPA_16B_L4_ETHR))
- return true;
-
- return false;
-}
-
-static int process_receive_bypass(struct hfi1_packet *packet)
+static void process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (hfi1_is_vnic_packet(packet)) {
- hfi1_vnic_bypass_rcv(packet);
- return RHF_RCV_CONTINUE;
- }
-
if (hfi1_setup_bypass_packet(packet))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
@@ -1677,17 +1760,16 @@ static int process_receive_bypass(struct hfi1_packet *packet)
(OPA_EI_STATUS_SMASK | BAD_L2_ERR);
}
}
- return RHF_RCV_CONTINUE;
}
-static int process_receive_error(struct hfi1_packet *packet)
+static void process_receive_error(struct hfi1_packet *packet)
{
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
(rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
packet->rhf & RHF_DC_ERR)))
- return RHF_RCV_CONTINUE;
+ return;
hfi1_setup_ib_header(packet);
handle_eflags(packet);
@@ -1695,32 +1777,29 @@ static int process_receive_error(struct hfi1_packet *packet)
if (unlikely(rhf_err_flags(packet->rhf)))
dd_dev_err(packet->rcd->dd,
"Unhandled error packet received. Dropping.\n");
-
- return RHF_RCV_CONTINUE;
}
-static int kdeth_process_expected(struct hfi1_packet *packet)
+static void kdeth_process_expected(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
if (unlikely(rhf_err_flags(packet->rhf))) {
struct hfi1_ctxtdata *rcd = packet->rcd;
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_kdeth_expected_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static int kdeth_process_eager(struct hfi1_packet *packet)
+static void kdeth_process_eager(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
- return RHF_RCV_CONTINUE;
+ return;
trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
@@ -1728,37 +1807,41 @@ static int kdeth_process_eager(struct hfi1_packet *packet)
show_eflags_errs(packet);
if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_kdeth_eager_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-static int process_receive_invalid(struct hfi1_packet *packet)
+static void process_receive_invalid(struct hfi1_packet *packet)
{
dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
rhf_rcv_type(packet->rhf));
- return RHF_RCV_CONTINUE;
}
+#define HFI1_RCVHDR_DUMP_MAX 5
+
void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
{
struct hfi1_packet packet;
struct ps_mdata mdata;
+ int i;
- seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s head %llu tail %llu\n",
- rcd->ctxt, rcd->rcvhdrq_cnt, rcd->rcvhdrqentsize,
- HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+ seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n",
+ rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd),
+ get_dma_rtail_setting(rcd) ?
"dma_rtail" : "nodma_rtail",
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL),
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS),
read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
RCV_HDR_HEAD_HEAD_MASK,
- read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL));
+ read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL),
+ rcd->head);
init_packet(rcd, &packet);
init_ps_mdata(&mdata, &packet);
- while (1) {
+ for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) {
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
rcd->rhf_offset;
struct ib_header *hdr;
@@ -1810,3 +1893,14 @@ const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
};
+
+const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
+ [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
+ [RHF_RCV_TYPE_EAGER] = process_receive_invalid,
+ [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
+ [RHF_RCV_TYPE_ERROR] = process_receive_error,
+ [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
+ [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index d106d23016ba..9ed05e10020e 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -1,51 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
-#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
#include "efivar.h"
/* GUID for HFI1 variables in EFI */
@@ -78,7 +38,7 @@ static int read_efi_var(const char *name, unsigned long *size,
*size = 0;
*return_data = NULL;
- if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return -EOPNOTSUPP;
uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL);
@@ -112,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size,
* is in the EFIVAR_FS code and may not be compiled in.
* However, even that is insufficient since it does not cover
* EFI_BUFFER_TOO_SMALL which could be an important return.
- * For now, just split out succces or not found.
+ * For now, just split out success or not found.
*/
ret = status == EFI_SUCCESS ? 0 :
status == EFI_NOT_FOUND ? -ENOENT :
@@ -152,9 +112,8 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind,
unsigned long *size, void **return_data)
{
char prefix_name[64];
- char name[64];
+ char name[128];
int result;
- int i;
/* create a common prefix */
snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x",
@@ -170,10 +129,7 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind,
* variable.
*/
if (result) {
- /* Converting to uppercase */
- for (i = 0; prefix_name[i]; i++)
- if (isalpha(prefix_name[i]))
- prefix_name[i] = toupper(prefix_name[i]);
+ string_upper(prefix_name, prefix_name);
snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
result = read_efi_var(name, size, return_data);
}
diff --git a/drivers/infiniband/hw/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 94e9e70de568..882240929a4b 100644
--- a/drivers/infiniband/hw/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef _HFI1_EFIVAR_H
#define _HFI1_EFIVAR_H
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 1613af1c58d9..f93a160d8d05 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/delay.h>
#include "hfi.h"
#include "common.h"
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index e774184f1643..51648d1afcf1 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
struct hfi1_devdata;
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
index e9d5cc8b771a..879a66edbded 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -1,56 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include "exp_rcv.h"
#include "trace.h"
/**
- * exp_tid_group_init - initialize exp_tid_set
- * @set - the set
+ * hfi1_exp_tid_set_init - initialize exp_tid_set
+ * @set: the set
*/
static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
{
@@ -60,7 +18,7 @@ static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
/**
* hfi1_exp_tid_group_init - initialize rcd expected receive
- * @rcd - the rcd
+ * @rcd: the rcd
*/
void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
{
@@ -70,8 +28,8 @@ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
}
/**
- * alloc_ctxt_rcv_groups - initialize expected receive groups
- * @rcd - the context to add the groupings to
+ * hfi1_alloc_ctxt_rcv_groups - initialize expected receive groups
+ * @rcd: the context to add the groupings to
*/
int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{
@@ -100,8 +58,8 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
}
/**
- * free_ctxt_rcv_groups - free expected receive groups
- * @rcd - the context to free
+ * hfi1_free_ctxt_rcv_groups - free expected receive groups
+ * @rcd: the context to free
*
* The routine dismantles the expect receive linked
* list and clears any tids associated with the receive
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
index f25362015095..141413d9fbc7 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -1,52 +1,10 @@
-#ifndef _HFI1_EXP_RCV_H
-#define _HFI1_EXP_RCV_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_EXP_RCV_H
+#define _HFI1_EXP_RCV_H
#include "hfi.h"
#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
@@ -175,12 +133,13 @@ static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
return grp;
}
-static inline u32 rcventry2tidinfo(u32 rcventry)
+static inline u32 create_tid(u32 rcventry, u32 npages)
{
u32 pair = rcventry & ~0x1;
return EXP_TID_SET(IDX, pair >> 1) |
- EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+ EXP_TID_SET(CTRL, 1 << (rcventry - pair)) |
+ EXP_TID_SET(LEN, npages);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 3fd3315d0fb0..a45cbffd52c7 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -1,53 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/types.h>
#include <linux/bitmap.h>
@@ -141,18 +99,18 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
if (!data)
return -ENOMEM;
copy = min(len, datalen - 1);
- if (copy_from_user(data, buf, copy))
- return -EFAULT;
+ if (copy_from_user(data, buf, copy)) {
+ ret = -EFAULT;
+ goto free_data;
+ }
- ret = debugfs_file_get(file->f_path.dentry);
- if (unlikely(ret))
- return ret;
ptr = data;
token = ptr;
for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
char *dash;
unsigned long range_start, range_end, i;
bool remove = false;
+ unsigned long bound = 1U << BITS_PER_BYTE;
end = strchr(ptr, ',');
if (end)
@@ -178,6 +136,10 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
BITS_PER_BYTE);
break;
}
+ /* Check the inputs */
+ if (range_start >= bound || range_end >= bound)
+ break;
+
for (i = range_start; i <= range_end; i++) {
if (remove)
clear_bit(i, fault->opcodes);
@@ -189,7 +151,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
}
ret = len;
- debugfs_file_put(file->f_path.dentry);
+free_data:
kfree(data);
return ret;
}
@@ -207,23 +169,19 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- ret = debugfs_file_get(file->f_path.dentry);
- if (unlikely(ret))
- return ret;
bit = find_first_bit(fault->opcodes, bitsize);
while (bit < bitsize) {
zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
if (zero - 1 != bit)
- size += snprintf(data + size,
+ size += scnprintf(data + size,
datalen - size - 1,
"0x%lx-0x%lx,", bit, zero - 1);
else
- size += snprintf(data + size,
+ size += scnprintf(data + size,
datalen - size - 1, "0x%lx,",
bit);
bit = find_next_bit(fault->opcodes, bitsize, zero);
}
- debugfs_file_put(file->f_path.dentry);
data[size - 1] = '\n';
data[size] = '\0';
ret = simple_read_from_buffer(buf, len, pos, data, size);
@@ -236,7 +194,6 @@ static const struct file_operations __fault_opcodes_fops = {
.open = fault_opcodes_open,
.read = fault_opcodes_read,
.write = fault_opcodes_write,
- .llseek = no_llseek
};
void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h
index a83382700a7c..51adafe240d7 100644
--- a/drivers/infiniband/hw/hfi1/fault.h
+++ b/drivers/infiniband/hw/hfi1/fault.h
@@ -1,51 +1,11 @@
-#ifndef _HFI1_FAULT_H
-#define _HFI1_FAULT_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
+#ifndef _HFI1_FAULT_H
+#define _HFI1_FAULT_H
+
#include <linux/fault-inject.h>
#include <linux/dcache.h>
#include <linux/bitops.h>
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index f9a7e9d29c8b..503abec709c9 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,49 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
*/
+
#include <linux/poll.h>
#include <linux/cdev.h>
#include <linux/vmalloc.h>
@@ -193,30 +153,28 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
return -EINVAL;
- if (!atomic_inc_not_zero(&dd->user_refcount))
+ if (!refcount_inc_not_zero(&dd->user_refcount))
return -ENXIO;
/* The real work is performed later in assign_ctxt() */
fd = kzalloc(sizeof(*fd), GFP_KERNEL);
- if (fd) {
- fd->rec_cpu_num = -1; /* no cpu affinity by default */
- fd->mm = current->mm;
- mmgrab(fd->mm);
- fd->dd = dd;
- kobject_get(&fd->dd->kobj);
- fp->private_data = fd;
- } else {
- fp->private_data = NULL;
-
- if (atomic_dec_and_test(&dd->user_refcount))
- complete(&dd->user_comp);
-
- return -ENOMEM;
- }
-
+ if (!fd || init_srcu_struct(&fd->pq_srcu))
+ goto nomem;
+ spin_lock_init(&fd->pq_rcu_lock);
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->dd = dd;
+ fp->private_data = fd;
return 0;
+nomem:
+ kfree(fd);
+ fp->private_data = NULL;
+ if (refcount_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+ return -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -301,28 +259,37 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
+ int idx;
- if (!cq || !pq)
- return -EIO;
-
- if (!iter_is_iovec(from) || !dim)
+ if (!HFI1_CAP_IS_KSET(SDMA))
+ return -EINVAL;
+ if (!user_backed_iter(from))
return -EINVAL;
+ idx = srcu_read_lock(&fd->pq_srcu);
+ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+ if (!cq || !pq) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
+ return -EIO;
+ }
trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -ENOSPC;
+ }
while (dim) {
+ const struct iovec *iov = iter_iov(from);
int ret;
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
- fd, (struct iovec *)(from->iov + done),
+ fd, (struct iovec *)(iov + done),
dim, &count);
if (ret) {
reqs = ret;
@@ -333,9 +300,21 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
reqs++;
}
+ srcu_read_unlock(&fd->pq_srcu, idx);
return reqs;
}
+static inline void mmap_cdbg(u16 ctxt, u8 subctxt, u8 type, u8 mapio, u8 vmf,
+ u64 memaddr, void *memvirt, dma_addr_t memdma,
+ ssize_t memlen, struct vm_area_struct *vma)
+{
+ hfi1_cdbg(PROC,
+ "%u:%u type:%u io/vf/dma:%d/%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx",
+ ctxt, subctxt, type, mapio, vmf, !!memdma,
+ memaddr ?: (u64)memvirt, memlen,
+ vma->vm_end - vma->vm_start, vma->vm_flags);
+}
+
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct hfi1_filedata *fd = fp->private_data;
@@ -345,6 +324,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
void *memvirt = NULL;
+ dma_addr_t memdma = 0;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -364,6 +344,11 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
goto done;
}
+ /*
+ * vm_pgoff is used as a buffer selector cookie. Always mmap from
+ * the beginning.
+ */
+ vma->vm_pgoff = 0;
flags = vma->vm_flags;
switch (type) {
@@ -385,7 +370,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
mapio = 1;
break;
- case PIO_CRED:
+ case PIO_CRED: {
+ u64 cr_page_offset;
if (flags & VM_WRITE) {
ret = -EPERM;
goto done;
@@ -395,10 +381,11 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memvirt = dd->cr_base[uctxt->numa_id].va;
- memaddr = virt_to_phys(memvirt) +
- (((u64)uctxt->sc->hw_free -
- (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
+ cr_page_offset = ((u64)uctxt->sc->hw_free -
+ (u64)dd->cr_base[uctxt->numa_id].va) &
+ PAGE_MASK;
+ memvirt = dd->cr_base[uctxt->numa_id].va + cr_page_offset;
+ memdma = dd->cr_base[uctxt->numa_id].dma + cr_page_offset;
memlen = PAGE_SIZE;
flags &= ~VM_MAYWRITE;
flags |= VM_DONTCOPY | VM_DONTEXPAND;
@@ -408,14 +395,16 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* memory been flagged as non-cached?
*/
/* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
- mapio = 1;
break;
+ }
case RCV_HDRQ:
memlen = rcvhdrq_size(uctxt);
memvirt = uctxt->rcvhdrq;
+ memdma = uctxt->rcvhdrq_dma;
break;
case RCV_EGRBUF: {
- unsigned long addr;
+ unsigned long vm_start_save;
+ unsigned long vm_end_save;
int i;
/*
* The RcvEgr buffer need to be handled differently
@@ -433,25 +422,35 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EPERM;
goto done;
}
- vma->vm_flags &= ~VM_MAYWRITE;
- addr = vma->vm_start;
+ vm_flags_clear(vma, VM_MAYWRITE);
+ /*
+ * Mmap multiple separate allocations into a single vma. From
+ * here, dma_mmap_coherent() calls dma_direct_mmap(), which
+ * requires the mmap to exactly fill the vma starting at
+ * vma_start. Adjust the vma start and end for each eager
+ * buffer segment mapped. Restore the originals when done.
+ */
+ vm_start_save = vma->vm_start;
+ vm_end_save = vma->vm_end;
+ vma->vm_end = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
memlen = uctxt->egrbufs.buffers[i].len;
memvirt = uctxt->egrbufs.buffers[i].addr;
- ret = remap_pfn_range(
- vma, addr,
- /*
- * virt_to_pfn() does the same, but
- * it's not available on x86_64
- * when CONFIG_MMU is enabled.
- */
- PFN_DOWN(__pa(memvirt)),
- memlen,
- vma->vm_page_prot);
- if (ret < 0)
+ memdma = uctxt->egrbufs.buffers[i].dma;
+ vma->vm_end += memlen;
+ mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr,
+ memvirt, memdma, memlen, vma);
+ ret = dma_mmap_coherent(&dd->pcidev->dev, vma,
+ memvirt, memdma, memlen);
+ if (ret < 0) {
+ vma->vm_start = vm_start_save;
+ vma->vm_end = vm_end_save;
goto done;
- addr += memlen;
+ }
+ vma->vm_start += memlen;
}
+ vma->vm_start = vm_start_save;
+ vma->vm_end = vm_end_save;
ret = 0;
goto done;
}
@@ -505,12 +504,13 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
- if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
+ if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) {
ret = -EPERM;
goto done;
}
memlen = PAGE_SIZE;
- memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
+ memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
+ memdma = uctxt->rcvhdrqtailaddr_dma;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -558,15 +558,16 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
goto done;
}
- vma->vm_flags = flags;
- hfi1_cdbg(PROC,
- "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
- ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
- vma->vm_end - vma->vm_start, vma->vm_flags);
+ vm_flags_reset(vma, flags);
+ mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, memvirt, memdma,
+ memlen, vma);
if (vmf) {
vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
+ } else if (memdma) {
+ ret = dma_mmap_coherent(&dd->pcidev->dev, vma,
+ memvirt, memdma, memlen);
} else if (mapio) {
ret = io_remap_pfn_range(vma, vma->vm_start,
PFN_DOWN(memaddr),
@@ -701,12 +702,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
deallocate_ctxt(uctxt);
done:
- mmdrop(fdata->mm);
- kobject_put(&dd->kobj);
- if (atomic_dec_and_test(&dd->user_refcount))
+ if (refcount_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
+ cleanup_srcu_struct(&fdata->pq_srcu);
kfree(fdata);
return 0;
}
@@ -728,7 +728,7 @@ static u64 kvirt_to_phys(void *addr)
}
/**
- * complete_subctxt
+ * complete_subctxt - complete sub-context info
* @fd: valid filedata pointer
*
* Sub-context info can only be set up after the base context
@@ -833,7 +833,7 @@ static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len)
}
/**
- * match_ctxt
+ * match_ctxt - match context
* @fd: valid filedata pointer
* @uinfo: user info to compare base context with
* @uctxt: context to compare uinfo to.
@@ -890,7 +890,7 @@ static int match_ctxt(struct hfi1_filedata *fd,
}
/**
- * find_sub_ctxt
+ * find_sub_ctxt - fund sub-context
* @fd: valid filedata pointer
* @uinfo: matching info to use to find a possible context to share.
*
@@ -975,7 +975,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
ret = -ENOMEM;
goto ctxdata_free;
}
- hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
+ hfi1_cdbg(PROC, "allocated send context %u(%u)", uctxt->sc->sw_index,
uctxt->sc->hw_context);
ret = sc_enable(uctxt->sc);
if (ret)
@@ -996,7 +996,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
init_waitqueue_head(&uctxt->wait);
- strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
+ strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
uctxt->jkey = generate_jkey(current_uid());
hfi1_stats.sps_ctxts++;
@@ -1090,7 +1090,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
* don't have to wait to be sure the DMA update has happened
* (chip resets head/tail to 0 on transition to enable).
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
/* Setup J_KEY before enabling the context */
@@ -1138,7 +1138,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
/* adjust flag if this fd is not able to cache */
- if (!fd->handler)
+ if (!fd->use_mn)
cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
cinfo.num_active = hfi1_count_active_units();
@@ -1154,8 +1154,8 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
cinfo.send_ctxt = uctxt->sc->hw_context;
cinfo.egrtids = uctxt->egrbufs.alloced;
- cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
+ cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt);
+ cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2;
cinfo.sdma_ring_size = fd->cq->nentries;
cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
@@ -1210,8 +1210,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
goto done;
ret = init_user_ctxt(fd, uctxt);
- if (ret)
+ if (ret) {
+ hfi1_free_ctxt_rcv_groups(uctxt);
goto done;
+ }
user_init(uctxt);
@@ -1253,8 +1255,8 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
- binfo.sw_version = HFI1_KERN_SWVERSION;
- binfo.bthqp = kdeth_qp;
+ binfo.sw_version = HFI1_USER_SWVERSION;
+ binfo.bthqp = RVT_KDETH_QP_PREFIX;
binfo.jkey = uctxt->jkey;
/*
* If more than 64 contexts are enabled the allocated credit
@@ -1347,12 +1349,15 @@ static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg,
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
sizeof(tinfo.tidcnt)))
- return -EFAULT;
+ ret = -EFAULT;
addr = arg + offsetof(struct hfi1_tid_info, length);
- if (copy_to_user((void __user *)addr, &tinfo.length,
+ if (!ret && copy_to_user((void __user *)addr, &tinfo.length,
sizeof(tinfo.length)))
ret = -EFAULT;
+
+ if (ret)
+ hfi1_user_exp_rcv_invalid(fd, &tinfo);
}
return ret;
@@ -1514,7 +1519,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
* manage_rcvq - manage a context's receive queue
* @uctxt: the context
* @subctxt: the sub-context
- * @start_stop: action to carry out
+ * @arg: start/stop action to carry out
*
* start_stop == 0 disables receive on the context, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
@@ -1543,7 +1548,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
* always resets it's tail register back to 0 on a
* transition from disabled to enabled.
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
} else {
@@ -1684,7 +1689,7 @@ static int user_add(struct hfi1_devdata *dd)
snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
&dd->user_cdev, &dd->user_device,
- true, &dd->kobj);
+ true, &dd->verbs_dev.rdi.ibdev.dev.kobj);
if (ret)
user_remove(dd);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 2b57ba70ddd6..3c228aeaaf81 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,53 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/firmware.h>
#include <linux/mutex.h>
-#include <linux/module.h>
#include <linux/delay.h>
#include <linux/crc32.h>
@@ -1157,7 +1114,7 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
* Reset all of the fabric serdes for this HFI in preparation to take the
* link to Polling.
*
- * To do a reset, we need to write to to the serdes registers. Unfortunately,
+ * To do a reset, we need to write to the serdes registers. Unfortunately,
* the fabric serdes download to the other HFI on the ASIC will have turned
* off the firmware validation on this HFI. This means we can't write to the
* registers to reset the serdes. Work around this by performing a complete
@@ -1786,6 +1743,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
if (!dd->platform_config.data) {
dd_dev_err(dd, "%s: Missing config file\n", __func__);
+ ret = -EINVAL;
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@@ -1794,6 +1752,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
dd_dev_err(dd, "%s: Bad config file\n", __func__);
+ ret = -EINVAL;
goto bail;
}
@@ -1817,6 +1776,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
if (file_length > dd->platform_config.size) {
dd_dev_info(dd, "%s:File claims to be larger than read size\n",
__func__);
+ ret = -EINVAL;
goto bail;
} else if (file_length < dd->platform_config.size) {
dd_dev_info(dd,
@@ -1837,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
__func__, (ptr - (u32 *)
dd->platform_config.data));
+ ret = -EINVAL;
goto bail;
}
@@ -1868,11 +1829,8 @@ int parse_platform_config(struct hfi1_devdata *dd)
2;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
pcfgcache->config_tables[table_type].num_table =
table_length_dwords;
@@ -1883,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr - (u32 *)
dd->platform_config.data));
+ ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1890,15 +1849,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* metadata table */
switch (table_type) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
@@ -1907,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr -
(u32 *)dd->platform_config.data));
+ ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@@ -1924,6 +1879,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
__func__, (ptr -
(u32 *)dd->platform_config.data));
+ ret = -EINVAL;
goto bail;
}
/* Jump the CRC DWORD */
@@ -2027,15 +1983,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
switch (table) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
if (field && field < platform_config_table_limits[table])
src_ptr =
@@ -2138,11 +2089,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
pcfgcache->config_tables[table_type].table;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
src_ptr = pcfgcache->config_tables[table_type].table;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index b458c218842b..cb630551cf1a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,52 +1,13 @@
-#ifndef _HFI1_KERNEL_H
-#define _HFI1_KERNEL_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020-2023 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
*/
+#ifndef _HFI1_KERNEL_H
+#define _HFI1_KERNEL_H
+
+#include <linux/refcount.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
@@ -68,7 +29,6 @@
#include <rdma/ib_hdrs.h>
#include <rdma/opa_addr.h>
#include <linux/rhashtable.h>
-#include <linux/netdevice.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -197,7 +157,9 @@ struct exp_tid_set {
u32 count;
};
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+struct hfi1_ctxtdata;
+typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
+typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct tid_queue {
struct list_head queue_head;
@@ -226,7 +188,13 @@ struct hfi1_ctxtdata {
* be valid. Worst case is we process an extra interrupt and up to 64
* packets with the wrong interrupt handler.
*/
- int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+ intr_handler do_interrupt;
+ /** fast handler after autoactive */
+ intr_handler fast_handler;
+ /** slow handler */
+ intr_handler slow_handler;
+ /* napi pointer assiociated with netdev */
+ struct napi_struct *napi;
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
/* clear interrupt mask */
@@ -377,11 +345,11 @@ struct hfi1_packet {
u32 rhqoff;
u32 dlid;
u32 slid;
+ int numpkt;
u16 tlen;
s16 etail;
u16 pkey;
u8 hlen;
- u8 numpkt;
u8 rsize;
u8 updegr;
u8 etype;
@@ -539,6 +507,37 @@ static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
}
+/**
+ * hfi1_get_rc_ohdr - get extended header
+ * @opah - the opaheader
+ */
+static inline struct ib_other_headers *
+hfi1_get_rc_ohdr(struct hfi1_opa_header *opah)
+{
+ struct ib_other_headers *ohdr;
+ struct ib_header *hdr = NULL;
+ struct hfi1_16b_header *hdr_16b = NULL;
+
+ /* Find out where the BTH is */
+ if (opah->hdr_type == HFI1_PKT_TYPE_9B) {
+ hdr = &opah->ibh;
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ } else {
+ u8 l4;
+
+ hdr_16b = &opah->opah;
+ l4 = hfi1_16B_get_l4(hdr_16b);
+ if (l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &hdr_16b->u.oth;
+ else
+ ohdr = &hdr_16b->u.l.oth;
+ }
+ return ohdr;
+}
+
struct rvt_sge_state;
/*
@@ -677,12 +676,6 @@ static inline void incr_cntr64(u64 *cntr)
(*cntr)++;
}
-static inline void incr_cntr32(u32 *cntr)
-{
- if (*cntr < (u32)-1LL)
- (*cntr)++;
-}
-
#define MAX_NAME_SIZE 64
struct hfi1_msix_entry {
enum irq_type type;
@@ -739,10 +732,6 @@ struct hfi1_pportdata {
struct hfi1_ibport ibport_data;
struct hfi1_devdata *dd;
- struct kobject pport_cc_kobj;
- struct kobject sc2vl_kobj;
- struct kobject sl2sc_kobj;
- struct kobject vl2mtu_kobj;
/* PHY support */
struct qsfp_data qsfp_info;
@@ -824,7 +813,7 @@ struct hfi1_pportdata {
u8 rx_pol_inv;
u8 hw_pidx; /* physical port index */
- u8 port; /* IB port number and index into dd->pports - 1 */
+ u32 port; /* IB port number and index into dd->pports - 1 */
/* type of neighbor node */
u8 neighbor_type;
u8 neighbor_normal;
@@ -948,7 +937,7 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
struct hfi1_pkt_state *ps,
struct rvt_swqe *wqe);
extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
-
+extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
/* return values for the RHF receive functions */
#define RHF_RCV_CONTINUE 0 /* keep going */
@@ -1008,23 +997,10 @@ struct hfi1_asic_data {
#define NUM_MAP_ENTRIES 256
#define NUM_MAP_REGS 32
-/*
- * Number of VNIC contexts used. Ensure it is less than or equal to
- * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
- */
-#define HFI1_NUM_VNIC_CTXT 8
-
-/* Number of VNIC RSM entries */
-#define NUM_VNIC_MAP_ENTRIES 8
-
/* Virtual NIC information */
struct hfi1_vnic_data {
- struct hfi1_ctxtdata *ctxt[HFI1_NUM_VNIC_CTXT];
struct kmem_cache *txreq_cache;
- struct xarray vesws;
u8 num_vports;
- u8 rmt_start;
- u8 num_ctxt;
};
struct hfi1_vnic_vport_info;
@@ -1039,6 +1015,7 @@ struct sdma_vl_map;
#define SERIAL_MAX 16 /* length of the serial number */
typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
+struct hfi1_netdev_rx;
struct hfi1_devdata {
struct hfi1_ibdev verbs_dev; /* must be first */
/* pointers to related structs for this device */
@@ -1122,14 +1099,16 @@ struct hfi1_devdata {
char *boardname; /* human readable board info */
+ u64 ctx0_seq_drop;
+
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
u64 __percpu *send_schedule;
- /* number of reserved contexts for VNIC usage */
- u16 num_vnic_contexts;
+ /* number of reserved contexts for netdev usage */
+ u16 num_netdev_contexts;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
/* number of pio send contexts in use by the driver */
@@ -1279,7 +1258,7 @@ struct hfi1_devdata {
struct err_info_constraint err_info_xmit_constraint;
atomic_t drop_packet;
- u8 do_drop;
+ bool do_drop;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
@@ -1365,7 +1344,7 @@ struct hfi1_devdata {
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
/* Keeps track of user space clients */
- atomic_t user_refcount;
+ refcount_t user_refcount;
/* Used to wait for outstanding user space clients before dev removal */
struct completion user_comp;
@@ -1374,18 +1353,17 @@ struct hfi1_devdata {
bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable *sdma_rht;
- struct kobject kobj;
-
/* vnic data */
struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
-};
+ int vnic_num_vports;
+ struct hfi1_netdev_rx *netdev_rx;
+ struct hfi1_affinity_node *affinity_entry;
-static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
-{
- return (dd->vnic.rmt_start + spare) > NUM_MAP_ENTRIES;
-}
+ /* Keeps track of IPoIB RSM rule users */
+ atomic_t ipoib_rsm_usr_num;
+};
/* 8051 firmware version helper */
#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
@@ -1400,20 +1378,21 @@ static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
#define PT_INVALID 3
struct tid_rb_node;
-struct mmu_rb_node;
-struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct srcu_struct pq_srcu;
struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
struct hfi1_user_sdma_comp_q *cq;
- struct hfi1_user_sdma_pkt_q *pq;
+ /* update side lock for SRCU */
+ spinlock_t pq_rcu_lock;
+ struct hfi1_user_sdma_pkt_q __rcu *pq;
u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
- struct mmu_rb_handler *handler;
+ bool use_mn;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
@@ -1422,7 +1401,6 @@ struct hfi1_filedata {
u32 invalid_tid_idx;
/* protect invalid_tids array and invalid_tid_idx */
spinlock_t invalid_lock;
- struct mm_struct *mm;
};
extern struct xarray hfi1_dev_table;
@@ -1450,7 +1428,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
struct hfi1_ctxtdata **rcd);
void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd);
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
- struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
+ struct hfi1_devdata *dd, u8 hw_pidx, u32 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
@@ -1460,6 +1438,8 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
void set_all_slowpath(struct hfi1_devdata *dd);
extern const struct pci_device_id hfi1_pci_tbl[];
@@ -1476,12 +1456,148 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp,
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */
+/**
+ * hfi1_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ */
+static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->head;
+}
+
+/**
+ * hfi1_set_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ * @head: the new head
+ */
+static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
+{
+ rcd->head = head;
+}
+
/* calculate the current RHF address */
static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
{
return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
}
+/* return DMA_RTAIL configuration */
+static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
+{
+ return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
+}
+
+/**
+ * hfi1_seq_incr_wrap - wrapping increment for sequence
+ * @seq: the current sequence number
+ *
+ * Returns: the incremented seq
+ */
+static inline u8 hfi1_seq_incr_wrap(u8 seq)
+{
+ if (++seq > RHF_MAX_SEQ)
+ seq = 1;
+ return seq;
+}
+
+/**
+ * hfi1_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->seq_cnt;
+}
+
+/**
+ * hfi1_set_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
+{
+ rcd->seq_cnt = cnt;
+}
+
+/**
+ * last_rcv_seq - is last
+ * @rcd: the receive context
+ * @seq: sequence
+ *
+ * return true if last packet
+ */
+static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ return seq != rcd->seq_cnt;
+}
+
+/**
+ * rcd_seq_incr - increment context sequence number
+ * @rcd: the receive context
+ * @seq: the current sequence number
+ *
+ * Returns: true if the this was the last packet
+ */
+static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
+ return last_rcv_seq(rcd, seq);
+}
+
+/**
+ * get_hdrqentsize - return hdrq entry size
+ * @rcd: the receive context
+ */
+static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrqentsize;
+}
+
+/**
+ * get_hdrq_cnt - return hdrq count
+ * @rcd: the receive context
+ */
+static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrq_cnt;
+}
+
+/**
+ * hfi1_is_slowpath - check if this context is slow path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->do_interrupt == rcd->slow_handler;
+}
+
+/**
+ * hfi1_is_fastpath - check if this context is fast path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ return false;
+
+ return rcd->do_interrupt == rcd->fast_handler;
+}
+
+/**
+ * hfi1_set_fast - change to the fast handler
+ * @rcd: the receive context
+ */
+static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
+{
+ if (unlikely(!rcd))
+ return;
+ if (unlikely(!hfi1_is_fastpath(rcd)))
+ rcd->do_interrupt = rcd->fast_handler;
+}
+
int hfi1_reset_device(int);
void receive_interrupt_work(struct work_struct *work);
@@ -1602,7 +1718,7 @@ static inline void pause_for_credit_return(struct hfi1_devdata *dd)
}
/**
- * sc_to_vlt() reverse lookup sc to vl
+ * sc_to_vlt() - reverse lookup sc to vl
* @dd - devdata
* @sc5 - 5 bit sc
*/
@@ -1808,10 +1924,10 @@ static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
+static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u32 port)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
+ u32 pidx = port - 1; /* IB number port from 1, hdw from 0 */
WARN_ON(pidx >= dd->num_pports);
return &dd->pport[pidx].ibport_data;
@@ -1984,9 +2100,21 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty);
+/**
+ * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
+ * @rcd - the receive context
+ */
+static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
+{
+ return (__le64 *)rcd->rcvhdrtail_kvaddr;
+}
+
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
- *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
+ u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
+
+ if (kv)
+ *kv = 0ULL;
}
static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -1995,7 +2123,17 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
* volatile because it's a DMA target from the chip, routine is
* inlined, and don't want register caching or reordering.
*/
- return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+ return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
+}
+
+static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
+{
+ if (likely(!rcd->rcvhdrtail_kvaddr)) {
+ u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
+
+ return !last_rcv_seq(rcd, seq);
+ }
+ return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
}
/*
@@ -2004,12 +2142,11 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
extern const char ib_hfi1_version[];
extern const struct attribute_group ib_hfi1_attr_group;
+extern const struct attribute_group *hfi1_attr_port_groups[];
int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd);
-int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
- struct kobject *kobj);
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
@@ -2052,7 +2189,6 @@ extern int num_user_contexts;
extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
-extern uint kdeth_qp;
extern uint loopback;
extern uint quick_linkup;
extern uint rcv_intr_timeout;
@@ -2203,20 +2339,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
-/*
- * this is used for formatting hw error messages...
- */
-struct hfi1_hwerror_msgs {
- u64 mask;
- const char *msg;
- size_t sz;
-};
-
-/* in intr.c... */
-void hfi1_format_hwerrors(u64 hwerrs,
- const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t lmsg);
-
#define USER_OPCODE_CHECK_VAL 0xC0
#define USER_OPCODE_CHECK_MASK 0xC0
#define OPCODE_CHECK_VAL_DISABLED 0x0
@@ -2267,10 +2389,29 @@ static inline bool is_integrated(struct hfi1_devdata *dd)
return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
}
+/**
+ * hfi1_need_drop - detect need for drop
+ * @dd: - the device
+ *
+ * In some cases, the first packet needs to be dropped.
+ *
+ * Return true is the current packet needs to be dropped and false otherwise.
+ */
+static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
+{
+ if (unlikely(dd->do_drop &&
+ atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+ DROP_PACKET_ON)) {
+ dd->do_drop = false;
+ return true;
+ }
+ return false;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
-#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
+#define DD_DEV_ASSIGN(dd) __assign_str(dev)
static inline void hfi1_update_ah_attr(struct ib_device *ibdev,
struct rdma_ah_attr *attr)
@@ -2404,7 +2545,7 @@ static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr)
HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B;
/*
- * Return a 16B header type if either the the destination
+ * Return a 16B header type if either the destination
* or source lid is extended.
*/
if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 71cb9525c074..e4aef102dac0 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,48 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
#include <linux/pci.h>
@@ -69,6 +28,7 @@
#include "affinity.h"
#include "vnic.h"
#include "exp_rcv.h"
+#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -78,8 +38,6 @@
*/
#define HFI1_MIN_USER_CTXT_BUFCNT 7
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
@@ -122,8 +80,6 @@ unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16 size);
-
DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static int hfi1_create_kctxt(struct hfi1_devdata *dd,
@@ -154,7 +110,11 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
/* Control context must use DMA_RTAIL */
if (rcd->ctxt == HFI1_CTRL_CTXT)
rcd->flags |= HFI1_CAP_DMA_RTAIL;
- rcd->seq_cnt = 1;
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+ handle_receive_interrupt_dma_rtail :
+ handle_receive_interrupt_nodma_rtail;
+
+ hfi1_set_seq_cnt(rcd, 1);
rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
if (!rcd->sc) {
@@ -310,7 +270,7 @@ struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
}
/**
- * hfi1_rcd_get_by_index
+ * hfi1_rcd_get_by_index - get by index
* @dd: pointer to a valid devdata structure
* @ctxt: the index of an possilbe rcd
*
@@ -373,13 +333,16 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+ rcd->slow_handler = handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
+ rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
mutex_init(&rcd->exp_mutex);
spin_lock_init(&rcd->exp_lock);
INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
- hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
+ hfi1_cdbg(PROC, "setting up context %u", rcd->ctxt);
/*
* Calculate the context's RcvArray entry starting point.
@@ -437,7 +400,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->egrbufs.count = MAX_EAGER_ENTRIES;
}
hfi1_cdbg(PROC,
- "ctxt%u: max Eager buffer RcvArray entries: %u\n",
+ "ctxt%u: max Eager buffer RcvArray entries: %u",
rcd->ctxt, rcd->egrbufs.count);
/*
@@ -469,7 +432,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
if (rcd->egrbufs.size < hfi1_max_mtu) {
rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
hfi1_cdbg(PROC,
- "ctxt%u: eager bufs size too small. Adjusting to %u\n",
+ "ctxt%u: eager bufs size too small. Adjusting to %u",
rcd->ctxt, rcd->egrbufs.size);
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
@@ -496,12 +459,12 @@ bail:
}
/**
- * hfi1_free_ctxt
+ * hfi1_free_ctxt - free context
* @rcd: pointer to an initialized rcd data structure
*
* This wrapper is the free function that matches hfi1_create_ctxtdata().
* When a context is done being used (kernel or user), this function is called
- * for the "final" put to match the kref init from hf1i_create_ctxtdata().
+ * for the "final" put to match the kref init from hfi1_create_ctxtdata().
* Other users of the context do a get/put sequence to make sure that the
* structure isn't removed while in use.
*/
@@ -511,23 +474,6 @@ void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
}
/*
- * Convert a receive header entry size that to the encoding used in the CSR.
- *
- * Return a zero if the given size is invalid.
- */
-static inline u64 encode_rcv_header_entry_size(u16 size)
-{
- /* there are only 3 valid receive header entry sizes */
- if (size == 2)
- return 1;
- if (size == 16)
- return 2;
- else if (size == 32)
- return 4;
- return 0; /* invalid */
-}
-
-/*
* Select the largest ccti value over all SLs to determine the intra-
* packet gap for the link.
*
@@ -543,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
u16 shift, mult;
u64 src;
u32 current_egress_rate; /* Mbits /sec */
- u32 max_pkt_time;
+ u64 max_pkt_time;
/*
* max_pkt_time is the maximum packet egress time in units
* of the fabric clock period 1/(805 MHz).
@@ -641,7 +587,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
* Common code for initializing the physical port structure.
*/
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
- struct hfi1_devdata *dd, u8 hw_pidx, u8 port)
+ struct hfi1_devdata *dd, u8 hw_pidx, u32 port)
{
int i;
uint default_pkey_idx;
@@ -664,12 +610,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
-
- if (loopback) {
- dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
- ppd->pkeys[!default_pkey_idx] = 0x8001;
- }
+ ppd->pkeys[0] = 0x8001;
INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
INIT_WORK(&ppd->link_up_work, handle_link_up);
@@ -694,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cca_timer_lock);
for (i = 0; i < OPA_MAX_SLS; i++) {
- hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
ppd->cca_timer[i].ppd = ppd;
ppd->cca_timer[i].sl = i;
ppd->cca_timer[i].ccti = 0;
- ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
+ hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
@@ -805,8 +745,8 @@ static int create_workqueues(struct hfi1_devdata *dd)
ppd->hfi1_wq =
alloc_workqueue(
"hfi%d_%d",
- WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
- WQ_MEM_RECLAIM,
+ WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+ WQ_PERCPU,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
@@ -845,6 +785,29 @@ wq_error:
}
/**
+ * destroy_workqueues - destroy per port workqueues
+ * @dd: the hfi1_ib device
+ */
+static void destroy_workqueues(struct hfi1_devdata *dd)
+{
+ int pidx;
+ struct hfi1_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ if (ppd->hfi1_wq) {
+ destroy_workqueue(ppd->hfi1_wq);
+ ppd->hfi1_wq = NULL;
+ }
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
+ }
+}
+
+/**
* enable_general_intr() - Enable the IRQs that will be handled by the
* general interrupt handler.
* @dd: valid devdata
@@ -892,10 +855,10 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
- dd->do_drop = 1;
+ dd->do_drop = true;
} else {
atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
- dd->do_drop = 0;
+ dd->do_drop = false;
}
/* make sure the link is not "up" */
@@ -911,18 +874,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (ret)
goto done;
- /* allocate dummy tail memory for all receive contexts */
- dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
- sizeof(u64),
- &dd->rcvhdrtail_dummy_dma,
- GFP_KERNEL);
-
- if (!dd->rcvhdrtail_dummy_kvaddr) {
- dd_dev_err(dd, "cannot allocate dummy tail memory\n");
- ret = -ENOMEM;
- goto done;
- }
-
/* dd->rcd can be NULL if early initialization failed */
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
@@ -935,8 +886,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (!rcd)
continue;
- rcd->do_interrupt = &handle_receive_interrupt;
-
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
@@ -1036,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->led_override_timer.function) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
}
@@ -1077,7 +1026,6 @@ static void shutdown_device(struct hfi1_devdata *dd)
msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
for (i = 0; i < dd->num_rcv_contexts; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
@@ -1117,15 +1065,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
hfi1_quiet_serdes(ppd);
-
- if (ppd->hfi1_wq) {
- destroy_workqueue(ppd->hfi1_wq);
- ppd->hfi1_wq = NULL;
- }
- if (ppd->link_wq) {
- destroy_workqueue(ppd->link_wq);
- ppd->link_wq = NULL;
- }
+ if (ppd->hfi1_wq)
+ flush_workqueue(ppd->hfi1_wq);
+ if (ppd->link_wq)
+ flush_workqueue(ppd->link_wq);
}
sdma_exit(dd);
}
@@ -1149,9 +1092,9 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
- if (rcd->rcvhdrtail_kvaddr) {
+ if (hfi1_rcvhdrtail_kvaddr(rcd)) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)rcd->rcvhdrtail_kvaddr,
+ (void *)hfi1_rcvhdrtail_kvaddr(rcd),
rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
@@ -1162,7 +1105,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].dma)
+ if (rcd->egrbufs.buffers[e].addr)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
@@ -1214,13 +1157,13 @@ static void finalize_asic_data(struct hfi1_devdata *dd,
}
/**
- * hfi1_clean_devdata - cleans up per-unit data structure
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
* @dd: pointer to a valid devdata structure
*
- * It cleans up all data structures set up by
+ * It cleans up and frees all data structures set up by
* by hfi1_alloc_devdata().
*/
-static void hfi1_clean_devdata(struct hfi1_devdata *dd)
+void hfi1_free_devdata(struct hfi1_devdata *dd)
{
struct hfi1_asic_data *ad;
unsigned long flags;
@@ -1243,27 +1186,15 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
dd->tx_opstats = NULL;
kfree(dd->comp_vect);
dd->comp_vect = NULL;
+ if (dd->rcvhdrtail_dummy_kvaddr)
+ dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
+ (void *)dd->rcvhdrtail_dummy_kvaddr,
+ dd->rcvhdrtail_dummy_dma);
+ dd->rcvhdrtail_dummy_kvaddr = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
-static void __hfi1_free_devdata(struct kobject *kobj)
-{
- struct hfi1_devdata *dd =
- container_of(kobj, struct hfi1_devdata, kobj);
-
- hfi1_clean_devdata(dd);
-}
-
-static struct kobj_type hfi1_devdata_type = {
- .release = __hfi1_free_devdata,
-};
-
-void hfi1_free_devdata(struct hfi1_devdata *dd)
-{
- kobject_put(&dd->kobj);
-}
-
/**
* hfi1_alloc_devdata - Allocate our primary per-unit data structure.
* @pdev: Valid PCI device
@@ -1290,7 +1221,6 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
dd->pport = (struct hfi1_pportdata *)(dd + 1);
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
- dd->node = NUMA_NO_NODE;
ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
GFP_KERNEL);
@@ -1300,6 +1230,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+ /*
+ * If the BIOS does not have the NUMA node information set, select
+ * NUMA 0 so we get consistent performance.
+ */
+ dd->node = pcibus_to_node(pdev->bus);
+ if (dd->node == NUMA_NO_NODE) {
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+ dd->node = 0;
+ }
/*
* Initialize all locks for the device. This needs to be as early as
@@ -1349,11 +1288,20 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
goto bail;
}
- kobject_init(&dd->kobj, &hfi1_devdata_type);
+ /* allocate dummy tail memory for all receive contexts */
+ dd->rcvhdrtail_dummy_kvaddr =
+ dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
+ &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
+ if (!dd->rcvhdrtail_dummy_kvaddr) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ atomic_set(&dd->ipoib_rsm_usr_num, 0);
return dd;
bail:
- hfi1_clean_devdata(dd);
+ hfi1_free_devdata(dd);
return ERR_PTR(ret);
}
@@ -1394,7 +1342,7 @@ static void remove_one(struct pci_dev *);
static int init_one(struct pci_dev *, const struct pci_device_id *);
static void shutdown_one(struct pci_dev *);
-#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
+#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
const struct pci_device_id hfi1_pci_tbl[] = {
@@ -1489,7 +1437,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}
- hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
@@ -1557,13 +1504,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
free_credit_return(dd);
- if (dd->rcvhdrtail_dummy_kvaddr) {
- dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
- (void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_dma);
- dd->rcvhdrtail_dummy_kvaddr = NULL;
- }
-
/*
* Free any resources still in use (usually just kernel contexts)
* at unload; we do for ctxtcnt, because that's what we allocate.
@@ -1612,29 +1552,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
-{
- if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd, "Receive header queue count too small\n");
- return -EINVAL;
- }
-
- if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- dd_dev_err(dd,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
- return -EINVAL;
- }
-
- if (thecnt % HDRQ_INCREMENT) {
- dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret = 0, j, pidx, initfail;
@@ -1662,7 +1579,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
}
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
+ ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
@@ -1721,9 +1638,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* do the generic initialization */
initfail = hfi1_init(dd, 0);
- /* setup vnic */
- hfi1_vnic_setup(dd);
-
ret = hfi1_register_ib_device(dd);
/*
@@ -1762,7 +1676,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
hfi1_device_remove(dd);
if (!ret)
hfi1_unregister_ib_device(dd);
- hfi1_vnic_cleanup(dd);
postinit_cleanup(dd);
if (initfail)
ret = initfail;
@@ -1785,7 +1698,7 @@ static void wait_for_clients(struct hfi1_devdata *dd)
* Remove the device init value and complete the device if there is
* no clients or wait for active clients to finish.
*/
- if (atomic_dec_and_test(&dd->user_refcount))
+ if (refcount_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
wait_for_completion(&dd->user_comp);
@@ -1807,14 +1720,15 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
- /* cleanup vnic */
- hfi1_vnic_cleanup(dd);
+ /* free netdev data */
+ hfi1_free_rx(dd);
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
*/
shutdown_device(dd);
+ destroy_workqueues(dd);
stop_timers(dd);
@@ -1843,20 +1757,13 @@ static void shutdown_one(struct pci_dev *pdev)
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
unsigned amt;
- u64 reg;
if (!rcd->rcvhdrq) {
- gfp_t gfp_flags;
-
amt = rcvhdrq_size(rcd);
- if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- gfp_flags = GFP_KERNEL;
- else
- gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
&rcd->rcvhdrq_dma,
- gfp_flags | __GFP_COMP);
+ GFP_KERNEL);
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
@@ -1870,35 +1777,14 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
PAGE_SIZE,
&rcd->rcvhdrqtailaddr_dma,
- gfp_flags);
+ GFP_KERNEL);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
}
}
- /*
- * These values are per-context:
- * RcvHdrCnt
- * RcvHdrEntSize
- * RcvHdrSize
- */
- reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
- & RCV_HDR_CNT_CNT_MASK)
- << RCV_HDR_CNT_CNT_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
- reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
- & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
- << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
- reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
- << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
- /*
- * Program dummy tail address for every receive context
- * before enabling any receive context
- */
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_dma);
+ set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+ rcd->rcvhdrq_cnt);
return 0;
@@ -1914,7 +1800,8 @@ bail:
}
/**
- * allocate eager buffers, both kernel and user contexts.
+ * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user
+ * contexts.
* @rcd: the context we are setting up.
*
* Allocate the eager TID buffers and program them into hip.
@@ -1926,20 +1813,11 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
u32 max_entries, egrtop, alloced_bytes = 0;
- gfp_t gfp_flags;
u16 order, idx = 0;
int ret = 0;
u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
/*
- * GFP_USER, but without GFP_FS, so buffer cache can be
- * coalesced (we hope); otherwise, even at order 4,
- * heavy filesystem activity makes these fail, and we can
- * use compound pages.
- */
- gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
- /*
* The minimum size of the eager buffers is a groups of MTU-sized
* buffers.
* The global eager_buffer_size parameter is checked against the
@@ -1969,7 +1847,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
dma_alloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
&rcd->egrbufs.buffers[idx].dma,
- gfp_flags);
+ GFP_KERNEL);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
@@ -2040,7 +1918,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.size = alloced_bytes;
hfi1_cdbg(PROC,
- "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB\n",
+ "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB",
rcd->ctxt, rcd->egrbufs.alloced,
rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
@@ -2063,13 +1941,13 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2;
rcd->expected_base = rcd->eager_base + egrtop;
- hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u\n",
+ hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u",
rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count,
rcd->eager_base, rcd->expected_base);
if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) {
hfi1_cdbg(PROC,
- "ctxt%u: current Eager buffer size is invalid %u\n",
+ "ctxt%u: current Eager buffer size is invalid %u",
rcd->ctxt, rcd->egrbufs.rcvtid_size);
ret = -EINVAL;
goto bail_rcvegrbuf_phys;
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 387305b768e9..d8dd1a599631 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/pci.h>
@@ -89,37 +47,6 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
hfi1_event_pkey_change(ppd->dd, ppd->port);
}
-/**
- * format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
- strlcat(msg, "[", msgl);
- strlcat(msg, hwmsg, msgl);
- strlcat(msg, "]", msgl);
-}
-
-/**
- * hfi1_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
- */
-void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t msgl)
-{
- int i;
-
- for (i = 0; i < nhwerrmsgs; i++)
- if (hwerrs & hwerrmsgs[i].mask)
- format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-}
-
static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
{
struct ib_event event;
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
index adb4a1ba921b..111489802614 100644
--- a/drivers/infiniband/hw/hfi1/iowait.c
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -26,7 +26,7 @@ inline void iowait_clear_flag(struct iowait *wait, u32 flag)
clear_bit(flag, &wait->flags);
}
-/**
+/*
* iowait_init() - initialize wait structure
* @wait: wait struct to initialize
* @tx_limit: limit for overflow queuing
@@ -81,12 +81,14 @@ void iowait_init(struct iowait *wait, u32 tx_limit,
void iowait_cancel_work(struct iowait *w)
{
cancel_work_sync(&iowait_get_ib_work(w)->iowork);
- cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+ /* Make sure that the iowork for TID RDMA is used */
+ if (iowait_get_tid_work(w)->iowork.func)
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
}
/**
* iowait_set_work_flag - set work flag based on leg
- * @w - the iowait work struct
+ * @w: the iowait work struct
*/
int iowait_set_work_flag(struct iowait_work *w)
{
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 07847cb72169..7259f4f55700 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_IOWAIT_H
-#define _HFI1_IOWAIT_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_IOWAIT_H
+#define _HFI1_IOWAIT_H
+
#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
@@ -133,7 +92,7 @@ struct iowait_work {
*
* The lock field is used by waiters to record
* the seqlock_t that guards the list head.
- * Waiters explicity know that, but the destroy
+ * Waiters explicitly know that, but the destroy
* code that unwaits QPs does not.
*/
struct iowait {
@@ -321,7 +280,7 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
* iowait_get_txhead() - get packet off of iowait list
*
- * @wait iowait_work struture
+ * @wait: iowait_work structure
*/
static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
@@ -399,7 +358,7 @@ static inline void iowait_get_priority(struct iowait *w)
* @wait_head: the wait queue
*
* This function is called to insert an iowait struct into a
- * wait queue after a resource (eg, sdma decriptor or pio
+ * wait queue after a resource (eg, sdma descriptor or pio
* buffer) is run out.
*/
static inline void iowait_queue(bool pkts_sent, struct iowait *w,
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
new file mode 100644
index 000000000000..aec60d4888eb
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB functionality
+ */
+
+#ifndef HFI1_IPOIB_H
+#define HFI1_IPOIB_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/if_infiniband.h>
+
+#include "hfi.h"
+#include "iowait.h"
+#include "netdev.h"
+
+#include <rdma/ib_verbs.h>
+
+#define HFI1_IPOIB_ENTROPY_SHIFT 24
+
+#define HFI1_IPOIB_TXREQ_NAME_LEN 32
+
+#define HFI1_IPOIB_PSEUDO_LEN 20
+#define HFI1_IPOIB_ENCAP_LEN 4
+
+struct hfi1_ipoib_dev_priv;
+
+union hfi1_ipoib_flow {
+ u16 as_int;
+ struct {
+ u8 tx_queue;
+ u8 sc5;
+ } __attribute__((__packed__));
+};
+
+/**
+ * struct ipoib_txreq - IPOIB transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma_hdr: 9b ib headers
+ * @sdma_status: status returned by sdma engine
+ * @complete: non-zero implies complete
+ * @priv: ipoib netdev private data
+ * @txq: txq on which skb was output
+ * @skb: skb to send
+ */
+struct ipoib_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_sdma_header *sdma_hdr;
+ int sdma_status;
+ int complete;
+ struct hfi1_ipoib_dev_priv *priv;
+ struct hfi1_ipoib_txq *txq;
+ struct sk_buff *skb;
+};
+
+/**
+ * struct hfi1_ipoib_circ_buf - List of items to be processed
+ * @items: ring of items each a power of two size
+ * @max_items: max items + 1 that the ring can contain
+ * @shift: log2 of size for getting txreq
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @tail: ring tail
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @head: ring head
+ */
+struct hfi1_ipoib_circ_buf {
+ void *items;
+ u32 max_items;
+ u32 shift;
+ /* consumer cache line */
+ u64 ____cacheline_aligned_in_smp sent_txreqs;
+ u32 avail;
+ u32 tail;
+ atomic_t stops;
+ atomic_t ring_full;
+ atomic_t no_desc;
+ /* producer cache line */
+ u64 ____cacheline_aligned_in_smp complete_txreqs;
+ u32 head;
+};
+
+/**
+ * struct hfi1_ipoib_txq - IPOIB per Tx queue information
+ * @priv: private pointer
+ * @sde: sdma engine
+ * @tx_list: tx request list
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @flow: tracks when list needs to be flushed for a flow change
+ * @q_idx: ipoib Tx queue index
+ * @pkts_sent: indicator packets have been sent from this queue
+ * @wait: iowait structure
+ * @napi: pointer to tx napi interface
+ * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
+ */
+struct hfi1_ipoib_txq {
+ struct napi_struct napi;
+ struct hfi1_ipoib_dev_priv *priv;
+ struct sdma_engine *sde;
+ struct list_head tx_list;
+ union hfi1_ipoib_flow flow;
+ u8 q_idx;
+ bool pkts_sent;
+ struct iowait wait;
+
+ struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring;
+};
+
+struct hfi1_ipoib_dev_priv {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ struct ib_device *device;
+ struct hfi1_ipoib_txq *txqs;
+ const struct net_device_ops *netdev_ops;
+ struct rvt_qp *qp;
+ u32 qkey;
+ u16 pkey;
+ u16 pkey_index;
+ u8 port_num;
+};
+
+/* hfi1 ipoib rdma netdev's private data structure */
+struct hfi1_ipoib_rdma_netdev {
+ struct rdma_netdev rn; /* keep this first */
+ /* followed by device private data */
+ struct hfi1_ipoib_dev_priv dev_priv;
+};
+
+static inline struct hfi1_ipoib_dev_priv *
+hfi1_ipoib_priv(const struct net_device *dev)
+{
+ return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
+}
+
+int hfi1_ipoib_send(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn);
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
+
+int hfi1_ipoib_rxq_init(struct net_device *dev);
+void hfi1_ipoib_rxq_deinit(struct net_device *dev);
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+ int size, void *data);
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+ u32 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params);
+
+void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q);
+
+#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
new file mode 100644
index 000000000000..7c9d5203002b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for ipoib functionality
+ */
+
+#include "ipoib.h"
+#include "hfi.h"
+
+static u32 qpn_from_mac(const u8 *mac_arr)
+{
+ return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3];
+}
+
+static int hfi1_ipoib_dev_init(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int ret;
+
+ ret = priv->netdev_ops->ndo_init(dev);
+ if (ret)
+ return ret;
+
+ ret = hfi1_netdev_add_data(priv->dd,
+ qpn_from_mac(priv->netdev->dev_addr),
+ dev);
+ if (ret < 0) {
+ priv->netdev_ops->ndo_uninit(dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void hfi1_ipoib_dev_uninit(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
+
+ priv->netdev_ops->ndo_uninit(dev);
+}
+
+static int hfi1_ipoib_dev_open(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int ret;
+
+ ret = priv->netdev_ops->ndo_open(dev);
+ if (!ret) {
+ struct hfi1_ibport *ibp = to_iport(priv->device,
+ priv->port_num);
+ struct rvt_qp *qp;
+ u32 qpn = qpn_from_mac(priv->netdev->dev_addr);
+
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (!qp) {
+ rcu_read_unlock();
+ priv->netdev_ops->ndo_stop(dev);
+ return -EINVAL;
+ }
+ rvt_get_qp(qp);
+ priv->qp = qp;
+ rcu_read_unlock();
+
+ hfi1_netdev_enable_queues(priv->dd);
+ hfi1_ipoib_napi_tx_enable(dev);
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_dev_stop(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ if (!priv->qp)
+ return 0;
+
+ hfi1_ipoib_napi_tx_disable(dev);
+ hfi1_netdev_disable_queues(priv->dd);
+
+ rvt_put_qp(priv->qp);
+ priv->qp = NULL;
+
+ return priv->netdev_ops->ndo_stop(dev);
+}
+
+static const struct net_device_ops hfi1_ipoib_netdev_ops = {
+ .ndo_init = hfi1_ipoib_dev_init,
+ .ndo_uninit = hfi1_ipoib_dev_uninit,
+ .ndo_open = hfi1_ipoib_dev_open,
+ .ndo_stop = hfi1_ipoib_dev_stop,
+};
+
+static int hfi1_ipoib_mcast_attach(struct net_device *dev,
+ struct ib_device *device,
+ union ib_gid *mgid,
+ u16 mlid,
+ int set_qkey,
+ u32 qkey)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+ struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+ struct rvt_qp *qp;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (qp) {
+ rvt_get_qp(qp);
+ rcu_read_unlock();
+ if (set_qkey)
+ priv->qkey = qkey;
+
+ /* attach QP to multicast group */
+ ret = ib_attach_mcast(&qp->ibqp, mgid, mlid);
+ rvt_put_qp(qp);
+ } else {
+ rcu_read_unlock();
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_mcast_detach(struct net_device *dev,
+ struct ib_device *device,
+ union ib_gid *mgid,
+ u16 mlid)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+ struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+ struct rvt_qp *qp;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (qp) {
+ rvt_get_qp(qp);
+ rcu_read_unlock();
+ ret = ib_detach_mcast(&qp->ibqp, mgid, mlid);
+ rvt_put_qp(qp);
+ } else {
+ rcu_read_unlock();
+ }
+ return ret;
+}
+
+static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ hfi1_ipoib_txreq_deinit(priv);
+ hfi1_ipoib_rxq_deinit(priv->netdev);
+}
+
+static void hfi1_ipoib_set_id(struct net_device *dev, int id)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ priv->pkey_index = (u16)id;
+ ib_query_pkey(priv->device,
+ priv->port_num,
+ priv->pkey_index,
+ &priv->pkey);
+}
+
+static int hfi1_ipoib_setup_rn(struct ib_device *device,
+ u32 port_num,
+ struct net_device *netdev,
+ void *param)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct rdma_netdev *rn = netdev_priv(netdev);
+ struct hfi1_ipoib_dev_priv *priv;
+ int rc;
+
+ rn->send = hfi1_ipoib_send;
+ rn->tx_timeout = hfi1_ipoib_tx_timeout;
+ rn->attach_mcast = hfi1_ipoib_mcast_attach;
+ rn->detach_mcast = hfi1_ipoib_mcast_detach;
+ rn->set_id = hfi1_ipoib_set_id;
+ rn->hca = device;
+ rn->port_num = port_num;
+ rn->mtu = netdev->mtu;
+
+ priv = hfi1_ipoib_priv(netdev);
+ priv->dd = dd;
+ priv->netdev = netdev;
+ priv->device = device;
+ priv->port_num = port_num;
+ priv->netdev_ops = netdev->netdev_ops;
+
+ ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
+
+ rc = hfi1_ipoib_txreq_init(priv);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
+ return rc;
+ }
+
+ rc = hfi1_ipoib_rxq_init(netdev);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
+ hfi1_ipoib_txreq_deinit(priv);
+ return rc;
+ }
+
+ netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
+ netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
+ netdev->needs_free_netdev = true;
+ netdev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
+ return 0;
+}
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+ u32 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+
+ if (type != RDMA_NETDEV_IPOIB)
+ return -EOPNOTSUPP;
+
+ if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts)
+ return -EOPNOTSUPP;
+
+ if (!port_num || port_num > dd->num_pports)
+ return -EINVAL;
+
+ params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev);
+ params->txqs = dd->num_sdma;
+ params->rxqs = dd->num_netdev_contexts;
+ params->param = NULL;
+ params->initialize_rdma_netdev = hfi1_ipoib_setup_rn;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
new file mode 100644
index 000000000000..629691a572ef
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#include "netdev.h"
+#include "ipoib.h"
+
+#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
+
+static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
+{
+ skb_checksum_none_assert(skb);
+ skb->protocol = *((__be16 *)data);
+
+ skb_put_data(skb, data, size);
+ skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
+ skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
+}
+
+static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
+{
+ struct sk_buff *skb;
+ int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
+ void *frag;
+
+ skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ skb_size = SKB_DATA_ALIGN(skb_size);
+ frag = napi_alloc_frag(skb_size);
+
+ if (unlikely(!frag))
+ return napi_alloc_skb(napi, size);
+
+ skb = build_skb(frag, skb_size);
+
+ if (unlikely(!skb)) {
+ skb_free_frag(frag);
+ return NULL;
+ }
+
+ skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
+ return skb;
+}
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+ int size, void *data)
+{
+ struct napi_struct *napi = &rxq->napi;
+ int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
+ struct sk_buff *skb;
+
+ /*
+ * For smaller(4k + skb overhead) allocations we will go using
+ * napi cache. Otherwise we will try to use napi frag cache.
+ */
+ if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
+ skb = napi_alloc_skb(napi, skb_size);
+ else
+ skb = prepare_frag_skb(napi, skb_size);
+
+ if (unlikely(!skb))
+ return NULL;
+
+ copy_ipoib_buf(skb, data, size);
+
+ return skb;
+}
+
+int hfi1_ipoib_rxq_init(struct net_device *netdev)
+{
+ struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+ struct hfi1_devdata *dd = ipoib_priv->dd;
+ int ret;
+
+ ret = hfi1_netdev_rx_init(dd);
+ if (ret)
+ return ret;
+
+ hfi1_init_aip_rsm(dd);
+
+ return ret;
+}
+
+void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
+{
+ struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+ struct hfi1_devdata *dd = ipoib_priv->dd;
+
+ hfi1_deinit_aip_rsm(dd);
+ hfi1_netdev_rx_destroy(dd);
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
new file mode 100644
index 000000000000..8b9cc55db59d
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -0,0 +1,868 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB SDMA functionality
+ */
+
+#include <linux/log2.h>
+#include <linux/circ_buf.h>
+
+#include "sdma.h"
+#include "verbs.h"
+#include "trace_ibhdrs.h"
+#include "ipoib.h"
+#include "trace_tx.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+struct ipoib_txparms {
+ struct hfi1_devdata *dd;
+ struct rdma_ah_attr *ah_attr;
+ struct hfi1_ibport *ibp;
+ struct hfi1_ipoib_txq *txq;
+ union hfi1_ipoib_flow flow;
+ u32 dqpn;
+ u8 hdr_dwords;
+ u8 entropy;
+};
+
+static struct ipoib_txreq *
+hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx)
+{
+ return (struct ipoib_txreq *)(r->items + (idx << r->shift));
+}
+
+static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
+{
+ return sent - completed;
+}
+
+static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
+{
+ return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+ txq->tx_ring.complete_txreqs);
+}
+
+static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
+{
+ trace_hfi1_txq_stop(txq);
+ if (atomic_inc_return(&txq->tx_ring.stops) == 1)
+ netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
+{
+ trace_hfi1_txq_wake(txq);
+ if (atomic_dec_and_test(&txq->tx_ring.stops))
+ netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items - 1);
+}
+
+static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items) >> 1;
+}
+
+static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+{
+ ++txq->tx_ring.sent_txreqs;
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
+ !atomic_xchg(&txq->tx_ring.ring_full, 1)) {
+ trace_hfi1_txq_full(txq);
+ hfi1_ipoib_stop_txq(txq);
+ }
+}
+
+static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
+{
+ struct net_device *dev = txq->priv->netdev;
+
+ /* If shutting down just return as queue state is irrelevant */
+ if (unlikely(dev->reg_state != NETREG_REGISTERED))
+ return;
+
+ /*
+ * When the queue has been drained to less than half full it will be
+ * restarted.
+ * The size of the txreq ring is fixed at initialization.
+ * The tx queue len can be adjusted upward while the interface is
+ * running.
+ * The tx queue len can be large enough to overflow the txreq_ring.
+ * Use the minimum of the current tx_queue_len or the rings max txreqs
+ * to protect against ring overflow.
+ */
+ if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
+ atomic_xchg(&txq->tx_ring.ring_full, 0)) {
+ trace_hfi1_txq_xmit_unstopped(txq);
+ hfi1_ipoib_wake_txq(txq);
+ }
+}
+
+static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
+{
+ struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
+
+ if (likely(!tx->sdma_status)) {
+ dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len);
+ } else {
+ ++priv->netdev->stats.tx_errors;
+ dd_dev_warn(priv->dd,
+ "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
+ __func__, tx->sdma_status,
+ le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
+ tx->txq->sde->this_idx);
+ }
+
+ napi_consume_skb(tx->skb, budget);
+ tx->skb = NULL;
+ sdma_txclean(priv->dd, &tx->txreq);
+}
+
+static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq)
+{
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ int i;
+ struct ipoib_txreq *tx;
+
+ for (i = 0; i < tx_ring->max_items; i++) {
+ tx = hfi1_txreq_from_idx(tx_ring, i);
+ tx->complete = 0;
+ dev_kfree_skb_any(tx->skb);
+ tx->skb = NULL;
+ sdma_txclean(txq->priv->dd, &tx->txreq);
+ }
+ tx_ring->head = 0;
+ tx_ring->tail = 0;
+ tx_ring->complete_txreqs = 0;
+ tx_ring->sent_txreqs = 0;
+ tx_ring->avail = hfi1_ipoib_ring_hwat(txq);
+}
+
+static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(napi, struct hfi1_ipoib_txq, napi);
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ u32 head = tx_ring->head;
+ u32 max_tx = tx_ring->max_items;
+ int work_done;
+ struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head);
+
+ trace_hfi1_txq_poll(txq);
+ for (work_done = 0; work_done < budget; work_done++) {
+ /* See hfi1_ipoib_sdma_complete() */
+ if (!smp_load_acquire(&tx->complete))
+ break;
+ tx->complete = 0;
+ trace_hfi1_tx_produce(tx, head);
+ hfi1_ipoib_free_tx(tx, budget);
+ head = CIRC_NEXT(head, max_tx);
+ tx = hfi1_txreq_from_idx(tx_ring, head);
+ }
+ tx_ring->complete_txreqs += work_done;
+
+ /* Finished freeing tx items so store the head value. */
+ smp_store_release(&tx_ring->head, head);
+
+ hfi1_ipoib_check_queue_stopped(txq);
+
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
+
+ return work_done;
+}
+
+static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
+{
+ struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
+
+ trace_hfi1_txq_complete(tx->txq);
+ tx->sdma_status = status;
+ /* see hfi1_ipoib_poll_tx_ring */
+ smp_store_release(&tx->complete, 1);
+ napi_schedule_irqoff(&tx->txq->napi);
+}
+
+static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_devdata *dd = txp->dd;
+ struct sdma_txreq *txreq = &tx->txreq;
+ struct sk_buff *skb = tx->skb;
+ int ret = 0;
+ int i;
+
+ if (skb_headlen(skb)) {
+ ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb));
+ if (unlikely(ret))
+ return ret;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ ret = sdma_txadd_page(dd,
+ txreq,
+ skb_frag_page(frag),
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ NULL, NULL, NULL);
+ if (unlikely(ret))
+ break;
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_devdata *dd = txp->dd;
+ struct sdma_txreq *txreq = &tx->txreq;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
+ u16 pkt_bytes =
+ sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
+ int ret;
+
+ ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete);
+ if (unlikely(ret))
+ return ret;
+
+ /* add pbc + headers */
+ ret = sdma_txadd_kvaddr(dd,
+ txreq,
+ sdma_hdr,
+ sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2));
+ if (unlikely(ret))
+ return ret;
+
+ /* add the ulp payload */
+ return hfi1_ipoib_build_ulp_payload(tx, txp);
+}
+
+static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
+ struct sk_buff *skb = tx->skb;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
+ struct rdma_ah_attr *ah_attr = txp->ah_attr;
+ struct ib_other_headers *ohdr;
+ struct ib_grh *grh;
+ u16 dwords;
+ u16 slid;
+ u16 dlid;
+ u16 lrh0;
+ u32 bth0;
+ u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 |
+ priv->netdev->dev_addr[2] << 8 |
+ priv->netdev->dev_addr[3]);
+ u16 payload_dwords;
+ u8 pad_cnt;
+
+ pad_cnt = -skb->len & 3;
+
+ /* Includes ICRC */
+ payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC;
+
+ /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
+ txp->hdr_dwords = 7;
+
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ grh = &sdma_hdr->hdr.ibh.u.l.grh;
+ txp->hdr_dwords +=
+ hfi1_make_grh(txp->ibp,
+ grh,
+ rdma_ah_read_grh(ah_attr),
+ txp->hdr_dwords - LRH_9B_DWORDS,
+ payload_dwords);
+ lrh0 = HFI1_LRH_GRH;
+ ohdr = &sdma_hdr->hdr.ibh.u.l.oth;
+ } else {
+ lrh0 = HFI1_LRH_BTH;
+ ohdr = &sdma_hdr->hdr.ibh.u.oth;
+ }
+
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
+ lrh0 |= (txp->flow.sc5 & 0xf) << 12;
+
+ dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
+ if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ } else {
+ u16 lid = (u16)ppd->lid;
+
+ if (lid) {
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
+ slid = lid;
+ } else {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ }
+ }
+
+ /* Includes ICRC */
+ dwords = txp->hdr_dwords + payload_dwords;
+
+ /* Build the lrh */
+ sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B;
+ hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid);
+
+ /* Build the bth */
+ bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey;
+
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(txp->dqpn);
+ ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs));
+
+ /* Build the deth */
+ ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
+ ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy <<
+ HFI1_IPOIB_ENTROPY_SHIFT) | sqpn);
+
+ /* Construct the pbc. */
+ sdma_hdr->pbc =
+ cpu_to_le64(create_pbc(ppd,
+ ib_is_sc5(txp->flow.sc5) <<
+ PBC_DC_INFO_SHIFT,
+ 0,
+ sc_to_vlt(priv->dd, txp->flow.sc5),
+ dwords - SIZE_OF_CRC +
+ (sizeof(sdma_hdr->pbc) >> 2)));
+}
+
+static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct ipoib_txreq *tx;
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ u32 tail = tx_ring->tail;
+ int ret;
+
+ if (unlikely(!tx_ring->avail)) {
+ u32 head;
+
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))
+ /* This shouldn't happen with a stopped queue */
+ return ERR_PTR(-ENOMEM);
+ /* See hfi1_ipoib_poll_tx_ring() */
+ head = smp_load_acquire(&tx_ring->head);
+ tx_ring->avail =
+ min_t(u32, hfi1_ipoib_ring_hwat(txq),
+ CIRC_CNT(head, tail, tx_ring->max_items));
+ } else {
+ tx_ring->avail--;
+ }
+ tx = hfi1_txreq_from_idx(tx_ring, tail);
+ trace_hfi1_txq_alloc_tx(txq);
+
+ /* so that we can test if the sdma descriptors are there */
+ tx->txreq.num_desc = 0;
+ tx->txq = txq;
+ tx->skb = skb;
+ INIT_LIST_HEAD(&tx->txreq.list);
+
+ hfi1_ipoib_build_ib_tx_headers(tx, txp);
+
+ ret = hfi1_ipoib_build_tx_desc(tx, txp);
+ if (likely(!ret)) {
+ if (txq->flow.as_int != txp->flow.as_int) {
+ txq->flow.tx_queue = txp->flow.tx_queue;
+ txq->flow.sc5 = txp->flow.sc5;
+ txq->sde =
+ sdma_select_engine_sc(priv->dd,
+ txp->flow.tx_queue,
+ txp->flow.sc5);
+ trace_hfi1_flow_switch(txq);
+ }
+
+ return tx;
+ }
+
+ sdma_txclean(priv->dd, &tx->txreq);
+
+ return ERR_PTR(ret);
+}
+
+static int hfi1_ipoib_submit_tx_list(struct net_device *dev,
+ struct hfi1_ipoib_txq *txq)
+{
+ int ret;
+ u16 count_out;
+
+ ret = sdma_send_txlist(txq->sde,
+ iowait_get_ib_work(&txq->wait),
+ &txq->tx_list,
+ &count_out);
+ if (likely(!ret) || ret == -EBUSY || ret == -ECOMM)
+ return ret;
+
+ dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret);
+
+ return ret;
+}
+
+static int hfi1_ipoib_flush_tx_list(struct net_device *dev,
+ struct hfi1_ipoib_txq *txq)
+{
+ int ret = 0;
+
+ if (!list_empty(&txq->tx_list)) {
+ /* Flush the current list */
+ ret = hfi1_ipoib_submit_tx_list(dev, txq);
+
+ if (unlikely(ret))
+ if (ret != -EBUSY)
+ ++dev->stats.tx_carrier_errors;
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq,
+ struct ipoib_txreq *tx)
+{
+ int ret;
+
+ ret = sdma_send_txreq(txq->sde,
+ iowait_get_ib_work(&txq->wait),
+ &tx->txreq,
+ txq->pkts_sent);
+ if (likely(!ret)) {
+ txq->pkts_sent = true;
+ iowait_starve_clear(txq->pkts_sent, &txq->wait);
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_send_dma_single(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ struct ipoib_txreq *tx;
+ int ret;
+
+ tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+ if (IS_ERR(tx)) {
+ int ret = PTR_ERR(tx);
+
+ dev_kfree_skb_any(skb);
+
+ if (ret == -ENOMEM)
+ ++dev->stats.tx_errors;
+ else
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+ }
+
+ tx_ring = &txq->tx_ring;
+ trace_hfi1_tx_consume(tx, tx_ring->tail);
+ /* consume tx */
+ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
+ ret = hfi1_ipoib_submit_tx(txq, tx);
+ if (likely(!ret)) {
+tx_ok:
+ trace_sdma_output_ibhdr(txq->priv->dd,
+ &tx->sdma_hdr->hdr,
+ ib_is_sc5(txp->flow.sc5));
+ hfi1_ipoib_check_queue_depth(txq);
+ return NETDEV_TX_OK;
+ }
+
+ txq->pkts_sent = false;
+
+ if (ret == -EBUSY || ret == -ECOMM)
+ goto tx_ok;
+
+ /* mark complete and kick napi tx */
+ smp_store_release(&tx->complete, 1);
+ napi_schedule(&tx->txq->napi);
+
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+}
+
+static int hfi1_ipoib_send_dma_list(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ struct ipoib_txreq *tx;
+
+ /* Has the flow change ? */
+ if (txq->flow.as_int != txp->flow.as_int) {
+ int ret;
+
+ trace_hfi1_flow_flush(txq);
+ ret = hfi1_ipoib_flush_tx_list(dev, txq);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY)
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+ }
+ tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+ if (IS_ERR(tx)) {
+ int ret = PTR_ERR(tx);
+
+ dev_kfree_skb_any(skb);
+
+ if (ret == -ENOMEM)
+ ++dev->stats.tx_errors;
+ else
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+ }
+
+ tx_ring = &txq->tx_ring;
+ trace_hfi1_tx_consume(tx, tx_ring->tail);
+ /* consume tx */
+ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
+ list_add_tail(&tx->txreq.list, &txq->tx_list);
+
+ hfi1_ipoib_check_queue_depth(txq);
+
+ trace_sdma_output_ibhdr(txq->priv->dd,
+ &tx->sdma_hdr->hdr,
+ ib_is_sc5(txp->flow.sc5));
+
+ if (!netdev_xmit_more())
+ (void)hfi1_ipoib_flush_tx_list(dev, txq);
+
+ return NETDEV_TX_OK;
+}
+
+static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb)
+{
+ if (skb_transport_header_was_set(skb)) {
+ u8 *hdr = (u8 *)skb_transport_header(skb);
+
+ return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]);
+ }
+
+ return (u8)skb_get_queue_mapping(skb);
+}
+
+int hfi1_ipoib_send(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct ipoib_txparms txp;
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) {
+ dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n",
+ skb->len,
+ rn->mtu + HFI1_IPOIB_ENCAP_LEN);
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ txp.dd = priv->dd;
+ txp.ah_attr = &ibah_to_rvtah(address)->attr;
+ txp.ibp = to_iport(priv->device, priv->port_num);
+ txp.txq = &priv->txqs[skb_get_queue_mapping(skb)];
+ txp.dqpn = dqpn;
+ txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)];
+ txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb);
+ txp.entropy = hfi1_ipoib_calc_entropy(skb);
+
+ if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list))
+ return hfi1_ipoib_send_dma_list(dev, skb, &txp);
+
+ return hfi1_ipoib_send_dma_single(dev, skb, &txp);
+}
+
+/*
+ * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *txreq,
+ uint seq,
+ bool pkts_sent)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait->iow, struct hfi1_ipoib_txq, wait);
+
+ write_seqlock(&sde->waitlock);
+
+ if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) {
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
+
+ if (list_empty(&txreq->list))
+ /* came from non-list submit */
+ list_add_tail(&txreq->list, &txq->tx_list);
+ if (list_empty(&txq->wait.list)) {
+ struct hfi1_ibport *ibp = &sde->ppd->ibport_data;
+
+ if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) {
+ trace_hfi1_txq_queued(txq);
+ hfi1_ipoib_stop_txq(txq);
+ }
+ ibp->rvp.n_dmawait++;
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+ }
+
+ write_sequnlock(&sde->waitlock);
+ return -EBUSY;
+ }
+
+ write_sequnlock(&sde->waitlock);
+ return -EINVAL;
+}
+
+/*
+ * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ */
+static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait, struct hfi1_ipoib_txq, wait);
+
+ trace_hfi1_txq_wakeup(txq);
+ if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED))
+ iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND);
+}
+
+static void hfi1_ipoib_flush_txq(struct work_struct *work)
+{
+ struct iowait_work *ioww =
+ container_of(work, struct iowait_work, iowork);
+ struct iowait *wait = iowait_ioww_to_iow(ioww);
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait, struct hfi1_ipoib_txq, wait);
+ struct net_device *dev = txq->priv->netdev;
+
+ if (likely(dev->reg_state == NETREG_REGISTERED) &&
+ likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
+ if (atomic_xchg(&txq->tx_ring.no_desc, 0))
+ hfi1_ipoib_wake_txq(txq);
+}
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
+{
+ struct net_device *dev = priv->netdev;
+ u32 tx_ring_size, tx_item_size;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ int i, j;
+
+ /*
+ * Ring holds 1 less than tx_ring_size
+ * Round up to next power of 2 in order to hold at least tx_queue_len
+ */
+ tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1);
+ tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq));
+
+ priv->txqs = kcalloc_node(dev->num_tx_queues,
+ sizeof(struct hfi1_ipoib_txq),
+ GFP_KERNEL,
+ priv->dd->node);
+ if (!priv->txqs)
+ return -ENOMEM;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct ipoib_txreq *tx;
+
+ tx_ring = &txq->tx_ring;
+ iowait_init(&txq->wait,
+ 0,
+ hfi1_ipoib_flush_txq,
+ NULL,
+ hfi1_ipoib_sdma_sleep,
+ hfi1_ipoib_sdma_wakeup,
+ NULL,
+ NULL);
+ txq->priv = priv;
+ txq->sde = NULL;
+ INIT_LIST_HEAD(&txq->tx_list);
+ atomic_set(&txq->tx_ring.stops, 0);
+ atomic_set(&txq->tx_ring.ring_full, 0);
+ atomic_set(&txq->tx_ring.no_desc, 0);
+ txq->q_idx = i;
+ txq->flow.tx_queue = 0xff;
+ txq->flow.sc5 = 0xff;
+ txq->pkts_sent = false;
+
+ netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+ priv->dd->node);
+
+ txq->tx_ring.items =
+ kvzalloc_node(array_size(tx_ring_size, tx_item_size),
+ GFP_KERNEL, priv->dd->node);
+ if (!txq->tx_ring.items)
+ goto free_txqs;
+
+ txq->tx_ring.max_items = tx_ring_size;
+ txq->tx_ring.shift = ilog2(tx_item_size);
+ txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++) {
+ hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
+ kzalloc_node(sizeof(*tx->sdma_hdr),
+ GFP_KERNEL, priv->dd->node);
+ if (!hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr)
+ goto free_txqs;
+ }
+
+ netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
+ }
+
+ return 0;
+
+free_txqs:
+ for (i--; i >= 0; i--) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ netif_napi_del(&txq->napi);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
+ }
+
+ kfree(priv->txqs);
+ priv->txqs = NULL;
+ return -ENOMEM;
+}
+
+static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
+{
+ struct sdma_txreq *txreq;
+ struct sdma_txreq *txreq_tmp;
+
+ list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
+ struct ipoib_txreq *tx =
+ container_of(txreq, struct ipoib_txreq, txreq);
+
+ list_del(&txreq->list);
+ sdma_txclean(txq->priv->dd, &tx->txreq);
+ dev_kfree_skb_any(tx->skb);
+ tx->skb = NULL;
+ txq->tx_ring.complete_txreqs++;
+ }
+
+ if (hfi1_ipoib_used(txq))
+ dd_dev_warn(txq->priv->dd,
+ "txq %d not empty found %u requests\n",
+ txq->q_idx,
+ hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+ txq->tx_ring.complete_txreqs));
+}
+
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
+{
+ int i, j;
+
+ for (i = 0; i < priv->netdev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+
+ iowait_cancel_work(&txq->wait);
+ iowait_sdma_drain(&txq->wait);
+ hfi1_ipoib_drain_tx_list(txq);
+ netif_napi_del(&txq->napi);
+ hfi1_ipoib_drain_tx_ring(txq);
+ for (j = 0; j < tx_ring->max_items; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
+ }
+
+ kfree(priv->txqs);
+ priv->txqs = NULL;
+}
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ napi_enable(&txq->napi);
+ }
+}
+
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ napi_disable(&txq->napi);
+ hfi1_ipoib_drain_tx_ring(txq);
+ }
+}
+
+void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct hfi1_ipoib_txq *txq = &priv->txqs[q];
+
+ dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n",
+ txq, q,
+ __netif_subqueue_stopped(dev, txq->q_idx),
+ atomic_read(&txq->tx_ring.stops),
+ atomic_read(&txq->tx_ring.no_desc),
+ atomic_read(&txq->tx_ring.ring_full));
+ dd_dev_info(priv->dd, "sde %p engine %u\n",
+ txq->sde,
+ txq->sde ? txq->sde->this_idx : 0);
+ dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
+ dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
+ txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs,
+ hfi1_ipoib_used(txq));
+ dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n",
+ dev->tx_queue_len, txq->tx_ring.max_items);
+ dd_dev_info(priv->dd, "head %u tail %u\n",
+ txq->tx_ring.head, txq->tx_ring.tail);
+ dd_dev_info(priv->dd, "wait queued %u\n",
+ !list_empty(&txq->wait.list));
+ dd_dev_info(priv->dd, "tx_list empty %u\n",
+ list_empty(&txq->tx_list));
+}
+
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 4228393e6c4c..961fa07116f0 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/net.h>
@@ -108,7 +66,7 @@ static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx)
return 0;
}
-void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port)
{
struct ib_event event;
@@ -297,7 +255,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
struct rvt_qp *qp0;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ppd(ppd);
- u8 port_num = ppd->port;
+ u32 port_num = ppd->port;
memset(&attr, 0, sizeof(attr));
attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
@@ -411,7 +369,7 @@ static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
void hfi1_handle_trap_timer(struct timer_list *t)
{
- struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer);
+ struct hfi1_ibport *ibp = timer_container_of(ibp, t, rvp.trap_timer);
struct trap_node *trap = NULL;
unsigned long flags;
int i;
@@ -515,7 +473,7 @@ static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/*
* Send a Port Capability Mask Changed trap (ch. 14.3.11).
*/
-void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num)
{
struct trap_node *trap;
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
@@ -581,7 +539,7 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len, u32 max_len)
+ u32 port, u32 *resp_len, u32 max_len)
{
struct opa_node_description *nd;
@@ -601,12 +559,12 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
}
static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct opa_node_info *ni;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
+ u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
ni = (struct opa_node_info *)data;
@@ -641,11 +599,11 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
}
static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
- u8 port)
+ u32 port)
{
struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
+ u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
/* GUID 0 is illegal */
if (smp->attr_mod || pidx >= dd->num_pports ||
@@ -721,7 +679,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/* Bad mkey not a violation below level 2 */
if (ibp->rvp.mkeyprot < 2)
break;
- /* fall through */
+ fallthrough;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -794,7 +752,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
}
static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
int i;
@@ -1009,7 +967,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
* @port: the IB port number
* @pkeys: the pkey table is placed here
*/
-static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
+static int get_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
{
struct hfi1_pportdata *ppd = dd->pport + port - 1;
@@ -1019,7 +977,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
}
static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1202,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
pr_warn("invalid logical state transition %s -> %s\n",
- opa_lstate_name(logical_old),
- opa_lstate_name(logical_new));
+ ib_port_state_to_str(logical_old),
+ ib_port_state_to_str(logical_new));
return ret;
}
@@ -1272,7 +1230,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
case IB_PORT_NOP:
if (phys_state == IB_PORTPHYSSTATE_NOP)
break;
- /* FALLTHROUGH */
+ fallthrough;
case IB_PORT_DOWN:
if (phys_state == IB_PORTPHYSSTATE_NOP) {
link_state = HLS_DN_DOWNDEF;
@@ -1341,7 +1299,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
return 0;
}
-/**
+/*
* subn_set_opa_portinfo - set port information
* @smp: the incoming SM packet
* @ibdev: the infiniband device
@@ -1349,7 +1307,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
*
*/
static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len, int local_mad)
{
struct opa_port_info *pi = (struct opa_port_info *)data;
@@ -1667,7 +1625,7 @@ get_only:
* @port: the IB port number
* @pkeys: the PKEY table
*/
-static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
+static int set_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
{
struct hfi1_pportdata *ppd;
int i;
@@ -1718,7 +1676,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
}
static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -1732,7 +1690,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
u32 size = 0;
if (n_blocks_sent == 0) {
- pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
+ pr_warn("OPA Get PKey AM Invalid : P = %u; B = 0x%x; N = 0x%x\n",
port, start_block, n_blocks_sent);
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -1825,7 +1783,7 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
}
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -1848,7 +1806,7 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -1877,7 +1835,7 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -1900,7 +1858,7 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -1921,7 +1879,7 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
@@ -1943,7 +1901,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
@@ -1985,7 +1943,7 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
@@ -2010,7 +1968,7 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
@@ -2042,7 +2000,7 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
u32 nports = OPA_AM_NPORT(am);
@@ -2084,7 +2042,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len, int local_mad)
{
u32 nports = OPA_AM_NPORT(am);
@@ -2132,7 +2090,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -2184,7 +2142,7 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len,
+ struct ib_device *ibdev, u32 port, u32 *resp_len,
u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
@@ -2208,7 +2166,7 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len,
+ struct ib_device *ibdev, u32 port, u32 *resp_len,
u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
@@ -2232,7 +2190,7 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
@@ -2274,7 +2232,7 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
@@ -2300,7 +2258,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
* can be changed from the default values
*/
case OPA_VLARB_PREEMPT_ELEMENTS:
- /* FALLTHROUGH */
case OPA_VLARB_PREEMPT_MATRIX:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
break;
@@ -2326,7 +2283,7 @@ struct opa_port_status_req {
__be32 vl_select_mask;
};
-#define VL_MASK_ALL 0x000080ff
+#define VL_MASK_ALL 0x00000000000080ffUL
struct opa_port_status_rsp {
__u8 port_num;
@@ -2381,7 +2338,7 @@ struct opa_port_status_rsp {
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
__be64 port_vl_xmit_discards;
- } vls[0]; /* real array size defined by # bits set in vl_select_mask */
+ } vls[]; /* real array size defined by # bits set in vl_select_mask */
};
enum counter_selects {
@@ -2423,7 +2380,7 @@ struct opa_aggregate {
__be16 attr_id;
__be16 err_reqlength; /* 1 bit, 8 res, 7 bit */
__be32 attr_mod;
- u8 data[0];
+ u8 data[];
};
#define MSK_LLI 0x000000f0
@@ -2480,9 +2437,9 @@ struct opa_port_data_counters_msg {
__be64 port_vl_xmit_wait_data;
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
- } vls[0];
+ } vls[];
/* array size defined by #bits set in vl_select_mask*/
- } port[1]; /* array size defined by #ports in attribute modifier */
+ } port;
};
struct opa_port_error_counters64_msg {
@@ -2513,9 +2470,9 @@ struct opa_port_error_counters64_msg {
u8 reserved3[7];
struct _vls_ectrs {
__be64 port_vl_xmit_discards;
- } vls[0];
+ } vls[];
/* array size defined by #bits set in vl_select_mask */
- } port[1]; /* array size defined by #ports in attribute modifier */
+ } port;
};
struct opa_port_error_info_msg {
@@ -2586,7 +2543,7 @@ struct opa_port_error_info_msg {
u8 error_info;
} __packed fm_config_ei;
__u32 reserved9;
- } port[1]; /* actual array size defined by #ports in attr modifier */
+ } port;
};
/* opa_port_error_info_msg error_info_select_mask bit definitions */
@@ -2625,15 +2582,14 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
}
static void a0_portstatus(struct hfi1_pportdata *ppd,
- struct opa_port_status_rsp *rsp, u32 vl_select_mask)
+ struct opa_port_status_rsp *rsp)
{
if (!is_bx(ppd->dd)) {
unsigned long vl;
u64 sum_vl_xmit_wait = 0;
- u32 vl_all_mask = VL_MASK_ALL;
+ unsigned long vl_all_mask = VL_MASK_ALL;
- for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
- 8 * sizeof(vl_all_mask)) {
+ for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
u64 tmp = sum_vl_xmit_wait +
read_port_cntr(ppd, C_TX_WAIT_VL,
idx_from_vl(vl));
@@ -2724,18 +2680,18 @@ u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd,
static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct opa_port_status_req *req =
(struct opa_port_status_req *)pmp->data;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct opa_port_status_rsp *rsp;
- u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
+ unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
unsigned long vl;
size_t response_data_size;
u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
- u8 port_num = req->port_num;
- u8 num_vls = hweight32(vl_select_mask);
+ u32 port_num = req->port_num;
+ u8 num_vls = hweight64(vl_select_mask);
struct _vls_pctrs *vlinfo;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -2744,8 +2700,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
u16 link_width;
u16 link_speed;
- response_data_size = sizeof(struct opa_port_status_rsp) +
- num_vls * sizeof(struct _vls_pctrs);
+ response_data_size = struct_size(rsp, vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
return reply((struct ib_mad_hdr *)pmp);
@@ -2771,7 +2726,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
- rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
+ rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
CNTR_INVALID_VL));
rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
@@ -2842,8 +2797,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
* So in the for_each_set_bit() loop below, we don't need
* any additional checks for vl.
*/
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
@@ -2884,7 +2838,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
vfi++;
}
- a0_portstatus(ppd, rsp, vl_select_mask);
+ a0_portstatus(ppd, rsp);
if (resp_len)
*resp_len += response_data_size;
@@ -2892,7 +2846,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
-static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
+static u64 get_error_counter_summary(struct ib_device *ibdev, u32 port,
u8 res_lli, u8 res_ler)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -2931,16 +2885,14 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
return error_counter_summary;
}
-static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
- u32 vl_select_mask)
+static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
{
if (!is_bx(ppd->dd)) {
unsigned long vl;
u64 sum_vl_xmit_wait = 0;
- u32 vl_all_mask = VL_MASK_ALL;
+ unsigned long vl_all_mask = VL_MASK_ALL;
- for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
- 8 * sizeof(vl_all_mask)) {
+ for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
u64 tmp = sum_vl_xmit_wait +
read_port_cntr(ppd, C_TX_WAIT_VL,
idx_from_vl(vl));
@@ -2979,7 +2931,7 @@ static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct opa_port_data_counters_msg *req =
(struct opa_port_data_counters_msg *)pmp->data;
@@ -2993,9 +2945,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
- u8 port_num;
+ u32 port_num;
unsigned long vl;
- u32 vl_select_mask;
+ unsigned long vl_select_mask;
int vfi;
u16 link_width;
u16 link_speed;
@@ -3014,8 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
/* Sanity check */
- response_data_size = sizeof(struct opa_port_data_counters_msg) +
- num_vls * sizeof(struct _vls_dctrs);
+ response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3035,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- rsp = &req->port[0];
+ rsp = &req->port;
memset(rsp, 0, sizeof(*rsp));
rsp->port_number = port;
@@ -3073,8 +3024,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
* So in the for_each_set_bit() loop below, we don't need
* any additional checks for vl.
*/
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_data =
@@ -3122,7 +3072,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
vfi++;
}
- a0_datacounters(ppd, rsp, vl_select_mask);
+ a0_datacounters(ppd, rsp);
if (resp_len)
*resp_len += response_data_size;
@@ -3131,7 +3081,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
- struct ib_device *ibdev, u8 port)
+ struct ib_device *ibdev, u32 port)
{
struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
pmp->data;
@@ -3159,7 +3109,7 @@ bail:
}
static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
- struct _port_ectrs *rsp, u8 port)
+ struct _port_ectrs *rsp, u32 port)
{
u64 tmp, tmp2;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -3202,11 +3152,11 @@ static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
size_t response_data_size;
struct _port_ectrs *rsp;
- u8 port_num;
+ u32 port_num;
struct opa_port_error_counters64_msg *req;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 num_ports;
@@ -3217,7 +3167,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
struct _vls_ectrs *vlinfo;
unsigned long vl;
u64 port_mask, tmp;
- u32 vl_select_mask;
+ unsigned long vl_select_mask;
int vfi;
req = (struct opa_port_error_counters64_msg *)pmp->data;
@@ -3232,8 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- response_data_size = sizeof(struct opa_port_error_counters64_msg) +
- num_vls * sizeof(struct _vls_ectrs);
+ response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3252,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- rsp = &req->port[0];
+ rsp = &req->port;
ibp = to_iport(ibdev, port_num);
ppd = ppd_from_ibp(ibp);
@@ -3276,8 +3225,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
vlinfo = &rsp->vls[0];
vfi = 0;
vl_select_mask = be32_to_cpu(req->vl_select_mask);
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_discards =
cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
@@ -3293,7 +3241,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
}
static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
- struct ib_device *ibdev, u8 port)
+ struct ib_device *ibdev, u32 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
pmp->data;
@@ -3379,7 +3327,7 @@ bail:
static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
size_t response_data_size;
struct _port_ei *rsp;
@@ -3387,12 +3335,12 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u64 port_mask;
u32 num_ports;
- u8 port_num;
+ u32 port_num;
u8 num_pslm;
u64 reg;
req = (struct opa_port_error_info_msg *)pmp->data;
- rsp = &req->port[0];
+ rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3478,7 +3426,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct opa_clear_port_status *req =
(struct opa_clear_port_status *)pmp->data;
@@ -3488,7 +3436,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
u64 portn = be64_to_cpu(req->port_select_mask[3]);
u32 counter_select = be32_to_cpu(req->counter_select_mask);
- u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
+ unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
unsigned long vl;
if ((nports != 1) || (portn != 1 << port)) {
@@ -3582,8 +3530,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_UNCORRECTABLE_ERRORS)
write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
if (counter_select & CS_PORT_XMIT_DATA)
write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
@@ -3631,19 +3578,19 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct _port_ei *rsp;
struct opa_port_error_info_msg *req;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u64 port_mask;
u32 num_ports;
- u8 port_num;
+ u32 port_num;
u8 num_pslm;
u32 error_info_select;
req = (struct opa_port_error_info_msg *)pmp->data;
- rsp = &req->port[0];
+ rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3713,7 +3660,7 @@ struct opa_congestion_info_attr {
} __packed;
static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct opa_congestion_info_attr *p =
@@ -3738,7 +3685,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len, u32 max_len)
+ u32 port, u32 *resp_len, u32 max_len)
{
int i;
struct opa_congestion_setting_attr *p =
@@ -3830,7 +3777,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
}
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct opa_congestion_setting_attr *p =
@@ -3871,7 +3818,7 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len, u32 max_len)
+ u32 port, u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -3936,7 +3883,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
}
static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *cc_table_attr =
@@ -3988,7 +3935,7 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
@@ -4047,7 +3994,7 @@ struct opa_led_info {
#define OPA_LED_MASK BIT(OPA_LED_SHIFT)
static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -4077,7 +4024,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -4100,7 +4047,7 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
- u8 *data, struct ib_device *ibdev, u8 port,
+ u8 *data, struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len)
{
int ret;
@@ -4180,7 +4127,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4190,7 +4137,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
}
static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
- u8 *data, struct ib_device *ibdev, u8 port,
+ u8 *data, struct ib_device *ibdev, u32 port,
u32 *resp_len, u32 max_len, int local_mad)
{
int ret;
@@ -4250,7 +4197,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4265,7 +4212,7 @@ static inline void set_aggr_error(struct opa_aggregate *ag)
}
static int subn_get_opa_aggregate(struct opa_smp *smp,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len)
{
int i;
@@ -4314,7 +4261,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
}
static int subn_set_opa_aggregate(struct opa_smp *smp,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len, int local_mad)
{
int i;
@@ -4520,7 +4467,7 @@ static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
}
static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
- u8 port, const struct opa_mad *in_mad,
+ u32 port, const struct opa_mad *in_mad,
struct opa_mad *out_mad,
u32 *resp_len, int local_mad)
{
@@ -4625,7 +4572,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
}
static int process_subn(struct ib_device *ibdev, int mad_flags,
- u8 port, const struct ib_mad *in_mad,
+ u32 port, const struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
struct ib_smp *smp = (struct ib_smp *)out_mad;
@@ -4683,7 +4630,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
return ret;
}
-static int process_perf(struct ib_device *ibdev, u8 port,
+static int process_perf(struct ib_device *ibdev, u32 port,
const struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
@@ -4745,7 +4692,7 @@ static int process_perf(struct ib_device *ibdev, u8 port,
return ret;
}
-static int process_perf_opa(struct ib_device *ibdev, u8 port,
+static int process_perf_opa(struct ib_device *ibdev, u32 port,
const struct opa_mad *in_mad,
struct opa_mad *out_mad, u32 *resp_len)
{
@@ -4827,7 +4774,7 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
}
static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
- u8 port, const struct ib_wc *in_wc,
+ u32 port, const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct opa_mad *in_mad,
struct opa_mad *out_mad, size_t *out_mad_size,
@@ -4880,7 +4827,7 @@ bail:
return ret;
}
-static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u32 port,
const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad *in_mad,
@@ -4913,6 +4860,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
* @in_grh: the global route header for this packet
* @in_mad: the incoming MAD
* @out_mad: any outgoing MAD reply
+ * @out_mad_size: size of the outgoing MAD reply
+ * @out_mad_pkey_index: used to apss back the packet key index
*
* Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
* interested in processing.
@@ -4923,18 +4872,13 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
*
* This is called by the ib_mad module.
*/
-int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
- switch (in_mad->base_version) {
+ switch (in_mad->mad_hdr.base_version) {
case OPA_MGMT_BASE_VERSION:
- if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
- dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
- return IB_MAD_RESULT_FAILURE;
- }
return hfi1_process_opa_mad(ibdev, mad_flags, port,
in_wc, in_grh,
(struct opa_mad *)in_mad,
@@ -4942,10 +4886,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
out_mad_size,
out_mad_pkey_index);
case IB_MGMT_BASE_VERSION:
- return hfi1_process_ib_mad(ibdev, mad_flags, port,
- in_wc, in_grh,
- (const struct ib_mad *)in_mad,
- (struct ib_mad *)out_mad);
+ return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
+ in_grh, in_mad, out_mad);
default:
break;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 2f48e6953629..d6dde762921a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef _HFI1_MAD_H
#define _HFI1_MAD_H
@@ -165,7 +124,6 @@ struct opa_mad_notice_attr {
} __packed ntc_2048;
};
- u8 class_data[0];
};
#define IB_VLARB_LOWPRI_0_31 1
@@ -436,7 +394,7 @@ struct sc2vlnt {
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
-void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port);
void hfi1_handle_trap_timer(struct timer_list *t);
u16 tx_link_width(u16 link_width);
u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd, u16 link_width,
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 14d2a90964c3..67a5c410fb5e 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -1,78 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/mmu_notifier.h>
#include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
#include "mmu_rb.h"
#include "trace.h"
-struct mmu_rb_handler {
- struct mmu_notifier mn;
- struct rb_root_cached root;
- void *ops_arg;
- spinlock_t lock; /* protect the RB tree */
- struct mmu_rb_ops *ops;
- struct mm_struct *mm;
- struct list_head lru_list;
- struct work_struct del_work;
- struct list_head del_list;
- struct workqueue_struct *wq;
-};
-
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
static int mmu_notifier_range_start(struct mmu_notifier *,
const struct mmu_notifier_range *);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list);
+static void release_immediate(struct kref *refcount);
static void handle_remove(struct work_struct *work);
static const struct mmu_notifier_ops mn_opts = {
@@ -92,37 +39,39 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
return PAGE_ALIGN(node->addr + node->len) - 1;
}
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
- struct mmu_rb_ops *ops,
+int hfi1_mmu_rb_register(void *ops_arg,
+ const struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler)
{
- struct mmu_rb_handler *handlr;
+ struct mmu_rb_handler *h;
+ void *free_ptr;
int ret;
- handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
- if (!handlr)
+ free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
+ if (!free_ptr)
return -ENOMEM;
- handlr->root = RB_ROOT_CACHED;
- handlr->ops = ops;
- handlr->ops_arg = ops_arg;
- INIT_HLIST_NODE(&handlr->mn.hlist);
- spin_lock_init(&handlr->lock);
- handlr->mn.ops = &mn_opts;
- handlr->mm = mm;
- INIT_WORK(&handlr->del_work, handle_remove);
- INIT_LIST_HEAD(&handlr->del_list);
- INIT_LIST_HEAD(&handlr->lru_list);
- handlr->wq = wq;
-
- ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+ h = PTR_ALIGN(free_ptr, cache_line_size());
+ h->root = RB_ROOT_CACHED;
+ h->ops = ops;
+ h->ops_arg = ops_arg;
+ INIT_HLIST_NODE(&h->mn.hlist);
+ spin_lock_init(&h->lock);
+ h->mn.ops = &mn_opts;
+ INIT_WORK(&h->del_work, handle_remove);
+ INIT_LIST_HEAD(&h->del_list);
+ INIT_LIST_HEAD(&h->lru_list);
+ h->wq = wq;
+ h->free_ptr = free_ptr;
+
+ ret = mmu_notifier_register(&h->mn, current->mm);
if (ret) {
- kfree(handlr);
+ kfree(free_ptr);
return ret;
}
- *handler = handlr;
+ *handler = h;
return 0;
}
@@ -133,8 +82,11 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
unsigned long flags;
struct list_head del_list;
+ /* Prevent freeing of mm until we are completely finished. */
+ mmgrab(handler->mn.mm);
+
/* Unregister first so we don't get any more notifications. */
- mmu_notifier_unregister(&handler->mn, handler->mm);
+ mmu_notifier_unregister(&handler->mn, handler->mn.mm);
/*
* Make sure the wq delete handler is finished running. It will not
@@ -153,9 +105,16 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
}
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&rbnode->list);
+ kref_put(&rbnode->refcount, release_immediate);
+ }
+
+ /* Now the mm may be freed. */
+ mmdrop(handler->mn.mm);
- kfree(handler);
+ kfree(handler->free_ptr);
}
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
@@ -165,27 +124,39 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
unsigned long flags;
int ret = 0;
- trace_hfi1_mmu_rb_insert(mnode->addr, mnode->len);
+ trace_hfi1_mmu_rb_insert(mnode);
+
+ if (current->mm != handler->mn.mm)
+ return -EPERM;
+
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, mnode->addr, mnode->len);
if (node) {
- ret = -EINVAL;
+ ret = -EEXIST;
goto unlock;
}
__mmu_int_rb_insert(mnode, &handler->root);
- list_add(&mnode->list, &handler->lru_list);
-
- ret = handler->ops->insert(handler->ops_arg, mnode);
- if (ret) {
- __mmu_int_rb_remove(mnode, &handler->root);
- list_del(&mnode->list); /* remove from LRU list */
- }
+ list_add_tail(&mnode->list, &handler->lru_list);
+ mnode->handler = handler;
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
return ret;
}
/* Caller must hold handler lock */
+struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len)
+{
+ struct mmu_rb_node *node;
+
+ trace_hfi1_mmu_rb_search(addr, len);
+ node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
+ if (node)
+ list_move_tail(&node->list, &handler->lru_list);
+ return node;
+}
+
+/* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr,
unsigned long len)
@@ -209,27 +180,47 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
-bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len,
- struct mmu_rb_node **rb_node)
+/*
+ * Must NOT call while holding mnode->handler->lock.
+ * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
+ * spinlock.
+ */
+static void release_immediate(struct kref *refcount)
{
- struct mmu_rb_node *node;
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ trace_hfi1_mmu_release_node(mnode);
+ mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
+}
+
+/* Caller must hold mnode->handler->lock */
+static void release_nolock(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ list_move(&mnode->list, &mnode->handler->del_list);
+ queue_work(mnode->handler->wq, &mnode->handler->del_work);
+}
+
+/*
+ * struct mmu_rb_node->refcount kref_put() callback.
+ * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
+ * handler->del_work on handler->wq.
+ * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
+ * Acquires mmu_rb_node->handler->lock; do not call while already holding
+ * handler->lock.
+ */
+void hfi1_mmu_rb_release(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ struct mmu_rb_handler *handler = mnode->handler;
unsigned long flags;
- bool ret = false;
spin_lock_irqsave(&handler->lock, flags);
- node = __mmu_rb_search(handler, addr, len);
- if (node) {
- if (node->addr == addr && node->len == len)
- goto unlock;
- __mmu_int_rb_remove(node, &handler->root);
- list_del(&node->list); /* remove from LRU list */
- ret = true;
- }
-unlock:
+ list_move(&mnode->list, &mnode->handler->del_list);
spin_unlock_irqrestore(&handler->lock, flags);
- *rb_node = node;
- return ret;
+ queue_work(handler->wq, &handler->del_work);
}
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
@@ -239,11 +230,17 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
unsigned long flags;
bool stop = false;
+ if (current->mm != handler->mn.mm)
+ return;
+
INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
- list) {
+ list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
+ /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
+ if (kref_read(&rbnode->refcount) > 1)
+ continue;
+
if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
&stop)) {
__mmu_int_rb_remove(rbnode, &handler->root);
@@ -255,33 +252,12 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
}
spin_unlock_irqrestore(&handler->lock, flags);
- while (!list_empty(&del_list)) {
- rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
- list_del(&rbnode->list);
- handler->ops->remove(handler->ops_arg, rbnode);
+ list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
+ trace_hfi1_mmu_rb_evict(rbnode);
+ kref_put(&rbnode->refcount, release_immediate);
}
}
-/*
- * It is up to the caller to ensure that this function does not race with the
- * mmu invalidate notifier which may be calling the users remove callback on
- * 'node'.
- */
-void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *node)
-{
- unsigned long flags;
-
- /* Validity of handler and node pointers has been checked by caller. */
- trace_hfi1_mmu_rb_remove(node->addr, node->len);
- spin_lock_irqsave(&handler->lock, flags);
- __mmu_int_rb_remove(node, &handler->root);
- list_del(&node->list); /* remove from LRU list */
- spin_unlock_irqrestore(&handler->lock, flags);
-
- handler->ops->remove(handler->ops_arg, node);
-}
-
static int mmu_notifier_range_start(struct mmu_notifier *mn,
const struct mmu_notifier_range *range)
{
@@ -290,7 +266,6 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
struct rb_root_cached *root = &handler->root;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
- bool added = false;
spin_lock_irqsave(&handler->lock, flags);
for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
@@ -298,42 +273,20 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
/* Guard against node removal. */
ptr = __mmu_int_rb_iter_next(node, range->start,
range->end - 1);
- trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
- if (handler->ops->invalidate(handler->ops_arg, node)) {
- __mmu_int_rb_remove(node, root);
- /* move from LRU list to delete list */
- list_move(&node->list, &handler->del_list);
- added = true;
- }
+ trace_hfi1_mmu_mem_invalidate(node);
+ /* Remove from rb tree and lru_list. */
+ __mmu_int_rb_remove(node, root);
+ list_del_init(&node->list);
+ kref_put(&node->refcount, release_nolock);
}
spin_unlock_irqrestore(&handler->lock, flags);
- if (added)
- queue_work(handler->wq, &handler->del_work);
-
return 0;
}
/*
- * Call the remove function for the given handler and the list. This
- * is expected to be called with a delete list extracted from handler.
- * The caller should not be holding the handler lock.
- */
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list)
-{
- struct mmu_rb_node *node;
-
- while (!list_empty(del_list)) {
- node = list_first_entry(del_list, struct mmu_rb_node, list);
- list_del(&node->list);
- handler->ops->remove(handler->ops_arg, node);
- }
-}
-
-/*
* Work queue function to remove all nodes that have been queued up to
- * be removed. The key feature is that mm->mmap_sem is not being held
+ * be removed. The key feature is that mm->mmap_lock is not being held
* and the remove callback can sleep while taking it, if needed.
*/
static void handle_remove(struct work_struct *work)
@@ -343,11 +296,17 @@ static void handle_remove(struct work_struct *work)
del_work);
struct list_head del_list;
unsigned long flags;
+ struct mmu_rb_node *node;
/* remove anything that is queued to get removed */
spin_lock_irqsave(&handler->lock, flags);
list_replace_init(&handler->del_list, &del_list);
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ node = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&node->list);
+ trace_hfi1_mmu_release_node(node);
+ handler->ops->remove(handler->ops_arg, node);
+ }
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index f04cec1e99d1..3fa50dd64db6 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -1,49 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef _HFI1_MMU_RB_H
#define _HFI1_MMU_RB_H
@@ -54,35 +14,54 @@ struct mmu_rb_node {
unsigned long len;
unsigned long __last;
struct rb_node node;
+ struct mmu_rb_handler *handler;
struct list_head list;
+ struct kref refcount;
};
-/*
- * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is
- * allowed to sleep.
- */
+/* filter and evict must not sleep. Only remove is allowed to sleep. */
struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
- int (*insert)(void *ops_arg, struct mmu_rb_node *mnode);
void (*remove)(void *ops_arg, struct mmu_rb_node *mnode);
- int (*invalidate)(void *ops_arg, struct mmu_rb_node *node);
int (*evict)(void *ops_arg, struct mmu_rb_node *mnode,
void *evict_arg, bool *stop);
};
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
- struct mmu_rb_ops *ops,
+struct mmu_rb_handler {
+ /*
+ * struct mmu_notifier is 56 bytes, and spinlock_t is 4 bytes, so
+ * they fit together in one cache line. mn is relatively rarely
+ * accessed, so co-locating the spinlock with it achieves much of
+ * the cacheline contention reduction of giving the spinlock its own
+ * cacheline without the overhead of doing so.
+ */
+ struct mmu_notifier mn;
+ spinlock_t lock; /* protect the RB tree */
+
+ /* Begin on a new cachline boundary here */
+ struct rb_root_cached root ____cacheline_aligned_in_smp;
+ void *ops_arg;
+ const struct mmu_rb_ops *ops;
+ struct list_head lru_list;
+ struct work_struct del_work;
+ struct list_head del_list;
+ struct workqueue_struct *wq;
+ void *free_ptr;
+};
+
+int hfi1_mmu_rb_register(void *ops_arg,
+ const struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler);
void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
+void hfi1_mmu_rb_release(struct kref *refcount);
+
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
-void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *mnode);
-bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len,
- struct mmu_rb_node **rb_node);
+struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
+ unsigned long addr,
+ unsigned long len);
#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
index d920b165d696..77d2ece9a9cb 100644
--- a/drivers/infiniband/hw/hfi1/msix.c
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -1,54 +1,12 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
- * Copyright(c) 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2018 - 2020 Intel Corporation.
*/
#include "hfi.h"
#include "affinity.h"
#include "sdma.h"
+#include "netdev.h"
/**
* msix_initialize() - Calculate, request and configure MSIx IRQs
@@ -69,7 +27,7 @@ int msix_initialize(struct hfi1_devdata *dd)
* one for each VNIC context
* ...any new IRQs should be added here.
*/
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts;
if (total >= CCE_NUM_MSIX_VECTORS)
return -EINVAL;
@@ -102,8 +60,8 @@ int msix_initialize(struct hfi1_devdata *dd)
* @arg: context information for the IRQ
* @handler: IRQ handler
* @thread: IRQ thread handler (could be NULL)
- * @idx: zero base idx if multiple devices are needed
* @type: affinty IRQ type
+ * @name: IRQ name
*
* Allocated an MSIx vector if available, and then create the appropriate
* meta data needed to keep track of the pci IRQ request.
@@ -115,13 +73,11 @@ int msix_initialize(struct hfi1_devdata *dd)
*/
static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
irq_handler_t handler, irq_handler_t thread,
- u32 idx, enum irq_type type)
+ enum irq_type type, const char *name)
{
unsigned long nr;
int irq;
int ret;
- const char *err_info;
- char name[MAX_NAME_SIZE];
struct hfi1_msix_entry *me;
/* Allocate an MSIx vector */
@@ -135,43 +91,15 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
if (nr == dd->msix_info.max_requested)
return -ENOSPC;
- /* Specific verification and determine the name */
- switch (type) {
- case IRQ_GENERAL:
- /* general interrupt must be MSIx vector 0 */
- if (nr) {
- spin_lock(&dd->msix_info.msix_lock);
- __clear_bit(nr, dd->msix_info.in_use_msix);
- spin_unlock(&dd->msix_info.msix_lock);
- dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
- nr);
- return -EINVAL;
- }
- snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- break;
- case IRQ_SDMA:
- snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
- dd->unit, idx);
- err_info = "sdma";
- break;
- case IRQ_RCVCTXT:
- snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- break;
- case IRQ_OTHER:
- default:
+ if (type < IRQ_SDMA || type >= IRQ_OTHER)
return -EINVAL;
- }
- name[sizeof(name) - 1] = 0;
irq = pci_irq_vector(dd->pcidev, nr);
ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
if (ret) {
dd_dev_err(dd,
- "%s: request for IRQ %d failed, MSIx %d, err %d\n",
- err_info, irq, idx, ret);
+ "%s: request for IRQ %d failed, MSIx %lx, err %d\n",
+ name, irq, nr, ret);
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
@@ -190,22 +118,19 @@ static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
/* This is a request, so a failure is not fatal */
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+ dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret);
return nr;
}
-/**
- * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
- * @rcd: valid rcd context
- *
- */
-int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
+ irq_handler_t handler,
+ irq_handler_t thread,
+ const char *name)
{
- int nr;
-
- nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
- receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+ int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
+ rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT,
+ name);
if (nr < 0)
return nr;
@@ -222,16 +147,50 @@ int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
}
/**
- * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt,
+ receive_context_thread, name);
+}
+
+/**
+ * msix_netdev_request_rcd_irq - Helper function for RCVAVAIL IRQs
+ * for netdev context
+ * @rcd: valid netdev contexti
+ */
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi,
+ NULL, name);
+}
+
+/**
+ * msix_request_sdma_irq - Helper for getting SDMA IRQ resources
* @sde: valid sdma engine
*
*/
int msix_request_sdma_irq(struct sdma_engine *sde)
{
int nr;
+ char name[MAX_NAME_SIZE];
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ sde->dd->unit, sde->this_idx);
nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
- sde->this_idx, IRQ_SDMA);
+ IRQ_SDMA, name);
if (nr < 0)
return nr;
sde->msix_intr = nr;
@@ -241,7 +200,33 @@ int msix_request_sdma_irq(struct sdma_engine *sde)
}
/**
- * enable_sdma_src() - Helper to enable SDMA IRQ srcs
+ * msix_request_general_irq - Helper for getting general IRQ
+ * resources
+ * @dd: valid device data
+ */
+int msix_request_general_irq(struct hfi1_devdata *dd)
+{
+ int nr;
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL,
+ name);
+ if (nr < 0)
+ return nr;
+
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ msix_free_irq(dd, (u8)nr);
+ dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * enable_sdma_srcs - Helper to enable SDMA IRQ srcs
* @dd: valid devdata structure
* @i: index of SDMA engine
*/
@@ -265,10 +250,9 @@ static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
int msix_request_irqs(struct hfi1_devdata *dd)
{
int i;
- int ret;
+ int ret = msix_request_general_irq(dd);
- ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
- if (ret < 0)
+ if (ret)
return ret;
for (i = 0; i < dd->num_sdma; i++) {
@@ -322,7 +306,7 @@ void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
}
/**
- * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
+ * msix_clean_up_interrupts - Free all MSIx IRQ resources
* @dd: valid device data data structure
*
* Free the MSIx and associated PCI resources, if they have been allocated.
@@ -345,15 +329,16 @@ void msix_clean_up_interrupts(struct hfi1_devdata *dd)
}
/**
- * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * msix_netdev_synchronize_irq - netdev IRQ synchronize
* @dd: valid devdata
*/
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ for (i = 0; i < ctxt_count; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i);
struct hfi1_msix_entry *me;
me = &dd->msix_info.msix_entries[rcd->msix_intr];
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
index a514881632a4..9530ccb0a2ce 100644
--- a/drivers/infiniband/hw/hfi1/msix.h
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -1,50 +1,8 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
- * Copyright(c) 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2018 - 2020 Intel Corporation.
*/
+
#ifndef _HFI1_MSIX_H
#define _HFI1_MSIX_H
@@ -54,11 +12,13 @@
int msix_initialize(struct hfi1_devdata *dd);
int msix_request_irqs(struct hfi1_devdata *dd);
void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_general_irq(struct hfi1_devdata *dd);
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
int msix_request_sdma_irq(struct sdma_engine *sde);
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
-/* VNIC interface */
-void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+/* Netdev interface */
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd);
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd);
#endif
diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
new file mode 100644
index 000000000000..07c8f77c9181
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_NETDEV_H
+#define HFI1_NETDEV_H
+
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+
+/**
+ * struct hfi1_netdev_rxq - Receive Queue for HFI
+ * Both IPoIB and VNIC netdevices will be working on the rx abstraction.
+ * @napi: napi object
+ * @rx: ptr to netdev_rx
+ * @rcd: ptr to receive context data
+ */
+struct hfi1_netdev_rxq {
+ struct napi_struct napi;
+ struct hfi1_netdev_rx *rx;
+ struct hfi1_ctxtdata *rcd;
+};
+
+/*
+ * Number of netdev contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_MAX_NETDEV_CTXTS 8
+
+/* Number of NETDEV RSM entries */
+#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
+
+/**
+ * struct hfi1_netdev_rx: data required to setup and run HFI netdev.
+ * @rx_napi: the dummy netdevice to support "polling" the receive contexts
+ * @dd: hfi1_devdata
+ * @rxq: pointer to dummy netdev receive queues.
+ * @num_rx_q: number of receive queues
+ * @rmt_index: first free index in RMT Array
+ * @msix_start: first free MSI-X interrupt vector.
+ * @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs.
+ * @enabled: atomic counter of netdevs enabling receive queues.
+ * When 0 NAPI will be disabled.
+ * @netdevs: atomic counter of netdevs using dummy netdev.
+ * When 0 receive queues will be freed.
+ */
+struct hfi1_netdev_rx {
+ struct net_device *rx_napi;
+ struct hfi1_devdata *dd;
+ struct hfi1_netdev_rxq *rxq;
+ int num_rx_q;
+ int rmt_start;
+ struct xarray dev_tbl;
+ /* count of enabled napi polls */
+ atomic_t enabled;
+ /* count of netdevs on top */
+ atomic_t netdevs;
+};
+
+static inline
+int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
+{
+ return dd->netdev_rx->num_rx_q;
+}
+
+static inline
+struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
+{
+ return dd->netdev_rx->rxq[ctxt].rcd;
+}
+
+static inline
+int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd)
+{
+ return dd->netdev_rx->rmt_start;
+}
+
+static inline
+void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx)
+{
+ dd->netdev_rx->rmt_start = rmt_idx;
+}
+
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+ struct cpumask *cpu_mask);
+
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd);
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd);
+int hfi1_alloc_rx(struct hfi1_devdata *dd);
+void hfi1_free_rx(struct hfi1_devdata *dd);
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
+
+/* chip.c */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget);
+
+#endif /* HFI1_NETDEV_H */
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
new file mode 100644
index 000000000000..8608044203bb
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for netdev RX functionality
+ */
+
+#include "sdma.h"
+#include "verbs.h"
+#include "netdev.h"
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <rdma/ib_verbs.h>
+
+static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_rx *rx,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops;
+ struct hfi1_devdata *dd = rx->dd;
+ int ret;
+
+ uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions;
+ uctxt->do_interrupt = &handle_receive_interrupt_napi_sp;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
+done:
+ return ret;
+}
+
+static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
+ if (ret < 0) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ /* Netdev contexts are always NO_RDMA_RTAIL */
+ uctxt->fast_handler = handle_receive_interrupt_napi_fp;
+ uctxt->slow_handler = handle_receive_interrupt_napi_sp;
+ hfi1_set_seq_cnt(uctxt, 1);
+ uctxt->is_vnic = true;
+
+ hfi1_stats.sps_ctxts++;
+
+ dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt);
+ *ctxt = uctxt;
+
+ return 0;
+}
+
+static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ flush_wc();
+
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+
+ if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS)
+ msix_free_irq(dd, uctxt->msix_intr);
+
+ uctxt->msix_intr = CCE_NUM_MSIX_VECTORS;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
+
+ hfi1_stats.sps_ctxts--;
+
+ hfi1_free_ctxt(uctxt);
+}
+
+static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_rx *rx,
+ struct hfi1_ctxtdata **ctxt)
+{
+ int rc;
+ struct hfi1_devdata *dd = rx->dd;
+
+ rc = hfi1_netdev_allocate_ctxt(dd, ctxt);
+ if (rc) {
+ dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = hfi1_netdev_setup_ctxt(rx, *ctxt);
+ if (rc) {
+ dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc);
+ hfi1_netdev_deallocate_ctxt(dd, *ctxt);
+ *ctxt = NULL;
+ }
+
+ return rc;
+}
+
+/**
+ * hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
+ * @dd: device on which to allocate netdev contexts
+ * @available_contexts: count of available receive contexts
+ * @cpu_mask: mask of possible cpus to include for contexts
+ *
+ * Return: count of physical cores on a node or the remaining available recv
+ * contexts for netdev recv context usage up to the maximum of
+ * HFI1_MAX_NETDEV_CTXTS.
+ * A value of 0 can be returned when acceleration is explicitly turned off,
+ * a memory allocation error occurs or when there are no available contexts.
+ *
+ */
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+ struct cpumask *cpu_mask)
+{
+ cpumask_var_t node_cpu_mask;
+ unsigned int available_cpus;
+
+ if (!HFI1_CAP_IS_KSET(AIP))
+ return 0;
+
+ /* Always give user contexts priority over netdev contexts */
+ if (available_contexts == 0) {
+ dd_dev_info(dd, "No receive contexts available for netdevs.\n");
+ return 0;
+ }
+
+ if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
+ dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
+ return 0;
+ }
+
+ cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
+
+ available_cpus = cpumask_weight(node_cpu_mask);
+
+ free_cpumask_var(node_cpu_mask);
+
+ return min3(available_cpus, available_contexts,
+ (u32)HFI1_MAX_NETDEV_CTXTS);
+}
+
+static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
+{
+ int i;
+ int rc;
+ struct hfi1_devdata *dd = rx->dd;
+ struct net_device *dev = rx->rx_napi;
+
+ rx->num_rx_q = dd->num_netdev_contexts;
+ rx->rxq = kcalloc_node(rx->num_rx_q, sizeof(*rx->rxq),
+ GFP_KERNEL, dd->node);
+
+ if (!rx->rxq) {
+ dd_dev_err(dd, "Unable to allocate netdev queue data\n");
+ return (-ENOMEM);
+ }
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ rc = hfi1_netdev_allot_ctxt(rx, &rxq->rcd);
+ if (rc)
+ goto bail_context_irq_failure;
+
+ hfi1_rcd_get(rxq->rcd);
+ rxq->rx = rx;
+ rxq->rcd->napi = &rxq->napi;
+ dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n",
+ i, rxq->rcd->ctxt);
+ /*
+ * Disable BUSY_POLL on this NAPI as this is not supported
+ * right now.
+ */
+ set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
+ netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi);
+ rc = msix_netdev_request_rcd_irq(rxq->rcd);
+ if (rc)
+ goto bail_context_irq_failure;
+ }
+
+ return 0;
+
+bail_context_irq_failure:
+ dd_dev_err(dd, "Unable to allot receive context\n");
+ for (; i >= 0; i--) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ if (rxq->rcd) {
+ hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+ hfi1_rcd_put(rxq->rcd);
+ rxq->rcd = NULL;
+ }
+ }
+ kfree(rx->rxq);
+ rx->rxq = NULL;
+
+ return rc;
+}
+
+static void hfi1_netdev_rxq_deinit(struct hfi1_netdev_rx *rx)
+{
+ int i;
+ struct hfi1_devdata *dd = rx->dd;
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ netif_napi_del(&rxq->napi);
+ hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+ hfi1_rcd_put(rxq->rcd);
+ rxq->rcd = NULL;
+ }
+
+ kfree(rx->rxq);
+ rx->rxq = NULL;
+ rx->num_rx_q = 0;
+}
+
+static void enable_queues(struct hfi1_netdev_rx *rx)
+{
+ int i;
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ dd_dev_info(rx->dd, "enabling queue %d on context %d\n", i,
+ rxq->rcd->ctxt);
+ napi_enable(&rxq->napi);
+ hfi1_rcvctrl(rx->dd,
+ HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB,
+ rxq->rcd);
+ }
+}
+
+static void disable_queues(struct hfi1_netdev_rx *rx)
+{
+ int i;
+
+ msix_netdev_synchronize_irq(rx->dd);
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ dd_dev_info(rx->dd, "disabling queue %d on context %d\n", i,
+ rxq->rcd->ctxt);
+
+ /* wait for napi if it was scheduled */
+ hfi1_rcvctrl(rx->dd,
+ HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS,
+ rxq->rcd);
+ napi_synchronize(&rxq->napi);
+ napi_disable(&rxq->napi);
+ }
+}
+
+/**
+ * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time,
+ * it allocates receive queue data and calls netif_napi_add
+ * for each queue.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+ int res;
+
+ if (atomic_fetch_inc(&rx->netdevs))
+ return 0;
+
+ mutex_lock(&hfi1_mutex);
+ res = hfi1_netdev_rxq_init(rx);
+ mutex_unlock(&hfi1_mutex);
+ return res;
+}
+
+/**
+ * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0
+ * napi is deleted and receive queses memory is freed.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ /* destroy the RX queues only if it is the last netdev going away */
+ if (atomic_fetch_add_unless(&rx->netdevs, -1, 0) == 1) {
+ mutex_lock(&hfi1_mutex);
+ hfi1_netdev_rxq_deinit(rx);
+ mutex_unlock(&hfi1_mutex);
+ }
+
+ return 0;
+}
+
+/**
+ * hfi1_alloc_rx - Allocates the rx support structure
+ * @dd: hfi1 dev data
+ *
+ * Allocate the rx structure to support gathering the receive
+ * resources and the dummy netdev.
+ *
+ * Updates dd struct pointer upon success.
+ *
+ * Return: 0 (success) -error on failure
+ *
+ */
+int hfi1_alloc_rx(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx;
+
+ dd_dev_info(dd, "allocating rx size %ld\n", sizeof(*rx));
+ rx = kzalloc_node(sizeof(*rx), GFP_KERNEL, dd->node);
+
+ if (!rx)
+ return -ENOMEM;
+ rx->dd = dd;
+ rx->rx_napi = alloc_netdev_dummy(0);
+ if (!rx->rx_napi) {
+ kfree(rx);
+ return -ENOMEM;
+ }
+
+ xa_init(&rx->dev_tbl);
+ atomic_set(&rx->enabled, 0);
+ atomic_set(&rx->netdevs, 0);
+ dd->netdev_rx = rx;
+
+ return 0;
+}
+
+void hfi1_free_rx(struct hfi1_devdata *dd)
+{
+ if (dd->netdev_rx) {
+ dd_dev_info(dd, "hfi1 rx freed\n");
+ free_netdev(dd->netdev_rx->rx_napi);
+ kfree(dd->netdev_rx);
+ dd->netdev_rx = NULL;
+ }
+}
+
+/**
+ * hfi1_netdev_enable_queues - This is napi enable function.
+ * It enables napi objects associated with queues.
+ * When at least one device has called it it increments atomic counter.
+ * Disable function decrements counter and when it is 0,
+ * calls napi_disable for every queue.
+ *
+ * @dd: hfi1 dev data
+ */
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx;
+
+ if (!dd->netdev_rx)
+ return;
+
+ rx = dd->netdev_rx;
+ if (atomic_fetch_inc(&rx->enabled))
+ return;
+
+ mutex_lock(&hfi1_mutex);
+ enable_queues(rx);
+ mutex_unlock(&hfi1_mutex);
+}
+
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx;
+
+ if (!dd->netdev_rx)
+ return;
+
+ rx = dd->netdev_rx;
+ if (atomic_dec_if_positive(&rx->enabled))
+ return;
+
+ mutex_lock(&hfi1_mutex);
+ disable_queues(rx);
+ mutex_unlock(&hfi1_mutex);
+}
+
+/**
+ * hfi1_netdev_add_data - Registers data with unique identifier
+ * to be requested later this is needed for VNIC and IPoIB VLANs
+ * implementations.
+ * This call is protected by mutex idr_lock.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ * @data: data to be associated with index
+ */
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ return xa_insert(&rx->dev_tbl, id, data, GFP_NOWAIT);
+}
+
+/**
+ * hfi1_netdev_remove_data - Removes data with previously given id.
+ * Returns the reference to removed entry.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ return xa_erase(&rx->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_data - Gets data with given id
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ return xa_load(&rx->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_first_data - Gets first entry with greater or equal id.
+ *
+ * @dd: hfi1 dev data
+ * @start_id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+ unsigned long index = *start_id;
+ void *ret;
+
+ ret = xa_find(&rx->dev_tbl, &index, UINT_MAX, XA_PRESENT);
+ *start_id = (int)index;
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 774215b95df5..49f2da677b03 100644
--- a/drivers/infiniband/hw/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
@@ -1,52 +1,10 @@
-#ifndef _LINUX_H
-#define _LINUX_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _LINUX_H
+#define _LINUX_H
/*
* This header file is for OPA-specific definitions which are
* required by the HFI driver, and which aren't yet in the Linux
diff --git a/drivers/infiniband/hw/hfi1/opfn.c b/drivers/infiniband/hw/hfi1/opfn.c
index 370a5a8eaa71..6e0e3458d202 100644
--- a/drivers/infiniband/hw/hfi1/opfn.c
+++ b/drivers/infiniband/hw/hfi1/opfn.c
@@ -305,8 +305,8 @@ void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
int opfn_init(void)
{
opfn_wq = alloc_workqueue("hfi_opfn",
- WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE |
- WQ_MEM_RECLAIM,
+ WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+ WQ_PERCPU,
HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
if (!opfn_wq)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index c96d193bb236..7133964749f8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,55 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*/
+#include <linux/bitfield.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/vmalloc.h>
-#include <linux/aer.h>
#include <linux/module.h>
#include "hfi.h"
@@ -92,29 +50,21 @@ int hfi1_pcie_init(struct hfi1_devdata *dd)
goto bail;
}
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
/*
* If the 64 bit setup fails, try 32 bit. Some systems
* do not setup 64 bit maps on systems with 2GB or less
* memory installed.
*/
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- } else {
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- }
- if (ret) {
- dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
- goto bail;
}
pci_set_master(pdev);
- (void)pci_enable_pcie_error_reporting(pdev);
return 0;
bail:
@@ -161,7 +111,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -EINVAL;
}
- dd->kregbase1 = ioremap_nocache(addr, RCV_ARRAY);
+ dd->kregbase1 = ioremap(addr, RCV_ARRAY);
if (!dd->kregbase1) {
dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
@@ -179,7 +129,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
dd->base2_start = RCV_ARRAY + rcv_array_count * 8;
- dd->kregbase2 = ioremap_nocache(
+ dd->kregbase2 = ioremap(
addr + dd->base2_start,
TXE_PIO_SEND - dd->base2_start);
if (!dd->kregbase2) {
@@ -261,12 +211,6 @@ static u32 extract_speed(u16 linkstat)
return speed;
}
-/* return the PCIe link speed from the given link status */
-static u32 extract_width(u16 linkstat)
-{
- return (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
-}
-
/* read the link status and set dd->{lbus_width,lbus_speed,lbus_info} */
static void update_lbus_info(struct hfi1_devdata *dd)
{
@@ -279,7 +223,7 @@ static void update_lbus_info(struct hfi1_devdata *dd)
return;
}
- dd->lbus_width = extract_width(linkstat);
+ dd->lbus_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, linkstat);
dd->lbus_speed = extract_speed(linkstat);
snprintf(dd->lbus_info, sizeof(dd->lbus_info),
"PCIe,%uMHz,x%u", dd->lbus_speed, dd->lbus_width);
@@ -306,7 +250,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
@@ -319,7 +263,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent &&
+ (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+ dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
@@ -332,10 +278,14 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/* restore command and BARs after a reset has wiped them out */
+/*
+ * Restore command and BARs after a reset has wiped them out
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int restore_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
if (ret)
@@ -384,13 +334,17 @@ int restore_pci_variables(struct hfi1_devdata *dd)
error:
dd_dev_err(dd, "Unable to write to PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
-/* Save BARs and command to rewrite after device reset */
+/*
+ * Save BARs and command to rewrite after device reset
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
int save_pci_variables(struct hfi1_devdata *dd)
{
- int ret = 0;
+ int ret;
ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
&dd->pcibar0);
@@ -439,7 +393,7 @@ int save_pci_variables(struct hfi1_devdata *dd)
error:
dd_dev_err(dd, "Unable to read from PCI config\n");
- return ret;
+ return pcibios_err_to_errno(ret);
}
/*
@@ -450,10 +404,6 @@ static int hfi1_pcie_caps;
module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, 0444);
-MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-
/**
* tune_pcie_caps() - Code to adjust PCIe capabilities.
* @dd: Valid device data structure
@@ -1257,14 +1207,11 @@ retry:
(u32)lnkctl2);
/* only write to parent if target is not as high as ours */
if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) {
- lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
- lnkctl2 |= target_vector;
- dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- ret = pcie_capability_write_word(parent,
- PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS,
+ target_vector);
if (ret) {
- dd_dev_err(dd, "Unable to write to PCI config\n");
+ dd_dev_err(dd, "Unable to change parent PCI target speed\n");
return_error = 1;
goto done;
}
@@ -1273,22 +1220,11 @@ retry:
}
dd_dev_info(dd, "%s: setting target link speed\n", __func__);
- ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
- if (ret) {
- dd_dev_err(dd, "Unable to read from PCI config\n");
- return_error = 1;
- goto done;
- }
-
- dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
- lnkctl2 |= target_vector;
- dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS,
+ target_vector);
if (ret) {
- dd_dev_err(dd, "Unable to write to PCI config\n");
+ dd_dev_err(dd, "Unable to change device PCI target speed\n");
return_error = 1;
goto done;
}
diff --git a/drivers/infiniband/hw/hfi1/pin_system.c b/drivers/infiniband/hw/hfi1/pin_system.c
new file mode 100644
index 000000000000..cce56134519b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/pin_system.c
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+/*
+ * Copyright(c) 2023 - Cornelis Networks, Inc.
+ */
+
+#include <linux/types.h>
+
+#include "hfi.h"
+#include "common.h"
+#include "device.h"
+#include "pinning.h"
+#include "mmu_rb.h"
+#include "user_sdma.h"
+#include "trace.h"
+
+struct sdma_mmu_node {
+ struct mmu_rb_node rb;
+ struct hfi1_user_sdma_pkt_q *pq;
+ struct page **pages;
+ unsigned int npages;
+};
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2,
+ bool *stop);
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+
+static const struct mmu_rb_ops sdma_rb_ops = {
+ .filter = sdma_rb_filter,
+ .evict = sdma_rb_evict,
+ .remove = sdma_rb_remove,
+};
+
+int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq)
+{
+ struct hfi1_devdata *dd = pq->dd;
+ int ret;
+
+ ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
+ if (ret)
+ dd_dev_err(dd,
+ "[%u:%u] Failed to register system memory DMA support with MMU: %d\n",
+ pq->ctxt, pq->subctxt, ret);
+ return ret;
+}
+
+void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq)
+{
+ if (pq->handler)
+ hfi1_mmu_rb_unregister(pq->handler);
+}
+
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+ struct evict_data evict_data;
+
+ evict_data.cleared = 0;
+ evict_data.target = npages;
+ hfi1_mmu_rb_evict(pq->handler, &evict_data);
+ return evict_data.cleared;
+}
+
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+ unsigned int start, unsigned int npages)
+{
+ hfi1_release_user_pages(mm, pages + start, npages, false);
+ kfree(pages);
+}
+
+static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
+{
+ return node->rb.handler->mn.mm;
+}
+
+static void free_system_node(struct sdma_mmu_node *node)
+{
+ if (node->npages) {
+ unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+ node->npages);
+ atomic_sub(node->npages, &node->pq->n_locked);
+ }
+ kfree(node);
+}
+
+/*
+ * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
+ * from being released until after rb_node is assigned to an SDMA descriptor
+ * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
+ * virtual address range for rb_node is invalidated between now and then.
+ */
+static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
+ unsigned long start,
+ unsigned long end)
+{
+ struct mmu_rb_node *rb_node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start));
+ if (!rb_node) {
+ spin_unlock_irqrestore(&handler->lock, flags);
+ return NULL;
+ }
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&rb_node->refcount);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ return container_of(rb_node, struct sdma_mmu_node, rb);
+}
+
+static int pin_system_pages(struct user_sdma_request *req,
+ uintptr_t start_address, size_t length,
+ struct sdma_mmu_node *node, int npages)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ int pinned, cleared;
+ struct page **pages;
+
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+retry:
+ if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked),
+ npages)) {
+ SDMA_DBG(req, "Evicting: nlocked %u npages %u",
+ atomic_read(&pq->n_locked), npages);
+ cleared = sdma_cache_evict(pq, npages);
+ if (cleared >= npages)
+ goto retry;
+ }
+
+ SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u",
+ start_address, node->npages, npages);
+ pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0,
+ pages);
+
+ if (pinned < 0) {
+ kfree(pages);
+ SDMA_DBG(req, "pinned %d", pinned);
+ return pinned;
+ }
+ if (pinned != npages) {
+ unpin_vector_pages(current->mm, pages, node->npages, pinned);
+ SDMA_DBG(req, "npages %u pinned %d", npages, pinned);
+ return -EFAULT;
+ }
+ node->rb.addr = start_address;
+ node->rb.len = length;
+ node->pages = pages;
+ node->npages = npages;
+ atomic_add(pinned, &pq->n_locked);
+ SDMA_DBG(req, "done. pinned %d", pinned);
+ return 0;
+}
+
+/*
+ * kref refcount on *node_p will be 2 on successful addition: one kref from
+ * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
+ * released until after *node_p is assigned to an SDMA descriptor (struct
+ * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
+ * address range for *node_p is invalidated between now and then.
+ */
+static int add_system_pinning(struct user_sdma_request *req,
+ struct sdma_mmu_node **node_p,
+ unsigned long start, unsigned long len)
+
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ struct sdma_mmu_node *node;
+ int ret;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ /* First kref "moves" to mmu_rb_handler */
+ kref_init(&node->rb.refcount);
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&node->rb.refcount);
+
+ node->pq = pq;
+ ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
+ if (ret == 0) {
+ ret = hfi1_mmu_rb_insert(pq->handler, &node->rb);
+ if (ret)
+ free_system_node(node);
+ else
+ *node_p = node;
+
+ return ret;
+ }
+
+ kfree(node);
+ return ret;
+}
+
+static int get_system_cache_entry(struct user_sdma_request *req,
+ struct sdma_mmu_node **node_p,
+ size_t req_start, size_t req_len)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ u64 start = ALIGN_DOWN(req_start, PAGE_SIZE);
+ u64 end = PFN_ALIGN(req_start + req_len);
+ int ret;
+
+ if ((end - start) == 0) {
+ SDMA_DBG(req,
+ "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx",
+ req_start, req_len, start, end);
+ return -EINVAL;
+ }
+
+ SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len);
+
+ while (1) {
+ struct sdma_mmu_node *node =
+ find_system_node(pq->handler, start, end);
+ u64 prepend_len = 0;
+
+ SDMA_DBG(req, "node %p start %llx end %llu", node, start, end);
+ if (!node) {
+ ret = add_system_pinning(req, node_p, start,
+ end - start);
+ if (ret == -EEXIST) {
+ /*
+ * Another execution context has inserted a
+ * conficting entry first.
+ */
+ continue;
+ }
+ return ret;
+ }
+
+ if (node->rb.addr <= start) {
+ /*
+ * This entry covers at least part of the region. If it doesn't extend
+ * to the end, then this will be called again for the next segment.
+ */
+ *node_p = node;
+ return 0;
+ }
+
+ SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
+ node->rb.addr, kref_read(&node->rb.refcount));
+ prepend_len = node->rb.addr - start;
+
+ /*
+ * This node will not be returned, instead a new node
+ * will be. So release the reference.
+ */
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+
+ /* Prepend a node to cover the beginning of the allocation */
+ ret = add_system_pinning(req, node_p, start, prepend_len);
+ if (ret == -EEXIST) {
+ /* Another execution context has inserted a conficting entry first. */
+ continue;
+ }
+ return ret;
+ }
+}
+
+static void sdma_mmu_rb_node_get(void *ctx)
+{
+ struct mmu_rb_node *node = ctx;
+
+ kref_get(&node->refcount);
+}
+
+static void sdma_mmu_rb_node_put(void *ctx)
+{
+ struct sdma_mmu_node *node = ctx;
+
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+}
+
+static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct sdma_mmu_node *cache_entry,
+ size_t start,
+ size_t from_this_cache_entry)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ unsigned int page_offset;
+ unsigned int from_this_page;
+ size_t page_index;
+ void *ctx;
+ int ret;
+
+ /*
+ * Because the cache may be more fragmented than the memory that is being accessed,
+ * it's not strictly necessary to have a descriptor per cache entry.
+ */
+
+ while (from_this_cache_entry) {
+ page_index = PFN_DOWN(start - cache_entry->rb.addr);
+
+ if (page_index >= cache_entry->npages) {
+ SDMA_DBG(req,
+ "Request for page_index %zu >= cache_entry->npages %u",
+ page_index, cache_entry->npages);
+ return -EINVAL;
+ }
+
+ page_offset = start - ALIGN_DOWN(start, PAGE_SIZE);
+ from_this_page = PAGE_SIZE - page_offset;
+
+ if (from_this_page < from_this_cache_entry) {
+ ctx = NULL;
+ } else {
+ /*
+ * In the case they are equal the next line has no practical effect,
+ * but it's better to do a register to register copy than a conditional
+ * branch.
+ */
+ from_this_page = from_this_cache_entry;
+ ctx = cache_entry;
+ }
+
+ ret = sdma_txadd_page(pq->dd, &tx->txreq,
+ cache_entry->pages[page_index],
+ page_offset, from_this_page,
+ ctx,
+ sdma_mmu_rb_node_get,
+ sdma_mmu_rb_node_put);
+ if (ret) {
+ /*
+ * When there's a failure, the entire request is freed by
+ * user_sdma_send_pkts().
+ */
+ SDMA_DBG(req,
+ "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u",
+ ret, page_index, page_offset, from_this_page);
+ return ret;
+ }
+ start += from_this_page;
+ from_this_cache_entry -= from_this_page;
+ }
+ return 0;
+}
+
+static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ size_t from_this_iovec)
+{
+ while (from_this_iovec > 0) {
+ struct sdma_mmu_node *cache_entry;
+ size_t from_this_cache_entry;
+ size_t start;
+ int ret;
+
+ start = (uintptr_t)iovec->iov.iov_base + iovec->offset;
+ ret = get_system_cache_entry(req, &cache_entry, start,
+ from_this_iovec);
+ if (ret) {
+ SDMA_DBG(req, "pin system segment failed %d", ret);
+ return ret;
+ }
+
+ from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr);
+ if (from_this_cache_entry > from_this_iovec)
+ from_this_cache_entry = from_this_iovec;
+
+ ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
+ from_this_cache_entry);
+
+ /*
+ * Done adding cache_entry to zero or more sdma_desc. Can
+ * kref_put() the "safety" kref taken under
+ * get_system_cache_entry().
+ */
+ kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
+
+ if (ret) {
+ SDMA_DBG(req, "add system segment failed %d", ret);
+ return ret;
+ }
+
+ iovec->offset += from_this_cache_entry;
+ from_this_iovec -= from_this_cache_entry;
+ }
+
+ return 0;
+}
+
+/*
+ * Add up to pkt_data_remaining bytes to the txreq, starting at the current
+ * offset in the given iovec entry and continuing until all data has been added
+ * to the iovec or the iovec entry type changes.
+ *
+ * On success, prior to returning, adjust pkt_data_remaining, req->iov_idx, and
+ * the offset value in req->iov[req->iov_idx] to reflect the data that has been
+ * consumed.
+ */
+int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ u32 *pkt_data_remaining)
+{
+ size_t remaining_to_add = *pkt_data_remaining;
+ /*
+ * Walk through iovec entries, ensure the associated pages
+ * are pinned and mapped, add data to the packet until no more
+ * data remains to be added or the iovec entry type changes.
+ */
+ while (remaining_to_add > 0) {
+ struct user_sdma_iovec *cur_iovec;
+ size_t from_this_iovec;
+ int ret;
+
+ cur_iovec = iovec;
+ from_this_iovec = iovec->iov.iov_len - iovec->offset;
+
+ if (from_this_iovec > remaining_to_add) {
+ from_this_iovec = remaining_to_add;
+ } else {
+ /* The current iovec entry will be consumed by this pass. */
+ req->iov_idx++;
+ iovec++;
+ }
+
+ ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec,
+ from_this_iovec);
+ if (ret)
+ return ret;
+
+ remaining_to_add -= from_this_iovec;
+ }
+ *pkt_data_remaining = remaining_to_add;
+
+ return 0;
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len)
+{
+ return (bool)(node->addr == addr);
+}
+
+/*
+ * Return 1 to remove the node from the rb tree and call the remove op.
+ *
+ * Called with the rb tree lock held.
+ */
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+ struct evict_data *evict_data = evict_arg;
+
+ /* this node will be evicted, add its pages to our count */
+ evict_data->cleared += node->npages;
+
+ /* have enough pages been cleared? */
+ if (evict_data->cleared >= evict_data->target)
+ *stop = true;
+
+ return 1; /* remove this node */
+}
+
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+ free_system_node(node);
+}
diff --git a/drivers/infiniband/hw/hfi1/pinning.h b/drivers/infiniband/hw/hfi1/pinning.h
new file mode 100644
index 000000000000..a814a3aa9654
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/pinning.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
+ * Copyright(c) 2023 Cornelis Networks, Inc.
+ */
+#ifndef _HFI1_PINNING_H
+#define _HFI1_PINNING_H
+
+struct hfi1_user_sdma_pkt_q;
+struct user_sdma_request;
+struct user_sdma_txreq;
+struct user_sdma_iovec;
+
+int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq);
+void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq);
+int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ u32 *pkt_data_remaining);
+
+#endif /* _HFI1_PINNING_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 16ba9d52e1b9..764286da2ce8 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/delay.h>
@@ -86,7 +44,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
switch (op) {
case PSC_GLOBAL_ENABLE:
reg |= SEND_CTRL_SEND_ENABLE_SMASK;
- /* Fall through */
+ fallthrough;
case PSC_DATA_VL_ENABLE:
mask = 0;
for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
@@ -862,7 +820,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
}
hfi1_cdbg(PIO,
- "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n",
+ "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u",
sw_index,
hw_context,
sc_type_name(type),
@@ -920,6 +878,7 @@ void sc_disable(struct send_context *sc)
{
u64 reg;
struct pio_buf *pbuf;
+ LIST_HEAD(wake_list);
if (!sc)
return;
@@ -952,6 +911,23 @@ void sc_disable(struct send_context *sc)
}
}
spin_unlock(&sc->release_lock);
+
+ write_seqlock(&sc->waitlock);
+ list_splice_init(&sc->piowait, &wake_list);
+ write_sequnlock(&sc->waitlock);
+ while (!list_empty(&wake_list)) {
+ struct iowait *wait;
+ struct rvt_qp *qp;
+ struct hfi1_qp_priv *priv;
+
+ wait = list_first_entry(&wake_list, struct iowait, list);
+ qp = iowait_to_qp(wait);
+ priv = qp->priv;
+ list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
+ hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
+ }
+
spin_unlock_irq(&sc->alloc_lock);
}
@@ -977,7 +953,7 @@ static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context)
}
/**
- * sc_wait_for_packet_egress
+ * sc_wait_for_packet_egress - wait for packet
* @sc: valid send context
* @pause: wait for credit return
*
@@ -1385,16 +1361,6 @@ void sc_flush(struct send_context *sc)
sc_wait_for_packet_egress(sc, 1);
}
-/* drop all packets on the context, no waiting until they are sent */
-void sc_drop(struct send_context *sc)
-{
- if (!sc)
- return;
-
- dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
- __func__, sc->sw_index, sc->hw_context);
-}
-
/*
* Start the software reaction to a context halt or SPC freeze:
* - mark the context as halted or frozen
@@ -1427,7 +1393,8 @@ void sc_stop(struct send_context *sc, int flag)
* @cb: optional callback to call when the buffer is finished sending
* @arg: argument for cb
*
- * Return a pointer to a PIO buffer if successful, NULL if not enough room.
+ * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM
+ * when link is down.
*/
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
pio_release_cb cb, void *arg)
@@ -1443,7 +1410,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
spin_lock_irqsave(&sc->alloc_lock, flags);
if (!(sc->flags & SCF_ENABLED)) {
spin_unlock_irqrestore(&sc->alloc_lock, flags);
- goto done;
+ return ERR_PTR(-ECOMM);
}
retry:
@@ -1577,9 +1544,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
else
sc_del_credit_return_intr(sc);
trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
- if (needint) {
+ if (needint)
sc_return_credits(sc);
- }
}
/**
@@ -1917,9 +1883,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
}
/* build new map */
- newmap = kzalloc(sizeof(*newmap) +
- roundup_pow_of_two(num_vls) *
- sizeof(struct pio_map_elem *),
+ newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)),
GFP_KERNEL);
if (!newmap)
goto bail;
@@ -1934,9 +1898,8 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
int sz = roundup_pow_of_two(vl_scontexts[i]);
/* only allocate once */
- newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
- sz * sizeof(struct
- send_context *),
+ newmap->map[i] = kzalloc(struct_size(newmap->map[i],
+ ksc, sz),
GFP_KERNEL);
if (!newmap->map[i])
goto bail;
@@ -2113,7 +2076,7 @@ int init_credit_return(struct hfi1_devdata *dd)
"Unable to allocate credit return DMA range for NUMA %d\n",
i);
ret = -ENOMEM;
- goto done;
+ goto free_cr_base;
}
}
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2121,6 +2084,10 @@ int init_credit_return(struct hfi1_devdata *dd)
ret = 0;
done:
return ret;
+
+free_cr_base:
+ free_credit_return(dd);
+ goto done;
}
void free_credit_return(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index c9a58b642bdd..ab0f9a3a8d12 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,52 +1,10 @@
-#ifndef _PIO_H
-#define _PIO_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _PIO_H
+#define _PIO_H
/* send context types */
#define SC_KERNEL 0
#define SC_VL15 1
@@ -243,7 +201,7 @@ struct sc_config_sizes {
*/
struct pio_map_elem {
u32 mask;
- struct send_context *ksc[0];
+ struct send_context *ksc[];
};
/*
@@ -263,7 +221,7 @@ struct pio_vl_map {
u32 mask;
u8 actual_vls;
u8 vls;
- struct pio_map_elem *map[0];
+ struct pio_map_elem *map[];
};
int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
@@ -279,7 +237,6 @@ int init_credit_return(struct hfi1_devdata *dd);
void free_credit_return(struct hfi1_devdata *dd);
int init_sc_pools_and_sizes(struct hfi1_devdata *dd);
int init_send_contexts(struct hfi1_devdata *dd);
-int init_credit_return(struct hfi1_devdata *dd);
int init_pervl_scs(struct hfi1_devdata *dd);
struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
uint hdrqentsize, int numa);
@@ -289,12 +246,10 @@ void sc_disable(struct send_context *sc);
int sc_restart(struct send_context *sc);
void sc_return_credits(struct send_context *sc);
void sc_flush(struct send_context *sc);
-void sc_drop(struct send_context *sc);
void sc_stop(struct send_context *sc, int bit);
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
pio_release_cb cb, void *arg);
void sc_release_update(struct send_context *sc);
-void sc_return_credits(struct send_context *sc);
void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
void sc_add_credit_return_intr(struct send_context *sc);
void sc_del_credit_return_intr(struct send_context *sc);
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 03024cec78dd..80fee812a930 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include "hfi.h"
@@ -55,6 +13,7 @@
/**
* pio_copy - copy data block to MMIO space
+ * @dd: hfi1 dev data
* @pbuf: a number of blocks allocated within a PIO send context
* @pbc: PBC to send
* @from: source, must be 8 byte aligned
@@ -191,30 +150,29 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 6:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 5:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 4:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 3:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 2:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 1:
*dest++ = *src++;
- /* fall through */
}
}
/*
- * Read nbytes from "from" and and place them in the low bytes
+ * Read nbytes from "from" and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index cbf7faa5038c..7bd0e9b6cb50 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/firmware.h>
@@ -634,7 +592,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
u32 config_data, const char *message)
{
u8 i;
- int ret = HCMD_SUCCESS;
+ int ret;
for (i = 0; i < 4; i++) {
ret = load_8051_config(ppd->dd, field_id, i, config_data);
@@ -668,8 +626,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
/* active optical cables only */
switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
/* active AOC */
power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
@@ -899,8 +857,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_PASSIVE_TUNING;
break;
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
ptr_total_atten);
@@ -909,7 +867,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_ACTIVE_TUNING;
break;
- case 0xD: /* fallthrough */
+ case 0xD: fallthrough;
case 0xF:
default:
dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index eed0aa9124fa..0631f9bf3a89 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef __PLATFORM_H
#define __PLATFORM_H
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4e0e9fc0a777..f3d8c0c193ac 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#include <linux/err.h>
@@ -186,31 +144,17 @@ static void flush_iowait(struct rvt_qp *qp)
write_sequnlock_irqrestore(lock, flags);
}
-static inline int opa_mtu_enum_to_int(int mtu)
-{
- switch (mtu) {
- case OPA_MTU_8192: return 8192;
- case OPA_MTU_10240: return 10240;
- default: return -1;
- }
-}
-
-/**
+/*
* This function is what we would push to the core layer if we wanted to be a
* "first class citizen". Instead we hide this here and rely on Verbs ULPs
* to blindly pass the MTU enum value from the PathRecord to us.
*/
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
- int val;
-
/* Constraining 10KB packets to 8KB packets */
if (mtu == (enum ib_mtu)OPA_MTU_10240)
- mtu = OPA_MTU_8192;
- val = opa_mtu_enum_to_int((int)mtu);
- if (val > 0)
- return val;
- return ib_mtu_enum_to_int(mtu);
+ mtu = (enum ib_mtu)OPA_MTU_8192;
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
}
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -303,9 +247,9 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
/**
* hfi1_setup_wqe - set up the wqe
- * @qp - The qp
- * @wqe - The built wqe
- * @call_send - Determine if the send should be posted or scheduled.
+ * @qp: The qp
+ * @wqe: The built wqe
+ * @call_send: Determine if the send should be posted or scheduled.
*
* Perform setup of the wqe. This is called
* prior to inserting the wqe into the ring but after
@@ -326,7 +270,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
hfi1_setup_tid_rdma_wqe(qp, wqe);
- /* fall through */
+ fallthrough;
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
@@ -348,11 +292,12 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
break;
case IB_QPT_GSI:
case IB_QPT_UD:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ ah = rvt_get_swqe_ah(wqe);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
+ break;
default:
break;
}
@@ -381,7 +326,10 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if (dd->flags & HFI1_SHUTDOWN)
+ return true;
return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
@@ -605,7 +553,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
return sde;
}
-/*
+/**
* qp_to_send_context - map a qp to a send context
* @qp: the QP
* @sc5: the 5 bit sc
@@ -702,8 +650,8 @@ void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
sde ? sde->this_idx : 0,
send_context,
send_context ? send_context->sw_index : 0,
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
+ ib_cq_head(qp->ibqp.send_cq),
+ ib_cq_tail(qp->ibqp.send_cq),
qp->pid,
qp->s_state,
qp->s_ack_state,
@@ -922,8 +870,8 @@ void notify_error_qp(struct rvt_qp *qp)
/**
* hfi1_qp_iter_cb - callback for iterator
- * @qp - the qp
- * @v - the sl in low bits of v
+ * @qp: the qp
+ * @v: the sl in low bits of v
*
* This is called from the iterator callback to work
* on an individual qp.
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b670321365d3..870ff1a6e5c4 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,52 +1,10 @@
-#ifndef _QP_H
-#define _QP_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _QP_H
+#define _QP_H
#include <linux/hash.h>
#include <rdma/rdmavt_qp.h>
#include "verbs.h"
@@ -113,20 +71,6 @@ static inline void clear_ahg(struct rvt_qp *qp)
}
/**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-
-/**
* hfi1_qp_wakeup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index b5966991d647..3b7842a7f634 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/delay.h>
@@ -231,7 +189,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -242,7 +200,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_NOSTART,
+ msgs[1].flags = I2C_M_NOSTART;
msgs[1].len = len;
msgs[1].buf = data;
break;
@@ -279,7 +237,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -290,7 +248,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_RD,
+ msgs[1].flags = I2C_M_RD;
msgs[1].len = len;
msgs[1].buf = data;
break;
@@ -447,26 +405,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
}
/*
- * Perform a stand-alone single QSFP write. Acquire the resource, do the
- * write, then release the resource.
- */
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len)
-{
- struct hfi1_devdata *dd = ppd->dd;
- u32 resource = qsfp_resource(dd);
- int ret;
-
- ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
- if (ret)
- return ret;
- ret = qsfp_write(ppd, target, addr, bp, len);
- release_chip_resource(dd, resource);
-
- return ret;
-}
-
-/*
* Access page n, offset m of QSFP memory as defined by SFF 8636
* by reading @addr = ((256 * n) + m)
*
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 36cf52359848..5c59d53fcb63 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
/* QSFP support common definitions, for hfi driver */
@@ -237,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
struct hfi1_asic_data;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index a922edcf23d6..b36242c9d42c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/io.h>
@@ -141,7 +99,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
release_rdma_sge_mr(e);
- /* FALLTHROUGH */
+ fallthrough;
case OP(ATOMIC_ACKNOWLEDGE):
/*
* We can increment the tail pointer now that the last
@@ -160,7 +118,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_acked_ack_queue = next;
qp->s_tail_ack_queue = next;
trace_hfi1_rsp_make_rc_ack(qp, e->psn);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
@@ -267,7 +225,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_READ_RESPONSE_MIDDLE):
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
@@ -421,6 +379,7 @@ bail:
/**
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
+ * @ps: the current packet state
*
* Assumes s_lock is held.
*
@@ -595,11 +554,8 @@ check_s_state:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
@@ -632,11 +588,8 @@ check_s_state:
goto no_flow_control;
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
no_flow_control:
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
@@ -887,8 +840,7 @@ no_flow_control:
goto bail;
}
qp->s_num_rd_atomic++;
-
- /* FALLTHROUGH */
+ fallthrough;
case IB_WR_OPFN:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
@@ -952,10 +904,10 @@ no_flow_control:
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -997,10 +949,10 @@ no_flow_control:
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -1382,9 +1334,8 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
[HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
};
-/**
+/*
* hfi1_send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
@@ -1432,7 +1383,7 @@ void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps,
sc_to_vlt(ppd->dd, sc5), plen);
pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL);
- if (!pbuf) {
+ if (IS_ERR_OR_NULL(pbuf)) {
/*
* We have no room to send at the moment. Pass
* responsibility for sending the ACK to the send engine
@@ -1483,6 +1434,11 @@ static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
req->ack_pending = cur_seg - req->comp_seg;
priv->pending_tid_r_segs += req->ack_pending;
qp->s_num_rd_atomic += req->ack_pending;
+ trace_hfi1_tid_req_update_num_rd_atomic(qp, 0,
+ wqe->wr.opcode,
+ wqe->psn,
+ wqe->lpsn,
+ req);
} else {
priv->pending_tid_r_segs += req->total_segs;
qp->s_num_rd_atomic += req->total_segs;
@@ -1701,6 +1657,36 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
}
}
+/**
+ * hfi1_rc_verbs_aborted - handle abort status
+ * @qp: the QP
+ * @opah: the opa header
+ *
+ * This code modifies both ACK bit in BTH[2]
+ * and the s_flags to go into send one mode.
+ *
+ * This serves to throttle the send engine to only
+ * send a single packet in the likely case the
+ * a link has gone down.
+ */
+void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah)
+{
+ struct ib_other_headers *ohdr = hfi1_get_rc_ohdr(opah);
+ u8 opcode = ib_bth_get_opcode(ohdr);
+ u32 psn;
+
+ /* ignore responses */
+ if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
+ opcode == TID_OP(READ_RESP) ||
+ opcode == TID_OP(WRITE_RESP))
+ return;
+
+ psn = ib_bth_get_psn(ohdr) | IB_BTH_REQ_ACK;
+ ohdr->bth[2] = cpu_to_be32(psn);
+ qp->s_flags |= RVT_S_SEND_ONE;
+}
+
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
@@ -1709,8 +1695,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
struct ib_other_headers *ohdr;
struct hfi1_qp_priv *priv = qp->priv;
struct rvt_swqe *wqe;
- struct ib_header *hdr = NULL;
- struct hfi1_16b_header *hdr_16b = NULL;
u32 opcode, head, tail;
u32 psn;
struct tid_rdma_request *req;
@@ -1719,24 +1703,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
return;
- /* Find out where the BTH is */
- if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
- hdr = &opah->ibh;
- if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- } else {
- u8 l4;
-
- hdr_16b = &opah->opah;
- l4 = hfi1_16B_get_l4(hdr_16b);
- if (l4 == OPA_16B_L4_IB_LOCAL)
- ohdr = &hdr_16b->u.oth;
- else
- ohdr = &hdr_16b->u.l.oth;
- }
-
+ ohdr = hfi1_get_rc_ohdr(opah);
opcode = ib_bth_get_opcode(ohdr);
if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
@@ -1819,23 +1786,13 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
}
while (qp->s_last != qp->s_acked) {
- u32 s_last;
-
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
trdma_clean_swqe(qp, wqe);
- rvt_qp_wqe_unreserve(qp, wqe);
- s_last = qp->s_last;
- trace_hfi1_qp_send_completion(qp, wqe, s_last);
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_put_qp_swqe(qp, wqe);
- rvt_qp_swqe_complete(qp,
+ trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
+ rvt_qp_complete_swqe(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
@@ -1879,19 +1836,9 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
trace_hfi1_rc_completion(qp, wqe->lpsn);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
- u32 s_last;
-
trdma_clean_swqe(qp, wqe);
- rvt_put_qp_swqe(qp, wqe);
- rvt_qp_wqe_unreserve(qp, wqe);
- s_last = qp->s_last;
- trace_hfi1_qp_send_completion(qp, wqe, s_last);
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_qp_swqe_complete(qp,
+ trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
+ rvt_qp_complete_swqe(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
IB_WC_SUCCESS);
@@ -2003,7 +1950,7 @@ static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
}
}
-/**
+/*
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
* @psn: the packet sequence number of the ACK
@@ -2219,15 +2166,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device);
- if (qp->s_rnr_retry == 0 &&
- !((rdi->post_parms[wqe->wr.opcode].flags &
- RVT_OPERATION_IGN_RNR_CNT) &&
- qp->s_rnr_retry_cnt == 0)) {
- status = IB_WC_RNR_RETRY_EXC_ERR;
- goto class_b;
+ if (!(rdi->post_parms[wqe->wr.opcode].flags &
+ RVT_OPERATION_IGN_RNR_CNT)) {
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+ qp->s_rnr_retry--;
}
- if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
- qp->s_rnr_retry--;
/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
@@ -2552,6 +2499,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* @opcode: the opcode for this packet
* @psn: the packet sequence number for this packet
* @diff: the difference between the PSN and the expected PSN
+ * @rcd: the receive context
*
* This is called from hfi1_rc_rcv() to process an unexpected
* incoming RC packet for the given QP.
@@ -2609,7 +2557,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* to be sent before sending this one.
*/
e = NULL;
- old_req = 1;
+ old_req = true;
ibp->rvp.n_rc_dupreq++;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -2911,7 +2859,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
@@ -2951,7 +2899,7 @@ send_middle:
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
- /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */
+ fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
@@ -2967,7 +2915,7 @@ send_last_inv:
goto send_last;
case OP(RDMA_WRITE_LAST):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
@@ -3015,13 +2963,12 @@ send_last:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_ONLY):
copy_last = rvt_is_user_qp(qp);
- /* fall through */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 23ac6057b211..aafa4e03b179 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/spinlock.h>
@@ -260,6 +218,7 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
* @qp: the queue pair
* @ohdr: a pointer to the destination header memory
* @bth0: bth0 passed in from the RC/UC builder
+ * @bth1: bth1 passed in from the RC/UC builder
* @bth2: bth2 passed in from the RC/UC builder
* @middle: non zero implies indicates ahg "could" be used
* @ps: the current packet state
@@ -348,6 +307,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
* @qp: the queue pair
* @ohdr: a pointer to the destination header memory
* @bth0: bth0 passed in from the RC/UC builder
+ * @bth1: bth1 passed in from the RC/UC builder
* @bth2: bth2 passed in from the RC/UC builder
* @middle: non zero implies indicates ahg "could" be used
* @ps: the current packet state
@@ -455,11 +415,10 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
/**
* hfi1_schedule_send_yield - test for a yield required for QP
* send engine
- * @timeout: Final time for timeout slice for jiffies
* @qp: a pointer to QP
* @ps: a pointer to a structure with commonly lookup values for
- * the the send engine progress
- * @tid - true if it is the tid leg
+ * the send engine progress
+ * @tid: true if it is the tid leg
*
* This routine checks if the time slice for the QP has expired
* for RC QPs, if so an additional work entry is queued. At this
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index b0110728f541..5cfa4f8fbf3d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/spinlock.h>
@@ -65,6 +23,7 @@
#define SDMA_DESCQ_CNT 2048
#define SDMA_DESC_INTR 64
#define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
module_param(sdma_descq_cnt, uint, S_IRUGO);
@@ -231,11 +190,11 @@ static const struct sdma_set_state_action sdma_action_table[] = {
static void sdma_complete(struct kref *);
static void sdma_finalput(struct sdma_state *);
static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
static void sdma_put(struct sdma_state *);
static void sdma_set_state(struct sdma_engine *, enum sdma_states);
static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void sdma_sendctrl(struct sdma_engine *, unsigned);
static void init_sdma_regs(struct sdma_engine *, u32, uint);
static void sdma_process_event(
@@ -405,19 +364,33 @@ static void sdma_flush(struct sdma_engine *sde)
struct sdma_txreq *txp, *txp_next;
LIST_HEAD(flushlist);
unsigned long flags;
+ uint seq;
/* flush from head to tail */
sdma_flush_descq(sde);
spin_lock_irqsave(&sde->flushlist_lock, flags);
/* copy flush list */
- list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
- list_del_init(&txp->list);
- list_add_tail(&txp->list, &flushlist);
- }
+ list_splice_init(&sde->flushlist, &flushlist);
spin_unlock_irqrestore(&sde->flushlist_lock, flags);
/* flush from flush list */
list_for_each_entry_safe(txp, txp_next, &flushlist, list)
complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
+ /* wakeup QPs orphaned on the dmawait list */
+ do {
+ struct iowait *w, *nw;
+
+ seq = read_seqbegin(&sde->waitlock);
+ if (!list_empty(&sde->dmawait)) {
+ write_seqlock(&sde->waitlock);
+ list_for_each_entry_safe(w, nw, &sde->dmawait, list) {
+ if (w->wakeup) {
+ w->wakeup(w, SDMA_AVAIL_REASON);
+ list_del_init(&w->list);
+ }
+ }
+ write_sequnlock(&sde->waitlock);
+ }
+ } while (read_seqretry(&sde->waitlock, seq));
}
/*
@@ -494,7 +467,8 @@ static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
static void sdma_err_progress_check(struct timer_list *t)
{
unsigned index;
- struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
+ struct sdma_engine *sde = timer_container_of(sde, t,
+ err_progress_check_timer);
dd_dev_err(sde->dd, "SDE progress check event\n");
for (index = 0; index < sde->dd->num_sdma; index++) {
@@ -530,9 +504,10 @@ static void sdma_err_progress_check(struct timer_list *t)
schedule_work(&sde->err_halt_worker);
}
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t,
+ sdma_hw_clean_up_task);
u64 statuscsr;
while (1) {
@@ -589,9 +564,9 @@ static void sdma_flush_descq(struct sdma_engine *sde)
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&sde->tail_lock, flags);
@@ -818,7 +793,7 @@ struct sdma_engine *sdma_select_engine_sc(
struct sdma_rht_map_elem {
u32 mask;
u8 ctr;
- struct sdma_engine *sde[0];
+ struct sdma_engine *sde[];
};
struct sdma_rht_node {
@@ -833,7 +808,7 @@ static const struct rhashtable_params sdma_rht_params = {
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
- .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
@@ -855,20 +830,19 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;
/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
- if (cpumask_weight(current_mask) != 1)
+ if (current->nr_cpus_allowed != 1)
goto out;
- cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu_id,
- sdma_rht_params);
+ cpu_id = smp_processor_id();
+ rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
struct sdma_rht_map_elem *map = rht_node->map[vl];
@@ -1016,7 +990,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
/* Clean up old mappings */
- for_each_cpu(cpu, cpu_online_mask) {
+ for_each_online_cpu(cpu) {
struct sdma_rht_node *rht_node;
/* Don't cleanup sdes that are set in the new mask */
@@ -1270,7 +1244,7 @@ bail:
}
/**
- * sdma_clean() Clean up allocated memory
+ * sdma_clean - Clean up allocated memory
* @dd: struct hfi1_devdata
* @num_engines: num sdma engines
*
@@ -1283,7 +1257,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
- dma_free_coherent(&dd->pcidev->dev, 4,
+ dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
@@ -1315,11 +1289,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
kvfree(sde->tx_ring);
sde->tx_ring = NULL;
}
- spin_lock_irq(&dd->sde_map_lock);
- sdma_map_free(rcu_access_pointer(dd->sdma_map));
- RCU_INIT_POINTER(dd->sdma_map, NULL);
- spin_unlock_irq(&dd->sde_map_lock);
- synchronize_rcu();
+ if (rcu_access_pointer(dd->sdma_map)) {
+ spin_lock_irq(&dd->sde_map_lock);
+ sdma_map_free(rcu_access_pointer(dd->sdma_map));
+ RCU_INIT_POINTER(dd->sdma_map, NULL);
+ spin_unlock_irq(&dd->sde_map_lock);
+ synchronize_rcu();
+ }
kfree(dd->per_sdma);
dd->per_sdma = NULL;
@@ -1440,11 +1416,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
sde->tail_csr =
get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
- tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
- (unsigned long)sde);
-
- tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)sde);
+ tasklet_setup(&sde->sdma_hw_clean_up_task,
+ sdma_hw_clean_up_task);
+ tasklet_setup(&sde->sdma_sw_clean_up_task,
+ sdma_sw_clean_up_task);
INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
INIT_WORK(&sde->flush_worker, sdma_field_flush);
@@ -1478,7 +1453,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
/* Allocate memory for pad */
- dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
+ dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
&dd->sdma_pad_phys, GFP_KERNEL);
if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
@@ -1513,8 +1488,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(tmp_sdma_rht);
goto bail;
+ }
+
dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
@@ -1544,24 +1522,6 @@ void sdma_all_running(struct hfi1_devdata *dd)
}
/**
- * sdma_all_idle() - called when the link goes down
- * @dd: hfi1_devdata
- *
- * This routine moves all engines to the idle state.
- */
-void sdma_all_idle(struct hfi1_devdata *dd)
-{
- struct sdma_engine *sde;
- unsigned int i;
-
- /* idle all engines */
- for (i = 0; i < dd->num_sdma; ++i) {
- sde = &dd->per_sdma[i];
- sdma_process_event(sde, sdma_event_e70_go_idle);
- }
-}
-
-/**
* sdma_start() - called to kick off state processing for all engines
* @dd: hfi1_devdata
*
@@ -1598,7 +1558,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sde->this_idx);
sdma_process_event(sde, sdma_event_e00_go_hw_down);
- del_timer_sync(&sde->err_progress_check_timer);
+ timer_delete_sync(&sde->err_progress_check_timer);
/*
* This waits for the state machine to exit so it is not
@@ -1618,20 +1578,18 @@ static inline void sdma_unmap_desc(
{
switch (sdma_mapping_type(descp)) {
case SDMA_MAP_SINGLE:
- dma_unmap_single(
- &dd->pcidev->dev,
- sdma_mapping_addr(descp),
- sdma_mapping_len(descp),
- DMA_TO_DEVICE);
+ dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp),
+ sdma_mapping_len(descp), DMA_TO_DEVICE);
break;
case SDMA_MAP_PAGE:
- dma_unmap_page(
- &dd->pcidev->dev,
- sdma_mapping_addr(descp),
- sdma_mapping_len(descp),
- DMA_TO_DEVICE);
+ dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp),
+ sdma_mapping_len(descp), DMA_TO_DEVICE);
break;
}
+
+ if (descp->pinning_ctx && descp->ctx_put)
+ descp->ctx_put(descp->pinning_ctx);
+ descp->pinning_ctx = NULL;
}
/*
@@ -1723,7 +1681,7 @@ retry:
sane = (hwhead == swhead);
if (unlikely(!sane)) {
- dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
+ dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
sde->this_idx,
use_dmahead ? "dma" : "kreg",
hwhead, swhead, swtail, cnt);
@@ -1843,7 +1801,7 @@ retry:
/*
* The SDMA idle interrupt is not guaranteed to be ordered with respect
- * to updates to the the dma_head location in host memory. The head
+ * to updates to the dma_head location in host memory. The head
* value read might not be fully up to date. If there are pending
* descriptors and the SDMA idle interrupt fired then read from the
* CSR SDMA head instead to get the latest value from the hardware.
@@ -2413,7 +2371,7 @@ unlock_noconn:
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
iowait_inc_wait_count(wait, tx->num_desc);
- schedule_work(&sde->flush_worker);
+ queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM;
goto unlock;
nodesc:
@@ -2431,11 +2389,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u16 which, after return will contain the total number of
- * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
- * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
- * which are added to SDMA engine flush list if the SDMA engine state is
- * not running.
+ * @count_out: pointer to a u16 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2511,7 +2469,7 @@ unlock_noconn:
iowait_inc_wait_count(wait, tx->num_desc);
}
spin_unlock(&sde->flushlist_lock);
- schedule_work(&sde->flush_worker);
+ queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM;
goto update_tail;
nodesc:
@@ -2567,7 +2525,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
* 7220, e.g.
*/
ss->go_s99_running = 1;
- /* fall through -- and start dma engine */
+ fallthrough; /* and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
@@ -2709,7 +2667,6 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e70_go_idle:
break;
case sdma_event_e85_link_down:
- /* fall through */
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -2990,7 +2947,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e60_hw_halted:
need_progress = 1;
sdma_err_progress_check_schedule(sde);
- /* fall through */
+ fallthrough;
case sdma_event_e90_sw_halted:
/*
* SW initiated halt does not perform engines
@@ -3004,7 +2961,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
break;
case sdma_event_e85_link_down:
ss->go_s99_running = 0;
- /* fall through */
+ fallthrough;
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3039,6 +2996,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int i;
+ struct sdma_desc *descp;
/* Handle last descriptor */
if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
@@ -3059,12 +3017,10 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
if (unlikely(tx->num_desc == MAX_DESC))
goto enomem;
- tx->descp = kmalloc_array(
- MAX_DESC,
- sizeof(struct sdma_desc),
- GFP_ATOMIC);
- if (!tx->descp)
+ descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC);
+ if (!descp)
goto enomem;
+ tx->descp = descp;
/* reserve last descriptor for coalescing */
tx->desc_limit = MAX_DESC - 1;
@@ -3114,7 +3070,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
}
if (type == SDMA_MAP_PAGE) {
- kvaddr = kmap(page);
+ kvaddr = kmap_local_page(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
__sdma_txclean(dd, tx);
@@ -3124,7 +3080,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
tx->coalesce_idx += len;
if (type == SDMA_MAP_PAGE)
- kunmap(page);
+ kunmap_local(kvaddr);
/* If there is more data, return */
if (tx->tlen - tx->coalesce_idx)
@@ -3154,7 +3110,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
/* Add descriptor for coalesce buffer */
tx->desc_limit = MAX_DESC;
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
- addr, tx->tlen);
+ addr, tx->tlen, NULL, NULL, NULL);
}
return 1;
@@ -3185,7 +3141,6 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int rval = 0;
- tx->num_desc++;
if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
@@ -3193,12 +3148,15 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
return rval;
}
}
+
/* finish the one just added */
make_tx_sdma_desc(
tx,
SDMA_MAP_NONE,
dd->sdma_pad_phys,
- sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
+ sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)),
+ NULL, NULL, NULL);
+ tx->num_desc++;
_sdma_close_tx(dd, tx);
return rval;
}
@@ -3235,7 +3193,7 @@ void _sdma_txreq_ahgadd(
tx->num_desc++;
tx->descs[2].qw[0] = 0;
tx->descs[2].qw[1] = 0;
- /* FALLTHROUGH */
+ fallthrough;
case SDMA_AHG_APPLY_UPDATE2:
tx->num_desc++;
tx->descs[1].qw[0] = 0;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 1e2e40f79cb2..91dfd5d0c419 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_SDMA_H
-#define _HFI1_SDMA_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_SDMA_H
+#define _HFI1_SDMA_H
+
#include <linux/types.h>
#include <linux/list.h>
#include <asm/byteorder.h>
@@ -414,7 +373,6 @@ void sdma_start(struct hfi1_devdata *dd);
void sdma_exit(struct hfi1_devdata *dd);
void sdma_clean(struct hfi1_devdata *dd, size_t num_engines);
void sdma_all_running(struct hfi1_devdata *dd);
-void sdma_all_idle(struct hfi1_devdata *dd);
void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
void sdma_freeze(struct hfi1_devdata *dd);
void sdma_unfreeze(struct hfi1_devdata *dd);
@@ -636,7 +594,10 @@ static inline void make_tx_sdma_desc(
struct sdma_txreq *tx,
int type,
dma_addr_t addr,
- size_t len)
+ size_t len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
struct sdma_desc *desc = &tx->descp[tx->num_desc];
@@ -653,6 +614,11 @@ static inline void make_tx_sdma_desc(
<< SDMA_DESC0_PHY_ADDR_SHIFT) |
(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
<< SDMA_DESC0_BYTE_COUNT_SHIFT);
+
+ desc->pinning_ctx = pinning_ctx;
+ desc->ctx_put = ctx_put;
+ if (pinning_ctx && ctx_get)
+ ctx_get(pinning_ctx);
}
/* helper to extend txreq */
@@ -672,14 +638,13 @@ static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
- tx->descp[tx->num_desc].qw[0] |=
- SDMA_DESC0_LAST_DESC_FLAG;
- tx->descp[tx->num_desc].qw[1] |=
- dd->default_desc1;
+ u16 last_desc = tx->num_desc - 1;
+
+ tx->descp[last_desc].qw[0] |= SDMA_DESC0_LAST_DESC_FLAG;
+ tx->descp[last_desc].qw[1] |= dd->default_desc1;
if (tx->flags & SDMA_TXREQ_F_URGENT)
- tx->descp[tx->num_desc].qw[1] |=
- (SDMA_DESC1_HEAD_TO_HOST_FLAG |
- SDMA_DESC1_INT_REQ_FLAG);
+ tx->descp[last_desc].qw[1] |= (SDMA_DESC1_HEAD_TO_HOST_FLAG |
+ SDMA_DESC1_INT_REQ_FLAG);
}
static inline int _sdma_txadd_daddr(
@@ -687,15 +652,20 @@ static inline int _sdma_txadd_daddr(
int type,
struct sdma_txreq *tx,
dma_addr_t addr,
- u16 len)
+ u16 len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
int rval = 0;
make_tx_sdma_desc(
tx,
type,
- addr, len);
+ addr, len,
+ pinning_ctx, ctx_get, ctx_put);
WARN_ON(len > tx->tlen);
+ tx->num_desc++;
tx->tlen -= len;
/* special cases for last */
if (!tx->tlen) {
@@ -707,7 +677,6 @@ static inline int _sdma_txadd_daddr(
_sdma_close_tx(dd, tx);
}
}
- tx->num_desc++;
return rval;
}
@@ -718,6 +687,14 @@ static inline int _sdma_txadd_daddr(
* @page: page to map
* @offset: offset within the page
* @len: length in bytes
+ * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not
+ * added if coalesce buffer is used. E.g. pointer to pinned-page
+ * cache entry for the sdma_desc.
+ * @ctx_get: optional function to take reference to @pinning_ctx. Not called if
+ * @pinning_ctx is NULL.
+ * @ctx_put: optional function to release reference to @pinning_ctx after
+ * sdma_desc completes. May be called in interrupt context so must
+ * not sleep. Not called if @pinning_ctx is NULL.
*
* This is used to add a page/offset/length descriptor.
*
@@ -732,7 +709,10 @@ static inline int sdma_txadd_page(
struct sdma_txreq *tx,
struct page *page,
unsigned long offset,
- u16 len)
+ u16 len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
dma_addr_t addr;
int rval;
@@ -756,8 +736,8 @@ static inline int sdma_txadd_page(
return -ENOSPC;
}
- return _sdma_txadd_daddr(
- dd, SDMA_MAP_PAGE, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len,
+ pinning_ctx, ctx_get, ctx_put);
}
/**
@@ -791,7 +771,8 @@ static inline int sdma_txadd_daddr(
return rval;
}
- return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len,
+ NULL, NULL, NULL);
}
/**
@@ -837,8 +818,8 @@ static inline int sdma_txadd_kvaddr(
return -ENOSPC;
}
- return _sdma_txadd_daddr(
- dd, SDMA_MAP_SINGLE, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len,
+ NULL, NULL, NULL);
}
struct iowait_work;
@@ -907,24 +888,6 @@ static inline unsigned sdma_progress(struct sdma_engine *sde, unsigned seq,
return 0;
}
-/**
- * sdma_iowait_schedule() - initialize wait structure
- * @sde: sdma_engine to schedule
- * @wait: wait struct to schedule
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
-static inline void sdma_iowait_schedule(
- struct sdma_engine *sde,
- struct iowait *wait)
-{
- struct hfi1_pportdata *ppd = sde->dd->pport;
-
- iowait_schedule(wait, ppd->hfi1_wq, sde->cpu);
-}
-
/* for use by interrupt handling */
void sdma_engine_error(struct sdma_engine *sde, u64 status);
void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
@@ -1002,7 +965,7 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
*/
struct sdma_map_elem {
u32 mask;
- struct sdma_engine *sde[0];
+ struct sdma_engine *sde[];
};
/**
@@ -1024,7 +987,7 @@ struct sdma_vl_map {
u32 mask;
u8 actual_vls;
u8 vls;
- struct sdma_map_elem *map[0];
+ struct sdma_map_elem *map[];
};
int sdma_map_init(
@@ -1089,5 +1052,4 @@ u16 sdma_get_descq_cnt(void);
extern uint mod_num_sdma;
void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
-
#endif
diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index 514a4784566b..5782166d984c 100644
--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#ifndef HFI1_SDMA_TXREQ_H
@@ -61,6 +19,9 @@
struct sdma_desc {
/* private: don't use directly */
u64 qw[2];
+ void *pinning_ctx;
+ /* Release reference to @pinning_ctx. May be called in interrupt context. Must not sleep. */
+ void (*ctx_put)(void *ctx);
};
/**
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 90f62c4bddba..372cfd13dc61 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,55 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/ctype.h>
+#include <rdma/ib_sysfs.h>
#include "hfi.h"
#include "mad.h"
#include "trace.h"
+static struct hfi1_pportdata *hfi1_get_pportdata_kobj(struct kobject *kobj)
+{
+ u32 port_num;
+ struct ib_device *ibdev = ib_port_sysfs_get_ibdev_kobj(kobj, &port_num);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ return &dd->pport[port_num - 1];
+}
+
/*
* Start of per-port congestion control structures and support code
*/
@@ -57,13 +26,12 @@
/*
* Congestion control table size followed by table entries
*/
-static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
int ret;
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+ struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
struct cc_state *cc_state;
ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
@@ -89,30 +57,19 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
return count;
}
-
-static void port_release(struct kobject *kobj)
-{
- /* nothing to do since memory is freed by hfi1_free_devdata() */
-}
-
-static const struct bin_attribute cc_table_bin_attr = {
- .attr = {.name = "cc_table_bin", .mode = 0444},
- .read = read_cc_table_bin,
- .size = PAGE_SIZE,
-};
+static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
/*
* Congestion settings: port control, control map and an array of 16
* entries for the congestion entries - increase, timer, event log
* trigger threshold and the minimum injection rate delay.
*/
-static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
+ struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
int ret;
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
struct cc_state *cc_state;
ret = sizeof(struct opa_congestion_setting_attr_shadow);
@@ -136,27 +93,30 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
return count;
}
+static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
-static const struct bin_attribute cc_setting_bin_attr = {
- .attr = {.name = "cc_settings_bin", .mode = 0444},
- .read = read_cc_setting_bin,
- .size = PAGE_SIZE,
-};
-
-struct hfi1_port_attr {
- struct attribute attr;
- ssize_t (*show)(struct hfi1_pportdata *, char *);
- ssize_t (*store)(struct hfi1_pportdata *, const char *, size_t);
+static const struct bin_attribute *const port_cc_bin_attributes[] = {
+ &bin_attr_cc_setting_bin,
+ &bin_attr_cc_table_bin,
+ NULL
};
-static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
+static ssize_t cc_prescan_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
- return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+
+ return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
}
-static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
+static ssize_t cc_prescan_store(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, const char *buf,
size_t count)
{
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+
if (!memcmp(buf, "on", 2))
ppd->cc_prescan = true;
else if (!memcmp(buf, "off", 3))
@@ -164,60 +124,41 @@ static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
return count;
}
+static IB_PORT_ATTR_ADMIN_RW(cc_prescan);
-static struct hfi1_port_attr cc_prescan_attr =
- __ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store);
-
-static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_port_attr *port_attr =
- container_of(attr, struct hfi1_port_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
-
- return port_attr->show(ppd, buf);
-}
-
-static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
-{
- struct hfi1_port_attr *port_attr =
- container_of(attr, struct hfi1_port_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
-
- return port_attr->store(ppd, buf, count);
-}
-
-static const struct sysfs_ops port_cc_sysfs_ops = {
- .show = cc_attr_show,
- .store = cc_attr_store
-};
-
-static struct attribute *port_cc_default_attributes[] = {
- &cc_prescan_attr.attr,
+static struct attribute *port_cc_attributes[] = {
+ &ib_port_attr_cc_prescan.attr,
NULL
};
-static struct kobj_type port_cc_ktype = {
- .release = port_release,
- .sysfs_ops = &port_cc_sysfs_ops,
- .default_attrs = port_cc_default_attributes
+static const struct attribute_group port_cc_group = {
+ .name = "CCMgtA",
+ .attrs = port_cc_attributes,
+ .bin_attrs = port_cc_bin_attributes,
};
/* Start sc2vl */
-#define HFI1_SC2VL_ATTR(N) \
- static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
- .attr = { .name = __stringify(N), .mode = 0444 }, \
- .sc = N \
- }
-
struct hfi1_sc2vl_attr {
- struct attribute attr;
+ struct ib_port_attribute attr;
int sc;
};
+static ssize_t sc2vl_attr_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hfi1_sc2vl_attr *sattr =
+ container_of(attr, struct hfi1_sc2vl_attr, attr);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
+}
+
+#define HFI1_SC2VL_ATTR(N) \
+ static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
+ .attr = __ATTR(N, 0444, sc2vl_attr_show, NULL), \
+ .sc = N, \
+ }
+
HFI1_SC2VL_ATTR(0);
HFI1_SC2VL_ATTR(1);
HFI1_SC2VL_ATTR(2);
@@ -251,78 +192,70 @@ HFI1_SC2VL_ATTR(29);
HFI1_SC2VL_ATTR(30);
HFI1_SC2VL_ATTR(31);
-static struct attribute *sc2vl_default_attributes[] = {
- &hfi1_sc2vl_attr_0.attr,
- &hfi1_sc2vl_attr_1.attr,
- &hfi1_sc2vl_attr_2.attr,
- &hfi1_sc2vl_attr_3.attr,
- &hfi1_sc2vl_attr_4.attr,
- &hfi1_sc2vl_attr_5.attr,
- &hfi1_sc2vl_attr_6.attr,
- &hfi1_sc2vl_attr_7.attr,
- &hfi1_sc2vl_attr_8.attr,
- &hfi1_sc2vl_attr_9.attr,
- &hfi1_sc2vl_attr_10.attr,
- &hfi1_sc2vl_attr_11.attr,
- &hfi1_sc2vl_attr_12.attr,
- &hfi1_sc2vl_attr_13.attr,
- &hfi1_sc2vl_attr_14.attr,
- &hfi1_sc2vl_attr_15.attr,
- &hfi1_sc2vl_attr_16.attr,
- &hfi1_sc2vl_attr_17.attr,
- &hfi1_sc2vl_attr_18.attr,
- &hfi1_sc2vl_attr_19.attr,
- &hfi1_sc2vl_attr_20.attr,
- &hfi1_sc2vl_attr_21.attr,
- &hfi1_sc2vl_attr_22.attr,
- &hfi1_sc2vl_attr_23.attr,
- &hfi1_sc2vl_attr_24.attr,
- &hfi1_sc2vl_attr_25.attr,
- &hfi1_sc2vl_attr_26.attr,
- &hfi1_sc2vl_attr_27.attr,
- &hfi1_sc2vl_attr_28.attr,
- &hfi1_sc2vl_attr_29.attr,
- &hfi1_sc2vl_attr_30.attr,
- &hfi1_sc2vl_attr_31.attr,
+static struct attribute *port_sc2vl_attributes[] = {
+ &hfi1_sc2vl_attr_0.attr.attr,
+ &hfi1_sc2vl_attr_1.attr.attr,
+ &hfi1_sc2vl_attr_2.attr.attr,
+ &hfi1_sc2vl_attr_3.attr.attr,
+ &hfi1_sc2vl_attr_4.attr.attr,
+ &hfi1_sc2vl_attr_5.attr.attr,
+ &hfi1_sc2vl_attr_6.attr.attr,
+ &hfi1_sc2vl_attr_7.attr.attr,
+ &hfi1_sc2vl_attr_8.attr.attr,
+ &hfi1_sc2vl_attr_9.attr.attr,
+ &hfi1_sc2vl_attr_10.attr.attr,
+ &hfi1_sc2vl_attr_11.attr.attr,
+ &hfi1_sc2vl_attr_12.attr.attr,
+ &hfi1_sc2vl_attr_13.attr.attr,
+ &hfi1_sc2vl_attr_14.attr.attr,
+ &hfi1_sc2vl_attr_15.attr.attr,
+ &hfi1_sc2vl_attr_16.attr.attr,
+ &hfi1_sc2vl_attr_17.attr.attr,
+ &hfi1_sc2vl_attr_18.attr.attr,
+ &hfi1_sc2vl_attr_19.attr.attr,
+ &hfi1_sc2vl_attr_20.attr.attr,
+ &hfi1_sc2vl_attr_21.attr.attr,
+ &hfi1_sc2vl_attr_22.attr.attr,
+ &hfi1_sc2vl_attr_23.attr.attr,
+ &hfi1_sc2vl_attr_24.attr.attr,
+ &hfi1_sc2vl_attr_25.attr.attr,
+ &hfi1_sc2vl_attr_26.attr.attr,
+ &hfi1_sc2vl_attr_27.attr.attr,
+ &hfi1_sc2vl_attr_28.attr.attr,
+ &hfi1_sc2vl_attr_29.attr.attr,
+ &hfi1_sc2vl_attr_30.attr.attr,
+ &hfi1_sc2vl_attr_31.attr.attr,
NULL
};
-static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_sc2vl_attr *sattr =
- container_of(attr, struct hfi1_sc2vl_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, sc2vl_kobj);
- struct hfi1_devdata *dd = ppd->dd;
-
- return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
-}
-
-static const struct sysfs_ops hfi1_sc2vl_ops = {
- .show = sc2vl_attr_show,
-};
-
-static struct kobj_type hfi1_sc2vl_ktype = {
- .release = port_release,
- .sysfs_ops = &hfi1_sc2vl_ops,
- .default_attrs = sc2vl_default_attributes
+static const struct attribute_group port_sc2vl_group = {
+ .name = "sc2vl",
+ .attrs = port_sc2vl_attributes,
};
-
/* End sc2vl */
/* Start sl2sc */
-#define HFI1_SL2SC_ATTR(N) \
- static struct hfi1_sl2sc_attr hfi1_sl2sc_attr_##N = { \
- .attr = { .name = __stringify(N), .mode = 0444 }, \
- .sl = N \
- }
-
struct hfi1_sl2sc_attr {
- struct attribute attr;
+ struct ib_port_attribute attr;
int sl;
};
+static ssize_t sl2sc_attr_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hfi1_sl2sc_attr *sattr =
+ container_of(attr, struct hfi1_sl2sc_attr, attr);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
+
+ return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
+}
+
+#define HFI1_SL2SC_ATTR(N) \
+ static struct hfi1_sl2sc_attr hfi1_sl2sc_attr_##N = { \
+ .attr = __ATTR(N, 0444, sl2sc_attr_show, NULL), .sl = N \
+ }
+
HFI1_SL2SC_ATTR(0);
HFI1_SL2SC_ATTR(1);
HFI1_SL2SC_ATTR(2);
@@ -356,79 +289,72 @@ HFI1_SL2SC_ATTR(29);
HFI1_SL2SC_ATTR(30);
HFI1_SL2SC_ATTR(31);
-static struct attribute *sl2sc_default_attributes[] = {
- &hfi1_sl2sc_attr_0.attr,
- &hfi1_sl2sc_attr_1.attr,
- &hfi1_sl2sc_attr_2.attr,
- &hfi1_sl2sc_attr_3.attr,
- &hfi1_sl2sc_attr_4.attr,
- &hfi1_sl2sc_attr_5.attr,
- &hfi1_sl2sc_attr_6.attr,
- &hfi1_sl2sc_attr_7.attr,
- &hfi1_sl2sc_attr_8.attr,
- &hfi1_sl2sc_attr_9.attr,
- &hfi1_sl2sc_attr_10.attr,
- &hfi1_sl2sc_attr_11.attr,
- &hfi1_sl2sc_attr_12.attr,
- &hfi1_sl2sc_attr_13.attr,
- &hfi1_sl2sc_attr_14.attr,
- &hfi1_sl2sc_attr_15.attr,
- &hfi1_sl2sc_attr_16.attr,
- &hfi1_sl2sc_attr_17.attr,
- &hfi1_sl2sc_attr_18.attr,
- &hfi1_sl2sc_attr_19.attr,
- &hfi1_sl2sc_attr_20.attr,
- &hfi1_sl2sc_attr_21.attr,
- &hfi1_sl2sc_attr_22.attr,
- &hfi1_sl2sc_attr_23.attr,
- &hfi1_sl2sc_attr_24.attr,
- &hfi1_sl2sc_attr_25.attr,
- &hfi1_sl2sc_attr_26.attr,
- &hfi1_sl2sc_attr_27.attr,
- &hfi1_sl2sc_attr_28.attr,
- &hfi1_sl2sc_attr_29.attr,
- &hfi1_sl2sc_attr_30.attr,
- &hfi1_sl2sc_attr_31.attr,
+static struct attribute *port_sl2sc_attributes[] = {
+ &hfi1_sl2sc_attr_0.attr.attr,
+ &hfi1_sl2sc_attr_1.attr.attr,
+ &hfi1_sl2sc_attr_2.attr.attr,
+ &hfi1_sl2sc_attr_3.attr.attr,
+ &hfi1_sl2sc_attr_4.attr.attr,
+ &hfi1_sl2sc_attr_5.attr.attr,
+ &hfi1_sl2sc_attr_6.attr.attr,
+ &hfi1_sl2sc_attr_7.attr.attr,
+ &hfi1_sl2sc_attr_8.attr.attr,
+ &hfi1_sl2sc_attr_9.attr.attr,
+ &hfi1_sl2sc_attr_10.attr.attr,
+ &hfi1_sl2sc_attr_11.attr.attr,
+ &hfi1_sl2sc_attr_12.attr.attr,
+ &hfi1_sl2sc_attr_13.attr.attr,
+ &hfi1_sl2sc_attr_14.attr.attr,
+ &hfi1_sl2sc_attr_15.attr.attr,
+ &hfi1_sl2sc_attr_16.attr.attr,
+ &hfi1_sl2sc_attr_17.attr.attr,
+ &hfi1_sl2sc_attr_18.attr.attr,
+ &hfi1_sl2sc_attr_19.attr.attr,
+ &hfi1_sl2sc_attr_20.attr.attr,
+ &hfi1_sl2sc_attr_21.attr.attr,
+ &hfi1_sl2sc_attr_22.attr.attr,
+ &hfi1_sl2sc_attr_23.attr.attr,
+ &hfi1_sl2sc_attr_24.attr.attr,
+ &hfi1_sl2sc_attr_25.attr.attr,
+ &hfi1_sl2sc_attr_26.attr.attr,
+ &hfi1_sl2sc_attr_27.attr.attr,
+ &hfi1_sl2sc_attr_28.attr.attr,
+ &hfi1_sl2sc_attr_29.attr.attr,
+ &hfi1_sl2sc_attr_30.attr.attr,
+ &hfi1_sl2sc_attr_31.attr.attr,
NULL
};
-static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_sl2sc_attr *sattr =
- container_of(attr, struct hfi1_sl2sc_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, sl2sc_kobj);
- struct hfi1_ibport *ibp = &ppd->ibport_data;
-
- return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
-}
-
-static const struct sysfs_ops hfi1_sl2sc_ops = {
- .show = sl2sc_attr_show,
-};
-
-static struct kobj_type hfi1_sl2sc_ktype = {
- .release = port_release,
- .sysfs_ops = &hfi1_sl2sc_ops,
- .default_attrs = sl2sc_default_attributes
+static const struct attribute_group port_sl2sc_group = {
+ .name = "sl2sc",
+ .attrs = port_sl2sc_attributes,
};
/* End sl2sc */
/* Start vl2mtu */
-#define HFI1_VL2MTU_ATTR(N) \
- static struct hfi1_vl2mtu_attr hfi1_vl2mtu_attr_##N = { \
- .attr = { .name = __stringify(N), .mode = 0444 }, \
- .vl = N \
- }
-
struct hfi1_vl2mtu_attr {
- struct attribute attr;
+ struct ib_port_attribute attr;
int vl;
};
+static ssize_t vl2mtu_attr_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hfi1_vl2mtu_attr *vlattr =
+ container_of(attr, struct hfi1_vl2mtu_attr, attr);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu);
+}
+
+#define HFI1_VL2MTU_ATTR(N) \
+ static struct hfi1_vl2mtu_attr hfi1_vl2mtu_attr_##N = { \
+ .attr = __ATTR(N, 0444, vl2mtu_attr_show, NULL), \
+ .vl = N, \
+ }
+
HFI1_VL2MTU_ATTR(0);
HFI1_VL2MTU_ATTR(1);
HFI1_VL2MTU_ATTR(2);
@@ -446,46 +372,29 @@ HFI1_VL2MTU_ATTR(13);
HFI1_VL2MTU_ATTR(14);
HFI1_VL2MTU_ATTR(15);
-static struct attribute *vl2mtu_default_attributes[] = {
- &hfi1_vl2mtu_attr_0.attr,
- &hfi1_vl2mtu_attr_1.attr,
- &hfi1_vl2mtu_attr_2.attr,
- &hfi1_vl2mtu_attr_3.attr,
- &hfi1_vl2mtu_attr_4.attr,
- &hfi1_vl2mtu_attr_5.attr,
- &hfi1_vl2mtu_attr_6.attr,
- &hfi1_vl2mtu_attr_7.attr,
- &hfi1_vl2mtu_attr_8.attr,
- &hfi1_vl2mtu_attr_9.attr,
- &hfi1_vl2mtu_attr_10.attr,
- &hfi1_vl2mtu_attr_11.attr,
- &hfi1_vl2mtu_attr_12.attr,
- &hfi1_vl2mtu_attr_13.attr,
- &hfi1_vl2mtu_attr_14.attr,
- &hfi1_vl2mtu_attr_15.attr,
+static struct attribute *port_vl2mtu_attributes[] = {
+ &hfi1_vl2mtu_attr_0.attr.attr,
+ &hfi1_vl2mtu_attr_1.attr.attr,
+ &hfi1_vl2mtu_attr_2.attr.attr,
+ &hfi1_vl2mtu_attr_3.attr.attr,
+ &hfi1_vl2mtu_attr_4.attr.attr,
+ &hfi1_vl2mtu_attr_5.attr.attr,
+ &hfi1_vl2mtu_attr_6.attr.attr,
+ &hfi1_vl2mtu_attr_7.attr.attr,
+ &hfi1_vl2mtu_attr_8.attr.attr,
+ &hfi1_vl2mtu_attr_9.attr.attr,
+ &hfi1_vl2mtu_attr_10.attr.attr,
+ &hfi1_vl2mtu_attr_11.attr.attr,
+ &hfi1_vl2mtu_attr_12.attr.attr,
+ &hfi1_vl2mtu_attr_13.attr.attr,
+ &hfi1_vl2mtu_attr_14.attr.attr,
+ &hfi1_vl2mtu_attr_15.attr.attr,
NULL
};
-static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_vl2mtu_attr *vlattr =
- container_of(attr, struct hfi1_vl2mtu_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj);
- struct hfi1_devdata *dd = ppd->dd;
-
- return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu);
-}
-
-static const struct sysfs_ops hfi1_vl2mtu_ops = {
- .show = vl2mtu_attr_show,
-};
-
-static struct kobj_type hfi1_vl2mtu_ktype = {
- .release = port_release,
- .sysfs_ops = &hfi1_vl2mtu_ops,
- .default_attrs = vl2mtu_default_attributes
+static const struct attribute_group port_vl2mtu_group = {
+ .name = "vl2mtu",
+ .attrs = port_vl2mtu_attributes,
};
/* end of per-port file structures and support code */
@@ -500,7 +409,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct hfi1_ibdev *dev =
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
- return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+ return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -510,13 +419,11 @@ static ssize_t board_id_show(struct device *device,
struct hfi1_ibdev *dev =
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
- int ret;
if (!dd->boardname)
- ret = -EINVAL;
- else
- ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
- return ret;
+ return -EINVAL;
+
+ return sysfs_emit(buf, "%s\n", dd->boardname);
}
static DEVICE_ATTR_RO(board_id);
@@ -528,7 +435,7 @@ static ssize_t boardversion_show(struct device *device,
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+ return sysfs_emit(buf, "%s", dd->boardversion);
}
static DEVICE_ATTR_RO(boardversion);
@@ -545,9 +452,9 @@ static ssize_t nctxts_show(struct device *device,
* and a receive context, so returning the smaller of the two counts
* give a more accurate picture of total contexts available.
*/
- return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_user_contexts,
- (u32)dd->sc_sizes[SC_USER].count));
+ return sysfs_emit(buf, "%u\n",
+ min(dd->num_user_contexts,
+ (u32)dd->sc_sizes[SC_USER].count));
}
static DEVICE_ATTR_RO(nctxts);
@@ -559,7 +466,7 @@ static ssize_t nfreectxts_show(struct device *device,
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
+ return sysfs_emit(buf, "%u\n", dd->freectxts);
}
static DEVICE_ATTR_RO(nfreectxts);
@@ -570,7 +477,8 @@ static ssize_t serial_show(struct device *device,
rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
- return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
+ /* dd->serial is already newline terminated in chip.c */
+ return sysfs_emit(buf, "%s", dd->serial);
}
static DEVICE_ATTR_RO(serial);
@@ -598,9 +506,8 @@ static DEVICE_ATTR_WO(chip_reset);
* Convert the reported temperature from an integer (reported in
* units of 0.25C) to a floating point number.
*/
-#define temp2str(temp, buf, size, idx) \
- scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \
- ((temp) >> 2), ((temp) & 0x3) * 25)
+#define temp_d(t) ((t) >> 2)
+#define temp_f(t) (((t)&0x3) * 25u)
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
@@ -615,19 +522,17 @@ static ssize_t tempsense_show(struct device *device,
int ret;
ret = hfi1_tempsense_rd(dd, &temp);
- if (!ret) {
- int idx = 0;
-
- idx += temp2str(temp.curr, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx);
- idx += scnprintf(buf + idx, PAGE_SIZE - idx,
- "%u %u %u\n", temp.triggers & 0x1,
- temp.triggers & 0x2, temp.triggers & 0x4);
- ret = idx;
- }
- return ret;
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n",
+ temp_d(temp.curr), temp_f(temp.curr),
+ temp_d(temp.lo_lim), temp_f(temp.lo_lim),
+ temp_d(temp.hi_lim), temp_f(temp.hi_lim),
+ temp_d(temp.crit_lim), temp_f(temp.crit_lim),
+ temp.triggers & 0x1,
+ temp.triggers & 0x2,
+ temp.triggers & 0x4);
}
static DEVICE_ATTR_RO(tempsense);
@@ -653,98 +558,13 @@ const struct attribute_group ib_hfi1_attr_group = {
.attrs = hfi1_attributes,
};
-int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
- struct kobject *kobj)
-{
- struct hfi1_pportdata *ppd;
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- int ret;
-
- if (!port_num || port_num > dd->num_pports) {
- dd_dev_err(dd,
- "Skipping infiniband class with invalid port %u\n",
- port_num);
- return -ENODEV;
- }
- ppd = &dd->pport[port_num - 1];
-
- ret = kobject_init_and_add(&ppd->sc2vl_kobj, &hfi1_sc2vl_ktype, kobj,
- "sc2vl");
- if (ret) {
- dd_dev_err(dd,
- "Skipping sc2vl sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail;
- }
- kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD);
-
- ret = kobject_init_and_add(&ppd->sl2sc_kobj, &hfi1_sl2sc_ktype, kobj,
- "sl2sc");
- if (ret) {
- dd_dev_err(dd,
- "Skipping sl2sc sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_sc2vl;
- }
- kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD);
-
- ret = kobject_init_and_add(&ppd->vl2mtu_kobj, &hfi1_vl2mtu_ktype, kobj,
- "vl2mtu");
- if (ret) {
- dd_dev_err(dd,
- "Skipping vl2mtu sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_sl2sc;
- }
- kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
-
- ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
- kobj, "CCMgtA");
- if (ret) {
- dd_dev_err(dd,
- "Skipping Congestion Control sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_vl2mtu;
- }
-
- kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
-
- ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
- if (ret) {
- dd_dev_err(dd,
- "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_cc;
- }
-
- ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr);
- if (ret) {
- dd_dev_err(dd,
- "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_cc_entry_bin;
- }
-
- dd_dev_info(dd,
- "Congestion Control Agent enabled for port %d\n",
- port_num);
-
- return 0;
-
-bail_cc_entry_bin:
- sysfs_remove_bin_file(&ppd->pport_cc_kobj,
- &cc_setting_bin_attr);
-bail_cc:
- kobject_put(&ppd->pport_cc_kobj);
-bail_vl2mtu:
- kobject_put(&ppd->vl2mtu_kobj);
-bail_sl2sc:
- kobject_put(&ppd->sl2sc_kobj);
-bail_sc2vl:
- kobject_put(&ppd->sc2vl_kobj);
-bail:
- return ret;
-}
+const struct attribute_group *hfi1_attr_port_groups[] = {
+ &port_cc_group,
+ &port_sc2vl_group,
+ &port_sl2sc_group,
+ &port_vl2mtu_group,
+ NULL,
+};
struct sde_attribute {
struct attribute attr;
@@ -814,7 +634,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
if (vl < 0)
return vl;
- return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+ return sysfs_emit(buf, "%d\n", vl);
}
static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
@@ -853,8 +673,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
- for (i = 0; i < dd->num_sdma; i++)
- kobject_del(&dd->per_sdma[i].kobj);
+ /*
+ * The function kobject_put() will call kobject_del() if the kobject
+ * has been added successfully. The sysfs files created under the
+ * kobject directory will also be removed during the process.
+ */
+ for (; i >= 0; i--)
+ kobject_put(&dd->per_sdma[i].kobj);
return ret;
}
@@ -864,19 +689,9 @@ bail:
*/
void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd)
{
- struct hfi1_pportdata *ppd;
int i;
- for (i = 0; i < dd->num_pports; i++) {
- ppd = &dd->pport[i];
-
- sysfs_remove_bin_file(&ppd->pport_cc_kobj,
- &cc_setting_bin_attr);
- sysfs_remove_bin_file(&ppd->pport_cc_kobj,
- &cc_table_bin_attr);
- kobject_put(&ppd->pport_cc_kobj);
- kobject_put(&ppd->vl2mtu_kobj);
- kobject_put(&ppd->sl2sc_kobj);
- kobject_put(&ppd->sc2vl_kobj);
- }
+ /* Unwind operations in hfi1_verbs_register_sysfs() */
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_put(&dd->per_sdma[i].kobj);
}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 6fb93032fbef..eafd2f157e32 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
- * Copyright(c) 2018 Intel Corporation.
+ * Copyright(c) 2018 - 2020 Intel Corporation.
*
*/
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode
*/
-static u32 tid_rdma_flow_wt;
-
static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow,
bool fecn);
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+ if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+ priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->s_flags |= RVT_S_ACK_PENDING;
+ hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+ validate_r_tid_ack(qp->priv);
+ tid_rdma_schedule_ack(qp);
+}
+
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{
return
@@ -176,7 +194,7 @@ void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
{
struct hfi1_qp_priv *priv = qp->priv;
- p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
+ p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
p->jkey = priv->rcd->jkey;
p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
@@ -291,12 +309,13 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
/**
* qp_to_rcd - determine the receive context used by a qp
- * @qp - the qp
+ * @rdi: rvt dev struct
+ * @qp: the qp
*
* This routine returns the receive context associated
* with a a qp's qpn.
*
- * Returns the context.
+ * Return: the context.
*/
static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
struct rvt_qp *qp)
@@ -312,9 +331,7 @@ static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
if (qp->ibqp.qp_num == 0)
ctxt = 0;
else
- ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
- (dd->n_krcv_queues - 1)) + 1;
-
+ ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
return dd->rcd[ctxt];
}
@@ -468,6 +485,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
/**
* kernel_tid_waiters - determine rcd wait
* @rcd: the receive context
+ * @queue: the queue to operate on
* @qp: the head of the qp being processed
*
* This routine will return false IFF
@@ -477,7 +495,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
* Must hold the qp s_lock and the exp_lock.
*
* Return:
- * false if either of the conditions below are statisfied:
+ * false if either of the conditions below are satisfied:
* 1. The list is empty or
* 2. The indicated qp is at the head of the list and the
* HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
@@ -501,7 +519,9 @@ static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
/**
* dequeue_tid_waiter - dequeue the qp from the list
- * @qp - the qp to remove the wait list
+ * @rcd: the receive context
+ * @queue: the queue to operate on
+ * @qp: the qp to remove the wait list
*
* This routine removes the indicated qp from the
* wait list if it is there.
@@ -533,6 +553,7 @@ static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
/**
* queue_qp_for_tid_wait - suspend QP on tid space
* @rcd: the receive context
+ * @queue: the queue to operate on
* @qp: the qp
*
* The qp is inserted at the tail of the rcd
@@ -577,14 +598,14 @@ static void __trigger_tid_waiter(struct rvt_qp *qp)
/**
* tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
- * @qp - the qp
+ * @qp: the qp
*
* trigger a schedule or a waiting qp in a deadlock
* safe manner. The qp reference is held prior
* to this call via first_qp().
*
* If the qp trigger was already scheduled (!rval)
- * the the reference is dropped, otherwise the resume
+ * the reference is dropped, otherwise the resume
* or the destroy cancel will dispatch the reference.
*/
static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
@@ -614,7 +635,7 @@ static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
/**
* tid_rdma_trigger_resume - field a trigger work request
- * @work - the work item
+ * @work: the work item
*
* Complete the off qp trigger processing by directly
* calling the progress routine.
@@ -638,7 +659,7 @@ static void tid_rdma_trigger_resume(struct work_struct *work)
rvt_put_qp(qp);
}
-/**
+/*
* tid_rdma_flush_wait - unwind any tid space wait
*
* This is called when resetting a qp to
@@ -677,8 +698,8 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
/* Flow functions */
/**
* kern_reserve_flow - allocate a hardware flow
- * @rcd - the context to use for allocation
- * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
+ * @rcd: the context to use for allocation
+ * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
* signify "don't care".
*
* Use a bit mask based allocation to reserve a hardware
@@ -689,7 +710,7 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
* The exp_lock must be held.
*
* Return:
- * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
+ * On success: a value positive value between 0 and RXE_NUM_TID_FLOWS - 1
* On failure: -EAGAIN
*/
static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
@@ -829,7 +850,7 @@ void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
int i;
for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
- rcd->flows[i].generation = mask_generation(prandom_u32());
+ rcd->flows[i].generation = mask_generation(get_random_u32());
kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
}
}
@@ -844,9 +865,10 @@ static u8 trdma_pset_order(struct tid_rdma_pageset *s)
/**
* tid_rdma_find_phys_blocks_4k - get groups base on mr info
- * @npages - number of pages
- * @pages - pointer to an array of page structs
- * @list - page set array to return
+ * @flow: overall info for a TID RDMA segment
+ * @pages: pointer to an array of page structs
+ * @npages: number of pages
+ * @list: page set array to return
*
* This routine returns the number of groups associated with
* the current sge information. This implementation is based
@@ -933,10 +955,10 @@ static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
/**
* tid_flush_pages - dump out pages into pagesets
- * @list - list of pagesets
- * @idx - pointer to current page index
- * @pages - number of pages to dump
- * @sets - current number of pagesset
+ * @list: list of pagesets
+ * @idx: pointer to current page index
+ * @pages: number of pages to dump
+ * @sets: current number of pagesset
*
* This routine flushes out accumuated pages.
*
@@ -974,9 +996,10 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list,
/**
* tid_rdma_find_phys_blocks_8k - get groups base on mr info
- * @pages - pointer to an array of page structs
- * @npages - number of pages
- * @list - page set array to return
+ * @flow: overall info for a TID RDMA segment
+ * @pages: pointer to an array of page structs
+ * @npages: number of pages
+ * @list: page set array to return
*
* This routine parses an array of pages to compute pagesets
* in an 8k compatible way.
@@ -984,7 +1007,7 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list,
* pages are tested two at a time, i, i + 1 for contiguous
* pages and i - 1 and i contiguous pages.
*
- * If any condition is false, any accumlated pages are flushed and
+ * If any condition is false, any accumulated pages are flushed and
* v0,v1 are emitted as separate PAGE_SIZE pagesets
*
* Otherwise, the current 8k is totaled for a future flush.
@@ -1048,7 +1071,7 @@ static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
return sets;
}
-/**
+/*
* Find pages for one segment of a sge array represented by @ss. The function
* does not check the sge, the sge must have been checked for alignment with a
* prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
@@ -1092,7 +1115,7 @@ static u32 kern_find_pages(struct tid_rdma_flow *flow,
}
flow->length = flow->req->seg_len - length;
- *last = req->isge == ss->num_sge ? false : true;
+ *last = req->isge != ss->num_sge;
return i;
}
@@ -1411,7 +1434,7 @@ static void kern_program_rcvarray(struct tid_rdma_flow *flow)
* (5) computes a tidarray with formatted TID entries which can be sent
* to the sender
* (6) Reserves and programs HW flows.
- * (7) It also manages queing the QP when TID/flow resources are not
+ * (7) It also manages queueing the QP when TID/flow resources are not
* available.
*
* @req points to struct tid_rdma_request of which the segments are a part. The
@@ -1581,8 +1604,8 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
}
/**
- * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
- * @req - the tid rdma request to be cleaned
+ * hfi1_kern_exp_rcv_free_flows - free previously allocated flow information
+ * @req: the tid rdma request to be cleaned
*/
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
{
@@ -1622,6 +1645,7 @@ static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
flows[i].req = req;
flows[i].npagesets = 0;
flows[i].pagesets[0].mapped = 0;
+ flows[i].resync_npkts = 0;
}
req->flows = flows;
return 0;
@@ -1675,34 +1699,6 @@ static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
return NULL;
}
-static struct tid_rdma_flow *
-__find_flow_ranged(struct tid_rdma_request *req, u16 head, u16 tail,
- u32 psn, u16 *fidx)
-{
- for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
- tail = CIRC_NEXT(tail, MAX_FLOWS)) {
- struct tid_rdma_flow *flow = &req->flows[tail];
- u32 spsn, lpsn;
-
- spsn = full_flow_psn(flow, flow->flow_state.spsn);
- lpsn = full_flow_psn(flow, flow->flow_state.lpsn);
-
- if (cmp_psn(psn, spsn) >= 0 && cmp_psn(psn, lpsn) <= 0) {
- if (fidx)
- *fidx = tail;
- return flow;
- }
- }
- return NULL;
-}
-
-static struct tid_rdma_flow *find_flow(struct tid_rdma_request *req,
- u32 psn, u16 *fidx)
-{
- return __find_flow_ranged(req, req->setup_head, req->clear_tail, psn,
- fidx);
-}
-
/* TID RDMA READ functions */
u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
struct ib_other_headers *ohdr, u32 *bth1,
@@ -2026,7 +2022,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
if (e->opcode == TID_OP(READ_REQ)) {
struct ib_reth *reth;
- u32 offset;
u32 len;
u32 rkey;
u64 vaddr;
@@ -2038,7 +2033,6 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
* The requester always restarts from the start of the original
* request.
*/
- offset = delta_psn(psn, e->psn) * qp->pmtu;
len = be32_to_cpu(reth->length);
if (psn != e->psn || len != req->total_len)
goto unlock;
@@ -2061,7 +2055,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
* req->clear_tail is advanced). However, when an earlier
* request is received, this request will not be complete any
* more (qp->s_tail_ack_queue is moved back, see below).
- * Consequently, we need to update the TID flow info everytime
+ * Consequently, we need to update the TID flow info every time
* a duplicate request is received.
*/
bth0 = be32_to_cpu(ohdr->bth[0]);
@@ -2225,7 +2219,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
/*
* 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
* (see hfi1_rc_rcv())
- * 2. Put TID RDMA READ REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA READ REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Initialize struct tid_rdma_flow info;
* - Copy TID entries;
@@ -2445,7 +2439,7 @@ find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA READ RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA READ RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -2605,18 +2599,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
hfi1_kern_clear_hw_flow(priv->rcd, qp);
}
-static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet, u8 rcv_type,
- u8 opcode)
+static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
{
struct rvt_qp *qp = packet->qp;
- struct hfi1_qp_priv *qpriv = qp->priv;
- u32 ipsn;
- struct ib_other_headers *ohdr = packet->ohdr;
- struct rvt_ack_entry *e;
- struct tid_rdma_request *req;
- struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
- u32 i;
if (rcv_type >= RHF_RCV_TYPE_IB)
goto done;
@@ -2633,41 +2618,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd,
if (rcv_type == RHF_RCV_TYPE_EAGER) {
hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
hfi1_schedule_send(qp);
- goto done_unlock;
}
- /*
- * For TID READ response, error out QP after freeing the tid
- * resources.
- */
- if (opcode == TID_OP(READ_RESP)) {
- ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
- if (cmp_psn(ipsn, qp->s_last_psn) > 0 &&
- cmp_psn(ipsn, qp->s_psn) < 0) {
- hfi1_kern_read_tid_flow_free(qp);
- spin_unlock(&qp->s_lock);
- rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
- goto done;
- }
- goto done_unlock;
- }
-
- /*
- * Error out the qp for TID RDMA WRITE
- */
- hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
- for (i = 0; i < rvt_max_atomic(rdi); i++) {
- e = &qp->s_ack_queue[i];
- if (e->opcode == TID_OP(WRITE_REQ)) {
- req = ack_to_tid_req(e);
- hfi1_kern_exp_rcv_clear_all(req);
- }
- }
- spin_unlock(&qp->s_lock);
- rvt_rc_error(qp, IB_WC_LOC_LEN_ERR);
- goto done;
-
-done_unlock:
+ /* Since no payload is delivered, just drop the packet */
spin_unlock(&qp->s_lock);
done:
return true;
@@ -2718,12 +2671,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
u32 fpsn;
lockdep_assert_held(&qp->r_lock);
+ trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
+ trace_hfi1_sender_read_kdeth_eflags(qp);
+ trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
+ spin_lock(&qp->s_lock);
/* If the psn is out of valid range, drop the packet */
if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
cmp_psn(ibpsn, qp->s_psn) > 0)
- return ret;
+ goto s_unlock;
- spin_lock(&qp->s_lock);
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -2771,14 +2727,19 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
wqe = do_rc_completion(qp, wqe, ibp);
if (qp->s_acked == qp->s_tail)
- break;
+ goto s_unlock;
}
+ if (qp->s_acked == qp->s_tail)
+ goto s_unlock;
+
/* Handle the eflags for the request */
if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
goto s_unlock;
req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
switch (rcv_type) {
case RHF_RCV_TYPE_EXPECTED:
switch (rte) {
@@ -2792,28 +2753,14 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
* to prevent continuous Flow Sequence errors for any
* packets that could be still in the fabric.
*/
- flow = find_flow(req, psn, NULL);
- if (!flow) {
- /*
- * We can't find the IB PSN matching the
- * received KDETH PSN. The only thing we can
- * do at this point is report the error to
- * the QP.
- */
- hfi1_kern_read_tid_flow_free(qp);
- spin_unlock(&qp->s_lock);
- rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
- return ret;
- }
+ flow = &req->flows[req->clear_tail];
+ trace_hfi1_tid_flow_read_kdeth_eflags(qp,
+ req->clear_tail,
+ flow);
if (priv->s_flags & HFI1_R_TID_SW_PSN) {
diff = cmp_psn(psn,
flow->flow_state.r_next_psn);
if (diff > 0) {
- if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
- restart_tid_rdma_read_req(rcd,
- qp,
- wqe);
-
/* Drop the packet.*/
goto s_unlock;
} else if (diff < 0) {
@@ -2886,6 +2833,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
default:
break;
}
+ break;
default:
break;
}
@@ -2965,7 +2913,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
if (lnh == HFI1_LRH_GRH)
goto r_unlock;
- if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode))
+ if (tid_rdma_tid_err(packet, rcv_type))
goto r_unlock;
}
@@ -2985,8 +2933,15 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
*/
spin_lock(&qp->s_lock);
qpriv = qp->priv;
+ if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
+ qpriv->r_tid_tail == qpriv->r_tid_head)
+ goto unlock;
e = &qp->s_ack_queue[qpriv->r_tid_tail];
+ if (e->opcode != TID_OP(WRITE_REQ))
+ goto unlock;
req = ack_to_tid_req(e);
+ if (req->comp_seg == req->cur_seg)
+ goto unlock;
flow = &req->flows[req->clear_tail];
trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
@@ -3058,6 +3013,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
default:
break;
}
+ break;
default:
break;
}
@@ -3076,10 +3032,7 @@ nak_psn:
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
- qpriv->r_tid_ack = qpriv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto unlock;
}
@@ -3271,11 +3224,13 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
switch (prev->wr.opcode) {
case IB_WR_TID_RDMA_WRITE:
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
+ break;
default:
break;
}
@@ -3283,7 +3238,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
case IB_WR_RDMA_READ:
if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
break;
- /* fall through */
+ fallthrough;
case IB_WR_TID_RDMA_READ:
switch (prev->wr.opcode) {
case IB_WR_RDMA_READ:
@@ -3294,9 +3249,11 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
req = wqe_to_tid_req(prev);
if (req->ack_seg != req->total_segs)
goto interlock;
+ break;
default:
break;
}
+ break;
default:
break;
}
@@ -3442,18 +3399,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
}
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{
/*
* Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of
- * segments between two sync points), assuming PMTU of 4K. The rationale
- * for this is that flows are released and recycled at each sync point.
+ * segments between two sync points). The rationale for this is that
+ * flows are released and recycled at each sync point.
*/
- tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
- TID_RDMA_MAX_SEGMENT_SIZE;
+ return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
}
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3486,7 +3442,7 @@ static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
return 0;
}
-/**
+/*
* Central place for resource allocation at TID write responder,
* is called from write_req and write_data interrupt handlers as
* well as the send thread when a queued QP is scheduled for
@@ -3576,7 +3532,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) {
- to_seg = tid_rdma_flow_wt *
+ to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv,
&rcd->flow_queue);
break;
@@ -3597,7 +3553,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/*
* If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be
- * caused due a delay in scheduling the second leg which we
+ * caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments
*/
@@ -3605,8 +3561,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
MAX_FLOWS)) {
ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
break;
}
@@ -3694,7 +3649,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
* 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
* (see hfi1_rc_rcv())
* - Don't allow 0-length requests.
- * 2. Put TID RDMA WRITE REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA WRITE REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Prepare struct tid_rdma_flow array?
* 3. Set the qp->s_ack_state as state diagram in design doc.
@@ -4010,7 +3965,7 @@ static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
- rval = del_timer(&qpriv->s_tid_timer);
+ rval = timer_delete(&qpriv->s_tid_timer);
qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
}
return rval;
@@ -4020,13 +3975,13 @@ void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
{
struct hfi1_qp_priv *qpriv = qp->priv;
- del_timer_sync(&qpriv->s_tid_timer);
+ timer_delete_sync(&qpriv->s_tid_timer);
qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
}
static void hfi1_tid_timeout(struct timer_list *t)
{
- struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
+ struct hfi1_qp_priv *qpriv = timer_container_of(qpriv, t, s_tid_timer);
struct rvt_qp *qp = qpriv->owner;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
unsigned long flags;
@@ -4071,7 +4026,7 @@ unlock_r_lock:
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -4406,8 +4361,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req);
trace_hfi1_tid_write_rsp_rcv_data(qp);
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
+ validate_r_tid_ack(priv);
if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e);
@@ -4446,8 +4400,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
}
done:
- priv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_schedule_ack(qp);
exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn)
@@ -4459,10 +4412,7 @@ send_nak:
if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn;
- priv->s_flags |= RVT_S_ACK_PENDING;
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto done;
}
@@ -4552,7 +4502,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
struct rvt_swqe *wqe;
struct tid_rdma_request *req;
struct tid_rdma_flow *flow;
- u32 aeth, psn, req_psn, ack_psn, fspsn, resync_psn, ack_kpsn;
+ u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
unsigned long flags;
u16 fidx;
@@ -4581,6 +4531,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
ack_kpsn--;
}
+ if (unlikely(qp->s_acked == qp->s_tail))
+ goto ack_op_err;
+
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
@@ -4593,7 +4546,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
/* Drop stale ACK/NAK */
- if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0)
+ if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
+ cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
goto ack_op_err;
while (cmp_psn(ack_kpsn,
@@ -4692,6 +4646,15 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
*/
fpsn = full_flow_psn(flow, flow->flow_state.spsn);
req->r_ack_psn = psn;
+ /*
+ * If resync_psn points to the last flow PSN for a
+ * segment and the new segment (likely from a new
+ * request) starts with a new generation number, we
+ * need to adjust resync_psn accordingly.
+ */
+ if (flow->flow_state.generation !=
+ (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
+ resync_psn = mask_psn(fpsn - 1);
flow->resync_npkts +=
delta_psn(mask_psn(resync_psn + 1), fpsn);
/*
@@ -4755,8 +4718,12 @@ done:
switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
+ if (!req->flows)
+ break;
flow = &req->flows[req->acked_tail];
- fspsn = full_flow_psn(flow, flow->flow_state.spsn);
+ flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
+ if (cmp_psn(psn, flpsn) > 0)
+ break;
trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
flow);
req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
@@ -4814,7 +4781,7 @@ static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
- rval = del_timer(&priv->s_tid_retry_timer);
+ rval = timer_delete(&priv->s_tid_retry_timer);
priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
}
return rval;
@@ -4824,13 +4791,14 @@ void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- del_timer_sync(&priv->s_tid_retry_timer);
+ timer_delete_sync(&priv->s_tid_retry_timer);
priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
}
static void hfi1_tid_retry_timeout(struct timer_list *t)
{
- struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
+ struct hfi1_qp_priv *priv = timer_container_of(priv, t,
+ s_tid_retry_timer);
struct rvt_qp *qp = priv->owner;
struct rvt_swqe *wqe;
unsigned long flags;
@@ -5002,8 +4970,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
bail:
if (fecn)
qp->s_flags |= RVT_S_ECN;
@@ -5114,7 +5081,7 @@ int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (priv->s_state == TID_OP(WRITE_REQ))
hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
priv->s_state = TID_OP(WRITE_DATA);
- /* fall through */
+ fallthrough;
case TID_OP(WRITE_DATA):
/*
@@ -5208,7 +5175,7 @@ bail_no_tx:
priv->s_flags &= ~RVT_S_BUSY;
/*
* If we didn't get a txreq, the QP will be woken up later to try
- * again, set the flags to the the wake up which work item to wake
+ * again, set the flags to the wake up which work item to wake
* up.
* (A better algorithm should be found to do this and generalize the
* sleep/wakeup flags.)
@@ -5453,7 +5420,10 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if ((dd->flags & HFI1_SHUTDOWN))
+ return true;
return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ?
@@ -5471,8 +5441,9 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
* the two state machines can step on each other with respect to the
* RVT_S_BUSY flag.
* Therefore, a modified test is used.
- * @return true if the second leg is scheduled;
- * false if the second leg is not scheduled.
+ *
+ * Return: %true if the second leg is scheduled;
+ * %false if the second leg is not scheduled.
*/
bool hfi1_schedule_tid_send(struct rvt_qp *qp)
{
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 1c536185261e..6e82df2190b7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT 18
/*
* Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
-void hfi1_compute_tid_rdma_flow_wt(void);
-
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 9a3d236bcc88..10290ebf76b2 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,52 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "exp_rcv.h"
+#include "ipoib.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
@@ -126,6 +85,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
#define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
@@ -187,6 +147,11 @@ void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr,
*qpn = ib_bth_get_qpn(ohdr);
}
+static u16 ib_get_len(const struct ib_header *hdr)
+{
+ return be16_to_cpu(hdr->lrh[2]);
+}
+
void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
u16 *len, u32 *dlid, u32 *slid)
@@ -444,6 +409,12 @@ const char *parse_everbs_hdrs(
break;
/* deth */
case OP(UD, SEND_ONLY):
+ trace_seq_printf(p, DETH_ENTROPY_PRN,
+ be32_to_cpu(eh->ud.deth[0]),
+ be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
+ be32_to_cpu(eh->ud.deth[1]) >>
+ HFI1_IPOIB_ENTROPY_SHIFT);
+ break;
case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, DETH_PRN,
be32_to_cpu(eh->ud.deth[0]),
@@ -512,6 +483,38 @@ u16 hfi1_trace_get_tid_idx(u32 ent)
return EXP_TID_GET(ent, IDX);
}
+struct hfi1_ctxt_hist {
+ atomic_t count;
+ atomic_t data[255];
+};
+
+static struct hfi1_ctxt_hist hist = {
+ .count = ATOMIC_INIT(0)
+};
+
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt)
+{
+ int i, len = ARRAY_SIZE(hist.data);
+ const char *ret = trace_seq_buffer_ptr(p);
+ unsigned long packet_count = atomic_fetch_inc(&hist.count);
+
+ trace_seq_printf(p, "packet[%lu]", packet_count);
+ for (i = 0; i < len; ++i) {
+ unsigned long val;
+ atomic_t *count = &hist.data[i];
+
+ if (ctxt == i)
+ val = atomic_fetch_inc(count);
+ else
+ val = atomic_read(count);
+
+ if (val)
+ trace_seq_printf(p, "(%d:%lu)", i, val);
+ }
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 1ce551864118..bb3cc006bacd 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index e00c8a7d559c..76c41bd79071 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
+* Copyright(c) 2015 - 2020 Intel Corporation.
*/
+
#if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_CTXTS_H
@@ -80,7 +39,7 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt);
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
@@ -138,6 +97,15 @@ TRACE_EVENT(hfi1_ctxt_info,
)
);
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt);
+TRACE_EVENT(ctxt_rsm_hist,
+ TP_PROTO(unsigned int ctxt),
+ TP_ARGS(ctxt),
+ TP_STRUCT__entry(__field(unsigned int, ctxt)),
+ TP_fast_assign(__entry->ctxt = ctxt;),
+ TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt))
+);
+
#endif /* __HFI1_TRACE_CTXTS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index de7a87392b8d..58304b91380f 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
*/
+
#if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_EXTRA_H
@@ -63,24 +22,27 @@
#define MAX_MSG_LEN 512
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
+#endif
+
DECLARE_EVENT_CLASS(hfi1_trace_template,
TP_PROTO(const char *function, struct va_format *vaf),
TP_ARGS(function, vaf),
TP_STRUCT__entry(__string(function, function)
- __dynamic_array(char, msg, MAX_MSG_LEN)
+ __vstring(msg, vaf->fmt, vaf->va)
),
- TP_fast_assign(__assign_str(function, function);
- WARN_ON_ONCE(vsnprintf
- (__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >=
- MAX_MSG_LEN);
+ TP_fast_assign(__assign_str(function);
+ __assign_vstr(msg, vaf->fmt, vaf->va);
),
TP_printk("(%s) %s",
__get_str(function),
__get_str(msg))
);
+#pragma GCC diagnostic pop
+
/*
* It may be nice to macroize the __hfi1_trace but the va_* stuff requires an
* actual function to work and can not be in a macro.
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index d1372cc66de6..b21356abc9ec 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_IBHDRS_H
@@ -79,6 +38,8 @@ __print_symbolic(opcode, \
ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
ib_opcode_name(RC_COMPARE_SWAP), \
ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
ib_opcode_name(TID_RDMA_WRITE_REQ), \
ib_opcode_name(TID_RDMA_WRITE_RESP), \
ib_opcode_name(TID_RDMA_WRITE_DATA), \
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index 8db2253523ff..8dc46b6891df 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
*/
+
#if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_MISC_H
@@ -63,7 +22,7 @@ TRACE_EVENT(hfi1_interrupt,
__array(char, buf, 64)
__field(int, src)
),
- TP_fast_assign(DD_DEV_ASSIGN(dd)
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
is_entry->is_name(__entry->buf, 64,
src - is_entry->start);
__entry->src = src;
@@ -100,7 +59,7 @@ TRACE_EVENT(hfi1_fault_opcode,
__field(u32, qpn)
__field(u8, opcode)
),
- TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->opcode = opcode;
),
diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
index 3b7abbc382c2..5a9dfd85e7f5 100644
--- a/drivers/infiniband/hw/hfi1/trace_mmu.h
+++ b/drivers/infiniband/hw/hfi1/trace_mmu.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_MMU_H
@@ -56,35 +15,53 @@
#define TRACE_SYSTEM hfi1_mmu
DECLARE_EVENT_CLASS(hfi1_mmu_rb_template,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len),
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node),
TP_STRUCT__entry(__field(unsigned long, addr)
__field(unsigned long, len)
+ __field(unsigned int, refcount)
),
- TP_fast_assign(__entry->addr = addr;
- __entry->len = len;
+ TP_fast_assign(__entry->addr = node->addr;
+ __entry->len = node->len;
+ __entry->refcount = kref_read(&node->refcount);
),
- TP_printk("MMU node addr 0x%lx, len %lu",
+ TP_printk("MMU node addr 0x%lx, len %lu, refcount %u",
__entry->addr,
- __entry->len
+ __entry->len,
+ __entry->refcount
)
);
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
-
-DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
-DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
+TRACE_EVENT(hfi1_mmu_rb_search,
+ TP_PROTO(unsigned long addr, unsigned long len),
+ TP_ARGS(addr, len),
+ TP_STRUCT__entry(__field(unsigned long, addr)
+ __field(unsigned long, len)
+ ),
+ TP_fast_assign(__entry->addr = addr;
+ __entry->len = len;
+ ),
+ TP_printk("MMU node addr 0x%lx, len %lu",
+ __entry->addr,
+ __entry->len
+ )
+);
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
- TP_PROTO(unsigned long addr, unsigned long len),
- TP_ARGS(addr, len));
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_evict,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_release_node,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
#endif /* __HFI1_TRACE_RC_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 1ebca37862e0..fa254f9b9c42 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016, 2017 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
*/
+
#if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_RC_H
@@ -70,7 +29,7 @@ DECLARE_EVENT_CLASS(hfi1_rc_template,
__field(u32, r_psn)
),
TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->psn = psn;
@@ -130,7 +89,7 @@ DECLARE_EVENT_CLASS(/* rc_ack */
__field(u32, lpsn)
),
TP_fast_assign(/* assign */
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 3cec960e9674..8d5e12fe88a5 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_RX_H
@@ -106,19 +65,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = rcd->ctxt;
- if (rcd->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (rcd->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (rcd->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
+ __entry->slow_path = hfi1_is_slowpath(rcd);
+ __entry->dma_rtail = get_dma_rtail_setting(rcd);
),
TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
__get_str(dev),
@@ -142,7 +90,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __assign_str(type, type);
+ __assign_str(type);
__entry->start = start;
__entry->end = end;
),
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
index 4388b594ed1b..e358f5b885fa 100644
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -138,10 +138,10 @@ TRACE_EVENT(/* put_tid */
TP_ARGS(dd, index, type, pa, order),
TP_STRUCT__entry(/* entry */
DD_DEV_ENTRY(dd)
- __field(unsigned long, pa);
- __field(u32, index);
- __field(u32, type);
- __field(u16, order);
+ __field(unsigned long, pa)
+ __field(u32, index)
+ __field(u32, type)
+ __field(u16, order)
),
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd);
@@ -358,7 +358,7 @@ DECLARE_EVENT_CLASS(/* msg */
),
TP_fast_assign(/* assign */
__entry->qpn = qp ? qp->ibqp.qp_num : 0;
- __assign_str(msg, msg);
+ __assign_str(msg);
__entry->more = more;
),
TP_printk(/* print */
@@ -627,6 +627,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, index, flow)
);
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
DECLARE_EVENT_CLASS(/* tid_node */
hfi1_tid_node_template,
TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
@@ -645,7 +651,7 @@ DECLARE_EVENT_CLASS(/* tid_node */
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
- __assign_str(msg, msg);
+ __assign_str(msg);
__entry->index = index;
__entry->base = base;
__entry->map = map;
@@ -851,6 +857,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, psn)
);
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
DECLARE_EVENT_CLASS(/* sender_info */
hfi1_sender_info_template,
TP_PROTO(struct rvt_qp *qp),
@@ -874,7 +886,7 @@ DECLARE_EVENT_CLASS(/* sender_info */
__field(u8, s_retry)
),
TP_fast_assign(/* assign */
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->state = qp->state;
__entry->s_cur = qp->s_cur;
@@ -955,6 +967,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp)
);
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
DECLARE_EVENT_CLASS(/* tid_read_sender */
hfi1_tid_read_sender_template,
TP_PROTO(struct rvt_qp *qp, char newreq),
@@ -1015,6 +1033,12 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, newreq)
);
+DEFINE_EVENT(/* event */
+ hfi1_tid_read_sender_template, hfi1_tid_read_sender_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
DECLARE_EVENT_CLASS(/* tid_rdma_request */
hfi1_tid_rdma_request_template,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
@@ -1216,6 +1240,13 @@ DEFINE_EVENT(/* event */
);
DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write,
TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
struct tid_rdma_request *req),
@@ -1229,6 +1260,13 @@ DEFINE_EVENT(/* event */
TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
);
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_update_num_rd_atomic,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
DECLARE_EVENT_CLASS(/* rc_rcv_err */
hfi1_rc_rcv_err_template,
TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
@@ -1247,7 +1285,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
__field(int, diff)
),
TP_fast_assign(/* assign */
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->state = qp->state;
@@ -1536,7 +1574,7 @@ DECLARE_EVENT_CLASS(/* tid_ack */
__field(u32, resync_psn)
),
TP_fast_assign(/* assign */
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->aeth = aeth;
__entry->psn = psn;
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 09eb0c9ada00..c0ba6b0a2c4e 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_TX_H
@@ -53,6 +11,8 @@
#include "hfi.h"
#include "mad.h"
#include "sdma.h"
+#include "ipoib.h"
+#include "user_sdma.h"
const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
@@ -118,7 +78,7 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
__field(unsigned long, iow_flags)
),
TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->flags = flags;
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
@@ -588,7 +548,7 @@ TRACE_EVENT(hfi1_sdma_user_reqinfo,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
TP_ARGS(dd, ctxt, subctxt, i),
TP_STRUCT__entry(
- DD_DEV_ENTRY(dd);
+ DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u8, subctxt)
__field(u8, ver_opcode)
@@ -653,6 +613,80 @@ TRACE_EVENT(hfi1_sdma_user_completion,
__entry->code)
);
+TRACE_EVENT(hfi1_usdma_defer,
+ TP_PROTO(struct hfi1_user_sdma_pkt_q *pq,
+ struct sdma_engine *sde,
+ struct iowait *wait),
+ TP_ARGS(pq, sde, wait),
+ TP_STRUCT__entry(DD_DEV_ENTRY(pq->dd)
+ __field(struct hfi1_user_sdma_pkt_q *, pq)
+ __field(struct sdma_engine *, sde)
+ __field(struct iowait *, wait)
+ __field(int, engine)
+ __field(int, empty)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(pq->dd);
+ __entry->pq = pq;
+ __entry->sde = sde;
+ __entry->wait = wait;
+ __entry->engine = sde->this_idx;
+ __entry->empty = list_empty(&__entry->wait->list);
+ ),
+ TP_printk("[%s] pq %llx sde %llx wait %llx engine %d empty %d",
+ __get_str(dev),
+ (unsigned long long)__entry->pq,
+ (unsigned long long)__entry->sde,
+ (unsigned long long)__entry->wait,
+ __entry->engine,
+ __entry->empty
+ )
+);
+
+TRACE_EVENT(hfi1_usdma_activate,
+ TP_PROTO(struct hfi1_user_sdma_pkt_q *pq,
+ struct iowait *wait,
+ int reason),
+ TP_ARGS(pq, wait, reason),
+ TP_STRUCT__entry(DD_DEV_ENTRY(pq->dd)
+ __field(struct hfi1_user_sdma_pkt_q *, pq)
+ __field(struct iowait *, wait)
+ __field(int, reason)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(pq->dd);
+ __entry->pq = pq;
+ __entry->wait = wait;
+ __entry->reason = reason;
+ ),
+ TP_printk("[%s] pq %llx wait %llx reason %d",
+ __get_str(dev),
+ (unsigned long long)__entry->pq,
+ (unsigned long long)__entry->wait,
+ __entry->reason
+ )
+);
+
+TRACE_EVENT(hfi1_usdma_we,
+ TP_PROTO(struct hfi1_user_sdma_pkt_q *pq,
+ int we_ret),
+ TP_ARGS(pq, we_ret),
+ TP_STRUCT__entry(DD_DEV_ENTRY(pq->dd)
+ __field(struct hfi1_user_sdma_pkt_q *, pq)
+ __field(int, state)
+ __field(int, we_ret)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(pq->dd);
+ __entry->pq = pq;
+ __entry->state = pq->state;
+ __entry->we_ret = we_ret;
+ ),
+ TP_printk("[%s] pq %llx state %d we_ret %d",
+ __get_str(dev),
+ (unsigned long long)__entry->pq,
+ __entry->state,
+ __entry->we_ret
+ )
+);
+
const char *print_u32_array(struct trace_seq *, u32 *, int);
#define __print_u32_hex(arr, len) print_u32_array(p, arr, len)
@@ -706,8 +740,8 @@ TRACE_EVENT(hfi1_sdma_state,
__string(newstate, nstate)
),
TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __assign_str(curstate, cstate);
- __assign_str(newstate, nstate);
+ __assign_str(curstate);
+ __assign_str(newstate);
),
TP_printk("[%s] current state %s new state %s",
__get_str(dev),
@@ -792,7 +826,7 @@ TRACE_EVENT(
__field(int, send_flags)
),
TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->wqe = wqe;
__entry->wr_id = wqe->wr.wr_id;
__entry->qpn = qp->ibqp.qp_num;
@@ -828,7 +862,7 @@ DECLARE_EVENT_CLASS(
__field(bool, flag)
),
TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->flag = flag;
),
@@ -858,6 +892,170 @@ DEFINE_EVENT(
TP_ARGS(qp, flag)
);
+DECLARE_EVENT_CLASS(/* AIP */
+ hfi1_ipoib_txq_template,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(txq->priv->dd)
+ __field(struct hfi1_ipoib_txq *, txq)
+ __field(struct sdma_engine *, sde)
+ __field(ulong, head)
+ __field(ulong, tail)
+ __field(uint, used)
+ __field(uint, flow)
+ __field(int, stops)
+ __field(int, no_desc)
+ __field(u8, idx)
+ __field(u8, stopped)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(txq->priv->dd);
+ __entry->txq = txq;
+ __entry->sde = txq->sde;
+ __entry->head = txq->tx_ring.head;
+ __entry->tail = txq->tx_ring.tail;
+ __entry->idx = txq->q_idx;
+ __entry->used =
+ txq->tx_ring.sent_txreqs -
+ txq->tx_ring.complete_txreqs;
+ __entry->flow = txq->flow.as_int;
+ __entry->stops = atomic_read(&txq->tx_ring.stops);
+ __entry->no_desc = atomic_read(&txq->tx_ring.no_desc);
+ __entry->stopped =
+ __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx);
+ ),
+ TP_printk(/* print */
+ "[%s] txq %llx idx %u sde %llx:%u cpu %d head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u",
+ __get_str(dev),
+ (unsigned long long)__entry->txq,
+ __entry->idx,
+ (unsigned long long)__entry->sde,
+ __entry->sde ? __entry->sde->this_idx : 0,
+ __entry->sde ? __entry->sde->cpu : 0,
+ __entry->head,
+ __entry->tail,
+ __entry->flow,
+ __entry->used,
+ __entry->stops,
+ __entry->no_desc,
+ __entry->stopped
+ )
+);
+
+DEFINE_EVENT(/* queue stop */
+ hfi1_ipoib_txq_template, hfi1_txq_stop,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* queue wake */
+ hfi1_ipoib_txq_template, hfi1_txq_wake,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* flow flush */
+ hfi1_ipoib_txq_template, hfi1_flow_flush,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* flow switch */
+ hfi1_ipoib_txq_template, hfi1_flow_switch,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* wakeup */
+ hfi1_ipoib_txq_template, hfi1_txq_wakeup,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* full */
+ hfi1_ipoib_txq_template, hfi1_txq_full,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* queued */
+ hfi1_ipoib_txq_template, hfi1_txq_queued,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* xmit_stopped */
+ hfi1_ipoib_txq_template, hfi1_txq_xmit_stopped,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* xmit_unstopped */
+ hfi1_ipoib_txq_template, hfi1_txq_xmit_unstopped,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DECLARE_EVENT_CLASS(/* AIP */
+ hfi1_ipoib_tx_template,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(tx->txq->priv->dd)
+ __field(struct ipoib_txreq *, tx)
+ __field(struct hfi1_ipoib_txq *, txq)
+ __field(struct sk_buff *, skb)
+ __field(ulong, idx)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(tx->txq->priv->dd);
+ __entry->tx = tx;
+ __entry->skb = tx->skb;
+ __entry->txq = tx->txq;
+ __entry->idx = idx;
+ ),
+ TP_printk(/* print */
+ "[%s] tx %llx txq %llx,%u skb %llx idx %lu",
+ __get_str(dev),
+ (unsigned long long)__entry->tx,
+ (unsigned long long)__entry->txq,
+ __entry->txq ? __entry->txq->q_idx : 0,
+ (unsigned long long)__entry->skb,
+ __entry->idx
+ )
+);
+
+DEFINE_EVENT(/* produce */
+ hfi1_ipoib_tx_template, hfi1_tx_produce,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx)
+);
+
+DEFINE_EVENT(/* consume */
+ hfi1_ipoib_tx_template, hfi1_tx_consume,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx)
+);
+
+DEFINE_EVENT(/* alloc_tx */
+ hfi1_ipoib_txq_template, hfi1_txq_alloc_tx,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* poll */
+ hfi1_ipoib_txq_template, hfi1_txq_poll,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* complete */
+ hfi1_ipoib_txq_template, hfi1_txq_complete,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4ed4fcfabd6c..33d2c2a218e2 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include "hfi.h"
@@ -55,6 +13,7 @@
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
+ * @ps: the current packet state
*
* Assume s_lock is held.
*
@@ -216,7 +175,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -241,7 +200,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -291,12 +250,7 @@ bail_no_tx:
/**
* hfi1_uc_rcv - handle an incoming UC packet
- * @ibp: the port the packet came in on
- * @hdr: the header of the packet
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
+ * @packet: the packet structure
*
* This is called from qp_rcv() to process an incoming UC packet
* for the given QP.
@@ -414,7 +368,7 @@ send_first:
goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
/*
@@ -476,8 +430,7 @@ last_imm:
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_FIRST):
@@ -516,7 +469,7 @@ rdma_first:
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index f88ad425664a..89d1bae8f824 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*/
#include <linux/net.h>
@@ -87,7 +45,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
rcu_read_lock();
qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- swqe->ud_wr.remote_qpn);
+ rvt_get_swqe_remote_qpn(swqe));
if (!qp) {
ibp->rvp.n_pkt_drops++;
rcu_read_unlock();
@@ -105,7 +63,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto drop;
}
- ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(swqe);
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
@@ -135,8 +93,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
- sqp->qkey : swqe->ud_wr.remote_qkey;
+ qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ?
+ sqp->qkey : rvt_get_swqe_remote_qkey(swqe);
if (unlikely(qkey != qp->qkey))
goto drop; /* silently drop per IBTA spec */
}
@@ -240,7 +198,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
if (sqp->ibqp.qp_type == IB_QPT_GSI ||
sqp->ibqp.qp_type == IB_QPT_SMI)
- wc.pkey_index = swqe->ud_wr.pkey_index;
+ wc.pkey_index = rvt_get_swqe_pkey_index(swqe);
else
wc.pkey_index = sqp->s_pkey_index;
} else {
@@ -255,8 +213,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED);
ibp->rvp.n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
@@ -283,20 +240,21 @@ static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe,
bth0 |= IB_BTH_SOLICITED;
bth0 |= extra_bytes << 20;
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
- *pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
+ *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
else
*pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
if (!bypass)
bth0 |= *pkey;
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
+ ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe));
ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
/*
* Qkeys with the high order bit set mean use the
* qkey from the QP context instead of the WR (see 10.2.5).
*/
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
- qp->qkey : wqe->ud_wr.remote_qkey);
+ ohdr->u.ud.deth[0] =
+ cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey :
+ rvt_get_swqe_remote_qkey(wqe));
ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
}
@@ -316,7 +274,7 @@ void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
extra_bytes = -wqe->length & 3;
nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC;
@@ -380,7 +338,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
u32 dlid, slid, nwords, extra_bytes;
- u32 dest_qp = wqe->ud_wr.remote_qpn;
+ u32 dest_qp = rvt_get_swqe_remote_qpn(wqe);
u32 src_qp = qp->ibqp.qp_num;
u16 len, pkey;
u8 l4, sc5;
@@ -388,7 +346,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
/*
* Build 16B Management Packet if either the destination
@@ -450,7 +408,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (is_mgmt) {
l4 = OPA_16B_L4_FM;
- pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
+ pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
dest_qp, src_qp);
} else {
@@ -468,6 +426,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
/**
* hfi1_make_ud_req - construct a UD request packet
* @qp: the QP
+ * @ps: the current packet state
*
* Assume s_lock is held.
*
@@ -515,7 +474,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr);
if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) ||
(rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) {
@@ -683,7 +642,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf) {
+ if (!IS_ERR_OR_NULL(pbuf)) {
trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
@@ -738,7 +697,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf) {
+ if (!IS_ERR_OR_NULL(pbuf)) {
trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
@@ -840,12 +799,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/**
* hfi1_ud_rcv - receive an incoming UD packet
- * @ibp: the port the packet came in on
- * @hdr: the packet header
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
+ * @packet: the packet structure
*
* This is called from qp_rcv() to process an incoming UD packet
* for the given QP.
@@ -1061,7 +1015,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited);
+ rvt_recv_cq(qp, &wc, solicited);
return;
drop:
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 0cd71ce7cc71..62b4f16dab27 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,48 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2015-2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <asm/page.h>
#include <linux/string.h>
@@ -59,24 +18,28 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
u16 pageidx, unsigned int npages);
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node);
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
- struct tid_group *grp,
- unsigned int start, u16 count,
+ struct tid_group *grp, u16 count,
u32 *tidlist, unsigned int *tididx,
unsigned int *pmapped);
-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
- struct tid_group **grp);
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
+static void __clear_tid_node(struct hfi1_filedata *fd,
+ struct tid_rb_node *node);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
-static struct mmu_rb_ops tid_rb_ops = {
- .insert = tid_rb_insert,
- .remove = tid_rb_remove,
- .invalidate = tid_rb_invalidate
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
+ .invalidate = tid_rb_invalidate,
+};
+static const struct mmu_interval_notifier_ops tid_cover_ops = {
+ .invalidate = tid_cover_invalidate,
};
/*
@@ -87,14 +50,10 @@ static struct mmu_rb_ops tid_rb_ops = {
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
- spin_lock_init(&fd->tid_lock);
- spin_lock_init(&fd->invalid_lock);
-
fd->entry_to_rb = kcalloc(uctxt->expected_count,
- sizeof(struct rb_node *),
+ sizeof(*fd->entry_to_rb),
GFP_KERNEL);
if (!fd->entry_to_rb)
return -ENOMEM;
@@ -109,20 +68,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
fd->entry_to_rb = NULL;
return -ENOMEM;
}
-
- /*
- * Register MMU notifier callbacks. If the registration
- * fails, continue without TID caching for this context.
- */
- ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
- dd->pport->hfi1_wq,
- &fd->handler);
- if (ret) {
- dd_dev_info(dd,
- "Failed MMU notifier registration %d\n",
- ret);
- ret = 0;
- }
+ fd->use_mn = true;
}
/*
@@ -139,7 +85,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && fd->handler) {
+ if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -158,18 +104,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- /*
- * The notifier would have been removed when the process'es mm
- * was freed.
- */
- if (fd->handler) {
- hfi1_mmu_rb_unregister(fd->handler);
- } else {
- if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- }
+ mutex_lock(&uctxt->exp_mutex);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(fd->invalid_tids);
fd->invalid_tids = NULL;
@@ -178,12 +118,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
fd->entry_to_rb = NULL;
}
-/**
+/*
* Release pinned receive buffer pages.
*
- * @mapped - true if the pages have been DMA mapped. false otherwise.
- * @idx - Index of the first page to unpin.
- * @npages - No of pages to unpin.
+ * @mapped: true if the pages have been DMA mapped. false otherwise.
+ * @idx: Index of the first page to unpin.
+ * @npages: No of pages to unpin.
*
* If the pages have been DMA mapped (indicated by mapped parameter), their
* info will be passed via a struct tid_rb_node. If they haven't been mapped,
@@ -198,46 +138,37 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd,
{
struct page **pages;
struct hfi1_devdata *dd = fd->uctxt->dd;
+ struct mm_struct *mm;
if (mapped) {
- pci_unmap_single(dd->pcidev, node->dma_addr,
- node->mmu.len, PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&dd->pcidev->dev, node->dma_addr,
+ node->npages * PAGE_SIZE, DMA_FROM_DEVICE);
pages = &node->pages[idx];
+ mm = mm_from_tid_node(node);
} else {
pages = &tidbuf->pages[idx];
+ mm = current->mm;
}
- hfi1_release_user_pages(fd->mm, pages, npages, mapped);
+ hfi1_release_user_pages(mm, pages, npages, mapped);
fd->tid_n_pinned -= npages;
}
-/**
+/*
* Pin receive buffer pages.
*/
static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
{
int pinned;
- unsigned int npages;
+ unsigned int npages = tidbuf->npages;
unsigned long vaddr = tidbuf->vaddr;
struct page **pages = NULL;
struct hfi1_devdata *dd = fd->uctxt->dd;
- /* Get the number of pages the user buffer spans */
- npages = num_user_pages(vaddr, tidbuf->length);
- if (!npages)
- return -EINVAL;
-
if (npages > fd->uctxt->expected_count) {
dd_dev_err(dd, "Expected buffer too big\n");
return -EINVAL;
}
- /* Verify that access is OK for the user buffer */
- if (!access_ok((void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- return -EFAULT;
- }
/* Allocate the array of struct page pointers needed for pinning */
pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
if (!pages)
@@ -248,18 +179,17 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
* pages, accept the amount pinned so far and program only that.
* User space knows how to deal with partially programmed buffers.
*/
- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
+ if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
kfree(pages);
return -ENOMEM;
}
- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+ pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
if (pinned <= 0) {
kfree(pages);
return pinned;
}
tidbuf->pages = pages;
- tidbuf->npages = npages;
fd->tid_n_pinned += pinned;
return pinned;
}
@@ -319,54 +249,70 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
int ret = 0, need_group = 0, pinned;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned int ngroups, pageidx = 0, pageset_count,
+ unsigned int ngroups, pageset_count,
tididx = 0, mapped, mapped_pages = 0;
u32 *tidlist = NULL;
struct tid_user_buf *tidbuf;
+ unsigned long mmu_seq = 0;
+
+ if (!PAGE_ALIGNED(tinfo->vaddr))
+ return -EINVAL;
+ if (tinfo->length == 0)
+ return -EINVAL;
tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
if (!tidbuf)
return -ENOMEM;
+ mutex_init(&tidbuf->cover_mutex);
tidbuf->vaddr = tinfo->vaddr;
tidbuf->length = tinfo->length;
+ tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length);
tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
GFP_KERNEL);
if (!tidbuf->psets) {
- kfree(tidbuf);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail_release_mem;
+ }
+
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &tidbuf->notifier, current->mm,
+ tidbuf->vaddr, tidbuf->npages * PAGE_SIZE,
+ &tid_cover_ops);
+ if (ret)
+ goto fail_release_mem;
+ mmu_seq = mmu_interval_read_begin(&tidbuf->notifier);
}
pinned = pin_rcv_pages(fd, tidbuf);
if (pinned <= 0) {
- kfree(tidbuf->psets);
- kfree(tidbuf);
- return pinned;
+ ret = (pinned < 0) ? pinned : -ENOSPC;
+ goto fail_unpin;
}
/* Find sets of physically contiguous pages */
tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
- /*
- * We don't need to access this under a lock since tid_used is per
- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
- * and hfi1_user_exp_rcv_setup() at the same time.
- */
+ /* Reserve the number of expected tids to be used. */
spin_lock(&fd->tid_lock);
if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
pageset_count = fd->tid_limit - fd->tid_used;
else
pageset_count = tidbuf->n_psets;
+ fd->tid_used += pageset_count;
spin_unlock(&fd->tid_lock);
- if (!pageset_count)
- goto bail;
+ if (!pageset_count) {
+ ret = -ENOSPC;
+ goto fail_unreserve;
+ }
ngroups = pageset_count / dd->rcv_entries.group_size;
tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
if (!tidlist) {
ret = -ENOMEM;
- goto nomem;
+ goto fail_unreserve;
}
tididx = 0;
@@ -385,7 +331,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tid_group_pop(&uctxt->tid_group_list);
ret = program_rcvarray(fd, tidbuf, grp,
- pageidx, dd->rcv_entries.group_size,
+ dd->rcv_entries.group_size,
tidlist, &tididx, &mapped);
/*
* If there was a failure to program the RcvArray
@@ -401,11 +347,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tid_group_add_tail(grp, &uctxt->tid_full_list);
ngroups--;
- pageidx += ret;
mapped_pages += mapped;
}
- while (pageidx < pageset_count) {
+ while (tididx < pageset_count) {
struct tid_group *grp, *ptr;
/*
* If we don't have any partially used tid groups, check
@@ -427,11 +372,11 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
*/
list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
list) {
- unsigned use = min_t(unsigned, pageset_count - pageidx,
+ unsigned use = min_t(unsigned, pageset_count - tididx,
grp->size - grp->used);
ret = program_rcvarray(fd, tidbuf, grp,
- pageidx, use, tidlist,
+ use, tidlist,
&tididx, &mapped);
if (ret < 0) {
hfi1_cdbg(TID,
@@ -443,11 +388,10 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
tid_group_move(grp,
&uctxt->tid_used_list,
&uctxt->tid_full_list);
- pageidx += ret;
mapped_pages += mapped;
need_group = 0;
/* Check if we are done so we break out early */
- if (pageidx >= pageset_count)
+ if (tididx >= pageset_count)
break;
} else if (WARN_ON(ret == 0)) {
/*
@@ -462,43 +406,78 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
}
unlock:
mutex_unlock(&uctxt->exp_mutex);
-nomem:
hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
mapped_pages, ret);
- if (tididx) {
- spin_lock(&fd->tid_lock);
- fd->tid_used += tididx;
- spin_unlock(&fd->tid_lock);
- tinfo->tidcnt = tididx;
- tinfo->length = mapped_pages * PAGE_SIZE;
-
- if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
- tidlist, sizeof(tidlist[0]) * tididx)) {
- /*
- * On failure to copy to the user level, we need to undo
- * everything done so far so we don't leak resources.
- */
- tinfo->tidlist = (unsigned long)&tidlist;
- hfi1_user_exp_rcv_clear(fd, tinfo);
- tinfo->tidlist = 0;
- ret = -EFAULT;
- goto bail;
+
+ /* fail if nothing was programmed, set error if none provided */
+ if (tididx == 0) {
+ if (ret >= 0)
+ ret = -ENOSPC;
+ goto fail_unreserve;
+ }
+
+ /* adjust reserved tid_used to actual count */
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= pageset_count - tididx;
+ spin_unlock(&fd->tid_lock);
+
+ /* unpin all pages not covered by a TID */
+ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages,
+ false);
+
+ if (fd->use_mn) {
+ /* check for an invalidate during setup */
+ bool fail = false;
+
+ mutex_lock(&tidbuf->cover_mutex);
+ fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq);
+ mutex_unlock(&tidbuf->cover_mutex);
+
+ if (fail) {
+ ret = -EBUSY;
+ goto fail_unprogram;
}
}
- /*
- * If not everything was mapped (due to insufficient RcvArray entries,
- * for example), unpin all unmapped pages so we can pin them nex time.
- */
- if (mapped_pages != pinned)
- unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages,
- (pinned - mapped_pages), false);
-bail:
+ tinfo->tidcnt = tididx;
+ tinfo->length = mapped_pages * PAGE_SIZE;
+
+ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
+ tidlist, sizeof(tidlist[0]) * tididx)) {
+ ret = -EFAULT;
+ goto fail_unprogram;
+ }
+
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&tidbuf->notifier);
+ kfree(tidbuf->pages);
kfree(tidbuf->psets);
+ kfree(tidbuf);
kfree(tidlist);
+ return 0;
+
+fail_unprogram:
+ /* unprogram, unmap, and unpin all allocated TIDs */
+ tinfo->tidlist = (unsigned long)tidlist;
+ hfi1_user_exp_rcv_clear(fd, tinfo);
+ tinfo->tidlist = 0;
+ pinned = 0; /* nothing left to unpin */
+ pageset_count = 0; /* nothing left reserved */
+fail_unreserve:
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= pageset_count;
+ spin_unlock(&fd->tid_lock);
+fail_unpin:
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&tidbuf->notifier);
+ if (pinned > 0)
+ unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false);
+fail_release_mem:
kfree(tidbuf->pages);
+ kfree(tidbuf->psets);
kfree(tidbuf);
- return ret > 0 ? 0 : ret;
+ kfree(tidlist);
+ return ret;
}
int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
@@ -512,14 +491,14 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
if (unlikely(tinfo->tidcnt > fd->tid_used))
return -EINVAL;
- tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist),
- sizeof(tidinfo[0]) * tinfo->tidcnt);
+ tidinfo = memdup_array_user(u64_to_user_ptr(tinfo->tidlist),
+ tinfo->tidcnt, sizeof(tidinfo[0]));
if (IS_ERR(tidinfo))
return PTR_ERR(tidinfo);
mutex_lock(&uctxt->exp_mutex);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
- ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
+ ret = unprogram_rcvarray(fd, tidinfo[tididx]);
if (ret) {
hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
ret);
@@ -656,7 +635,6 @@ static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
* struct tid_pageset holding information on physically contiguous
* chunks from the user buffer), and other fields.
* @grp: RcvArray group
- * @start: starting index into sets array
* @count: number of struct tid_pageset's to program
* @tidlist: the array of u32 elements when the information about the
* programmed RcvArray entries is to be encoded.
@@ -676,14 +654,14 @@ static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
* number of RcvArray entries programmed.
*/
static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
- struct tid_group *grp,
- unsigned int start, u16 count,
+ struct tid_group *grp, u16 count,
u32 *tidlist, unsigned int *tididx,
unsigned int *pmapped)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
u16 idx;
+ unsigned int start = *tididx;
u32 tidinfo = 0, rcventry, useidx = 0;
int mapped = 0;
@@ -728,8 +706,7 @@ static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
return ret;
mapped += npages;
- tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
- EXP_TID_SET(LEN, npages);
+ tidinfo = create_tid(rcventry - uctxt->expected_base, npages);
tidlist[(*tididx)++] = tidinfo;
grp->used++;
grp->map |= 1 << useidx++;
@@ -759,14 +736,12 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
* Allocate the node first so we can handle a potential
* failure before we've programmed anything.
*/
- node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
- GFP_KERNEL);
+ node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL);
if (!node)
return -ENOMEM;
- phys = pci_map_single(dd->pcidev,
- __va(page_to_phys(pages[0])),
- npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ phys = dma_map_single(&dd->pcidev->dev, __va(page_to_phys(pages[0])),
+ npages * PAGE_SIZE, DMA_FROM_DEVICE);
if (dma_mapping_error(&dd->pcidev->dev, phys)) {
dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
phys);
@@ -774,86 +749,100 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
return -EFAULT;
}
- node->mmu.addr = tbuf->vaddr + (pageidx * PAGE_SIZE);
- node->mmu.len = npages * PAGE_SIZE;
+ node->fdata = fd;
+ mutex_init(&node->invalidate_mutex);
node->phys = page_to_phys(pages[0]);
node->npages = npages;
node->rcventry = rcventry;
node->dma_addr = phys;
node->grp = grp;
node->freed = false;
- memcpy(node->pages, pages, sizeof(struct page *) * npages);
+ memcpy(node->pages, pages, flex_array_size(node, pages, npages));
- if (!fd->handler)
- ret = tid_rb_insert(fd, &node->mmu);
- else
- ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
-
- if (ret) {
- hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->mmu.addr, node->phys, ret);
- pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
- kfree(node);
- return -EFAULT;
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &node->notifier, current->mm,
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ &tid_mn_ops);
+ if (ret)
+ goto out_unmap;
}
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
- node->mmu.addr, node->phys, phys);
+ node->notifier.interval_tree.start, node->phys,
+ phys);
return 0;
+
+out_unmap:
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+ node->rcventry, node->notifier.interval_tree.start,
+ node->phys, ret);
+ dma_unmap_single(&dd->pcidev->dev, phys, npages * PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ kfree(node);
+ return -EFAULT;
}
-static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
- struct tid_group **grp)
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
struct tid_rb_node *node;
- u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+ u32 tidctrl = EXP_TID_GET(tidinfo, CTRL);
u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
- if (tididx >= uctxt->expected_count) {
- dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
- tididx, uctxt->ctxt);
- return -EINVAL;
- }
-
- if (tidctrl == 0x3)
+ if (tidctrl == 0x3 || tidctrl == 0x0)
return -EINVAL;
rcventry = tididx + (tidctrl - 1);
+ if (rcventry >= uctxt->expected_count) {
+ dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+ rcventry, uctxt->ctxt);
+ return -EINVAL;
+ }
+
node = fd->entry_to_rb[rcventry];
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- if (grp)
- *grp = node->grp;
-
- if (!fd->handler)
- cacheless_tid_rb_remove(fd, node);
- else
- hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&node->notifier);
+ cacheless_tid_rb_remove(fd, node);
return 0;
}
-static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
+ mutex_lock(&node->invalidate_mutex);
+ if (node->freed)
+ goto done;
+ node->freed = true;
+
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->mmu.addr, node->phys,
+ node->npages,
+ node->notifier.interval_tree.start, node->phys,
node->dma_addr);
- /*
- * Make sure device has seen the write before we unpin the
- * pages.
- */
+ /* Make sure device has seen the write before pages are unpinned */
hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
+done:
+ mutex_unlock(&node->invalidate_mutex);
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
+
+ __clear_tid_node(fd, node);
node->grp->used--;
node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
@@ -891,39 +880,43 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
if (!node || node->rcventry != rcventry)
continue;
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(
+ &node->notifier);
cacheless_tid_rb_remove(fd, node);
}
}
}
}
-/*
- * Always return 0 from this function. A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM. Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away. PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
- container_of(mnode, struct tid_rb_node, mmu);
+ container_of(mni, struct tid_rb_node, notifier);
+ struct hfi1_filedata *fdata = node->fdata;
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
if (node->freed)
- return 0;
+ return true;
+
+ /* take action only if unmapping */
+ if (range->event != MMU_NOTIFY_UNMAP)
+ return true;
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+ node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr);
- node->freed = true;
+
+ /* clear the hardware rcvarray entry */
+ __clear_tid_node(fdata, node);
spin_lock(&fdata->invalid_lock);
if (fdata->invalid_tid_idx < uctxt->expected_count) {
fdata->invalid_tids[fdata->invalid_tid_idx] =
- rcventry2tidinfo(node->rcventry - uctxt->expected_base);
- fdata->invalid_tids[fdata->invalid_tid_idx] |=
- EXP_TID_SET(LEN, node->npages);
+ create_tid(node->rcventry - uctxt->expected_base,
+ node->npages);
if (!fdata->invalid_tid_idx) {
unsigned long *ev;
@@ -943,18 +936,24 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
fdata->invalid_tid_idx++;
}
spin_unlock(&fdata->invalid_lock);
- return 0;
+ return true;
}
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
- u32 base = fdata->uctxt->expected_base;
+ struct tid_user_buf *tidbuf =
+ container_of(mni, struct tid_user_buf, notifier);
+
+ /* take action only if unmapping */
+ if (range->event == MMU_NOTIFY_UNMAP) {
+ mutex_lock(&tidbuf->cover_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+ mutex_unlock(&tidbuf->cover_mutex);
+ }
- fdata->entry_to_rb[tnode->rcventry - base] = tnode;
- return 0;
+ return true;
}
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -965,12 +964,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
clear_tid_node(fdata, tnode);
}
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
-
- cacheless_tid_rb_remove(fdata, tnode);
-}
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 43b105de1d54..055726f7c139 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,52 +1,12 @@
-#ifndef _HFI1_USER_EXP_RCV_H
-#define _HFI1_USER_EXP_RCV_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_USER_EXP_RCV_H
+#define _HFI1_USER_EXP_RCV_H
+
#include "hfi.h"
#include "exp_rcv.h"
@@ -56,6 +16,8 @@ struct tid_pageset {
};
struct tid_user_buf {
+ struct mmu_interval_notifier notifier;
+ struct mutex cover_mutex;
unsigned long vaddr;
unsigned long length;
unsigned int npages;
@@ -65,14 +27,16 @@ struct tid_user_buf {
};
struct tid_rb_node {
- struct mmu_rb_node mmu;
+ struct mmu_interval_notifier notifier;
+ struct hfi1_filedata *fdata;
+ struct mutex invalidate_mutex; /* covers hw removal */
unsigned long phys;
struct tid_group *grp;
u32 rcventry;
dma_addr_t dma_addr;
bool freed;
unsigned int npages;
- struct page *pages[0];
+ struct page *pages[] __counted_by(npages);
};
static inline int num_user_pages(unsigned long addr,
@@ -94,4 +58,9 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
struct hfi1_tid_info *tinfo);
+static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
+{
+ return node->notifier.mm;
+}
+
#endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 02eee8eff1db..c77913a7920f 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/mm.h>
@@ -71,33 +29,52 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
u32 nlocked, u32 npages)
{
- unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
- size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned int usr_ctxts =
- dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
- bool can_lock = capable(CAP_IPC_LOCK);
+ unsigned long ulimit_pages;
+ unsigned long cache_limit_pages;
+ unsigned int usr_ctxts;
/*
- * Calculate per-cache size. The calculation below uses only a quarter
- * of the available per-context limit. This leaves space for other
- * pinning. Should we worry about shared ctxts?
+ * Perform RLIMIT_MEMLOCK based checks unless CAP_IPC_LOCK is present.
*/
- cache_limit = (ulimit / usr_ctxts) / 4;
-
- /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
- if (ulimit != (-1UL) && size > cache_limit)
- size = cache_limit;
-
- /* Convert to number of pages */
- size = DIV_ROUND_UP(size, PAGE_SIZE);
-
- pinned = atomic64_read(&mm->pinned_vm);
+ if (!capable(CAP_IPC_LOCK)) {
+ ulimit_pages =
+ DIV_ROUND_DOWN_ULL(rlimit(RLIMIT_MEMLOCK), PAGE_SIZE);
+
+ /*
+ * Pinning these pages would exceed this process's locked memory
+ * limit.
+ */
+ if (atomic64_read(&mm->pinned_vm) + npages > ulimit_pages)
+ return false;
+
+ /*
+ * Only allow 1/4 of the user's RLIMIT_MEMLOCK to be used for HFI
+ * caches. This fraction is then equally distributed among all
+ * existing user contexts. Note that if RLIMIT_MEMLOCK is
+ * 'unlimited' (-1), the value of this limit will be > 2^42 pages
+ * (2^64 / 2^12 / 2^8 / 2^2).
+ *
+ * The effectiveness of this check may be reduced if I/O occurs on
+ * some user contexts before all user contexts are created. This
+ * check assumes that this process is the only one using this
+ * context (e.g., the corresponding fd was not passed to another
+ * process for concurrent access) as there is no per-context,
+ * per-process tracking of pinned pages. It also assumes that each
+ * user context has only one cache to limit.
+ */
+ usr_ctxts = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+ if (nlocked + npages > (ulimit_pages / usr_ctxts / 4))
+ return false;
+ }
- /* First, check the absolute limit against all pinned pages. */
- if (pinned + npages >= ulimit && !can_lock)
+ /*
+ * Pinning these pages would exceed the size limit for this cache.
+ */
+ cache_limit_pages = cache_size * (1024 * 1024) / PAGE_SIZE;
+ if (nlocked + npages > cache_limit_pages)
return false;
- return ((nlocked + npages) <= size) || can_lock;
+ return true;
}
int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,
@@ -106,7 +83,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
int ret;
unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0);
- ret = get_user_pages_fast(vaddr, npages, gup_flags, pages);
+ ret = pin_user_pages_fast(vaddr, npages, gup_flags, pages);
if (ret < 0)
return ret;
@@ -118,13 +95,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty)
{
- size_t i;
-
- for (i = 0; i < npages; i++) {
- if (dirty)
- set_page_dirty_lock(p[i]);
- put_page(p[i]);
- }
+ unpin_user_pages_dirty_lock(p, npages, dirty);
if (mm) { /* during close after signal, mm can be NULL */
atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 8bfbc6d7ea34..9b1aece1b080 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,49 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
+ * Copyright(c) 2020 - 2023 Cornelis Networks, Inc.
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/device.h>
@@ -64,7 +24,6 @@
#include "hfi.h"
#include "sdma.h"
-#include "mmu_rb.h"
#include "user_sdma.h"
#include "verbs.h" /* for the headers */
#include "common.h" /* for struct hfi1_tid_info */
@@ -79,11 +38,7 @@ static unsigned initial_pkt_count = 8;
static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
-static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
-static int pin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec);
-static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
- unsigned start, unsigned npages);
+static void user_sdma_free_request(struct user_sdma_request *req);
static int check_header_template(struct user_sdma_request *req,
struct hfi1_pkt_header *hdr, u32 lrhlen,
u32 datalen);
@@ -105,21 +60,6 @@ static int defer_packet_queue(
uint seq,
bool pkts_sent);
static void activate_packet_queue(struct iowait *wait, int reason);
-static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len);
-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
-static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
- void *arg2, bool *stop);
-static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
-
-static struct mmu_rb_ops sdma_rb_ops = {
- .filter = sdma_rb_filter,
- .insert = sdma_rb_insert,
- .evict = sdma_rb_evict,
- .remove = sdma_rb_remove,
- .invalidate = sdma_rb_invalidate
-};
static int defer_packet_queue(
struct sdma_engine *sde,
@@ -130,27 +70,26 @@ static int defer_packet_queue(
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
- struct user_sdma_txreq *tx =
- container_of(txreq, struct user_sdma_txreq, txreq);
- if (sdma_progress(sde, seq, txreq)) {
- if (tx->busycount++ < MAX_DEFER_RETRY_COUNT)
- goto eagain;
- }
+ write_seqlock(&sde->waitlock);
+ trace_hfi1_usdma_defer(pq, sde, &pq->busy);
+ if (sdma_progress(sde, seq, txreq))
+ goto eagain;
/*
* We are assuming that if the list is enqueued somewhere, it
* is to the dmawait list since that is the only place where
* it is supposed to be enqueued.
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
- write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list)) {
+ pq->busy.lock = &sde->waitlock;
iowait_get_priority(&pq->busy);
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
}
write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
+ write_sequnlock(&sde->waitlock);
return -EAGAIN;
}
@@ -158,6 +97,8 @@ static void activate_packet_queue(struct iowait *wait, int reason)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+
+ trace_hfi1_usdma_activate(pq, wait, reason);
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
wake_up(&wait->wait_dma);
};
@@ -182,7 +123,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
return -ENOMEM;
-
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
@@ -190,7 +130,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
- pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL, NULL);
@@ -202,9 +141,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
if (!pq->reqs)
goto pq_reqs_nomem;
- pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
- sizeof(*pq->req_in_use),
- GFP_KERNEL);
+ pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL);
if (!pq->req_in_use)
goto pq_reqs_no_in_use;
@@ -232,14 +169,11 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
cq->nentries = hfi1_sdma_comp_ring_size;
- ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
- &pq->handler);
- if (ret) {
- dd_dev_err(dd, "Failed to register with MMU %d", ret);
+ ret = hfi1_init_system_pinning(pq);
+ if (ret)
goto pq_mmu_fail;
- }
- fd->pq = pq;
+ rcu_assign_pointer(fd->pq, pq);
fd->cq = cq;
return 0;
@@ -251,7 +185,7 @@ cq_comps_nomem:
cq_nomem:
kmem_cache_destroy(pq->txreq_cache);
pq_txreq_nomem:
- kfree(pq->req_in_use);
+ bitmap_free(pq->req_in_use);
pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
@@ -260,6 +194,21 @@ pq_reqs_nomem:
return ret;
}
+static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
+{
+ unsigned long flags;
+ seqlock_t *lock = pq->busy.lock;
+
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
+ if (!list_empty(&pq->busy.list)) {
+ list_del_init(&pq->busy.list);
+ pq->busy.lock = NULL;
+ }
+ write_sequnlock_irqrestore(lock, flags);
+}
+
int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
struct hfi1_ctxtdata *uctxt)
{
@@ -267,20 +216,27 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
- pq = fd->pq;
+ spin_lock(&fd->pq_rcu_lock);
+ pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
+ lockdep_is_held(&fd->pq_rcu_lock));
if (pq) {
- if (pq->handler)
- hfi1_mmu_rb_unregister(pq->handler);
+ rcu_assign_pointer(fd->pq, NULL);
+ spin_unlock(&fd->pq_rcu_lock);
+ synchronize_srcu(&fd->pq_srcu);
+ /* at this point there can be no more new requests */
iowait_sdma_drain(&pq->busy);
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
!atomic_read(&pq->n_reqs));
kfree(pq->reqs);
- kfree(pq->req_in_use);
+ hfi1_free_system_pinning(pq);
+ bitmap_free(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
+ flush_pq_iowait(pq);
kfree(pq);
- fd->pq = NULL;
+ } else {
+ spin_unlock(&fd->pq_rcu_lock);
}
if (fd->cq) {
vfree(fd->cq->comps);
@@ -324,7 +280,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
{
int ret = 0, i;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq =
+ srcu_dereference(fd->pq, &fd->pq_srcu);
struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd;
unsigned long idx = 0;
@@ -429,6 +386,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = -EINVAL;
goto free_req;
}
+
/* Copy the header from the user buffer */
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
sizeof(req->hdr));
@@ -503,9 +461,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
memcpy(&req->iovs[i].iov,
iovec + idx++,
sizeof(req->iovs[i].iov));
- ret = pin_vector_pages(req, &req->iovs[i]);
- if (ret) {
- req->data_iovs = i;
+ if (req->iovs[i].iov.iov_len == 0) {
+ ret = -EINVAL;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -537,12 +494,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
* equal to the pkt count. However, there is no way to
* tell at this point.
*/
- tmp = memdup_user(iovec[idx].iov_base,
- ntids * sizeof(*req->tids));
+ tmp = memdup_array_user(iovec[idx].iov_base,
+ ntids, sizeof(*req->tids));
if (IS_ERR(tmp)) {
ret = PTR_ERR(tmp);
- SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
- ntids, ret);
+ SDMA_DBG(req, "Failed to copy %d TIDs (%pe)", ntids,
+ tmp);
goto free_req;
}
req->tids = tmp;
@@ -567,10 +524,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
pq->state = SDMA_PKT_Q_ACTIVE;
- /* Send the first N packets in the request to buy us some time */
- ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY))
- goto free_req;
/*
* This is a somewhat blocking send implementation.
@@ -581,13 +534,18 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
+ int we_ret;
+
if (ret != -EBUSY)
goto free_req;
- wait_event_interruptible_timeout(
+ we_ret = wait_event_interruptible_timeout(
pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
+ pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
SDMA_IOWAIT_TIMEOUT));
+ trace_hfi1_usdma_we(pq, we_ret);
+ if (we_ret <= 0)
+ flush_pq_iowait(pq);
}
}
*count += idx;
@@ -602,7 +560,7 @@ free_req:
if (req->seqsubmitted)
wait_event(pq->busy.wait_dma,
(req->seqcomp == req->seqsubmitted - 1));
- user_sdma_free_request(req, true);
+ user_sdma_free_request(req);
pq_update(pq);
set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
}
@@ -714,48 +672,6 @@ static int user_sdma_txadd_ahg(struct user_sdma_request *req,
return ret;
}
-static int user_sdma_txadd(struct user_sdma_request *req,
- struct user_sdma_txreq *tx,
- struct user_sdma_iovec *iovec, u32 datalen,
- u32 *queued_ptr, u32 *data_sent_ptr,
- u64 *iov_offset_ptr)
-{
- int ret;
- unsigned int pageidx, len;
- unsigned long base, offset;
- u64 iov_offset = *iov_offset_ptr;
- u32 queued = *queued_ptr, data_sent = *data_sent_ptr;
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
-
- base = (unsigned long)iovec->iov.iov_base;
- offset = offset_in_page(base + iovec->offset + iov_offset);
- pageidx = (((iovec->offset + iov_offset + base) - (base & PAGE_MASK)) >>
- PAGE_SHIFT);
- len = offset + req->info.fragsize > PAGE_SIZE ?
- PAGE_SIZE - offset : req->info.fragsize;
- len = min((datalen - queued), len);
- ret = sdma_txadd_page(pq->dd, &tx->txreq, iovec->pages[pageidx],
- offset, len);
- if (ret) {
- SDMA_DBG(req, "SDMA txreq add page failed %d\n", ret);
- return ret;
- }
- iov_offset += len;
- queued += len;
- data_sent += len;
- if (unlikely(queued < datalen && pageidx == iovec->npages &&
- req->iov_idx < req->data_iovs - 1)) {
- iovec->offset += iov_offset;
- iovec = &req->iovs[++req->iov_idx];
- iov_offset = 0;
- }
-
- *queued_ptr = queued;
- *data_sent_ptr = data_sent;
- *iov_offset_ptr = iov_offset;
- return ret;
-}
-
static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
{
int ret = 0;
@@ -787,8 +703,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
maxpkts = req->info.npkts - req->seqnum;
while (npkts < maxpkts) {
- u32 datalen = 0, queued = 0, data_sent = 0;
- u64 iov_offset = 0;
+ u32 datalen = 0;
/*
* Check whether any of the completions have come back
@@ -804,7 +719,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
tx->flags = 0;
tx->req = req;
- tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
/*
@@ -882,27 +796,17 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
goto free_txreq;
}
- /*
- * If the request contains any data vectors, add up to
- * fragsize bytes to the descriptor.
- */
- while (queued < datalen &&
- (req->sent + data_sent) < req->data_len) {
- ret = user_sdma_txadd(req, tx, iovec, datalen,
- &queued, &data_sent, &iov_offset);
- if (ret)
- goto free_txreq;
- }
- /*
- * The txreq was submitted successfully so we can update
- * the counters.
- */
req->koffset += datalen;
if (req_opcode(req->info.ctrl) == EXPECTED)
req->tidoffset += datalen;
- req->sent += data_sent;
- if (req->data_len)
- iovec->offset += iov_offset;
+ req->sent += datalen;
+ while (datalen) {
+ ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec,
+ &datalen);
+ if (ret)
+ goto free_txreq;
+ iovec = &req->iovs[req->iov_idx];
+ }
list_add_tail(&tx->txreq.list, &req->txps);
/*
* It is important to increment this here as it is used to
@@ -936,135 +840,6 @@ free_tx:
return ret;
}
-static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
-{
- struct evict_data evict_data;
-
- evict_data.cleared = 0;
- evict_data.target = npages;
- hfi1_mmu_rb_evict(pq->handler, &evict_data);
- return evict_data.cleared;
-}
-
-static int pin_sdma_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec,
- struct sdma_mmu_node *node,
- int npages)
-{
- int pinned, cleared;
- struct page **pages;
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
-
- pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
- memcpy(pages, node->pages, node->npages * sizeof(*pages));
-
- npages -= node->npages;
-retry:
- if (!hfi1_can_pin_pages(pq->dd, pq->mm,
- atomic_read(&pq->n_locked), npages)) {
- cleared = sdma_cache_evict(pq, npages);
- if (cleared >= npages)
- goto retry;
- }
- pinned = hfi1_acquire_user_pages(pq->mm,
- ((unsigned long)iovec->iov.iov_base +
- (node->npages * PAGE_SIZE)), npages, 0,
- pages + node->npages);
- if (pinned < 0) {
- kfree(pages);
- return pinned;
- }
- if (pinned != npages) {
- unpin_vector_pages(pq->mm, pages, node->npages, pinned);
- return -EFAULT;
- }
- kfree(node->pages);
- node->rb.len = iovec->iov.iov_len;
- node->pages = pages;
- atomic_add(pinned, &pq->n_locked);
- return pinned;
-}
-
-static void unpin_sdma_pages(struct sdma_mmu_node *node)
-{
- if (node->npages) {
- unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
- atomic_sub(node->npages, &node->pq->n_locked);
- }
-}
-
-static int pin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec)
-{
- int ret = 0, pinned, npages;
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- struct sdma_mmu_node *node = NULL;
- struct mmu_rb_node *rb_node;
- struct iovec *iov;
- bool extracted;
-
- extracted =
- hfi1_mmu_rb_remove_unless_exact(pq->handler,
- (unsigned long)
- iovec->iov.iov_base,
- iovec->iov.iov_len, &rb_node);
- if (rb_node) {
- node = container_of(rb_node, struct sdma_mmu_node, rb);
- if (!extracted) {
- atomic_inc(&node->refcount);
- iovec->pages = node->pages;
- iovec->npages = node->npages;
- iovec->node = node;
- return 0;
- }
- }
-
- if (!node) {
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return -ENOMEM;
-
- node->rb.addr = (unsigned long)iovec->iov.iov_base;
- node->pq = pq;
- atomic_set(&node->refcount, 0);
- }
-
- iov = &iovec->iov;
- npages = num_user_pages((unsigned long)iov->iov_base, iov->iov_len);
- if (node->npages < npages) {
- pinned = pin_sdma_pages(req, iovec, node, npages);
- if (pinned < 0) {
- ret = pinned;
- goto bail;
- }
- node->npages += pinned;
- npages = node->npages;
- }
- iovec->pages = node->pages;
- iovec->npages = npages;
- iovec->node = node;
-
- ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb);
- if (ret) {
- iovec->node = NULL;
- goto bail;
- }
- return 0;
-bail:
- unpin_sdma_pages(node);
- kfree(node);
- return ret;
-}
-
-static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
- unsigned start, unsigned npages)
-{
- hfi1_release_user_pages(mm, pages + start, npages, false);
- kfree(pages);
-}
-
static int check_header_template(struct user_sdma_request *req,
struct hfi1_pkt_header *hdr, u32 lrhlen,
u32 datalen)
@@ -1406,7 +1181,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (req->seqcomp != req->info.npkts - 1)
return;
- user_sdma_free_request(req, false);
+ user_sdma_free_request(req);
set_comp_state(pq, cq, req->info.comp_idx, state, status);
pq_update(pq);
}
@@ -1417,10 +1192,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
wake_up(&pq->wait);
}
-static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
+static void user_sdma_free_request(struct user_sdma_request *req)
{
- int i;
-
if (!list_empty(&req->txps)) {
struct sdma_txreq *t, *p;
@@ -1433,21 +1206,6 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
}
}
- for (i = 0; i < req->data_iovs; i++) {
- struct sdma_mmu_node *node = req->iovs[i].node;
-
- if (!node)
- continue;
-
- req->iovs[i].node = NULL;
-
- if (unpin)
- hfi1_mmu_rb_remove(req->pq->handler,
- &node->rb);
- else
- atomic_dec(&node->refcount);
- }
-
kfree(req->tids);
clear_bit(req->info.comp_idx, req->pq->req_in_use);
}
@@ -1464,63 +1222,3 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
-
-static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len)
-{
- return (bool)(node->addr == addr);
-}
-
-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- atomic_inc(&node->refcount);
- return 0;
-}
-
-/*
- * Return 1 to remove the node from the rb tree and call the remove op.
- *
- * Called with the rb tree lock held.
- */
-static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
- void *evict_arg, bool *stop)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
- struct evict_data *evict_data = evict_arg;
-
- /* is this node still being used? */
- if (atomic_read(&node->refcount))
- return 0; /* keep this node */
-
- /* this node will be evicted, add its pages to our count */
- evict_data->cleared += node->npages;
-
- /* have enough pages been cleared? */
- if (evict_data->cleared >= evict_data->target)
- *stop = true;
-
- return 1; /* remove this node */
-}
-
-static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- unpin_sdma_pages(node);
- kfree(node);
-}
-
-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- if (!atomic_read(&node->refcount))
- return 1;
- return 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 14dfd757dafd..8735524e3a9a 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,57 +1,20 @@
-#ifndef _HFI1_USER_SDMA_H
-#define _HFI1_USER_SDMA_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
+ * Copyright(c) 2023 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_USER_SDMA_H
+#define _HFI1_USER_SDMA_H
+
#include <linux/device.h>
#include <linux/wait.h>
#include "common.h"
#include "iowait.h"
#include "user_exp_rcv.h"
+#include "mmu_rb.h"
+#include "pinning.h"
+#include "sdma.h"
/* The maximum number of Data io vectors per message/request */
#define MAX_VECTORS_PER_REQ 8
@@ -110,12 +73,6 @@ enum pkt_q_sdma_state {
SDMA_PKT_Q_DEFERRED,
};
-/*
- * Maximum retry attempts to submit a TX request
- * before putting the process to sleep.
- */
-#define MAX_DEFER_RETRY_COUNT 1
-
#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
#define SDMA_DBG(req, fmt, ...) \
@@ -139,7 +96,6 @@ struct hfi1_user_sdma_pkt_q {
unsigned long unpinned;
struct mmu_rb_handler *handler;
atomic_t n_locked;
- struct mm_struct *mm;
};
struct hfi1_user_sdma_comp_q {
@@ -147,27 +103,14 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
-struct sdma_mmu_node {
- struct mmu_rb_node rb;
- struct hfi1_user_sdma_pkt_q *pq;
- atomic_t refcount;
- struct page **pages;
- unsigned int npages;
-};
-
struct user_sdma_iovec {
struct list_head list;
struct iovec iov;
- /* number of pages in this vector */
- unsigned int npages;
- /* array of pinned pages for this vector */
- struct page **pages;
/*
* offset into the virtual address space of the vector at
* which we last left off.
*/
u64 offset;
- struct sdma_mmu_node *node;
};
/* evict operation argument */
@@ -245,7 +188,6 @@ struct user_sdma_txreq {
struct list_head list;
struct user_sdma_request *req;
u16 flags;
- unsigned int busycount;
u16 seqnum;
};
@@ -256,5 +198,4 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count);
-
#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 1eb4105b2d22..3cbbfccdd8cd 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#include <rdma/ib_mad.h>
@@ -54,6 +12,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <rdma/opa_addr.h>
+#include <linux/nospec.h>
#include "hfi.h"
#include "common.h"
@@ -65,6 +24,7 @@
#include "vnic.h"
#include "fault.h"
#include "affinity.h"
+#include "ipoib.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -146,9 +106,6 @@ static int pio_wait(struct rvt_qp *qp,
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
-/* 16B trailing buffer */
-static const u8 trail_buf[MAX_16B_PADDING];
-
static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
@@ -517,10 +474,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
opa_get_lid(packet->dlid, 9B));
if (!mcast)
goto drop;
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
if (hfi1_do_pkey_check(packet))
- goto drop;
+ goto unlock_drop;
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
@@ -529,6 +487,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
@@ -595,7 +554,7 @@ void hfi1_16B_rcv(struct hfi1_packet *packet)
*/
static void mem_timer(struct timer_list *t)
{
- struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer);
+ struct hfi1_ibdev *dev = timer_container_of(dev, t, mem_timer);
struct list_head *list = &dev->memwait;
struct rvt_qp *qp = NULL;
struct iowait *wait;
@@ -638,6 +597,8 @@ static void verbs_sdma_complete(
struct hfi1_opa_header *hdr;
hdr = &tx->phdr.hdr;
+ if (unlikely(status == SDMA_TXREQ_S_ABORTED))
+ hfi1_rc_verbs_aborted(qp, hdr);
hfi1_rc_send_complete(qp, hdr);
}
spin_unlock(&qp->s_lock);
@@ -726,7 +687,7 @@ bail_txadd:
/**
* update_tx_opstats - record stats by opcode
- * @qp; the qp
+ * @qp: the qp
* @ps: transmit packet state
* @plen: the plen in dwords
*
@@ -817,8 +778,8 @@ static int build_verbs_tx_desc(
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
- (void *)trail_buf, extra_bytes);
+ ret = sdma_txadd_daddr(sde->dd, &tx->txreq, sde->dd->sdma_pad_phys,
+ extra_bytes);
bail_txadd:
return ret;
@@ -871,16 +832,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
- pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd,
pbc,
qp->srate_mbps,
vl,
plen);
- /* Update HCRC based on packet opcode */
- pbc = update_hcrc(ps->opcode, pbc);
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+ pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
@@ -1027,20 +989,20 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
- pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
-
- /* Update HCRC based on packet opcode */
- pbc = update_hcrc(ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
pbuf = sc_buffer_alloc(sc, plen, cb, qp);
- if (unlikely(!pbuf)) {
+ if (IS_ERR_OR_NULL(pbuf)) {
if (cb)
verbs_pio_complete(qp, 0);
- if (ppd->host_link_state != HLS_UP_ACTIVE) {
+ if (IS_ERR(pbuf)) {
/*
* If we have filled the PIO buffers to capacity and are
* not in an active state this request is not going to
@@ -1085,7 +1047,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
+ seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+ extra_bytes);
seg_pio_copy_end(pbuf);
}
@@ -1095,15 +1058,15 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
&ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
pio_bail:
+ spin_lock_irqsave(&qp->s_lock, flags);
if (qp->s_wqe) {
- spin_lock_irqsave(&qp->s_lock, flags);
rvt_send_complete(qp, qp->s_wqe, wc_status);
- spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(wc_status == IB_WC_GENERAL_ERR))
+ hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr);
hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
+ spin_unlock_irqrestore(&qp->s_lock, flags);
ret = 0;
@@ -1140,7 +1103,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
* egress_pkey_check - check P_KEY of a packet
* @ppd: Physical IB port data
* @slid: SLID for packet
- * @bkey: PKEY for header
+ * @pkey: PKEY for header
* @sc5: SC for packet
* @s_pkey_index: It will be used for look up optimization for kernel contexts
* only. If it is negative value, then it means user contexts is calling this
@@ -1201,7 +1164,7 @@ bad:
return 1;
}
-/**
+/*
* get_send_routine - choose an egress routine
*
* Choose an egress routine based on QP type
@@ -1337,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
- IB_DEVICE_MEM_MGT_EXTENSIONS |
- IB_DEVICE_RDMA_NETDEV_OPA_VNIC;
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1356,9 +1319,6 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_cq = hfi1_max_cqs;
rdi->dparms.props.max_ah = hfi1_max_ahs;
rdi->dparms.props.max_cqe = hfi1_max_cqes;
- rdi->dparms.props.max_mr = rdi->lkey_table.max;
- rdi->dparms.props.max_fmr = rdi->lkey_table.max;
- rdi->dparms.props.max_map_per_fmr = 32767;
rdi->dparms.props.max_pd = hfi1_max_pds;
rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
rdi->dparms.props.max_qp_init_rd_atom = 255;
@@ -1405,7 +1365,7 @@ static inline u16 opa_width_to_ib(u16 in)
}
}
-static int query_port(struct rvt_dev_info *rdi, u8 port_num,
+static int query_port(struct rvt_dev_info *rdi, u32 port_num,
struct ib_port_attr *props)
{
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
@@ -1422,7 +1382,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
- props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+ props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
/* Once we are a "first class" citizen and have added the OPA MTUs to
@@ -1437,6 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+ props->phys_mtu = hfi1_max_mtu;
return 0;
}
@@ -1481,17 +1442,15 @@ bail:
return ret;
}
-static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
+static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num)
{
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
- int ret;
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
OPA_LINKDOWN_REASON_UNKNOWN);
- ret = set_link_state(ppd, HLS_DN_DOWNDEF);
- return ret;
+ return set_link_state(ppd, HLS_DN_DOWNDEF);
}
static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
@@ -1536,6 +1495,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
sl = rdma_ah_get_sl(ah_attr);
if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
return -EINVAL;
+ sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc));
sc5 = ibp->sl_to_sc[sl];
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
@@ -1638,26 +1598,23 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
-static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */
-static const char **dev_cntr_names;
-static const char **port_cntr_names;
+static struct rdma_stat_desc *dev_cntr_descs;
+static struct rdma_stat_desc *port_cntr_descs;
int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
static int num_dev_cntrs;
static int num_port_cntrs;
-static int cntr_names_initialized;
/*
* Convert a list of names separated by '\n' into an array of NULL terminated
* strings. Optionally some entries can be reserved in the array to hold extra
* external strings.
*/
-static int init_cntr_names(const char *names_in,
- const size_t names_len,
- int num_extra_names,
- int *num_cntrs,
- const char ***cntr_names)
+static int init_cntr_names(const char *names_in, const size_t names_len,
+ int num_extra_names, int *num_cntrs,
+ struct rdma_stat_desc **cntr_descs)
{
- char *names_out, *p, **q;
+ struct rdma_stat_desc *names_out;
+ char *p;
int i, n;
n = 0;
@@ -1665,77 +1622,65 @@ static int init_cntr_names(const char *names_in,
if (names_in[i] == '\n')
n++;
- names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+ names_out = kzalloc((n + num_extra_names) * sizeof(*names_out)
+ + names_len,
GFP_KERNEL);
if (!names_out) {
*num_cntrs = 0;
- *cntr_names = NULL;
+ *cntr_descs = NULL;
return -ENOMEM;
}
- p = names_out + (n + num_extra_names) * sizeof(char *);
+ p = (char *)&names_out[n + num_extra_names];
memcpy(p, names_in, names_len);
- q = (char **)names_out;
for (i = 0; i < n; i++) {
- q[i] = p;
+ names_out[i].name = p;
p = strchr(p, '\n');
*p++ = '\0';
}
*num_cntrs = n;
- *cntr_names = (const char **)names_out;
+ *cntr_descs = names_out;
return 0;
}
-static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
{
- int i, err;
-
- mutex_lock(&cntr_names_lock);
- if (!cntr_names_initialized) {
+ if (!dev_cntr_descs) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int i, err;
- err = init_cntr_names(dd->cntrnames,
- dd->cntrnameslen,
+ err = init_cntr_names(dd->cntrnames, dd->cntrnameslen,
num_driver_cntrs,
- &num_dev_cntrs,
- &dev_cntr_names);
- if (err) {
- mutex_unlock(&cntr_names_lock);
+ &num_dev_cntrs, &dev_cntr_descs);
+ if (err)
return NULL;
- }
for (i = 0; i < num_driver_cntrs; i++)
- dev_cntr_names[num_dev_cntrs + i] =
- driver_cntr_names[i];
+ dev_cntr_descs[num_dev_cntrs + i].name =
+ driver_cntr_names[i];
+ }
+ return rdma_alloc_hw_stats_struct(dev_cntr_descs,
+ num_dev_cntrs + num_driver_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ if (!port_cntr_descs) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int err;
- err = init_cntr_names(dd->portcntrnames,
- dd->portcntrnameslen,
+ err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen,
0,
- &num_port_cntrs,
- &port_cntr_names);
- if (err) {
- kfree(dev_cntr_names);
- dev_cntr_names = NULL;
- mutex_unlock(&cntr_names_lock);
+ &num_port_cntrs, &port_cntr_descs);
+ if (err)
return NULL;
- }
- cntr_names_initialized = 1;
}
- mutex_unlock(&cntr_names_lock);
-
- if (!port_num)
- return rdma_alloc_hw_stats_struct(
- dev_cntr_names,
- num_dev_cntrs + num_driver_cntrs,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
- else
- return rdma_alloc_hw_stats_struct(
- port_cntr_names,
- num_port_cntrs,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static u64 hfi1_sps_ints(void)
@@ -1753,7 +1698,7 @@ static u64 hfi1_sps_ints(void)
}
static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
u64 *values;
int count;
@@ -1779,14 +1724,20 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
}
static const struct ib_device_ops hfi1_dev_ops = {
- .alloc_hw_stats = alloc_hw_stats,
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HFI1,
+
+ .alloc_hw_device_stats = hfi1_alloc_hw_device_stats,
+ .alloc_hw_port_stats = hfi_alloc_hw_port_stats,
.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
+ .device_group = &ib_hfi1_attr_group,
.get_dev_fw_str = hfi1_get_dev_fw_str,
.get_hw_stats = get_hw_stats,
- .init_port = hfi1_create_port_files,
.modify_device = modify_device,
+ .port_groups = hfi1_attr_port_groups,
/* keep process mad in the driver */
.process_mad = hfi1_process_mad,
+ .rdma_netdev_get_params = hfi1_ipoib_rn_get_params,
};
/**
@@ -1829,13 +1780,12 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = ibdev->node_guid;
- ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
ib_set_device_ops(ibdev, &hfi1_dev_ops);
- strlcpy(ibdev->node_desc, init_utsname()->nodename,
+ strscpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
/*
@@ -1858,9 +1808,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.qpn_start = 0;
dd->verbs_dev.rdi.dparms.qpn_inc = 1;
dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
- dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
- dd->verbs_dev.rdi.dparms.qpn_res_end =
- dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+ dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE;
+ dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX;
dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
@@ -1920,10 +1869,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
- rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
- &ib_hfi1_attr_group);
-
- ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
+ ret = rvt_register_device(&dd->verbs_dev.rdi);
if (ret)
goto err_verbs_txreq;
@@ -1954,16 +1900,13 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
if (!list_empty(&dev->memwait))
dd_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
- mutex_lock(&cntr_names_lock);
- kfree(dev_cntr_names);
- kfree(port_cntr_names);
- dev_cntr_names = NULL;
- port_cntr_names = NULL;
- cntr_names_initialized = 0;
- mutex_unlock(&cntr_names_lock);
+ kfree(dev_cntr_descs);
+ kfree(port_cntr_descs);
+ dev_cntr_descs = NULL;
+ port_cntr_descs = NULL;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 7ecb8ed4a1d9..070e4f0babe8 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#ifndef HFI1_VERBS_H
@@ -107,9 +65,9 @@ enum {
HFI1_HAS_GRH = (1 << 0),
};
-#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh))
+#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh))
#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
-#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
+#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
/* 24Bits for qpn, upper 8Bits reserved */
@@ -325,14 +283,13 @@ static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
*/
void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
u32 qp1, u32 qp2, u32 lid1, u32 lid2);
-void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num);
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
-int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
/*
* The PSN_MASK and PSN_SHIFT allow for
@@ -416,6 +373,7 @@ void hfi1_rc_hdrerr(
u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
+void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah);
void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah);
void hfi1_ud_rcv(struct hfi1_packet *packet);
@@ -433,9 +391,6 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
bool *call_send);
-extern const u32 rc_only_opcode;
-extern const u32 uc_only_opcode;
-
int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index c4ab2d5b4502..822f0d05bac8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include "hfi.h"
@@ -100,7 +58,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
- tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+ tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP);
if (tx)
goto out;
priv = qp->priv;
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index b002e96eb335..56353c7676d0 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2016 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#ifndef HFI1_VERBS_TXREQ_H
@@ -72,6 +30,7 @@ struct hfi1_ibdev;
struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp);
+#define VERBS_TXREQ_GFP (GFP_ATOMIC | __GFP_NOWARN)
static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp)
__must_hold(&qp->slock)
@@ -79,7 +38,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
struct verbs_txreq *tx;
struct hfi1_qp_priv *priv = qp->priv;
- tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+ tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP);
if (unlikely(!tx)) {
/* call slow path to get the lock */
tx = __get_txreq(dev, qp);
@@ -90,7 +49,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
tx->mr = NULL;
tx->sde = priv->s_sde;
tx->psc = priv->s_sendcontext;
- /* so that we can test if the sdma decriptors are there */
+ /* so that we can test if the sdma descriptors are there */
tx->txreq.num_desc = 0;
/* Set the header type */
tx->phdr.hdr.hdr_type = priv->hdr_type;
@@ -98,11 +57,6 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
return tx;
}
-static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
-{
- return &tx->txreq;
-}
-
static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{
struct sdma_txreq *stx;
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index 5ae781514e32..bbafeb5fc0ec 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -1,52 +1,10 @@
-#ifndef _HFI1_VNIC_H
-#define _HFI1_VNIC_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2017 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2017 - 2020 Intel Corporation.
*/
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
#include <rdma/opa_vnic.h>
#include "hfi.h"
#include "sdma.h"
@@ -69,6 +27,7 @@
#define HFI1_VNIC_SC_SHIFT 4
#define HFI1_VNIC_MAX_QUEUE 16
+#define HFI1_NUM_VNIC_CTXT 8
/**
* struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
@@ -104,7 +63,6 @@ struct hfi1_vnic_rx_queue {
struct hfi1_vnic_vport_info *vinfo;
struct net_device *netdev;
struct napi_struct napi;
- struct sk_buff_head skbq;
};
/**
@@ -146,7 +104,6 @@ struct hfi1_vnic_vport_info {
/* vnic hfi1 internal functions */
void hfi1_vnic_setup(struct hfi1_devdata *dd);
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd);
int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
@@ -157,7 +114,7 @@ bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
/* vnic rdma netdev operations */
struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum rdma_netdev_t type,
const char *name,
unsigned char name_assign_type,
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index b49e60e8397d..16a4c297a897 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2017 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2017 - 2020 Intel Corporation.
*/
/*
@@ -53,6 +11,7 @@
#include <linux/if_vlan.h>
#include "vnic.h"
+#include "netdev.h"
#define HFI_TX_TIMEOUT_MS 1000
@@ -62,114 +21,6 @@
static DEFINE_SPINLOCK(vport_cntr_lock);
-static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
-{
- unsigned int rcvctrl_ops = 0;
- int ret;
-
- uctxt->do_interrupt = &handle_receive_interrupt;
-
- /* Now allocate the RcvHdr queue and eager buffers. */
- ret = hfi1_create_rcvhdrq(dd, uctxt);
- if (ret)
- goto done;
-
- ret = hfi1_setup_eagerbufs(uctxt);
- if (ret)
- goto done;
-
- if (uctxt->rcvhdrtail_kvaddr)
- clear_rcvhdrtail(uctxt);
-
- rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
- rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
-
- if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
- rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
- rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
- rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
- rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
-
- hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
-done:
- return ret;
-}
-
-static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
- struct hfi1_ctxtdata **vnic_ctxt)
-{
- struct hfi1_ctxtdata *uctxt;
- int ret;
-
- if (dd->flags & HFI1_FROZEN)
- return -EIO;
-
- ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
- if (ret < 0) {
- dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
- return -ENOMEM;
- }
-
- uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
- HFI1_CAP_KGET(NODROP_RHQ_FULL) |
- HFI1_CAP_KGET(NODROP_EGR_FULL) |
- HFI1_CAP_KGET(DMA_RTAIL);
- uctxt->seq_cnt = 1;
- uctxt->is_vnic = true;
-
- msix_request_rcd_irq(uctxt);
-
- hfi1_stats.sps_ctxts++;
- dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
- *vnic_ctxt = uctxt;
-
- return 0;
-}
-
-static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
- struct hfi1_ctxtdata *uctxt)
-{
- dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
- flush_wc();
-
- /*
- * Disable receive context and interrupt available, reset all
- * RcvCtxtCtrl bits to default values.
- */
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
- HFI1_RCVCTRL_TIDFLOW_DIS |
- HFI1_RCVCTRL_INTRAVAIL_DIS |
- HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
- HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
-
- /* msix_intr will always be > 0, only clean up if this is true */
- if (uctxt->msix_intr)
- msix_free_irq(dd, uctxt->msix_intr);
-
- uctxt->event_flags = 0;
-
- hfi1_clear_tids(uctxt);
- hfi1_clear_ctxt_pkey(dd, uctxt);
-
- hfi1_stats.sps_ctxts--;
-
- hfi1_free_ctxt(uctxt);
-}
-
-void hfi1_vnic_setup(struct hfi1_devdata *dd)
-{
- xa_init(&dd->vnic.vesws);
-}
-
-void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
-{
- WARN_ON(!xa_empty(&dd->vnic.vesws));
-}
-
#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
u64 *src64, *dst64; \
for (src64 = &qstats->x_grp.unicast, \
@@ -179,6 +30,9 @@ void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
} \
} while (0)
+#define VNIC_MASK (0xFF)
+#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
+
/* hfi1_vnic_update_stats - update statistics */
static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
struct opa_vnic_stats *stats)
@@ -454,71 +308,25 @@ static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
return rc;
}
-static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
+static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
+ int vesw_id)
{
- unsigned char *pad_info;
- struct sk_buff *skb;
+ int vnic_id = VNIC_ID(vesw_id);
- skb = skb_dequeue(&rxq->skbq);
- if (unlikely(!skb))
- return NULL;
-
- /* remove tail padding and icrc */
- pad_info = skb->data + skb->len - 1;
- skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
- ((*pad_info) & 0x7)));
-
- return skb;
+ return hfi1_netdev_get_data(dd, vnic_id);
}
-/* hfi1_vnic_handle_rx - handle skb receive */
-static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
- int *work_done, int work_to_do)
+static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
{
- struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
- struct sk_buff *skb;
- int rc;
-
- while (1) {
- if (*work_done >= work_to_do)
- break;
-
- skb = hfi1_vnic_get_skb(rxq);
- if (unlikely(!skb))
- break;
-
- rc = hfi1_vnic_decap_skb(rxq, skb);
- /* update rx counters */
- hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
- if (unlikely(rc)) {
- dev_kfree_skb_any(skb);
- continue;
- }
-
- skb_checksum_none_assert(skb);
- skb->protocol = eth_type_trans(skb, rxq->netdev);
-
- napi_gro_receive(&rxq->napi, skb);
- (*work_done)++;
- }
-}
-
-/* hfi1_vnic_napi - napi receive polling callback function */
-static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
-{
- struct hfi1_vnic_rx_queue *rxq = container_of(napi,
- struct hfi1_vnic_rx_queue, napi);
- struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
- int work_done = 0;
+ struct hfi1_vnic_vport_info *vinfo;
+ int next_id = VNIC_ID(0);
- v_dbg("napi %d budget %d\n", rxq->idx, budget);
- hfi1_vnic_handle_rx(rxq, &work_done, budget);
+ vinfo = hfi1_netdev_get_first_data(dd, &next_id);
- v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
- if (work_done < budget)
- napi_complete(napi);
+ if (next_id > VNIC_ID(VNIC_MASK))
+ return NULL;
- return work_done;
+ return vinfo;
}
void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
@@ -527,13 +335,14 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
struct hfi1_vnic_vport_info *vinfo = NULL;
struct hfi1_vnic_rx_queue *rxq;
struct sk_buff *skb;
- int l4_type, vesw_id = -1;
+ int l4_type, vesw_id = -1, rc;
u8 q_idx;
+ unsigned char *pad_info;
l4_type = hfi1_16B_get_l4(packet->ebuf);
if (likely(l4_type == OPA_16B_L4_ETHR)) {
vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
- vinfo = xa_load(&dd->vnic.vesws, vesw_id);
+ vinfo = get_vnic_port(dd, vesw_id);
/*
* In case of invalid vesw id, count the error on
@@ -541,10 +350,8 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
*/
if (unlikely(!vinfo)) {
struct hfi1_vnic_vport_info *vinfo_tmp;
- unsigned long index = 0;
- vinfo_tmp = xa_find(&dd->vnic.vesws, &index, ULONG_MAX,
- XA_PRESENT);
+ vinfo_tmp = get_first_vnic_port(dd);
if (vinfo_tmp) {
spin_lock(&vport_cntr_lock);
vinfo_tmp->stats[0].netstats.rx_nohandler++;
@@ -563,12 +370,6 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
rxq = &vinfo->rxq[q_idx];
if (unlikely(!netif_oper_up(vinfo->netdev))) {
vinfo->stats[q_idx].rx_drop_state++;
- skb_queue_purge(&rxq->skbq);
- return;
- }
-
- if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
- vinfo->stats[q_idx].netstats.rx_fifo_errors++;
return;
}
@@ -580,62 +381,65 @@ void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
memcpy(skb->data, packet->ebuf, packet->tlen);
skb_put(skb, packet->tlen);
- skb_queue_tail(&rxq->skbq, skb);
- if (napi_schedule_prep(&rxq->napi)) {
- v_dbg("napi %d scheduling\n", q_idx);
- __napi_schedule(&rxq->napi);
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ return;
}
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
}
static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
struct net_device *netdev = vinfo->netdev;
- int i, rc;
+ int rc;
/* ensure virtual eth switch id is valid */
if (!vinfo->vesw_id)
return -EINVAL;
- rc = xa_insert(&dd->vnic.vesws, vinfo->vesw_id, vinfo, GFP_KERNEL);
+ rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
if (rc < 0)
return rc;
- for (i = 0; i < vinfo->num_rx_q; i++) {
- struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
- skb_queue_head_init(&rxq->skbq);
- napi_enable(&rxq->napi);
- }
+ rc = hfi1_netdev_rx_init(dd);
+ if (rc)
+ goto err_remove;
netif_carrier_on(netdev);
netif_tx_start_all_queues(netdev);
set_bit(HFI1_VNIC_UP, &vinfo->flags);
return 0;
+
+err_remove:
+ hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
+ return rc;
}
static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
- u8 i;
clear_bit(HFI1_VNIC_UP, &vinfo->flags);
netif_carrier_off(vinfo->netdev);
netif_tx_disable(vinfo->netdev);
- xa_erase(&dd->vnic.vesws, vinfo->vesw_id);
-
- /* ensure irqs see the change */
- msix_vnic_synchronize_irq(dd);
+ hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
- /* remove unread skbs */
- for (i = 0; i < vinfo->num_rx_q; i++) {
- struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
-
- napi_disable(&rxq->napi);
- skb_queue_purge(&rxq->skbq);
- }
+ hfi1_netdev_rx_destroy(dd);
}
static int hfi1_netdev_open(struct net_device *netdev)
@@ -660,70 +464,31 @@ static int hfi1_netdev_close(struct net_device *netdev)
return 0;
}
-static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
- struct hfi1_ctxtdata **vnic_ctxt)
-{
- int rc;
-
- rc = allocate_vnic_ctxt(dd, vnic_ctxt);
- if (rc) {
- dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
- return rc;
- }
-
- rc = setup_vnic_ctxt(dd, *vnic_ctxt);
- if (rc) {
- dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
- deallocate_vnic_ctxt(dd, *vnic_ctxt);
- *vnic_ctxt = NULL;
- }
-
- return rc;
-}
-
static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
- int i, rc = 0;
+ int rc = 0;
mutex_lock(&hfi1_mutex);
- if (!dd->vnic.num_vports) {
+ if (!dd->vnic_num_vports) {
rc = hfi1_vnic_txreq_init(dd);
if (rc)
goto txreq_fail;
}
- for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
- rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
- if (rc)
- break;
- hfi1_rcd_get(dd->vnic.ctxt[i]);
- dd->vnic.ctxt[i]->vnic_q_idx = i;
- }
-
- if (i < vinfo->num_rx_q) {
- /*
- * If required amount of contexts is not
- * allocated successfully then remaining contexts
- * are released.
- */
- while (i-- > dd->vnic.num_ctxt) {
- deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
- hfi1_rcd_put(dd->vnic.ctxt[i]);
- dd->vnic.ctxt[i] = NULL;
- }
+ rc = hfi1_netdev_rx_init(dd);
+ if (rc) {
+ dd_dev_err(dd, "Unable to initialize netdev contexts\n");
goto alloc_fail;
}
- if (dd->vnic.num_ctxt != i) {
- dd->vnic.num_ctxt = i;
- hfi1_init_vnic_rsm(dd);
- }
+ hfi1_init_vnic_rsm(dd);
- dd->vnic.num_vports++;
+ dd->vnic_num_vports++;
hfi1_vnic_sdma_init(vinfo);
+
alloc_fail:
- if (!dd->vnic.num_vports)
+ if (!dd->vnic_num_vports)
hfi1_vnic_txreq_deinit(dd);
txreq_fail:
mutex_unlock(&hfi1_mutex);
@@ -733,20 +498,14 @@ txreq_fail:
static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
{
struct hfi1_devdata *dd = vinfo->dd;
- int i;
mutex_lock(&hfi1_mutex);
- if (--dd->vnic.num_vports == 0) {
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
- hfi1_rcd_put(dd->vnic.ctxt[i]);
- dd->vnic.ctxt[i] = NULL;
- }
+ if (--dd->vnic_num_vports == 0) {
hfi1_deinit_vnic_rsm(dd);
- dd->vnic.num_ctxt = 0;
hfi1_vnic_txreq_deinit(dd);
}
mutex_unlock(&hfi1_mutex);
+ hfi1_netdev_rx_destroy(dd);
}
static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
@@ -792,7 +551,7 @@ static void hfi1_vnic_free_rn(struct net_device *netdev)
}
struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum rdma_netdev_t type,
const char *name,
unsigned char name_assign_type,
@@ -804,7 +563,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
struct rdma_netdev *rn;
int i, size, rc;
- if (!dd->num_vnic_contexts)
+ if (!dd->num_netdev_contexts)
return ERR_PTR(-ENOMEM);
if (!port_num || (port_num > dd->num_pports))
@@ -815,15 +574,16 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
- dd->num_sdma, dd->num_vnic_contexts);
+ chip_sdma_engines(dd),
+ dd->num_netdev_contexts);
if (!netdev)
return ERR_PTR(-ENOMEM);
rn = netdev_priv(netdev);
vinfo = opa_vnic_dev_priv(netdev);
vinfo->dd = dd;
- vinfo->num_tx_q = dd->num_sdma;
- vinfo->num_rx_q = dd->num_vnic_contexts;
+ vinfo->num_tx_q = chip_sdma_engines(dd);
+ vinfo->num_rx_q = dd->num_netdev_contexts;
vinfo->netdev = netdev;
rn->free_rdma_netdev = hfi1_vnic_free_rn;
rn->set_id = hfi1_vnic_set_vesw_id;
@@ -841,7 +601,6 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
rxq->idx = i;
rxq->vinfo = vinfo;
rxq->netdev = netdev;
- netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
}
rc = hfi1_vnic_init(vinfo);
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index af1b1ffcb38e..6caf01ba0bca 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2017 - 2018 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
/*
@@ -102,14 +60,15 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
goto bail_txadd;
for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
- struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i];
+ skb_frag_t *frag = &skb_shinfo(tx->skb)->frags[i];
/* combine physically continuous fragments later? */
ret = sdma_txadd_page(sde->dd,
&tx->txreq,
skb_frag_page(frag),
- frag->page_offset,
- skb_frag_size(frag));
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ NULL, NULL, NULL);
if (unlikely(ret))
goto bail_txadd;
}
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index fddb5fdf92de..44cdb706fe27 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,31 +1,11 @@
-config INFINIBAND_HNS
- tristate "HNS RoCE Driver"
- depends on NET_VENDOR_HISILICON
- depends on ARM64 || (COMPILE_TEST && 64BIT)
- ---help---
- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
- is used in Hisilicon Hip06 and more further ICT SoC based on
- platform device.
-
- To compile this driver as a module, choose M here: the module
- will be called hns-roce.
-
-config INFINIBAND_HNS_HIP06
- tristate "Hisilicon Hip06 Family RoCE support"
- depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
- ---help---
- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
- Hip07 SoC. These RoCE engines are platform devices.
-
- To compile this driver as a module, choose M here: the module
- will be called hns-roce-hw-v1.
-
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HNS_HIP08
tristate "Hisilicon Hip08 Family RoCE support"
- depends on INFINIBAND_HNS && PCI && HNS3
- ---help---
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on PCI && HNS3
+ help
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
The RoCE engine is a PCI device.
- To compile this driver as a module, choose M here: the module
- will be called hns-roce-hw-v2.
+ To compile this driver, choose M here. This module will be called
+ hns-roce-hw-v2.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index eee5205f936f..d07ef02c5231 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -1,14 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the Hisilicon RoCE drivers.
#
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3pf
+ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common
+ccflags-y += -I $(src)
-obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
-hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
+hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
-obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
-hns-roce-hw-v1-objs := hns_roce_hw_v1.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \
+ hns_roce_debugfs.o hns_roce_hw_v2.o hns_roce_bond.o
+
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
-hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index cdd2ac24fc2a..0c1c32d23c88 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -30,59 +30,98 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
-#define HNS_ROCE_PORT_NUM_SHIFT 24
-#define HNS_ROCE_VLAN_SL_BIT_MASK 7
-#define HNS_ROCE_VLAN_SL_SHIFT 13
+static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
+{
+ u32 fl = ah_attr->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ sport = get_random_u32_inclusive(IB_ROCE_UDP_ENCAP_VALID_PORT_MIN,
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MAX);
+ else
+ sport = rdma_flow_label_to_udp_sport(fl);
+
+ return sport;
+}
-int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
- const struct ib_gid_attr *gid_attr;
- struct device *dev = hr_dev->dev;
+ struct hns_roce_ib_create_ah_resp resp = {};
struct hns_roce_ah *ah = to_hr_ah(ibah);
- u16 vlan_tag = 0xffff;
- const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
- bool vlan_en = false;
+ u8 tclass = get_tclass(grh);
+ u8 priority = 0;
+ u8 tc_mode = 0;
int ret;
- gid_attr = ah_attr->grh.sgid_attr;
- ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag, NULL);
- if (ret)
- return ret;
-
- /* Get mac address */
- memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
-
- if (vlan_tag < VLAN_CFI_MASK) {
- vlan_en = true;
- vlan_tag |= (rdma_ah_get_sl(ah_attr) &
- HNS_ROCE_VLAN_SL_BIT_MASK) <<
- HNS_ROCE_VLAN_SL_SHIFT;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
}
- ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn |
- (rdma_ah_get_port_num(ah_attr) <<
- HNS_ROCE_PORT_NUM_SHIFT));
+ ah->av.port = rdma_ah_get_port_num(ah_attr);
ah->av.gid_index = grh->sgid_index;
- ah->av.vlan = cpu_to_le16(vlan_tag);
- ah->av.vlan_en = vlan_en;
- dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
- ah->av.vlan);
if (rdma_ah_get_static_rate(ah_attr))
ah->av.stat_rate = IB_RATE_10_GBPS;
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.flowlabel = grh->flow_label;
+ ah->av.udp_sport = get_ah_udp_sport(ah_attr);
+ ah->av.tclass = tclass;
+
+ ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+
+ if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ goto err_out;
+
+ if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ ah->av.sl = priority;
+ else
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
+
+ if (!check_sl_valid(hr_dev, ah->av.sl)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
- ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) <<
- HNS_ROCE_SL_SHIFT);
+ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
- return 0;
+ /* HIP08 needs to record vlan info in Address Vector */
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
+ &ah->av.vlan_id, NULL);
+ if (ret)
+ goto err_out;
+
+ ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
+ }
+
+ if (udata) {
+ resp.priority = ah->av.sl;
+ resp.tc_mode = tc_mode;
+ memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ }
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]);
+
+ return ret;
}
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
@@ -91,23 +130,12 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
memset(ah_attr, 0, sizeof(*ah_attr));
- rdma_ah_set_sl(ah_attr, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT));
- rdma_ah_set_port_num(ah_attr, (le32_to_cpu(ah->av.port_pd) >>
- HNS_ROCE_PORT_NUM_SHIFT));
+ rdma_ah_set_sl(ah_attr, ah->av.sl);
+ rdma_ah_set_port_num(ah_attr, ah->av.port);
rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
- rdma_ah_set_grh(ah_attr, NULL,
- (le32_to_cpu(ah->av.sl_tclass_flowlabel) &
- HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index,
- ah->av.hop_limit,
- (le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_TCLASS_SHIFT));
+ rdma_ah_set_grh(ah_attr, NULL, ah->av.flowlabel,
+ ah->av.gid_index, ah->av.hop_limit, ah->av.tclass);
rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
return 0;
}
-
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index dac058d3df53..6ee911f6885b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -31,220 +31,157 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
-int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
- int ret = 0;
-
- spin_lock(&bitmap->lock);
- *obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
- if (*obj >= bitmap->max) {
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
- *obj = find_first_zero_bit(bitmap->table, bitmap->max);
- }
+ struct hns_roce_buf_list *trunks;
+ u32 i;
- if (*obj < bitmap->max) {
- set_bit(*obj, bitmap->table);
- bitmap->last = (*obj + 1);
- if (bitmap->last == bitmap->max)
- bitmap->last = 0;
- *obj |= bitmap->top;
- } else {
- ret = -1;
- }
+ if (!buf)
+ return;
- spin_unlock(&bitmap->lock);
+ trunks = buf->trunk_list;
+ if (trunks) {
+ buf->trunk_list = NULL;
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift,
+ trunks[i].buf, trunks[i].map);
- return ret;
-}
+ kfree(trunks);
+ }
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
- int rr)
-{
- hns_roce_bitmap_free_range(bitmap, obj, 1, rr);
+ kfree(buf);
}
-EXPORT_SYMBOL_GPL(hns_roce_bitmap_free);
-int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
- int align, unsigned long *obj)
+/*
+ * Allocate the dma buffer for storing ROCEE table entries
+ *
+ * @size: required size
+ * @page_shift: the unit size in a continuous dma address range
+ * @flags: HNS_ROCE_BUF_ flags to control the allocation flow.
+ */
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags)
{
- int ret = 0;
- int i;
-
- if (likely(cnt == 1 && align == 1))
- return hns_roce_bitmap_alloc(bitmap, obj);
-
- spin_lock(&bitmap->lock);
+ u32 trunk_size, page_size, alloced_size;
+ struct hns_roce_buf_list *trunks;
+ struct hns_roce_buf *buf;
+ gfp_t gfp_flags;
+ u32 ntrunk, i;
+
+ /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */
+ if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT))
+ return ERR_PTR(-EINVAL);
+
+ gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL;
+ buf = kzalloc(sizeof(*buf), gfp_flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ buf->page_shift = page_shift;
+ page_size = 1 << buf->page_shift;
+
+ /* Calc the trunk size and num by required size and page_shift */
+ if (flags & HNS_ROCE_BUF_DIRECT) {
+ buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE));
+ ntrunk = 1;
+ } else {
+ buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE));
+ ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
+ }
- *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
- bitmap->last, cnt, align - 1);
- if (*obj >= bitmap->max) {
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
- *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, 0,
- cnt, align - 1);
+ trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags);
+ if (!trunks) {
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
}
- if (*obj < bitmap->max) {
- for (i = 0; i < cnt; i++)
- set_bit(*obj + i, bitmap->table);
+ trunk_size = 1 << buf->trunk_shift;
+ alloced_size = 0;
+ for (i = 0; i < ntrunk; i++) {
+ trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size,
+ &trunks[i].map, gfp_flags);
+ if (!trunks[i].buf)
+ break;
- if (*obj == bitmap->last) {
- bitmap->last = (*obj + cnt);
- if (bitmap->last >= bitmap->max)
- bitmap->last = 0;
- }
- *obj |= bitmap->top;
- } else {
- ret = -1;
+ alloced_size += trunk_size;
}
- spin_unlock(&bitmap->lock);
-
- return ret;
-}
+ buf->ntrunks = i;
-void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt,
- int rr)
-{
- int i;
+ /* In nofail mode, it's only failed when the alloced size is 0 */
+ if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) {
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, trunk_size,
+ trunks[i].buf, trunks[i].map);
- obj &= bitmap->max + bitmap->reserved_top - 1;
+ kfree(trunks);
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
- spin_lock(&bitmap->lock);
- for (i = 0; i < cnt; i++)
- clear_bit(obj + i, bitmap->table);
+ buf->npages = DIV_ROUND_UP(alloced_size, page_size);
+ buf->trunk_list = trunks;
- if (!rr)
- bitmap->last = min(bitmap->last, obj);
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
- spin_unlock(&bitmap->lock);
+ return buf;
}
-int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
- u32 reserved_bot, u32 reserved_top)
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct hns_roce_buf *buf,
+ unsigned int page_shift)
{
- u32 i;
+ unsigned int offset, max_size;
+ int total = 0;
+ int i;
- if (num != roundup_pow_of_two(num))
+ if (page_shift > buf->trunk_shift) {
+ dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n",
+ page_shift, buf->trunk_shift);
return -EINVAL;
+ }
- bitmap->last = 0;
- bitmap->top = 0;
- bitmap->max = num - reserved_top;
- bitmap->mask = mask;
- bitmap->reserved_top = reserved_top;
- spin_lock_init(&bitmap->lock);
- bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long),
- GFP_KERNEL);
- if (!bitmap->table)
- return -ENOMEM;
-
- for (i = 0; i < reserved_bot; ++i)
- set_bit(i, bitmap->table);
-
- return 0;
-}
-
-void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
-{
- kfree(bitmap->table);
-}
-
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
- struct hns_roce_buf *buf)
-{
- int i;
- struct device *dev = hr_dev->dev;
-
- if (buf->nbufs == 1) {
- dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
- } else {
- for (i = 0; i < buf->nbufs; ++i)
- if (buf->page_list[i].buf)
- dma_free_coherent(dev, 1 << buf->page_shift,
- buf->page_list[i].buf,
- buf->page_list[i].map);
- kfree(buf->page_list);
+ offset = 0;
+ max_size = buf->ntrunks << buf->trunk_shift;
+ for (i = 0; i < buf_cnt && offset < max_size; i++) {
+ bufs[total++] = hns_roce_buf_dma_addr(buf, offset);
+ offset += (1 << page_shift);
}
+
+ return total;
}
-EXPORT_SYMBOL_GPL(hns_roce_buf_free);
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
- struct hns_roce_buf *buf, u32 page_shift)
+int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem,
+ unsigned int page_shift)
{
- int i = 0;
- dma_addr_t t;
- struct device *dev = hr_dev->dev;
- u32 page_size = 1 << page_shift;
- u32 order;
-
- /* SQ/RQ buf lease than one page, SQ + RQ = 8K */
- if (size <= max_direct) {
- buf->nbufs = 1;
- /* Npages calculated by page_size */
- order = get_order(size);
- if (order <= page_shift - PAGE_SHIFT)
- order = 0;
- else
- order -= page_shift - PAGE_SHIFT;
- buf->npages = 1 << order;
- buf->page_shift = page_shift;
- /* MTT PA must be recorded in 4k alignment, t is 4k aligned */
- buf->direct.buf = dma_alloc_coherent(dev, size, &t,
- GFP_KERNEL);
- if (!buf->direct.buf)
- return -ENOMEM;
-
- buf->direct.map = t;
-
- while (t & ((1 << buf->page_shift) - 1)) {
- --buf->page_shift;
- buf->npages *= 2;
- }
- } else {
- buf->nbufs = (size + page_size - 1) / page_size;
- buf->npages = buf->nbufs;
- buf->page_shift = page_shift;
- buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
- GFP_KERNEL);
-
- if (!buf->page_list)
- return -ENOMEM;
-
- for (i = 0; i < buf->nbufs; ++i) {
- buf->page_list[i].buf = dma_alloc_coherent(dev,
- page_size,
- &t,
- GFP_KERNEL);
-
- if (!buf->page_list[i].buf)
- goto err_free;
-
- buf->page_list[i].map = t;
- }
+ struct ib_block_iter biter;
+ int total = 0;
+
+ /* convert system page cnt to hw page cnt */
+ rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
+ bufs[total++] = rdma_block_iter_dma_address(&biter);
+ if (total >= buf_cnt)
+ goto done;
}
- return 0;
-
-err_free:
- hns_roce_buf_free(hr_dev, size, buf);
- return -ENOMEM;
+done:
+ return total;
}
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
- hns_roce_cleanup_srq_table(hr_dev);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ ida_destroy(&hr_dev->xrcd_ida.ida);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ xa_destroy(&hr_dev->srq_table.xa);
+ }
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
- hns_roce_cleanup_mr_table(hr_dev);
- hns_roce_cleanup_pd_table(hr_dev);
- hns_roce_cleanup_uar_table(hr_dev);
+ ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
+ ida_destroy(&hr_dev->pd_ida.ida);
+ ida_destroy(&hr_dev->uar_ida.ida);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c
new file mode 100644
index 000000000000..cc85f3ce1f3e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_bond.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#include <net/lag.h>
+#include <net/bonding.h>
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+#include "hns_roce_bond.h"
+
+static DEFINE_XARRAY(roce_bond_xa);
+
+static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev)
+{
+ struct ib_device *ibdev =
+ ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS);
+
+ if (!ibdev)
+ return NULL;
+
+ return container_of(ibdev, struct hns_roce_dev, ib_dev);
+}
+
+static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev)
+{
+ struct net_device *upper_dev;
+
+ rcu_read_lock();
+ upper_dev = netdev_master_upper_dev_get_rcu(net_dev);
+ dev_hold(upper_dev);
+ rcu_read_unlock();
+
+ return upper_dev;
+}
+
+static int get_netdev_bond_slave_id(struct net_device *net_dev,
+ struct hns_roce_bond_group *bond_grp)
+{
+ int i;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++)
+ if (net_dev == bond_grp->bond_func_info[i].net_dev)
+ return i;
+
+ return -ENOENT;
+}
+
+struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
+ u8 bus_num)
+{
+ struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
+ struct hns_roce_bond_group *bond_grp;
+ struct net_device *upper_dev = NULL;
+ int i;
+
+ if (!die_info)
+ return NULL;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
+ return bond_grp;
+ if (bond_grp->upper_dev) {
+ upper_dev = get_upper_dev_from_ndev(net_dev);
+ if (bond_grp->upper_dev == upper_dev) {
+ dev_put(upper_dev);
+ return bond_grp;
+ }
+ dev_put(upper_dev);
+ }
+ }
+
+ return NULL;
+}
+
+static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp,
+ struct hns_roce_dev *hr_dev)
+{
+ struct net_device *active_dev;
+ struct net_device *old_dev;
+ int i, ret = 0;
+
+ if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ rcu_read_lock();
+ active_dev =
+ bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev));
+ rcu_read_unlock();
+ } else {
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ active_dev = bond_grp->bond_func_info[i].net_dev;
+ if (active_dev &&
+ ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE)
+ break;
+ }
+ }
+
+ if (!active_dev || i == ROCE_BOND_FUNC_MAX)
+ active_dev = get_hr_netdev(hr_dev, 0);
+
+ old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1);
+ if (old_dev == active_dev)
+ goto out;
+
+ ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to set netdev for bond.\n");
+ goto out;
+ }
+
+ if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ if (old_dev)
+ roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev);
+ rdma_roce_rescan_port(&hr_dev->ib_dev, 1);
+ }
+out:
+ dev_put(old_dev);
+ return ret;
+}
+
+bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev)
+{
+ struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED &&
+ bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
+ return true;
+
+ return false;
+}
+
+static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
+{
+ struct net_device *net_dev;
+ u32 active_slave_map = 0;
+ u8 active_slave_num = 0;
+ bool active;
+ u8 i;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ net_dev = bond_grp->bond_func_info[i].net_dev;
+ if (!net_dev || !(bond_grp->slave_map & (1U << i)))
+ continue;
+
+ active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ?
+ net_lag_port_dev_txable(net_dev) :
+ (ib_get_curr_port_state(net_dev) == IB_PORT_ACTIVE);
+ if (active) {
+ active_slave_num++;
+ active_slave_map |= (1U << i);
+ }
+ }
+
+ bond_grp->active_slave_num = active_slave_num;
+ bond_grp->active_slave_map = active_slave_map;
+}
+
+static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp,
+ struct hns_roce_dev *hr_dev)
+{
+ bond_grp->main_hr_dev = hr_dev;
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
+}
+
+static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp,
+ u8 func_idx)
+{
+ struct hnae3_handle *handle;
+
+ handle = bond_grp->bond_func_info[func_idx].handle;
+ if (handle->priv)
+ hns_roce_bond_uninit_client(bond_grp, func_idx);
+}
+
+static struct hns_roce_dev
+ *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
+ u8 func_idx, bool need_switch);
+
+static int switch_main_dev(struct hns_roce_bond_group *bond_grp,
+ u8 main_func_idx)
+{
+ struct hns_roce_dev *hr_dev;
+ struct net_device *net_dev;
+ u8 i;
+
+ bond_grp->main_hr_dev = NULL;
+ hns_roce_bond_uninit_client(bond_grp, main_func_idx);
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ net_dev = bond_grp->bond_func_info[i].net_dev;
+ if ((bond_grp->slave_map & (1U << i)) && net_dev) {
+ /* In case this slave is still being registered as
+ * a non-bonded PF, uninit it first and then re-init
+ * it as the main device.
+ */
+ hns_roce_slave_uninit(bond_grp, i);
+ hr_dev = hns_roce_slave_init(bond_grp, i, false);
+ if (hr_dev) {
+ bond_grp->main_hr_dev = hr_dev;
+ break;
+ }
+ }
+ }
+
+ if (!bond_grp->main_hr_dev)
+ return -ENODEV;
+
+ return 0;
+}
+
+static struct hns_roce_dev
+ *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
+ u8 func_idx, bool need_switch)
+{
+ struct hns_roce_dev *hr_dev = NULL;
+ struct hnae3_handle *handle;
+ u8 main_func_idx;
+ int ret;
+
+ if (need_switch) {
+ main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
+ if (func_idx == main_func_idx) {
+ ret = switch_main_dev(bond_grp, main_func_idx);
+ if (ret == -ENODEV)
+ return NULL;
+ }
+ }
+
+ handle = bond_grp->bond_func_info[func_idx].handle;
+ if (handle) {
+ if (handle->priv)
+ return handle->priv;
+ /* Prevent this device from being initialized as a bond device */
+ if (need_switch)
+ bond_grp->bond_func_info[func_idx].net_dev = NULL;
+ hr_dev = hns_roce_bond_init_client(bond_grp, func_idx);
+ if (!hr_dev)
+ BOND_ERR_LOG("failed to init slave %u.\n", func_idx);
+ }
+
+ return hr_dev;
+}
+
+static struct hns_roce_die_info *alloc_die_info(int bus_num)
+{
+ struct hns_roce_die_info *die_info;
+ int ret;
+
+ die_info = kzalloc(sizeof(*die_info), GFP_KERNEL);
+ if (!die_info)
+ return NULL;
+
+ ret = xa_err(xa_store(&roce_bond_xa, bus_num, die_info, GFP_KERNEL));
+ if (ret) {
+ kfree(die_info);
+ return NULL;
+ }
+
+ mutex_init(&die_info->die_mutex);
+
+ return die_info;
+}
+
+static void dealloc_die_info(struct hns_roce_die_info *die_info, u8 bus_num)
+{
+ mutex_destroy(&die_info->die_mutex);
+ xa_erase(&roce_bond_xa, bus_num);
+ kfree(die_info);
+}
+
+static int alloc_bond_id(struct hns_roce_bond_group *bond_grp)
+{
+ u8 bus_num = bond_grp->bus_num;
+ struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
+ int i;
+
+ if (!die_info) {
+ die_info = alloc_die_info(bus_num);
+ if (!die_info)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ if (die_info->bond_id_mask & BOND_ID(i))
+ continue;
+
+ die_info->bond_id_mask |= BOND_ID(i);
+ die_info->bgrps[i] = bond_grp;
+ bond_grp->bond_id = i;
+
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int remove_bond_id(int bus_num, u8 bond_id)
+{
+ struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
+
+ if (bond_id >= ROCE_BOND_NUM_MAX)
+ return -EINVAL;
+
+ if (!die_info)
+ return -ENODEV;
+
+ die_info->bond_id_mask &= ~BOND_ID(bond_id);
+ die_info->bgrps[bond_id] = NULL;
+ if (!die_info->bond_id_mask)
+ dealloc_die_info(die_info, bus_num);
+
+ return 0;
+}
+
+static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
+{
+ struct hns_roce_dev *hr_dev;
+ int ret;
+ int i;
+
+ for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) {
+ if (bond_grp->slave_map & (1 << i))
+ hns_roce_slave_uninit(bond_grp, i);
+ }
+
+ mutex_lock(&bond_grp->bond_mutex);
+ bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
+ mutex_unlock(&bond_grp->bond_mutex);
+ bond_grp->main_hr_dev = NULL;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ if (bond_grp->slave_map & (1 << i)) {
+ hr_dev = hns_roce_slave_init(bond_grp, i, false);
+ if (hr_dev) {
+ bond_grp->main_hr_dev = hr_dev;
+ break;
+ }
+ }
+ }
+
+ if (!bond_grp->main_hr_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
+
+out:
+ if (ret) {
+ BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret);
+ hns_roce_cleanup_bond(bond_grp);
+ } else {
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE set bond finished!\n");
+ }
+}
+
+static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp)
+{
+ u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
+ struct hns_roce_dev *hr_dev;
+ u8 i;
+
+ if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED)
+ goto out;
+
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
+ bond_grp->main_hr_dev = NULL;
+
+ hns_roce_slave_uninit(bond_grp, main_func_idx);
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ hr_dev = hns_roce_slave_init(bond_grp, i, false);
+ if (hr_dev)
+ bond_grp->main_hr_dev = hr_dev;
+ }
+
+out:
+ hns_roce_cleanup_bond(bond_grp);
+}
+
+static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp)
+{
+ int ret;
+
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
+
+ mutex_lock(&bond_grp->bond_mutex);
+ if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE)
+ bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
+ mutex_unlock(&bond_grp->bond_mutex);
+
+ if (ret)
+ ibdev_err(&bond_grp->main_hr_dev->ib_dev,
+ "failed to change RoCE bond slave state, ret = %d.\n",
+ ret);
+ else
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE slave changestate finished!\n");
+}
+
+static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp)
+{
+ int ret;
+ u8 i;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ if (bond_grp->slave_map & (1U << i)) {
+ if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn))
+ continue;
+ hns_roce_slave_uninit(bond_grp, i);
+ } else {
+ hns_roce_slave_init(bond_grp, i, true);
+ if (!bond_grp->main_hr_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ bond_grp->bond_func_info[i].net_dev = NULL;
+ bond_grp->bond_func_info[i].handle = NULL;
+ }
+ }
+
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
+
+out:
+ if (ret) {
+ BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret);
+ hns_roce_cleanup_bond(bond_grp);
+ } else {
+ mutex_lock(&bond_grp->bond_mutex);
+ if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM)
+ bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
+ mutex_unlock(&bond_grp->bond_mutex);
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE slave change num finished!\n");
+ }
+}
+
+static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev)
+{
+ struct hns_roce_v2_priv *priv;
+ struct hns_roce_dev *hr_dev;
+ struct net_device *net_dev;
+ int func_idx;
+
+ bond_grp->slave_map = 0;
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
+ func_idx = get_netdev_bond_slave_id(net_dev, bond_grp);
+ if (func_idx < 0) {
+ hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ if (!hr_dev)
+ continue;
+ func_idx = PCI_FUNC(hr_dev->pci_dev->devfn);
+ if (!bond_grp->bond_func_info[func_idx].net_dev) {
+ priv = hr_dev->priv;
+ bond_grp->bond_func_info[func_idx].net_dev =
+ net_dev;
+ bond_grp->bond_func_info[func_idx].handle =
+ priv->handle;
+ }
+ ib_device_put(&hr_dev->ib_dev);
+ }
+
+ bond_grp->slave_map |= (1 << func_idx);
+ }
+ rcu_read_unlock();
+}
+
+static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp,
+ struct net_device *net_dev)
+{
+ struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ bool ret = true;
+
+ if (!hr_dev) {
+ if (bond_grp &&
+ get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
+ return true;
+ else
+ return false;
+ }
+
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) {
+ ret = false;
+ goto out;
+ }
+
+ if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) {
+ ret = false;
+ goto out;
+ }
+
+ if (bond_grp->bus_num != get_hr_bus_num(hr_dev))
+ ret = false;
+
+out:
+ ib_device_put(&hr_dev->ib_dev);
+ return ret;
+}
+
+static bool check_slave_support(struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev)
+{
+ struct net_device *net_dev;
+ u8 slave_num = 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
+ if (is_dev_bond_supported(bond_grp, net_dev)) {
+ slave_num++;
+ continue;
+ }
+ rcu_read_unlock();
+ return false;
+ }
+ rcu_read_unlock();
+
+ return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX);
+}
+
+static void hns_roce_bond_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct hns_roce_bond_group *bond_grp =
+ container_of(delayed_work, struct hns_roce_bond_group,
+ bond_work);
+ enum hns_roce_bond_state bond_state;
+ bool bond_ready;
+
+ mutex_lock(&bond_grp->bond_mutex);
+ bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev);
+ hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev);
+ bond_state = bond_grp->bond_state;
+ bond_grp->bond_ready = bond_ready;
+ mutex_unlock(&bond_grp->bond_mutex);
+
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "bond work: bond_ready - %d, bond_state - %d.\n",
+ bond_ready, bond_state);
+
+ if (!bond_ready) {
+ hns_roce_clear_bond(bond_grp);
+ return;
+ }
+
+ switch (bond_state) {
+ case HNS_ROCE_BOND_NOT_BONDED:
+ hns_roce_set_bond(bond_grp);
+ /* In set_bond flow, we don't need to set bond netdev here as
+ * it has been done when bond_grp->main_hr_dev is registered.
+ */
+ return;
+ case HNS_ROCE_BOND_SLAVE_CHANGESTATE:
+ hns_roce_slave_changestate(bond_grp);
+ break;
+ case HNS_ROCE_BOND_SLAVE_CHANGE_NUM:
+ hns_roce_slave_change_num(bond_grp);
+ break;
+ default:
+ return;
+ }
+ hns_roce_set_bond_netdev(bond_grp, bond_grp->main_hr_dev);
+}
+
+static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp,
+ struct hns_roce_dev *hr_dev,
+ struct net_device *upper_dev)
+{
+ bond_grp->upper_dev = upper_dev;
+ bond_grp->main_hr_dev = hr_dev;
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
+ bond_grp->bond_ready = false;
+}
+
+static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp)
+{
+ mutex_lock(&bond_grp->bond_mutex);
+
+ cancel_delayed_work(&bond_grp->bond_work);
+ bond_grp->upper_dev = NULL;
+ bond_grp->main_hr_dev = NULL;
+ bond_grp->bond_ready = false;
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
+ bond_grp->slave_map = 0;
+ memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info));
+
+ mutex_unlock(&bond_grp->bond_mutex);
+}
+
+void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp)
+{
+ int ret;
+
+ ret = bond_grp->main_hr_dev ?
+ hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO;
+ if (ret)
+ BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
+ else
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE clear bond finished!\n");
+
+ hns_roce_detach_bond_grp(bond_grp);
+}
+
+static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp,
+ struct net_device *net_dev)
+{
+ struct hns_roce_bond_group *bond_grp_tmp;
+
+ bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num);
+ return bond_grp_tmp == bond_grp;
+}
+
+static void lowerstate_event_setting(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ mutex_lock(&bond_grp->bond_mutex);
+
+ if (bond_grp->bond_ready &&
+ bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED)
+ bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE;
+
+ mutex_unlock(&bond_grp->bond_mutex);
+}
+
+static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct net_device *net_dev =
+ netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
+
+ if (!netif_is_lag_port(net_dev))
+ return false;
+
+ if (!lowerstate_event_filter(bond_grp, net_dev))
+ return false;
+
+ lowerstate_event_setting(bond_grp, info);
+
+ return true;
+}
+
+static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info)
+{
+ if (!bond_info)
+ return false;
+
+ if (bond_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ bond_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ return false;
+
+ if (bond_info->tx_type == NETDEV_LAG_TX_TYPE_HASH &&
+ bond_info->hash_type > NETDEV_LAG_HASH_L23)
+ return false;
+
+ return true;
+}
+
+static void upper_event_setting(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netdev_lag_upper_info *bond_upper_info = NULL;
+ bool slave_inc = info->linking;
+
+ if (slave_inc)
+ bond_upper_info = info->upper_info;
+
+ if (bond_upper_info) {
+ bond_grp->tx_type = bond_upper_info->tx_type;
+ bond_grp->hash_type = bond_upper_info->hash_type;
+ }
+}
+
+static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp)
+{
+ struct net_device *net_dev;
+ u8 slave_num = 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(bond_grp->upper_dev, net_dev) {
+ if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
+ slave_num++;
+ }
+ rcu_read_unlock();
+
+ return (slave_num > 1);
+}
+
+static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info,
+ struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev)
+{
+ if (!is_bond_setting_supported(bond_info))
+ return false;
+
+ return check_slave_support(bond_grp, upper_dev);
+}
+
+static enum bond_support_type
+ check_bond_support(struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ bool bond_grp_exist = false;
+ bool support;
+
+ if (upper_dev == bond_grp->upper_dev)
+ bond_grp_exist = true;
+
+ if (!info->linking && !bond_grp_exist)
+ return BOND_NOT_SUPPORT;
+
+ if (info->linking)
+ support = check_linking_bond_support(info->upper_info, bond_grp,
+ upper_dev);
+ else
+ support = check_unlinking_bond_support(bond_grp);
+
+ if (support)
+ return BOND_SUPPORT;
+
+ return bond_grp_exist ? BOND_EXISTING_NOT_SUPPORT : BOND_NOT_SUPPORT;
+}
+
+static bool upper_event_filter(struct netdev_notifier_changeupper_info *info,
+ struct hns_roce_bond_group *bond_grp,
+ struct net_device *net_dev)
+{
+ struct net_device *upper_dev = info->upper_dev;
+ struct hns_roce_bond_group *bond_grp_tmp;
+ struct hns_roce_dev *hr_dev;
+ bool ret = true;
+ u8 bus_num;
+
+ if (!info->linking ||
+ bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
+ return bond_grp->upper_dev == upper_dev;
+
+ hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ if (!hr_dev)
+ return false;
+
+ bus_num = get_hr_bus_num(hr_dev);
+ if (bond_grp->bus_num != bus_num) {
+ ret = false;
+ goto out;
+ }
+
+ bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp_tmp && bond_grp_tmp != bond_grp)
+ ret = false;
+out:
+ ib_device_put(&hr_dev->ib_dev);
+ return ret;
+}
+
+static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *net_dev =
+ netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
+ struct net_device *upper_dev = info->upper_dev;
+ enum bond_support_type support = BOND_SUPPORT;
+ struct hns_roce_dev *hr_dev;
+ int slave_id;
+
+ if (!upper_dev || !netif_is_lag_master(upper_dev))
+ return false;
+
+ if (!upper_event_filter(info, bond_grp, net_dev))
+ return false;
+
+ mutex_lock(&bond_grp->bond_mutex);
+ support = check_bond_support(bond_grp, upper_dev, info);
+ if (support == BOND_NOT_SUPPORT) {
+ mutex_unlock(&bond_grp->bond_mutex);
+ return false;
+ }
+
+ if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) {
+ hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ if (!hr_dev) {
+ mutex_unlock(&bond_grp->bond_mutex);
+ return false;
+ }
+ hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev);
+ ib_device_put(&hr_dev->ib_dev);
+ }
+
+ /* In the case of netdev being unregistered, the roce
+ * instance shouldn't be inited.
+ */
+ if (net_dev->reg_state >= NETREG_UNREGISTERING) {
+ slave_id = get_netdev_bond_slave_id(net_dev, bond_grp);
+ if (slave_id >= 0) {
+ bond_grp->bond_func_info[slave_id].net_dev = NULL;
+ bond_grp->bond_func_info[slave_id].handle = NULL;
+ }
+ }
+
+ if (support == BOND_SUPPORT) {
+ bond_grp->bond_ready = true;
+ if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED)
+ bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGE_NUM;
+ }
+ mutex_unlock(&bond_grp->bond_mutex);
+ if (support == BOND_SUPPORT)
+ upper_event_setting(bond_grp, info);
+
+ return true;
+}
+
+static int hns_roce_bond_event(struct notifier_block *self,
+ unsigned long event, void *ptr)
+{
+ struct hns_roce_bond_group *bond_grp =
+ container_of(self, struct hns_roce_bond_group, bond_nb);
+ bool changed = false;
+
+ if (event == NETDEV_CHANGEUPPER)
+ changed = hns_roce_bond_upper_event(bond_grp, ptr);
+ if (event == NETDEV_CHANGELOWERSTATE)
+ changed = hns_roce_bond_lowerstate_event(bond_grp, ptr);
+
+ if (changed)
+ schedule_delayed_work(&bond_grp->bond_work, HZ);
+
+ return NOTIFY_DONE;
+}
+
+int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX];
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+ int ret;
+ int i;
+
+ if (xa_load(&roce_bond_xa, bus_num))
+ return 0;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = kvzalloc(sizeof(*bond_grp), GFP_KERNEL);
+ if (!bond_grp) {
+ ret = -ENOMEM;
+ goto mem_err;
+ }
+
+ mutex_init(&bond_grp->bond_mutex);
+ INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_bond_work);
+
+ bond_grp->bond_ready = false;
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
+ bond_grp->bus_num = bus_num;
+
+ ret = alloc_bond_id(bond_grp);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to alloc bond ID, ret = %d.\n", ret);
+ goto alloc_id_err;
+ }
+
+ bond_grp->bond_nb.notifier_call = hns_roce_bond_event;
+ ret = register_netdevice_notifier(&bond_grp->bond_nb);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to register bond nb, ret = %d.\n", ret);
+ goto register_nb_err;
+ }
+ bgrps[i] = bond_grp;
+ }
+
+ return 0;
+
+register_nb_err:
+ remove_bond_id(bond_grp->bus_num, bond_grp->bond_id);
+alloc_id_err:
+ mutex_destroy(&bond_grp->bond_mutex);
+ kvfree(bond_grp);
+mem_err:
+ for (i--; i >= 0; i--) {
+ unregister_netdevice_notifier(&bgrps[i]->bond_nb);
+ cancel_delayed_work_sync(&bgrps[i]->bond_work);
+ remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id);
+ mutex_destroy(&bgrps[i]->bond_mutex);
+ kvfree(bgrps[i]);
+ }
+ return ret;
+}
+
+void hns_roce_dealloc_bond_grp(void)
+{
+ struct hns_roce_bond_group *bond_grp;
+ struct hns_roce_die_info *die_info;
+ unsigned long id;
+ int i;
+
+ xa_for_each(&roce_bond_xa, id, die_info) {
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ unregister_netdevice_notifier(&bond_grp->bond_nb);
+ cancel_delayed_work_sync(&bond_grp->bond_work);
+ remove_bond_id(bond_grp->bus_num, bond_grp->bond_id);
+ mutex_destroy(&bond_grp->bond_mutex);
+ kvfree(bond_grp);
+ }
+ }
+}
+
+int hns_roce_bond_init(struct hns_roce_dev *hr_dev)
+{
+ struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+ int ret;
+
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+
+ if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT) {
+ ret = hns_roce_recover_bond(bond_grp, hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to recover RoCE bond, ret = %d.\n", ret);
+ return ret;
+ }
+ }
+
+ return hns_roce_set_bond_netdev(bond_grp, hr_dev);
+}
+
+void hns_roce_bond_suspend(struct hnae3_handle *handle)
+{
+ u8 bus_num = handle->pdev->bus->number;
+ struct hns_roce_bond_group *bond_grp;
+ struct hns_roce_die_info *die_info;
+ int i;
+
+ die_info = xa_load(&roce_bond_xa, bus_num);
+ if (!die_info)
+ return;
+
+ mutex_lock(&die_info->die_mutex);
+
+ /*
+ * Avoid duplicated processing when calling this function
+ * multiple times.
+ */
+ if (die_info->suspend_cnt)
+ goto out;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ unregister_netdevice_notifier(&bond_grp->bond_nb);
+ cancel_delayed_work_sync(&bond_grp->bond_work);
+ }
+
+out:
+ die_info->suspend_cnt++;
+ mutex_unlock(&die_info->die_mutex);
+}
+
+void hns_roce_bond_resume(struct hnae3_handle *handle)
+{
+ u8 bus_num = handle->pdev->bus->number;
+ struct hns_roce_bond_group *bond_grp;
+ struct hns_roce_die_info *die_info;
+ int i, ret;
+
+ die_info = xa_load(&roce_bond_xa, bus_num);
+ if (!die_info)
+ return;
+
+ mutex_lock(&die_info->die_mutex);
+
+ die_info->suspend_cnt--;
+ if (die_info->suspend_cnt)
+ goto out;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ ret = register_netdevice_notifier(&bond_grp->bond_nb);
+ if (ret)
+ dev_err(&handle->pdev->dev,
+ "failed to resume bond notifier(bus_num = %u, id = %u), ret = %d.\n",
+ bus_num, bond_grp->bond_id, ret);
+ }
+
+out:
+ mutex_unlock(&die_info->die_mutex);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h
new file mode 100644
index 000000000000..98c295d78ca1
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_bond.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#ifndef _HNS_ROCE_BOND_H
+#define _HNS_ROCE_BOND_H
+
+#include <linux/netdevice.h>
+#include <net/bonding.h>
+
+#define ROCE_BOND_FUNC_MAX 4
+#define ROCE_BOND_NUM_MAX 2
+
+#define BOND_ID(id) BIT(id)
+
+#define BOND_ERR_LOG(fmt, ...) \
+ pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__)
+
+enum {
+ BOND_MODE_1,
+ BOND_MODE_2_4,
+};
+
+enum hns_roce_bond_hashtype {
+ BOND_HASH_L2,
+ BOND_HASH_L34,
+ BOND_HASH_L23,
+};
+
+enum bond_support_type {
+ BOND_NOT_SUPPORT,
+ /*
+ * bond_grp already exists, but in the current
+ * conditions it's no longer supported
+ */
+ BOND_EXISTING_NOT_SUPPORT,
+ BOND_SUPPORT,
+};
+
+enum hns_roce_bond_state {
+ HNS_ROCE_BOND_NOT_ATTACHED,
+ HNS_ROCE_BOND_NOT_BONDED,
+ HNS_ROCE_BOND_IS_BONDED,
+ HNS_ROCE_BOND_SLAVE_CHANGE_NUM,
+ HNS_ROCE_BOND_SLAVE_CHANGESTATE,
+};
+
+enum hns_roce_bond_cmd_type {
+ HNS_ROCE_SET_BOND,
+ HNS_ROCE_CHANGE_BOND,
+ HNS_ROCE_CLEAR_BOND,
+};
+
+struct hns_roce_func_info {
+ struct net_device *net_dev;
+ struct hnae3_handle *handle;
+};
+
+struct hns_roce_bond_group {
+ struct net_device *upper_dev;
+ struct hns_roce_dev *main_hr_dev;
+ u8 active_slave_num;
+ u32 slave_map;
+ u32 active_slave_map;
+ u8 bond_id;
+ u8 bus_num;
+ struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX];
+ bool bond_ready;
+ enum hns_roce_bond_state bond_state;
+ enum netdev_lag_tx_type tx_type;
+ enum netdev_lag_hash hash_type;
+ struct mutex bond_mutex;
+ struct notifier_block bond_nb;
+ struct delayed_work bond_work;
+};
+
+struct hns_roce_die_info {
+ u8 bond_id_mask;
+ struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX];
+ struct mutex die_mutex;
+ u8 suspend_cnt;
+};
+
+struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
+ u8 bus_num);
+int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev);
+void hns_roce_dealloc_bond_grp(void);
+void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp);
+bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev);
+int hns_roce_bond_init(struct hns_roce_dev *hr_dev);
+void hns_roce_bond_suspend(struct hnae3_handle *handle);
+void hns_roce_bond_resume(struct hnae3_handle *handle);
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2acf946d02e5..873e8a69a1b9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -31,60 +31,57 @@
*/
#include <linux/dmapool.h>
-#include <linux/platform_device.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
-#define CMD_POLL_TOKEN 0xffff
-#define CMD_MAX_NUM 32
-#define CMD_TOKEN_MASK 0x1f
+#define CMD_POLL_TOKEN 0xffff
+#define CMD_MAX_NUM 32
-static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier,
- u8 op_modifier, u16 op, u16 token,
- int event)
+static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- struct hns_roce_cmdq *cmd = &hr_dev->cmd;
int ret;
- mutex_lock(&cmd->hcr_mutex);
- ret = hr_dev->hw->post_mbox(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, token, event);
- mutex_unlock(&cmd->hcr_mutex);
+ ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg);
+ if (ret)
+ return ret;
+
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]);
- return ret;
+ return 0;
}
/* this should be called with "poll_sem" */
-static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op,
- unsigned long timeout)
+static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- struct device *dev = hr_dev->dev;
int ret;
- ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- CMD_POLL_TOKEN, 0);
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
if (ret) {
- dev_err(dev, "[cmd_poll]hns_roce_cmd_mbox_post_hw failed\n");
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to post mailbox 0x%x in poll mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
return ret;
}
- return hr_dev->hw->chk_mbox(hr_dev, timeout);
+ ret = hr_dev->hw->poll_mbox_done(hr_dev);
+ if (ret)
+ return ret;
+
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]);
+
+ return 0;
}
-static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
int ret;
down(&hr_dev->cmd.poll_sem);
- ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_poll(hr_dev, mbox_msg);
up(&hr_dev->cmd.poll_sem);
return ret;
@@ -93,23 +90,24 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
u64 out_param)
{
- struct hns_roce_cmd_context
- *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask];
+ struct hns_roce_cmd_context *context =
+ &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds];
- if (token != context->token)
+ if (unlikely(token != context->token)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "[cmd] invalid ae token 0x%x, context token is 0x%x.\n",
+ token, context->token);
return;
+ }
context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO);
context->out_param = out_param;
complete(&context->done);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]);
}
-EXPORT_SYMBOL_GPL(hns_roce_cmd_event);
-/* this should be called with "use_events" */
-static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op,
- unsigned long timeout)
+static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
struct hns_roce_cmdq *cmd = &hr_dev->cmd;
struct hns_roce_cmd_context *context;
@@ -117,105 +115,91 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
int ret;
spin_lock(&cmd->context_lock);
- WARN_ON(cmd->free_head < 0);
- context = &cmd->context[cmd->free_head];
- context->token += cmd->token_mask + 1;
- cmd->free_head = context->next;
+
+ do {
+ context = &cmd->context[cmd->free_head];
+ cmd->free_head = context->next;
+ } while (context->busy);
+
+ context->busy = 1;
+ context->token += cmd->max_cmds;
+
spin_unlock(&cmd->context_lock);
- init_completion(&context->done);
+ reinit_completion(&context->done);
- ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- context->token, 1);
- if (ret)
+ mbox_msg->token = context->token;
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
+ if (ret) {
+ dev_err_ratelimited(dev,
+ "failed to post mailbox 0x%x in event mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
goto out;
+ }
- /*
- * It is timeout when wait_for_completion_timeout return 0
- * The return value is the time limit set in advance
- * how many seconds showing
- */
if (!wait_for_completion_timeout(&context->done,
- msecs_to_jiffies(timeout))) {
- dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
+ msecs_to_jiffies(HNS_ROCE_CMD_TIMEOUT_MSECS))) {
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x timeout.\n",
+ context->token, mbox_msg->cmd);
ret = -EBUSY;
goto out;
}
ret = context->result;
- if (ret) {
- dev_err(dev, "[cmd]event mod cmd process error!err=%d\n", ret);
- goto out;
- }
+ if (ret)
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x error %d.\n",
+ context->token, mbox_msg->cmd, ret);
out:
- spin_lock(&cmd->context_lock);
- context->next = cmd->free_head;
- cmd->free_head = context - cmd->context;
- spin_unlock(&cmd->context_lock);
-
+ context->busy = 0;
return ret;
}
-static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- int ret = 0;
+ int ret;
down(&hr_dev->cmd.event_sem);
- ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_wait(hr_dev, mbox_msg);
up(&hr_dev->cmd.event_sem);
return ret;
}
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
- unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ u8 cmd, unsigned long tag)
{
- int ret;
+ struct hns_roce_mbox_msg mbox_msg = {};
+ bool is_busy;
- if (hr_dev->hw->rst_prc_mbox) {
- ret = hr_dev->hw->rst_prc_mbox(hr_dev);
- if (ret == CMD_RST_PRC_SUCCESS)
- return 0;
- else if (ret == CMD_RST_PRC_EBUSY)
- return -EBUSY;
- }
+ if (hr_dev->hw->chk_mbox_avail)
+ if (!hr_dev->hw->chk_mbox_avail(hr_dev, &is_busy))
+ return is_busy ? -EBUSY : 0;
- if (hr_dev->cmd.use_events)
- ret = hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- timeout);
- else
- ret = hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- timeout);
+ mbox_msg.in_param = in_param;
+ mbox_msg.out_param = out_param;
+ mbox_msg.cmd = cmd;
+ mbox_msg.tag = tag;
- if (ret == CMD_RST_PRC_EBUSY)
- return -EBUSY;
+ if (hr_dev->cmd.use_events) {
+ mbox_msg.event_en = 1;
- if (ret && (hr_dev->hw->rst_prc_mbox &&
- hr_dev->hw->rst_prc_mbox(hr_dev) == CMD_RST_PRC_SUCCESS))
- return 0;
+ return hns_roce_cmd_mbox_wait(hr_dev, &mbox_msg);
+ } else {
+ mbox_msg.event_en = 0;
+ mbox_msg.token = CMD_POLL_TOKEN;
- return ret;
+ return hns_roce_cmd_mbox_poll(hr_dev, &mbox_msg);
+ }
}
-EXPORT_SYMBOL_GPL(hns_roce_cmd_mbox);
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
{
- struct device *dev = hr_dev->dev;
-
- mutex_init(&hr_dev->cmd.hcr_mutex);
sema_init(&hr_dev->cmd.poll_sem, 1);
hr_dev->cmd.use_events = 0;
- hr_dev->cmd.toggle = 1;
hr_dev->cmd.max_cmds = CMD_MAX_NUM;
- hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev,
+ hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", hr_dev->dev,
HNS_ROCE_MAILBOX_SIZE,
HNS_ROCE_MAILBOX_SIZE, 0);
if (!hr_dev->cmd.pool)
@@ -234,47 +218,39 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
int i;
- hr_cmd->context = kmalloc_array(hr_cmd->max_cmds,
- sizeof(*hr_cmd->context),
- GFP_KERNEL);
- if (!hr_cmd->context)
+ hr_cmd->context =
+ kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL);
+ if (!hr_cmd->context) {
+ hr_dev->cmd_mod = 0;
return -ENOMEM;
+ }
for (i = 0; i < hr_cmd->max_cmds; ++i) {
hr_cmd->context[i].token = i;
hr_cmd->context[i].next = i + 1;
+ init_completion(&hr_cmd->context[i].done);
}
-
- hr_cmd->context[hr_cmd->max_cmds - 1].next = -1;
+ hr_cmd->context[hr_cmd->max_cmds - 1].next = 0;
hr_cmd->free_head = 0;
sema_init(&hr_cmd->event_sem, hr_cmd->max_cmds);
spin_lock_init(&hr_cmd->context_lock);
- hr_cmd->token_mask = CMD_TOKEN_MASK;
hr_cmd->use_events = 1;
- down(&hr_cmd->poll_sem);
-
return 0;
}
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
- int i;
-
- hr_cmd->use_events = 0;
-
- for (i = 0; i < hr_cmd->max_cmds; ++i)
- down(&hr_cmd->event_sem);
kfree(hr_cmd->context);
- up(&hr_cmd->poll_sem);
+ hr_cmd->use_events = 0;
}
-struct hns_roce_cmd_mailbox
- *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmd_mailbox *mailbox;
@@ -282,8 +258,8 @@ struct hns_roce_cmd_mailbox
if (!mailbox)
return ERR_PTR(-ENOMEM);
- mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL,
- &mailbox->dma);
+ mailbox->buf =
+ dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma);
if (!mailbox->buf) {
kfree(mailbox);
return ERR_PTR(-ENOMEM);
@@ -291,7 +267,6 @@ struct hns_roce_cmd_mailbox
return mailbox;
}
-EXPORT_SYMBOL_GPL(hns_roce_alloc_cmd_mailbox);
void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox)
@@ -302,4 +277,15 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma);
kfree(mailbox);
}
-EXPORT_SYMBOL_GPL(hns_roce_free_cmd_mailbox);
+
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cmd, idx);
+}
+
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, 0, 0, cmd, idx);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 2b6ac646ca9a..11dbbabebdc9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -108,6 +108,9 @@ enum {
HNS_ROCE_CMD_QUERY_CEQC = 0x92,
HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
+ /* SCC CTX commands */
+ HNS_ROCE_CMD_QUERY_SCCC = 0xa2,
+
/* SCC CTX BT commands */
HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
@@ -115,12 +118,12 @@ enum {
enum {
/* TPT commands */
- HNS_ROCE_CMD_SW2HW_MPT = 0xd,
- HNS_ROCE_CMD_HW2SW_MPT = 0xf,
+ HNS_ROCE_CMD_CREATE_MPT = 0xd,
+ HNS_ROCE_CMD_DESTROY_MPT = 0xf,
/* CQ commands */
- HNS_ROCE_CMD_SW2HW_CQ = 0x16,
- HNS_ROCE_CMD_HW2SW_CQ = 0x17,
+ HNS_ROCE_CMD_CREATE_CQC = 0x16,
+ HNS_ROCE_CMD_DESTROY_CQC = 0x17,
/* QP/EE commands */
HNS_ROCE_CMD_RST2INIT_QP = 0x19,
@@ -129,23 +132,27 @@ enum {
HNS_ROCE_CMD_RTS2RTS_QP = 0x1c,
HNS_ROCE_CMD_2ERR_QP = 0x1e,
HNS_ROCE_CMD_RTS2SQD_QP = 0x1f,
- HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
HNS_ROCE_CMD_2RST_QP = 0x21,
HNS_ROCE_CMD_QUERY_QP = 0x22,
- HNS_ROCE_CMD_SW2HW_SRQ = 0x70,
+ HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
+ HNS_ROCE_CMD_CREATE_SRQ = 0x70,
HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
HNS_ROCE_CMD_QUERY_SRQC = 0x73,
- HNS_ROCE_CMD_HW2SW_SRQ = 0x74,
+ HNS_ROCE_CMD_DESTROY_SRQ = 0x74,
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
- unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout);
+ u8 cmd, unsigned long tag);
-struct hns_roce_cmd_mailbox
- *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox);
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx);
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd,
+ unsigned long idx);
#endif /* _HNS_ROCE_CMD_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 8e95a1aa1b4f..465d1f914b6c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -32,232 +32,77 @@
#ifndef _HNS_ROCE_COMMON_H
#define _HNS_ROCE_COMMON_H
-
-#ifndef assert
-#define assert(cond)
-#endif
+#include <linux/bitfield.h>
#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg))
#define roce_read(dev, reg) readl((dev)->reg_base + (reg))
#define roce_raw_write(value, addr) \
__raw_writel((__force u32)cpu_to_le32(value), (addr))
-#define roce_get_field(origin, mask, shift) \
- (((le32_to_cpu(origin)) & (mask)) >> (shift))
+#define roce_get_field(origin, mask, shift) \
+ ((le32_to_cpu(origin) & (mask)) >> (u32)(shift))
#define roce_get_bit(origin, shift) \
roce_get_field((origin), (1ul << (shift)), (shift))
-#define roce_set_field(origin, mask, shift, val) \
- do { \
- (origin) &= ~cpu_to_le32(mask); \
- (origin) |= cpu_to_le32(((u32)(val) << (shift)) & (mask)); \
+#define roce_set_field(origin, mask, shift, val) \
+ do { \
+ (origin) &= ~cpu_to_le32(mask); \
+ (origin) |= \
+ cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \
} while (0)
-#define roce_set_bit(origin, shift, val) \
+#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
-#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
-#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
-
-#define ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S 5
-
-#define ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S 6
-
-#define ROCEE_GLB_CFG_ROCEE_PORT_ST_S 10
-#define ROCEE_GLB_CFG_ROCEE_PORT_ST_M \
- (((1UL << 6) - 1) << ROCEE_GLB_CFG_ROCEE_PORT_ST_S)
-
-#define ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S 16
-
-#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S 0
-#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M \
- (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S 24
-#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M \
- (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S 0
-#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M \
- (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S 24
-#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M \
- (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S)
-
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S 0
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M \
- (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S)
-
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S 16
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M \
- (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S)
-
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S 0
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M \
- (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S)
-
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S 16
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M \
- (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S)
-
-#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_S 0
-#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_M \
- (((1UL << 8) - 1) << ROCEE_RAQ_WL_ROCEE_RAQ_WL_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S 0
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M \
- (((1UL << 15) - 1) << \
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S 16
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M \
- (((1UL << 4) - 1) << \
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S 20
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE 21
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S 0
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S)
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S 5
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S)
-
-#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S 0
-#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S)
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S 5
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S)
-
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S 0
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S)
-
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S 8
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S 0
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M \
- (((1UL << 19) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_S 19
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S 20
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M \
- (((1UL << 2) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S 22
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S)
-
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31
-
-#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0
-#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \
- (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S)
-
-#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0
-#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \
- (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S)
-
-#define ROCEE_MB6_ROCEE_MB_CMD_S 0
-#define ROCEE_MB6_ROCEE_MB_CMD_M \
- (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S)
-
-#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8
-#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \
- (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S)
-
-#define ROCEE_MB6_ROCEE_MB_EVENT_S 14
-
-#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15
-
-#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16
-#define ROCEE_MB6_ROCEE_MB_TOKEN_M \
- (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \
- (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \
- (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \
- (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31
-
-#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0
-#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \
- (((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S)
-
-#define ROCEE_SMAC_H_ROCEE_PORT_MTU_S 16
-#define ROCEE_SMAC_H_ROCEE_PORT_MTU_M \
- (((1UL << 4) - 1) << ROCEE_SMAC_H_ROCEE_PORT_MTU_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S 0
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M \
- (((1UL << 2) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S 8
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M \
- (((1UL << 4) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S 17
-
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S 0
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M \
- (((1UL << 5) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S)
-
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S 16
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M \
- (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S)
-
-#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S 0
-#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M \
- (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S)
+#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l
-#define ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S 16
-#define ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S 1
-#define ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S 0
+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ *((__le32 *)_ptr + (field_h) / 32) |= cpu_to_le32( \
+ BIT((field_l) % 32) + \
+ BUILD_BUG_ON_ZERO((field_h) != (field_l))); \
+ })
-#define ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S 0
-#define ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S 1
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
-#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32)); \
+ *((__le32 *)_ptr + (field_h) / 32) &= \
+ ~cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)); \
+ })
-#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0
-#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \
- (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
-#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0
-#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \
- (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
+#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ })
-#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
-#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \
- (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
+#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val)
-#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0
-#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \
- (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S)
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \
+ GENMASK((field_h) % 32, (field_l) % 32), val)); \
+ })
-#define ROCEE_SDB_CNT_CMP_BITS 16
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
-#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20
+#define _hr_reg_read(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32)); \
+ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \
+ le32_to_cpu(*((__le32 *)_ptr + (field_h) / 32))); \
+ })
-#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0
+#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
/*************ROCEE_REG DEFINITION****************/
#define ROCEE_VENDOR_ID_REG 0x0
@@ -334,8 +179,8 @@
#define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000
#define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004
#define ROCEE_TX_CMQ_DEPTH_REG 0x07008
-#define ROCEE_TX_CMQ_TAIL_REG 0x07010
-#define ROCEE_TX_CMQ_HEAD_REG 0x07014
+#define ROCEE_TX_CMQ_PI_REG 0x07010
+#define ROCEE_TX_CMQ_CI_REG 0x07014
#define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018
#define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 9caf35061721..6aa82fe9dd3d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -30,507 +30,565 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
+#include <linux/pci.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
-static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
{
- struct ib_cq *ibcq = &hr_cq->ib_cq;
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+
+ if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+ return;
- ibcq->comp_handler(ibcq, ibcq->cq_context);
+ mutex_lock(&cq_table->bank_mutex);
+ cq_table->ctx_num[uctx->cq_bank_id]--;
+ mutex_unlock(&cq_table->bank_mutex);
}
-static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq,
- enum hns_roce_event event_type)
+void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
{
- struct hns_roce_dev *hr_dev;
- struct ib_event event;
- struct ib_cq *ibcq;
-
- ibcq = &hr_cq->ib_cq;
- hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ u32 least_load = cq_table->ctx_num[0];
+ u8 bankid = 0;
+ u8 i;
- if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
- event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
- event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
- dev_err(hr_dev->dev,
- "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n",
- event_type, hr_cq->cqn);
+ if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
return;
+
+ mutex_lock(&cq_table->bank_mutex);
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ if (cq_table->ctx_num[i] < least_load) {
+ least_load = cq_table->ctx_num[i];
+ bankid = i;
+ }
}
+ cq_table->ctx_num[bankid]++;
+ mutex_unlock(&cq_table->bank_mutex);
- if (ibcq->event_handler) {
- event.device = ibcq->device;
- event.event = IB_EVENT_CQ_ERR;
- event.element.cq = ibcq;
- ibcq->event_handler(&event, ibcq->cq_context);
+ uctx->cq_bank_id = bankid;
+}
+
+static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
}
+
+ return bankid;
}
-static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long cq_num)
+static u8 select_cq_bankid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_bank *bank, struct ib_udata *udata)
{
- return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
- HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ struct hns_roce_ucontext *uctx = udata ?
+ rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext) : NULL;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return uctx ? uctx->cq_bank_id : 0;
+
+ return get_least_load_bankid_for_cq(bank);
}
-static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
- struct hns_roce_mtt *hr_mtt,
- struct hns_roce_uar *hr_uar,
- struct hns_roce_cq *hr_cq, int vector)
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata)
{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+ u8 bankid;
+ int id;
+
+ mutex_lock(&cq_table->bank_mutex);
+ bankid = select_cq_bankid(hr_dev, cq_table->bank, udata);
+ bank = &cq_table->bank[bankid];
+
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ mutex_unlock(&cq_table->bank_mutex);
+ return id;
+ }
+
+ /* the lower 2 bits is bankid */
+ hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid;
+ bank->inuse++;
+ mutex_unlock(&cq_table->bank_mutex);
+
+ return 0;
+}
+
+static inline u8 get_cq_bankid(unsigned long cqn)
+{
+ /* The lower 2 bits of CQN are used to hash to different banks */
+ return (u8)(cqn & GENMASK(1, 0));
+}
+
+static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+
+ bank = &cq_table->bank[get_cq_bankid(cqn)];
+
+ ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT);
+
+ mutex_lock(&cq_table->bank_mutex);
+ bank->inuse--;
+ mutex_unlock(&cq_table->bank_mutex);
+}
+
+static int hns_roce_create_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ u64 *mtts, dma_addr_t dma_handle)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_hem_table *mtt_table;
- struct hns_roce_cq_table *cq_table;
- struct device *dev = hr_dev->dev;
- dma_addr_t dma_handle;
- u64 *mtts;
int ret;
- cq_table = &hr_dev->cq_table;
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ibdev_err(ibdev, "failed to alloc mailbox for CQC.\n");
+ return PTR_ERR(mailbox);
+ }
- /* Get the physical address of cq buf */
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- mtt_table = &hr_dev->mr_table.mtt_cqe_table;
- else
- mtt_table = &hr_dev->mr_table.mtt_table;
+ hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
- mtts = hns_roce_table_find(hr_dev, mtt_table,
- hr_mtt->first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n");
- return -EINVAL;
- }
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_CQC,
+ hr_cq->cqn);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to send create cmd for CQ(0x%lx), ret = %d.\n",
+ hr_cq->cqn, ret);
- if (vector >= hr_dev->caps.num_comp_vectors) {
- dev_err(dev, "CQ alloc.Invalid vector.\n");
- return -EINVAL;
- }
- hr_cq->vector = vector;
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
- ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
- if (ret == -1) {
- dev_err(dev, "CQ alloc.Failed to alloc index.\n");
- return -ENOMEM;
+static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 mtts[MTT_MIN_COUNT] = {};
+ int ret;
+
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
+ return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
if (ret) {
- dev_err(dev, "CQ alloc.Failed to get context mem.\n");
- goto err_out;
+ ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n",
+ hr_cq->cqn, ret);
+ return ret;
}
- ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
+ ret = xa_err(xa_store_irq(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
- dev_err(dev, "CQ alloc failed xa_store.\n");
+ ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
goto err_put;
}
- /* Allocate mailbox memory */
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_xa;
- }
-
- hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle,
- nent, vector);
-
- /* Send mailbox to hw */
- ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- if (ret) {
- dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n");
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts,
+ hns_roce_get_mtr_ba(&hr_cq->mtr));
+ if (ret)
goto err_xa;
- }
-
- hr_cq->cons_index = 0;
- hr_cq->arm_sn = 1;
- hr_cq->uar = hr_uar;
-
- atomic_set(&hr_cq->refcount, 1);
- init_completion(&hr_cq->free);
return 0;
err_xa:
- xa_erase(&cq_table->array, hr_cq->cqn);
-
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
-err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
-static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long cq_num)
-{
- return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
- mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = hr_dev->dev;
int ret;
- ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
+ hr_cq->cqn);
if (ret)
- dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
- hr_cq->cqn);
+ dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
+ ret, hr_cq->cqn);
- xa_erase(&cq_table->array, hr_cq->cqn);
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
/* Waiting interrupt process procedure carried out */
synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
/* wait for all interrupt processed */
- if (atomic_dec_and_test(&hr_cq->refcount))
+ if (refcount_dec_and_test(&hr_cq->refcount))
complete(&hr_cq->free);
wait_for_completion(&hr_cq->free);
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
-EXPORT_SYMBOL_GPL(hns_roce_free_cq);
-static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
- struct ib_udata *udata,
- struct hns_roce_cq_buf *buf,
- struct ib_umem **umem, u64 buf_addr, int cqe)
+static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
int ret;
- u32 page_shift;
- u32 npages;
-
- *umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
- IB_ACCESS_LOCAL_WRITE, 1);
- if (IS_ERR(*umem))
- return PTR_ERR(*umem);
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
- else
- buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
-
- if (hr_dev->caps.cqe_buf_pg_sz) {
- npages = (ib_umem_page_count(*umem) +
- (1 << hr_dev->caps.cqe_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.cqe_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages, page_shift,
- &buf->hr_mtt);
- } else {
- ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- (*umem)->page_shift,
- &buf->hr_mtt);
- }
- if (ret)
- goto err_buf;
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem);
- if (ret)
- goto err_mtt;
-
- return 0;
+ buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
+ buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
+ buf_attr.region_count = 1;
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+ ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
+ hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret)
+ ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
-err_buf:
- ib_umem_release(*umem);
return ret;
}
-static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq_buf *buf, u32 nent)
+static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
- int ret;
- u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
-
- ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
- (1 << page_shift) * 2, &buf->hr_buf,
- page_shift);
- if (ret)
- goto out;
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
- else
- buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
+ hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr);
+}
- ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages,
- buf->hr_buf.page_shift, &buf->hr_mtt);
- if (ret)
- goto err_buf;
+static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr,
+ struct hns_roce_ib_create_cq_resp *resp)
+{
+ bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB;
+ struct hns_roce_ucontext *uctx;
+ int err;
- ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf);
- if (ret)
- goto err_mtt;
+ if (udata) {
+ if (has_db &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ err = hns_roce_db_map_user(uctx, addr, &hr_cq->db);
+ if (err)
+ return err;
+ hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ resp->cap_flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ }
+ } else {
+ if (has_db) {
+ err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (err)
+ return err;
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ }
+ hr_cq->db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+ }
return 0;
+}
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx;
-err_buf:
- hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz,
- &buf->hr_buf);
-out:
- return ret;
+ if (!(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB))
+ return;
+
+ hr_cq->flags &= ~HNS_ROCE_CQ_FLAG_RECORD_DB;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &hr_cq->db);
+ } else {
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+ }
}
-static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq_buf *buf, int cqe)
+static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
+ const struct ib_cq_init_attr *attr)
{
- hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz,
- &buf->hr_buf);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
+ ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
+ attr->cqe, hr_dev->caps.max_cqes);
+ return -EINVAL;
+ }
+
+ if (attr->comp_vector >= hr_dev->caps.num_comp_vectors) {
+ ibdev_err(ibdev, "failed to check CQ vector = %u, max = %d.\n",
+ attr->comp_vector, hr_dev->caps.num_comp_vectors);
+ return -EINVAL;
+ }
+
+ return 0;
}
-struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+static int get_cq_ucmd(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = hr_dev->dev;
- struct hns_roce_ib_create_cq ucmd;
- struct hns_roce_ib_create_cq_resp resp = {};
- struct hns_roce_cq *hr_cq = NULL;
- struct hns_roce_uar *uar = NULL;
- int vector = attr->comp_vector;
- int cq_entries = attr->cqe;
+ struct ib_device *ibdev = hr_cq->ib_cq.device;
int ret;
- struct hns_roce_ucontext *context = rdma_udata_to_drv_context(
- udata, struct hns_roce_ucontext, ibucontext);
- if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
- dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
- cq_entries, hr_dev->caps.max_cqes);
- return ERR_PTR(-EINVAL);
+ ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret);
+ return ret;
}
- hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
- if (!hr_cq)
- return ERR_PTR(-ENOMEM);
+ return 0;
+}
+
+static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
- if (hr_dev->caps.min_cqes)
- cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+ cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+ cq_entries = roundup_pow_of_two(cq_entries);
+ hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
+ hr_cq->cq_depth = cq_entries;
+ hr_cq->vector = vector;
- cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
- hr_cq->ib_cq.cqe = cq_entries - 1;
spin_lock_init(&hr_cq->lock);
+ INIT_LIST_HEAD(&hr_cq->sq_list);
+ INIT_LIST_HEAD(&hr_cq->rq_list);
+}
- if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "Failed to copy_from_udata.\n");
- ret = -EFAULT;
- goto err_cq;
- }
+static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
- /* Get user space address, write it into mtt table */
- ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
- &hr_cq->umem, ucmd.buf_addr,
- cq_entries);
- if (ret) {
- dev_err(dev, "Failed to get_cq_umem.\n");
- goto err_cq;
- }
+ if (!udata) {
+ hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+ return 0;
+ }
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp))) {
- ret = hns_roce_db_map_user(context, udata, ucmd.db_addr,
- &hr_cq->db);
- if (ret) {
- dev_err(dev, "cq record doorbell map failed!\n");
- goto err_mtt;
- }
- hr_cq->db_en = 1;
- resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB;
+ if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) {
+ if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE &&
+ ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid cqe size %u.\n", ucmd->cqe_size);
+ return -EINVAL;
}
- /* Get user space parameters */
- uar = &context->uar;
+ hr_cq->cqe_size = ucmd->cqe_size;
} else {
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
- ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
- if (ret)
- goto err_cq;
+ hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+ }
- hr_cq->set_ci_db = hr_cq->db.db_record;
- *hr_cq->set_ci_db = 0;
- hr_cq->db_en = 1;
- }
+ return 0;
+}
- /* Init mmt table and write buff address to mtt table */
- ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
- cq_entries);
- if (ret) {
- dev_err(dev, "Failed to alloc_cq_buf.\n");
- goto err_db;
- }
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct hns_roce_ib_create_cq_resp resp = {};
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_cq ucmd = {};
+ int ret;
+
+ if (attr->flags) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ ret = verify_cq_create_attr(hr_dev, attr);
+ if (ret)
+ goto err_out;
+
+ if (udata) {
+ ret = get_cq_ucmd(hr_cq, udata, &ucmd);
+ if (ret)
+ goto err_out;
+ }
- uar = &hr_dev->priv_uar;
- hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset +
- DB_REG_OFFSET * uar->index;
+ set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
+
+ ret = set_cqe_size(hr_cq, udata, &ucmd);
+ if (ret)
+ goto err_out;
+
+ ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
+ goto err_out;
}
- /* Allocate cq index, fill cq_context */
- ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar,
- hr_cq, vector);
+ ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
if (ret) {
- dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
- goto err_dbmap;
+ ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
+ goto err_cq_buf;
}
- /*
- * For the QP created by kernel space, tptr value should be initialized
- * to zero; For the QP created by user space, it will cause synchronous
- * problems if tptr is set to zero here, so we initialze it in user
- * space.
- */
- if (!udata && hr_cq->tptr_addr)
- *hr_cq->tptr_addr = 0;
-
- /* Get created cq handler and carry out event */
- hr_cq->comp = hns_roce_ib_cq_comp;
- hr_cq->event = hns_roce_ib_cq_event;
- hr_cq->cq_depth = cq_entries;
+ ret = alloc_cqn(hr_dev, hr_cq, udata);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+ goto err_cq_db;
+ }
+
+ ret = alloc_cqc(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc CQ context, ret = %d.\n", ret);
+ goto err_cqn;
+ }
if (udata) {
resp.cqn = hr_cq->cqn;
- ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
if (ret)
goto err_cqc;
}
- return &hr_cq->ib_cq;
+ hr_cq->cons_index = 0;
+ hr_cq->arm_sn = 1;
+ refcount_set(&hr_cq->refcount, 1);
+ init_completion(&hr_cq->free);
+
+ return 0;
err_cqc:
- hns_roce_free_cq(hr_dev, hr_cq);
-
-err_dbmap:
- if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp)))
- hns_roce_db_unmap_user(context, &hr_cq->db);
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (udata)
- ib_umem_release(hr_cq->umem);
- else
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- hr_cq->ib_cq.cqe);
-
-err_db:
- if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
- hns_roce_free_db(hr_dev, &hr_cq->db);
+ free_cqc(hr_dev, hr_cq);
+err_cqn:
+ free_cqn(hr_dev, hr_cq->cqn);
+err_cq_db:
+ free_cq_db(hr_dev, hr_cq, udata);
+err_cq_buf:
+ free_cq_buf(hr_dev, hr_cq);
+err_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
-err_cq:
- kfree(hr_cq);
- return ERR_PTR(ret);
+ return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_ib_create_cq);
-int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- int ret = 0;
-
- if (hr_dev->hw->destroy_cq) {
- ret = hr_dev->hw->destroy_cq(ib_cq, udata);
- } else {
- hns_roce_free_cq(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-
- if (udata) {
- ib_umem_release(hr_cq->umem);
-
- if (hr_cq->db_en == 1)
- hns_roce_db_unmap_user(
- rdma_udata_to_drv_context(
- udata,
- struct hns_roce_ucontext,
- ibucontext),
- &hr_cq->db);
- } else {
- /* Free the buff of stored cq */
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- ib_cq->cqe);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)
- hns_roce_free_db(hr_dev, &hr_cq->db);
- }
- kfree(hr_cq);
- }
+ free_cqc(hr_dev, hr_cq);
+ free_cqn(hr_dev, hr_cq->cqn);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cq_buf(hr_dev, hr_cq);
- return ret;
+ return 0;
}
-EXPORT_SYMBOL_GPL(hns_roce_ib_destroy_cq);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
{
- struct device *dev = hr_dev->dev;
- struct hns_roce_cq *cq;
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *ibcq;
- cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1));
- if (!cq) {
- dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(hr_dev->dev, "completion event for bogus CQ 0x%06x\n",
+ cqn);
return;
}
- ++cq->arm_sn;
- cq->comp(cq);
+ ++hr_cq->arm_sn;
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->comp_handler)
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
}
-EXPORT_SYMBOL_GPL(hns_roce_cq_completion);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
{
- struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = hr_dev->dev;
- struct hns_roce_cq *cq;
+ struct hns_roce_cq *hr_cq;
+ struct ib_event event;
+ struct ib_cq *ibcq;
- cq = xa_load(&cq_table->array, cqn & (hr_dev->caps.num_cqs - 1));
- if (cq)
- atomic_inc(&cq->refcount);
+ if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
+ dev_err(dev, "unexpected event type 0x%x on CQ 0x%06x\n",
+ event_type, cqn);
+ return;
+ }
- if (!cq) {
- dev_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+ xa_lock(&hr_dev->cq_table.array);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (hr_cq)
+ refcount_inc(&hr_cq->refcount);
+ xa_unlock(&hr_dev->cq_table.array);
+ if (!hr_cq) {
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
return;
}
- cq->event(cq, (enum hns_roce_event)event_type);
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.element.cq = ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
- if (atomic_dec_and_test(&cq->refcount))
- complete(&cq->free);
+ if (refcount_dec_and_test(&hr_cq->refcount))
+ complete(&hr_cq->free);
}
-EXPORT_SYMBOL_GPL(hns_roce_cq_event);
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
+ mutex_init(&cq_table->bank_mutex);
xa_init(&cq_table->array);
- return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs,
- hr_dev->caps.num_cqs - 1,
- hr_dev->caps.reserved_cqs, 0);
+ reserved_from_bot = hr_dev->caps.reserved_cqs;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ cq_table->bank[get_cq_bankid(i)].inuse++;
+ cq_table->bank[get_cq_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ ida_init(&cq_table->bank[i].ida);
+ cq_table->bank[i].max = hr_dev->caps.num_cqs /
+ HNS_ROCE_CQ_BANK_NUM - 1;
+ }
}
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
+ ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ xa_destroy(&hr_dev->cq_table.array);
+ mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index 0c6c1fe87705..5c4c0480832b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -4,21 +4,21 @@
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*/
-#include <linux/platform_device.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
-int hns_roce_db_map_user(struct hns_roce_ucontext *context,
- struct ib_udata *udata, unsigned long virt,
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
struct hns_roce_db *db)
{
+ unsigned long page_addr = virt & PAGE_MASK;
struct hns_roce_user_db_page *page;
+ unsigned int offset;
int ret = 0;
mutex_lock(&context->page_mutex);
list_for_each_entry(page, &context->page_list, list)
- if (page->user_virt == (virt & PAGE_MASK))
+ if (page->user_virt == page_addr)
goto found;
page = kmalloc(sizeof(*page), GFP_KERNEL);
@@ -28,8 +28,9 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context,
}
refcount_set(&page->refcount, 1);
- page->user_virt = (virt & PAGE_MASK);
- page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+ page->user_virt = page_addr;
+ page->umem = ib_umem_get(context->ibucontext.device, page_addr,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
ret = PTR_ERR(page->umem);
kfree(page);
@@ -39,10 +40,9 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context,
list_add(&page->list, &context->page_list);
found:
- db->dma = sg_dma_address(page->umem->sg_head.sgl) +
- (virt & ~PAGE_MASK);
- page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK;
- db->virt_addr = sg_virt(page->umem->sg_head.sgl);
+ offset = virt - page_addr;
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + offset;
+ db->virt_addr = sg_virt(page->umem->sgt_append.sgt.sgl) + offset;
db->u.user_page = page;
refcount_inc(&page->refcount);
@@ -51,7 +51,6 @@ out:
return ret;
}
-EXPORT_SYMBOL(hns_roce_db_map_user);
void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
struct hns_roce_db *db)
@@ -67,7 +66,6 @@ void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
mutex_unlock(&context->page_mutex);
}
-EXPORT_SYMBOL(hns_roce_db_unmap_user);
static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
struct device *dma_device)
@@ -78,7 +76,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
if (!pgdir)
return NULL;
- bitmap_fill(pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2);
+ bitmap_fill(pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
pgdir->bits[0] = pgdir->order0;
pgdir->bits[1] = pgdir->order1;
pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE,
@@ -94,8 +93,8 @@ static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir,
struct hns_roce_db *db, int order)
{
- int o;
- int i;
+ unsigned long o;
+ unsigned long i;
for (o = order; o <= 1; ++o) {
i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o);
@@ -116,7 +115,7 @@ found:
db->u.pgdir = pgdir;
db->index = i;
db->db_record = pgdir->page + db->index;
- db->dma = pgdir->db_dma + db->index * 4;
+ db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE;
db->order = order;
return 0;
@@ -150,12 +149,11 @@ out:
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_alloc_db);
void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
{
- int o;
- int i;
+ unsigned long o;
+ unsigned long i;
mutex_lock(&hr_dev->pgdir_mutex);
@@ -170,7 +168,8 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
i >>= o;
set_bit(i, db->u.pgdir->bits[o]);
- if (bitmap_full(db->u.pgdir->order1, HNS_ROCE_DB_PER_PAGE / 2)) {
+ if (bitmap_full(db->u.pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) {
dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page,
db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
@@ -179,4 +178,3 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
mutex_unlock(&hr_dev->pgdir_mutex);
}
-EXPORT_SYMBOL_GPL(hns_roce_free_db);
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
new file mode 100644
index 000000000000..b869cdc54118
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2023 Hisilicon Limited.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+
+#include "hns_roce_device.h"
+
+static struct dentry *hns_roce_dbgfs_root;
+
+static int hns_debugfs_seqfile_open(struct inode *inode, struct file *f)
+{
+ struct hns_debugfs_seqfile *seqfile = inode->i_private;
+
+ return single_open(f, seqfile->read, seqfile->data);
+}
+
+static const struct file_operations hns_debugfs_seqfile_fops = {
+ .owner = THIS_MODULE,
+ .open = hns_debugfs_seqfile_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+static void init_debugfs_seqfile(struct hns_debugfs_seqfile *seq,
+ const char *name, struct dentry *parent,
+ int (*read_fn)(struct seq_file *, void *),
+ void *data)
+{
+ debugfs_create_file(name, 0400, parent, seq, &hns_debugfs_seqfile_fops);
+
+ seq->read = read_fn;
+ seq->data = data;
+}
+
+static const char * const sw_stat_info[] = {
+ [HNS_ROCE_DFX_AEQE_CNT] = "aeqe",
+ [HNS_ROCE_DFX_CEQE_CNT] = "ceqe",
+ [HNS_ROCE_DFX_CMDS_CNT] = "cmds",
+ [HNS_ROCE_DFX_CMDS_ERR_CNT] = "cmds_err",
+ [HNS_ROCE_DFX_MBX_POSTED_CNT] = "posted_mbx",
+ [HNS_ROCE_DFX_MBX_POLLED_CNT] = "polled_mbx",
+ [HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
+ [HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
+ [HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
+ [HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
+ [HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
+ [HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
+ [HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
+ [HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT] = "xrcd_alloc_err",
+ [HNS_ROCE_DFX_MR_REG_ERR_CNT] = "mr_reg_err",
+ [HNS_ROCE_DFX_MR_REREG_ERR_CNT] = "mr_rereg_err",
+ [HNS_ROCE_DFX_AH_CREATE_ERR_CNT] = "ah_create_err",
+ [HNS_ROCE_DFX_MMAP_ERR_CNT] = "mmap_err",
+ [HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT] = "uctx_alloc_err",
+};
+
+static int sw_stat_debugfs_show(struct seq_file *file, void *offset)
+{
+ struct hns_roce_dev *hr_dev = file->private;
+ int i;
+
+ for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++)
+ seq_printf(file, "%-20s --- %lld\n", sw_stat_info[i],
+ atomic64_read(&hr_dev->dfx_cnt[i]));
+
+ return 0;
+}
+
+static void create_sw_stat_debugfs(struct hns_roce_dev *hr_dev,
+ struct dentry *parent)
+{
+ struct hns_sw_stat_debugfs *dbgfs = &hr_dev->dbgfs.sw_stat_root;
+
+ dbgfs->root = debugfs_create_dir("sw_stat", parent);
+
+ init_debugfs_seqfile(&dbgfs->sw_stat, "sw_stat", dbgfs->root,
+ sw_stat_debugfs_show, hr_dev);
+}
+
+/* debugfs for device */
+void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;
+
+ dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev),
+ hns_roce_dbgfs_root);
+
+ create_sw_stat_debugfs(hr_dev, dbgfs->root);
+}
+
+void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev)
+{
+ debugfs_remove_recursive(hr_dev->dbgfs.root);
+}
+
+/* debugfs for hns module */
+void hns_roce_init_debugfs(void)
+{
+ hns_roce_dbgfs_root = debugfs_create_dir("hns_roce", NULL);
+}
+
+void hns_roce_cleanup_debugfs(void)
+{
+ debugfs_remove_recursive(hns_roce_dbgfs_root);
+ hns_roce_dbgfs_root = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h
new file mode 100644
index 000000000000..98e87bd3161e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2023 Hisilicon Limited.
+ */
+
+#ifndef __HNS_ROCE_DEBUGFS_H
+#define __HNS_ROCE_DEBUGFS_H
+
+/* debugfs seqfile */
+struct hns_debugfs_seqfile {
+ int (*read)(struct seq_file *seq, void *data);
+ void *data;
+};
+
+struct hns_sw_stat_debugfs {
+ struct dentry *root;
+ struct hns_debugfs_seqfile sw_stat;
+};
+
+/* Debugfs for device */
+struct hns_roce_dev_debugfs {
+ struct dentry *root;
+ struct hns_sw_stat_debugfs sw_stat_root;
+};
+
+struct hns_roce_dev;
+
+void hns_roce_init_debugfs(void);
+void hns_roce_cleanup_debugfs(void);
+void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev);
+void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev);
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 563cf39df6d5..318f18cf37aa 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -33,104 +33,86 @@
#ifndef _HNS_ROCE_DEVICE_H
#define _HNS_ROCE_DEVICE_H
+#include <linux/pci.h>
#include <rdma/ib_verbs.h>
+#include <rdma/hns-abi.h>
+#include "hns_roce_debugfs.h"
-#define DRV_NAME "hns_roce"
+#define PCI_REVISION_ID_HIP08 0x21
+#define PCI_REVISION_ID_HIP09 0x30
-#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
-
-#define MAC_ADDR_OCTET_NUM 6
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
-#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
-
#define HNS_ROCE_IB_MIN_SQ_STRIDE 6
-#define HNS_ROCE_BA_SIZE (32 * 4096)
+#define BA_BYTE_LEN 8
-/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
-#define HNS_ROCE_MIN_WQE_NUM 0x20
-
-/* Hardware specification only for v1 engine */
-#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
-#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
-
-#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
-#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
- (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS)
-#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
-#define HNS_ROCE_MIN_CQE_CNT 16
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
#define HNS_ROCE_MAX_IRQ_NUM 128
+#define HNS_ROCE_SGE_IN_WQE 2
+#define HNS_ROCE_SGE_SHIFT 4
+
#define EQ_ENABLE 1
#define EQ_DISABLE 0
#define HNS_ROCE_CEQ 0
#define HNS_ROCE_AEQ 1
-#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
-#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
+#define HNS_ROCE_CEQE_SIZE 0x4
+#define HNS_ROCE_AEQE_SIZE 0x10
+
+#define HNS_ROCE_V3_EQE_SIZE 0x40
-/* 4G/4K = 1M */
-#define HNS_ROCE_SL_SHIFT 28
-#define HNS_ROCE_TCLASS_SHIFT 20
-#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
+#define HNS_ROCE_V2_QPC_SZ 256
+#define HNS_ROCE_V3_QPC_SZ 512
#define HNS_ROCE_MAX_PORTS 6
-#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
+#define HNS_ROCE_SGE_SIZE 16
+#define HNS_ROCE_DWQE_SIZE 65536
#define HNS_ROCE_HOP_NUM_0 0xff
-#define BITMAP_NO_RR 0
-#define BITMAP_RR 1
-
#define MR_TYPE_MR 0x00
#define MR_TYPE_FRMR 0x01
#define MR_TYPE_DMA 0x03
#define HNS_ROCE_FRMR_MAX_PA 512
+#define HNS_ROCE_FRMR_ALIGN_SIZE 128
#define PKEY_ID 0xffff
-#define GUID_LEN 8
#define NODE_DESC_SIZE 64
#define DB_REG_OFFSET 0x1000
-#define SERV_TYPE_RC 0
-#define SERV_TYPE_RD 1
-#define SERV_TYPE_UC 2
-#define SERV_TYPE_UD 3
-
/* Configure to HW for PAGE_SIZE larger than 4KB */
#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
-#define PAGES_SHIFT_8 8
-#define PAGES_SHIFT_16 16
-#define PAGES_SHIFT_24 24
-#define PAGES_SHIFT_32 32
+#define ATOMIC_WR_LEN 8
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
-enum {
- HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
- HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
-};
+#define HNS_ROCE_QP_BANK_NUM 8
+#define HNS_ROCE_CQ_BANK_NUM 4
-enum {
- HNS_ROCE_SUPPORT_CQ_RECORD_DB = 1 << 0,
-};
+#define CQ_BANKID_SHIFT 2
+#define CQ_BANKID_MASK GENMASK(1, 0)
+
+#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF
+#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF
-enum hns_roce_qp_state {
- HNS_ROCE_QP_STATE_RST,
- HNS_ROCE_QP_STATE_INIT,
- HNS_ROCE_QP_STATE_RTR,
- HNS_ROCE_QP_STATE_RTS,
- HNS_ROCE_QP_STATE_SQD,
- HNS_ROCE_QP_STATE_ERR,
- HNS_ROCE_QP_NUM_STATE,
+enum {
+ SERV_TYPE_RC,
+ SERV_TYPE_UC,
+ SERV_TYPE_RD,
+ SERV_TYPE_UD,
+ SERV_TYPE_XRC = 5,
};
enum hns_roce_event {
@@ -151,67 +133,33 @@ enum hns_roce_event {
/* 0x10 and 0x11 is unused in currently application case */
HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
HNS_ROCE_EVENT_TYPE_MB = 0x13,
- HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14,
HNS_ROCE_EVENT_TYPE_FLR = 0x15,
-};
-
-/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
-enum {
- HNS_ROCE_LWQCE_QPC_ERROR = 1,
- HNS_ROCE_LWQCE_MTU_ERROR = 2,
- HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3,
- HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4,
- HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5,
- HNS_ROCE_LWQCE_SL_ERROR = 6,
- HNS_ROCE_LWQCE_PORT_ERROR = 7,
-};
-
-/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
-enum {
- HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
- HNS_ROCE_LAVWQE_LENGTH_ERROR = 2,
- HNS_ROCE_LAVWQE_VA_ERROR = 3,
- HNS_ROCE_LAVWQE_PD_ERROR = 4,
- HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5,
- HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6,
- HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7,
-};
-
-/* DOORBELL overflow subtype */
-enum {
- HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2,
- HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6,
-};
-
-enum {
- /* RQ&SRQ related operations */
- HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
- HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07,
+ HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION = 0x16,
+ HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
};
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
- HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
- HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_CQ_RECORD_DB = BIT(3),
+ HNS_ROCE_CAP_FLAG_QP_RECORD_DB = BIT(4),
HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
+ HNS_ROCE_CAP_FLAG_XRC = BIT(6),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12),
+ HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
+ HNS_ROCE_CAP_FLAG_STASH = BIT(17),
+ HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
+ HNS_ROCE_CAP_FLAG_BOND = BIT(21),
+ HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22),
};
-enum hns_roce_mtt_type {
- MTT_TYPE_WQE,
- MTT_TYPE_CQE,
- MTT_TYPE_SRQWQE,
- MTT_TYPE_IDX
-};
+#define HNS_ROCE_DB_TYPE_COUNT 2
+#define HNS_ROCE_DB_UNIT_SIZE 4
enum {
HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4
@@ -231,26 +179,22 @@ enum hns_roce_instance_state {
HNS_ROCE_STATE_INIT,
HNS_ROCE_STATE_INITED,
HNS_ROCE_STATE_UNINIT,
+ HNS_ROCE_STATE_BOND_UNINIT,
};
enum {
HNS_ROCE_RST_DIRECT_RETURN = 0,
};
-enum {
- CMD_RST_PRC_OTHERS,
- CMD_RST_PRC_SUCCESS,
- CMD_RST_PRC_EBUSY,
-};
-
#define HNS_ROCE_CMD_SUCCESS 1
-#define HNS_ROCE_PORT_DOWN 0
-#define HNS_ROCE_PORT_UP 1
-
-#define HNS_ROCE_MTT_ENTRY_PER_SEG 8
+#define HNS_ROCE_MAX_HOP_NUM 3
+/* The minimum page size is 4K for hardware */
+#define HNS_HW_PAGE_SHIFT 12
+#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
-#define PAGE_ADDR_SHIFT 12
+#define HNS_HW_MAX_PAGE_SHIFT 27
+#define HNS_HW_MAX_PAGE_SIZE (1 << HNS_HW_MAX_PAGE_SHIFT)
struct hns_roce_uar {
u64 pfn;
@@ -258,11 +202,25 @@ struct hns_roce_uar {
unsigned long logic_idx;
};
+enum hns_roce_mmap_type {
+ HNS_ROCE_MMAP_TYPE_DB = 1,
+ HNS_ROCE_MMAP_TYPE_DWQE,
+};
+
+struct hns_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ enum hns_roce_mmap_type mmap_type;
+ u64 address;
+};
+
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
struct list_head page_list;
struct mutex page_mutex;
+ struct hns_user_mmap_entry *db_mmap_entry;
+ u32 config;
+ u8 cq_bank_id;
};
struct hns_roce_pd {
@@ -270,6 +228,11 @@ struct hns_roce_pd {
unsigned long pdn;
};
+struct hns_roce_xrcd {
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+};
+
struct hns_roce_bitmap {
/* Bitmap Traversal last a bit which is 1 */
unsigned long last;
@@ -281,20 +244,10 @@ struct hns_roce_bitmap {
unsigned long *table;
};
-/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */
-/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
-/* Every bit repesent to a partner free/used status in bitmap */
-/*
- * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
- * Bit = 1 represent to idle and available; bit = 0: not available
- */
-struct hns_roce_buddy {
- /* Members point to every order level bitmap */
- unsigned long **bits;
- /* Represent to avail bits of the order level bitmap */
- u32 *num_free;
- int max_order;
- spinlock_t lock;
+struct hns_roce_ida {
+ struct ida ida;
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
};
/* For Hardware Entry Memory */
@@ -303,12 +256,9 @@ struct hns_roce_hem_table {
u32 type;
/* HEM array elment num */
unsigned long num_hem;
- /* HEM entry record obj total num */
- unsigned long num_obj;
- /*Single obj size */
+ /* Single obj size */
unsigned long obj_size;
unsigned long table_chunk_size;
- int lowmem;
struct mutex mutex;
struct hns_roce_hem **hem;
u64 **bt_l1;
@@ -317,84 +267,97 @@ struct hns_roce_hem_table {
dma_addr_t *bt_l0_dma_addr;
};
-struct hns_roce_mtt {
- unsigned long first_seg;
- int order;
- int page_shift;
- enum hns_roce_mtt_type mtt_type;
+struct hns_roce_buf_region {
+ u32 offset; /* page offset */
+ u32 count; /* page count */
+ int hopnum; /* addressing hop num */
+};
+
+#define HNS_ROCE_MAX_BT_REGION 3
+#define HNS_ROCE_MAX_BT_LEVEL 3
+struct hns_roce_hem_list {
+ struct list_head root_bt;
+ /* link all bt dma mem by hop config */
+ struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL];
+ struct list_head btm_bt; /* link all bottom bt in @mid_bt */
+ dma_addr_t root_ba; /* pointer to the root ba table */
+};
+
+enum mtr_type {
+ MTR_DEFAULT = 0,
+ MTR_PBL,
+};
+
+struct hns_roce_buf_attr {
+ struct {
+ size_t size; /* region size */
+ int hopnum; /* multi-hop addressing hop num */
+ } region[HNS_ROCE_MAX_BT_REGION];
+ unsigned int region_count; /* valid region count */
+ unsigned int page_shift; /* buffer page shift */
+ unsigned int user_access; /* umem access flag */
+ u64 iova;
+ enum mtr_type type;
+ bool mtt_only; /* only alloc buffer-required MTT memory */
+ bool adaptive; /* adaptive for page_shift and hopnum */
};
-struct hns_roce_mw {
- struct ib_mw ibmw;
- u32 pdn;
- u32 rkey;
- int enabled; /* MW's active status */
- u32 pbl_hop_num;
- u32 pbl_ba_pg_sz;
- u32 pbl_buf_pg_sz;
+struct hns_roce_hem_cfg {
+ dma_addr_t root_ba; /* root BA table's address */
+ bool is_direct; /* addressing without BA table */
+ unsigned int ba_pg_shift; /* BA table page shift */
+ unsigned int buf_pg_shift; /* buffer page shift */
+ unsigned int buf_pg_count; /* buffer page count */
+ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
+ unsigned int region_count;
};
-/* Only support 4K page size for mr register */
-#define MR_SIZE_4K 0
+/* memory translate region */
+struct hns_roce_mtr {
+ struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */
+ struct ib_umem *umem; /* user space buffer */
+ struct hns_roce_buf *kmem; /* kernel space buffer */
+ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */
+};
struct hns_roce_mr {
struct ib_mr ibmr;
- struct ib_umem *umem;
- u64 iova; /* MR's virtual orignal addr */
+ u64 iova; /* MR's virtual original addr */
u64 size; /* Address range of MR */
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
- u32 access;/* Access permission of MR */
- u32 npages;
+ u32 access; /* Access permission of MR */
int enabled; /* MR's active status */
- int type; /* MR's register type */
- u64 *pbl_buf;/* MR's PBL space */
- dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
- u32 pbl_size;/* PA number in the PBL */
- u64 pbl_ba;/* page table address */
- u32 l0_chunk_last_num;/* L0 last number */
- u32 l1_chunk_last_num;/* L1 last number */
- u64 **pbl_bt_l2;/* PBL BT L2 */
- u64 **pbl_bt_l1;/* PBL BT L1 */
- u64 *pbl_bt_l0;/* PBL BT L0 */
- dma_addr_t *pbl_l2_dma_addr;/* PBL BT L2 dma addr */
- dma_addr_t *pbl_l1_dma_addr;/* PBL BT L1 dma addr */
- dma_addr_t pbl_l0_dma_addr;/* PBL BT L0 dma addr */
- u32 pbl_ba_pg_sz;/* BT chunk page size */
- u32 pbl_buf_pg_sz;/* buf chunk page size */
- u32 pbl_hop_num;/* multi-hop number */
+ int type; /* MR's register type */
+ u32 pbl_hop_num; /* multi-hop number */
+ struct hns_roce_mtr pbl_mtr;
+ u32 npages;
+ dma_addr_t *page_list;
};
struct hns_roce_mr_table {
- struct hns_roce_bitmap mtpt_bitmap;
- struct hns_roce_buddy mtt_buddy;
- struct hns_roce_hem_table mtt_table;
+ struct hns_roce_ida mtpt_ida;
struct hns_roce_hem_table mtpt_table;
- struct hns_roce_buddy mtt_cqe_buddy;
- struct hns_roce_hem_table mtt_cqe_table;
- struct hns_roce_buddy mtt_srqwqe_buddy;
- struct hns_roce_hem_table mtt_srqwqe_table;
- struct hns_roce_buddy mtt_idx_buddy;
- struct hns_roce_hem_table mtt_idx_table;
};
struct hns_roce_wq {
u64 *wrid; /* Work request ID */
spinlock_t lock;
- int wqe_cnt; /* WQE num */
- u32 max_post;
- int max_gs;
- int offset;
- int wqe_shift;/* WQE size */
+ u32 wqe_cnt; /* WQE num */
+ u32 max_gs;
+ u32 rsv_sge;
+ u32 offset;
+ u32 wqe_shift; /* WQE size */
u32 head;
u32 tail;
- void __iomem *db_reg_l;
+ void __iomem *db_reg;
+ u32 ext_sge_cnt;
};
struct hns_roce_sge {
- int sge_cnt; /* SGE num */
- int offset;
- int sge_shift;/* SGE size */
+ unsigned int sge_cnt; /* SGE num */
+ u32 offset;
+ u32 sge_shift; /* SGE size */
};
struct hns_roce_buf_list {
@@ -402,19 +365,34 @@ struct hns_roce_buf_list {
dma_addr_t map;
};
+/*
+ * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous
+ * dma address range.
+ *
+ * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep.
+ *
+ * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even
+ * the allocated size is smaller than the required size.
+ */
+enum {
+ HNS_ROCE_BUF_DIRECT = BIT(0),
+ HNS_ROCE_BUF_NOSLEEP = BIT(1),
+ HNS_ROCE_BUF_NOFAIL = BIT(2),
+};
+
struct hns_roce_buf {
- struct hns_roce_buf_list direct;
- struct hns_roce_buf_list *page_list;
- int nbufs;
+ struct hns_roce_buf_list *trunk_list;
+ u32 ntrunks;
u32 npages;
- int page_shift;
+ unsigned int trunk_shift;
+ unsigned int page_shift;
};
struct hns_roce_db_pgdir {
struct list_head list;
DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE);
- DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / 2);
- unsigned long *bits[2];
+ DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
+ unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT];
u32 *page;
dma_addr_t db_dma;
};
@@ -434,110 +412,115 @@ struct hns_roce_db {
} u;
dma_addr_t dma;
void *virt_addr;
- int index;
- int order;
-};
-
-struct hns_roce_cq_buf {
- struct hns_roce_buf hr_buf;
- struct hns_roce_mtt hr_mtt;
+ unsigned long index;
+ unsigned long order;
};
struct hns_roce_cq {
struct ib_cq ib_cq;
- struct hns_roce_cq_buf hr_buf;
+ struct hns_roce_mtr mtr;
struct hns_roce_db db;
- u8 db_en;
+ u32 flags;
spinlock_t lock;
- struct ib_umem *umem;
- void (*comp)(struct hns_roce_cq *cq);
- void (*event)(struct hns_roce_cq *cq, enum hns_roce_event event_type);
-
- struct hns_roce_uar *uar;
u32 cq_depth;
u32 cons_index;
u32 *set_ci_db;
- void __iomem *cq_db_l;
- u16 *tptr_addr;
+ void __iomem *db_reg;
int arm_sn;
+ int cqe_size;
unsigned long cqn;
u32 vector;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
+ struct list_head sq_list; /* all qps on this send cq */
+ struct list_head rq_list; /* all qps on this recv cq */
+ int is_armed; /* cq is armed */
+ struct list_head node; /* all armed cqs are on a list */
};
struct hns_roce_idx_que {
- struct hns_roce_buf idx_buf;
- int entry_sz;
- u32 buf_size;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
- u64 *bitmap;
+ struct hns_roce_mtr mtr;
+ u32 entry_shift;
+ unsigned long *bitmap;
+ u32 head;
+ u32 tail;
};
struct hns_roce_srq {
struct ib_srq ibsrq;
- void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
unsigned long srqn;
- int max;
+ u32 wqe_cnt;
int max_gs;
- int wqe_shift;
- void __iomem *db_reg_l;
+ u32 rsv_sge;
+ u32 wqe_shift;
+ u32 cqn;
+ u32 xrcdn;
+ void __iomem *db_reg;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
- struct hns_roce_buf buf;
+ struct hns_roce_mtr buf_mtr;
+
u64 *wrid;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
struct hns_roce_idx_que idx_que;
spinlock_t lock;
- int head;
- int tail;
- u16 wqe_ctr;
struct mutex mutex;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+ struct hns_roce_db rdb;
+ u32 cap_flags;
};
struct hns_roce_uar_table {
struct hns_roce_bitmap bitmap;
};
+struct hns_roce_bank {
+ struct ida ida;
+ u32 inuse; /* Number of IDs allocated */
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
+ u32 next; /* Next ID to allocate. */
+};
+
struct hns_roce_qp_table {
- struct hns_roce_bitmap bitmap;
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
struct hns_roce_hem_table trrl_table;
struct hns_roce_hem_table sccc_table;
struct mutex scc_mutex;
+ struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
+ struct mutex bank_mutex;
+ struct xarray dip_xa;
};
struct hns_roce_cq_table {
- struct hns_roce_bitmap bitmap;
struct xarray array;
struct hns_roce_hem_table table;
+ struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
+ struct mutex bank_mutex;
+ u32 ctx_num[HNS_ROCE_CQ_BANK_NUM];
};
struct hns_roce_srq_table {
- struct hns_roce_bitmap bitmap;
+ struct hns_roce_ida srq_ida;
struct xarray xa;
struct hns_roce_hem_table table;
};
-struct hns_roce_raq_table {
- struct hns_roce_buf_list *e_raq_buf;
-};
-
struct hns_roce_av {
- __le32 port_pd;
- u8 gid_index;
- u8 stat_rate;
- u8 hop_limit;
- __le32 sl_tclass_flowlabel;
- u8 dgid[HNS_ROCE_GID_SIZE];
- u8 mac[6];
- __le16 vlan;
- bool vlan_en;
+ u8 port;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ u32 flowlabel;
+ u16 udp_sport;
+ u8 sl;
+ u8 tclass;
+ u8 dgid[HNS_ROCE_GID_SIZE];
+ u8 mac[ETH_ALEN];
+ u16 vlan_id;
+ u8 vlan_en;
};
struct hns_roce_ah {
@@ -551,11 +534,16 @@ struct hns_roce_cmd_context {
int next;
u64 out_param;
u16 token;
+ u16 busy;
+};
+
+enum hns_roce_cmdq_state {
+ HNS_ROCE_CMDQ_STATE_NORMAL,
+ HNS_ROCE_CMDQ_STATE_FATAL_ERR,
};
struct hns_roce_cmdq {
struct dma_pool *pool;
- struct mutex hcr_mutex;
struct semaphore poll_sem;
/*
* Event mode: cmd register mutex protection,
@@ -567,18 +555,13 @@ struct hns_roce_cmdq {
int free_head;
struct hns_roce_cmd_context *context;
/*
- * Result of get integer part
- * which max_comds compute according a power of 2
- */
- u16 token_mask;
- /*
* Process whether use event mode, init default non-zero
* After the event queue of cmd event ready,
* can switch into event mode
* close device, switch into poll mode(non event mode)
*/
u8 use_events;
- u8 toggle;
+ enum hns_roce_cmdq_state state;
};
struct hns_roce_cmd_mailbox {
@@ -586,40 +569,48 @@ struct hns_roce_cmd_mailbox {
dma_addr_t dma;
};
+struct hns_roce_mbox_msg {
+ u64 in_param;
+ u64 out_param;
+ u8 cmd;
+ u32 tag;
+ u16 token;
+ u8 event_en;
+};
+
struct hns_roce_dev;
-struct hns_roce_rinl_sge {
- void *addr;
- u32 len;
+enum {
+ HNS_ROCE_FLUSH_FLAG = 0,
+ HNS_ROCE_STOP_FLUSH_FLAG = 1,
};
-struct hns_roce_rinl_wqe {
- struct hns_roce_rinl_sge *sg_list;
- u32 sge_cnt;
+struct hns_roce_work {
+ struct hns_roce_dev *hr_dev;
+ struct work_struct work;
+ int event_type;
+ int sub_type;
+ u32 queue_num;
};
-struct hns_roce_rinl_buf {
- struct hns_roce_rinl_wqe *wqe_list;
- u32 wqe_cnt;
+enum hns_roce_cong_type {
+ CONG_TYPE_DCQCN,
+ CONG_TYPE_LDCP,
+ CONG_TYPE_HC3,
+ CONG_TYPE_DIP,
};
struct hns_roce_qp {
struct ib_qp ibqp;
- struct hns_roce_buf hr_buf;
struct hns_roce_wq rq;
struct hns_roce_db rdb;
struct hns_roce_db sdb;
- u8 rdb_en;
- u8 sdb_en;
- u32 doorbell_qpn;
- __le32 sq_signal_bits;
- u32 sq_next_wqe;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
+ unsigned long en_flags;
+ enum ib_sig_type sq_signal_bits;
struct hns_roce_wq sq;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
+ struct hns_roce_mtr mtr;
+
u32 buff_size;
struct mutex mutex;
u8 port;
@@ -627,25 +618,36 @@ struct hns_roce_qp {
u8 sl;
u8 resp_depth;
u8 state;
- u32 access_flags;
u32 atomic_rd_en;
- u32 pkey_index;
u32 qkey;
void (*event)(struct hns_roce_qp *qp,
enum hns_roce_event event_type);
unsigned long qpn;
- atomic_t refcount;
+ u32 xrcdn;
+
+ refcount_t refcount;
struct completion free;
struct hns_roce_sge sge;
u32 next_sge;
-
- struct hns_roce_rinl_buf rq_inl_buf;
-};
-
-struct hns_roce_sqp {
- struct hns_roce_qp hr_qp;
+ enum ib_mtu path_mtu;
+ u32 max_inline_data;
+ u8 free_mr_en;
+
+ /* 0: flush needed, 1: unneeded */
+ unsigned long flush_flag;
+ struct hns_roce_work flush_work;
+ struct list_head node; /* all qps are on a list */
+ struct list_head rq_node; /* all recv qps are on a list */
+ struct list_head sq_node; /* all send qps are on a list */
+ struct hns_user_mmap_entry *dwqe_mmap_entry;
+ u32 config;
+ enum hns_roce_cong_type cong_type;
+ u8 tc_mode;
+ u8 priority;
+ spinlock_t flush_lock;
+ struct hns_roce_dip *dip;
};
struct hns_roce_ib_iboe {
@@ -655,41 +657,24 @@ struct hns_roce_ib_iboe {
u8 phy_port[HNS_ROCE_MAX_PORTS];
};
-enum {
- HNS_ROCE_EQ_STAT_INVALID = 0,
- HNS_ROCE_EQ_STAT_VALID = 2,
-};
-
struct hns_roce_ceqe {
- u32 comp;
+ __le32 comp;
+ __le32 rsv[15];
};
+#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l)
+
+#define CEQE_CQN CEQE_FIELD_LOC(23, 0)
+#define CEQE_OWNER CEQE_FIELD_LOC(31, 31)
+
struct hns_roce_aeqe {
__le32 asyn;
union {
struct {
- __le32 qp;
- u32 rsv0;
- u32 rsv1;
- } qp_event;
-
- struct {
- __le32 srq;
- u32 rsv0;
- u32 rsv1;
- } srq_event;
-
- struct {
- __le32 cq;
+ __le32 num;
u32 rsv0;
u32 rsv1;
- } cq_event;
-
- struct {
- __le32 ceqe;
- u32 rsv0;
- u32 rsv1;
- } ce_event;
+ } queue_event;
struct {
__le64 out_param;
@@ -698,48 +683,41 @@ struct hns_roce_aeqe {
u8 rsv0;
} __packed cmd;
} event;
+ __le32 rsv[12];
};
+#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l)
+
+#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0)
+#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8)
+#define AEQE_OWNER AEQE_FIELD_LOC(31, 31)
+#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32)
+
struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
- void __iomem *doorbell;
+ void __iomem *db_reg;
- int type_flag;/* Aeq:1 ceq:0 */
+ int type_flag; /* Aeq:1 ceq:0 */
int eqn;
u32 entries;
- int log_entries;
int eqe_size;
int irq;
- int log_page_size;
- int cons_index;
- struct hns_roce_buf_list *buf_list;
+ u32 cons_index;
int over_ignore;
int coalesce;
int arm_st;
- u64 eqe_ba;
- int eqe_ba_pg_sz;
- int eqe_buf_pg_sz;
int hop_num;
- u64 *bt_l0; /* Base address table for L0 */
- u64 **bt_l1; /* Base address table for L1 */
- u64 **buf;
- dma_addr_t l0_dma;
- dma_addr_t *l1_dma;
- dma_addr_t *buf_dma;
- u32 l0_last_num; /* L0 last chunk num */
- u32 l1_last_num; /* L1 last chunk num */
- int eq_max_cnt;
- int eq_period;
+ struct hns_roce_mtr mtr;
+ u16 eq_max_cnt;
+ u32 eq_period;
int shift;
- dma_addr_t cur_eqe_ba;
- dma_addr_t nxt_eqe_ba;
int event_type;
int sub_type;
+ struct work_struct work;
};
struct hns_roce_eq_table {
struct hns_roce_eq *eq;
- void __iomem **eqc_base; /* only for hw v1 */
};
struct hns_roce_caps {
@@ -750,54 +728,50 @@ struct hns_roce_caps {
int local_ca_ack_delay;
int num_uars;
u32 phy_num_uars;
- u32 max_sq_sg; /* 2 */
- u32 max_sq_inline; /* 32 */
- u32 max_rq_sg; /* 2 */
- u32 max_extend_sg;
- int num_qps; /* 256k */
- int reserved_qps;
- int num_qpc_timer;
- int num_cqc_timer;
- u32 max_srq_sg;
- int num_srqs;
- u32 max_wqes; /* 16k */
- u32 max_srqs;
+ u32 max_sq_sg;
+ u32 max_sq_inline;
+ u32 max_rq_sg;
+ u32 rsv0;
+ u32 num_qps;
+ u32 reserved_qps;
+ u32 num_srqs;
+ u32 max_wqes;
u32 max_srq_wrs;
u32 max_srq_sges;
- u32 max_sq_desc_sz; /* 64 */
- u32 max_rq_desc_sz; /* 64 */
- u32 max_srq_desc_sz;
+ u32 max_sq_desc_sz;
+ u32 max_rq_desc_sz;
+ u32 rsv2;
int max_qp_init_rdma;
int max_qp_dest_rdma;
- int num_cqs;
- int max_cqes;
- int min_cqes;
+ u32 num_cqs;
+ u32 max_cqes;
+ u32 min_cqes;
u32 min_wqes;
- int reserved_cqs;
- int reserved_srqs;
- u32 max_srqwqes;
- int num_aeq_vectors; /* 1 */
+ u32 reserved_cqs;
+ u32 reserved_srqs;
+ int num_aeq_vectors;
int num_comp_vectors;
int num_other_vectors;
- int num_mtpts;
- u32 num_mtt_segs;
- u32 num_cqe_segs;
+ u32 num_mtpts;
+ u32 rsv1;
u32 num_srqwqe_segs;
u32 num_idx_segs;
int reserved_mrws;
int reserved_uars;
int num_pds;
int reserved_pds;
+ u32 num_xrcds;
+ u32 reserved_xrcds;
u32 mtt_entry_sz;
- u32 cq_entry_sz;
+ u32 cqe_sz;
u32 page_size_cap;
u32 reserved_lkey;
int mtpt_entry_sz;
- int qpc_entry_sz;
+ int qpc_sz;
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
- int sccc_entry_sz;
+ int sccc_sz;
int qpc_timer_entry_sz;
int cqc_timer_entry_sz;
int srqc_entry_sz;
@@ -807,6 +781,8 @@ struct hns_roce_caps {
u32 pbl_hop_num;
int aeqe_depth;
int ceqe_depth;
+ u32 aeqe_size;
+ u32 ceqe_size;
enum ib_mtu max_mtu;
u32 qpc_bt_num;
u32 qpc_timer_bt_num;
@@ -814,7 +790,11 @@ struct hns_roce_caps {
u32 cqc_bt_num;
u32 cqc_timer_bt_num;
u32 mpt_bt_num;
+ u32 eqc_bt_num;
+ u32 smac_bt_num;
+ u32 sgid_bt_num;
u32 sccc_bt_num;
+ u32 gmv_bt_num;
u32 qpc_ba_pg_sz;
u32 qpc_buf_pg_sz;
u32 qpc_hop_num;
@@ -830,6 +810,9 @@ struct hns_roce_caps {
u32 mtt_ba_pg_sz;
u32 mtt_buf_pg_sz;
u32 mtt_hop_num;
+ u32 wqe_sq_hop_num;
+ u32 wqe_sge_hop_num;
+ u32 wqe_rq_hop_num;
u32 sccc_ba_pg_sz;
u32 sccc_buf_pg_sz;
u32 sccc_hop_num;
@@ -839,7 +822,7 @@ struct hns_roce_caps {
u32 cqc_timer_ba_pg_sz;
u32 cqc_timer_buf_pg_sz;
u32 cqc_timer_hop_num;
- u32 cqe_ba_pg_sz;
+ u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
u32 srqwqe_ba_pg_sz;
@@ -851,118 +834,156 @@ struct hns_roce_caps {
u32 eqe_ba_pg_sz;
u32 eqe_buf_pg_sz;
u32 eqe_hop_num;
+ u32 gmv_entry_num;
+ u32 gmv_entry_sz;
+ u32 gmv_ba_pg_sz;
+ u32 gmv_buf_pg_sz;
+ u32 gmv_hop_num;
u32 sl_num;
- u32 tsq_buf_pg_sz;
- u32 tpq_buf_pg_sz;
- u32 chunk_sz; /* chunk size in non multihop mode*/
+ u32 llm_buf_pg_sz;
+ u32 chunk_sz; /* chunk size in non multihop mode */
u64 flags;
+ u16 default_ceq_max_cnt;
+ u16 default_ceq_period;
+ u16 default_aeq_max_cnt;
+ u16 default_aeq_period;
+ u16 default_aeq_arm_st;
+ u16 default_ceq_arm_st;
+ u8 cong_cap;
+ enum hns_roce_cong_type default_cong_type;
+ u32 max_ack_req_msg_len;
};
-struct hns_roce_work {
- struct hns_roce_dev *hr_dev;
- struct work_struct work;
- u32 qpn;
- u32 cqn;
- int event_type;
- int sub_type;
+enum hns_roce_device_state {
+ HNS_ROCE_DEVICE_STATE_INITED,
+ HNS_ROCE_DEVICE_STATE_RST_DOWN,
+ HNS_ROCE_DEVICE_STATE_UNINIT,
+};
+
+enum hns_roce_hw_pkt_stat_index {
+ HNS_ROCE_HW_RX_RC_PKT_CNT,
+ HNS_ROCE_HW_RX_UC_PKT_CNT,
+ HNS_ROCE_HW_RX_UD_PKT_CNT,
+ HNS_ROCE_HW_RX_XRC_PKT_CNT,
+ HNS_ROCE_HW_RX_PKT_CNT,
+ HNS_ROCE_HW_RX_ERR_PKT_CNT,
+ HNS_ROCE_HW_RX_CNP_PKT_CNT,
+ HNS_ROCE_HW_TX_RC_PKT_CNT,
+ HNS_ROCE_HW_TX_UC_PKT_CNT,
+ HNS_ROCE_HW_TX_UD_PKT_CNT,
+ HNS_ROCE_HW_TX_XRC_PKT_CNT,
+ HNS_ROCE_HW_TX_PKT_CNT,
+ HNS_ROCE_HW_TX_ERR_PKT_CNT,
+ HNS_ROCE_HW_TX_CNP_PKT_CNT,
+ HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT,
+ HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT,
+ HNS_ROCE_HW_ECN_DB_CNT,
+ HNS_ROCE_HW_RX_BUF_CNT,
+ HNS_ROCE_HW_TRP_RX_SOF_CNT,
+ HNS_ROCE_HW_CQ_CQE_CNT,
+ HNS_ROCE_HW_CQ_POE_CNT,
+ HNS_ROCE_HW_CQ_NOTIFY_CNT,
+ HNS_ROCE_HW_CNT_TOTAL
};
-struct hns_roce_dfx_hw {
- int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn,
- int *buffer);
+enum hns_roce_sw_dfx_stat_index {
+ HNS_ROCE_DFX_AEQE_CNT,
+ HNS_ROCE_DFX_CEQE_CNT,
+ HNS_ROCE_DFX_CMDS_CNT,
+ HNS_ROCE_DFX_CMDS_ERR_CNT,
+ HNS_ROCE_DFX_MBX_POSTED_CNT,
+ HNS_ROCE_DFX_MBX_POLLED_CNT,
+ HNS_ROCE_DFX_MBX_EVENT_CNT,
+ HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT,
+ HNS_ROCE_DFX_MR_REG_ERR_CNT,
+ HNS_ROCE_DFX_MR_REREG_ERR_CNT,
+ HNS_ROCE_DFX_AH_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_MMAP_ERR_CNT,
+ HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT,
+ HNS_ROCE_DFX_CNT_TOTAL
};
struct hns_roce_hw {
- int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
int (*cmq_init)(struct hns_roce_dev *hr_dev);
void (*cmq_exit)(struct hns_roce_dev *hr_dev);
int (*hw_profile)(struct hns_roce_dev *hr_dev);
int (*hw_init)(struct hns_roce_dev *hr_dev);
void (*hw_exit)(struct hns_roce_dev *hr_dev);
- int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier, u16 op,
- u16 token, int event);
- int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
- int (*rst_prc_mbox)(struct hns_roce_dev *hr_dev);
- int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
+ int (*post_mbox)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg);
+ int (*poll_mbox_done)(struct hns_roce_dev *hr_dev);
+ bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy);
+ int (*set_gid)(struct hns_roce_dev *hr_dev, int gid_index,
const union ib_gid *gid, const struct ib_gid_attr *attr);
- int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
- void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
- enum ib_mtu mtu);
- int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx);
+ int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port,
+ const u8 *addr);
+ int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr);
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, int flags, u32 pdn,
- int mr_access_flags, u64 iova, u64 size,
+ struct hns_roce_mr *mr, int flags,
void *mb_buf);
int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
- int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
- dma_addr_t dma_handle, int nent, u32 vector);
+ dma_addr_t dma_handle);
int (*set_hem)(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj, int step_idx);
+ struct hns_roce_hem_table *table, int obj, u32 step_idx);
int (*clear_hem)(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
- int step_idx);
- int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+ u32 step_idx);
int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state);
- int (*destroy_qp)(struct ib_qp *ibqp, struct ib_udata *udata);
+ enum ib_qp_state new_state, struct ib_udata *udata);
int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp);
- int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr);
- int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
- const struct ib_recv_wr **bad_recv_wr);
- int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
- int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
- int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
- struct ib_udata *udata);
- int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata);
- int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+ void (*dereg_mr)(struct hns_roce_dev *hr_dev);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
- void (*write_srqc)(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
- void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
- dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx);
- int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata);
- int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
- int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr);
+ int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
+ int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer);
+ int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
+ int (*query_srqc)(struct hns_roce_dev *hr_dev, u32 srqn, void *buffer);
+ int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
+ u64 *stats, u32 port, int *hw_counters);
+ int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority);
const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
struct hns_roce_dev {
struct ib_device ib_dev;
- struct platform_device *pdev;
struct pci_dev *pci_dev;
struct device *dev;
struct hns_roce_uar priv_uar;
const char *irq_names[HNS_ROCE_MAX_IRQ_NUM];
spinlock_t sm_lock;
- spinlock_t bt_cmd_lock;
bool active;
bool is_reset;
bool dis_db;
unsigned long reset_cnt;
struct hns_roce_ib_iboe iboe;
+ enum hns_roce_device_state state;
+ struct list_head qp_list; /* list of all qps on this dev */
+ spinlock_t qp_list_lock; /* protect qp_list */
struct list_head pgdir_list;
struct mutex pgdir_mutex;
int irq[HNS_ROCE_MAX_IRQ_NUM];
u8 __iomem *reg_base;
+ void __iomem *mem_base;
struct hns_roce_caps caps;
struct xarray qp_table_xa;
- unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM];
+ unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN];
u64 sys_image_guid;
u32 vendor_id;
u32 vendor_part_id;
@@ -970,8 +991,9 @@ struct hns_roce_dev {
void __iomem *priv_addr;
struct hns_roce_cmdq cmd;
- struct hns_roce_bitmap pd_bitmap;
- struct hns_roce_uar_table uar_table;
+ struct hns_roce_ida pd_ida;
+ struct hns_roce_ida xrcd_ida;
+ struct hns_roce_ida uar_ida;
struct hns_roce_mr_table mr_table;
struct hns_roce_cq_table cq_table;
struct hns_roce_srq_table srq_table;
@@ -979,19 +1001,47 @@ struct hns_roce_dev {
struct hns_roce_eq_table eq_table;
struct hns_roce_hem_table qpc_timer_table;
struct hns_roce_hem_table cqc_timer_table;
+ /* GMV is the memory area that the driver allocates for the hardware
+ * to store SGID, SMAC and VLAN information.
+ */
+ struct hns_roce_hem_table gmv_table;
int cmd_mod;
int loop_idc;
u32 sdb_offset;
u32 odb_offset;
- dma_addr_t tptr_dma_addr; /*only for hw v1*/
- u32 tptr_size; /*only for hw v1*/
const struct hns_roce_hw *hw;
void *priv;
struct workqueue_struct *irq_workq;
- const struct hns_roce_dfx_hw *dfx;
+ struct work_struct ecc_work;
+ u32 func_num;
+ u32 is_vf;
+ u32 cong_algo_tmpl_id;
+ u64 dwqe_page;
+ struct hns_roce_dev_debugfs dbgfs;
+ atomic64_t *dfx_cnt;
};
+enum hns_roce_trace_type {
+ TRACE_SQ,
+ TRACE_RQ,
+ TRACE_SRQ,
+};
+
+static inline const char *trace_type_to_str(enum hns_roce_trace_type type)
+{
+ switch (type) {
+ case TRACE_SQ:
+ return "SQ";
+ case TRACE_RQ:
+ return "RQ";
+ case TRACE_SRQ:
+ return "SRQ";
+ default:
+ return "UNKNOWN";
+ }
+}
+
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
{
return container_of(ib_dev, struct hns_roce_dev, ib_dev);
@@ -1008,6 +1058,11 @@ static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
return container_of(ibpd, struct hns_roce_pd, ibpd);
}
+static inline struct hns_roce_xrcd *to_hr_xrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct hns_roce_xrcd, ibxrcd);
+}
+
static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
{
return container_of(ibah, struct hns_roce_ah, ibah);
@@ -1018,11 +1073,6 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
return container_of(ibmr, struct hns_roce_mr, ibmr);
}
-static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct hns_roce_mw, ibmw);
-}
-
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -1038,37 +1088,101 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct hns_roce_srq, ibsrq);
}
-static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
+static inline struct hns_user_mmap_entry *
+to_hns_mmap(struct rdma_user_mmap_entry *rdma_entry)
{
- return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
+ return container_of(rdma_entry, struct hns_user_mmap_entry, rdma_entry);
}
static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{
- __raw_writeq(*(u64 *) val, dest);
+ writeq(*(u64 *)val, dest);
}
static inline struct hns_roce_qp
*__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn)
{
- return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1));
+ return xa_load(&hr_dev->qp_table_xa, qpn);
}
-static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
+static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
+ unsigned int offset)
{
- u32 page_size = 1 << buf->page_shift;
+ return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) +
+ (offset & ((1 << buf->trunk_shift) - 1));
+}
- if (buf->nbufs == 1)
- return (char *)(buf->direct.buf) + offset;
- else
- return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
- (offset & (page_size - 1));
+static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf,
+ unsigned int offset)
+{
+ return buf->trunk_list[offset >> buf->trunk_shift].map +
+ (offset & ((1 << buf->trunk_shift) - 1));
+}
+
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
+{
+ return hns_roce_buf_dma_addr(buf, idx << buf->page_shift);
}
-int hns_roce_init_uar_table(struct hns_roce_dev *dev);
+#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
+
+static inline u64 to_hr_hw_page_addr(u64 addr)
+{
+ return addr >> HNS_HW_PAGE_SHIFT;
+}
+
+static inline u32 to_hr_hw_page_shift(u32 page_shift)
+{
+ return page_shift - HNS_HW_PAGE_SHIFT;
+}
+
+static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count)
+{
+ if (count > 0)
+ return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum;
+
+ return 0;
+}
+
+static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift);
+}
+
+static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift) >> buf_shift;
+}
+
+static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
+{
+ if (!count)
+ return 0;
+
+ return ilog2(to_hr_hem_entries_count(count, buf_shift));
+}
+
+#define DSCP_SHIFT 2
+
+static inline u8 get_tclass(const struct ib_global_route *grh)
+{
+ return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ?
+ grh->traffic_class >> DSCP_SHIFT : grh->traffic_class;
+}
+
+static inline struct net_device *get_hr_netdev(struct hns_roce_dev *hr_dev,
+ u8 port)
+{
+ return hr_dev->iboe.netdevs[port];
+}
+
+static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev)
+{
+ return hr_dev->pci_dev->bus->number;
+}
+
+void hns_roce_init_uar_table(struct hns_roce_dev *dev);
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
-void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
-void hns_roce_cleanup_uar_table(struct hns_roce_dev *dev);
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev);
void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev);
@@ -1077,116 +1191,107 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev);
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
- struct hns_roce_mtt *mtt);
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt);
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
-
-int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
+/* hns roce hw need current block and next block addr from mtt */
+#define MTT_MIN_COUNT 2
+static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr)
+{
+ return mtr->hem_cfg.root_ba;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max);
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int page_shift, struct ib_udata *udata,
+ unsigned long user_addr);
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr);
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, unsigned int page_cnt);
+
+void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
-
-int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
- int rr);
-int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
- u32 reserved_bot, u32 resetrved_top);
-void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap);
+
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev);
-int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
- int align, unsigned long *obj);
-void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt,
- int rr);
-
-int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata);
+
+int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
-int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata);
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key);
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
- struct ib_udata *udata);
-int hns_roce_dealloc_mw(struct ib_mw *ibmw);
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags);
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
- struct hns_roce_buf *buf);
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
- struct hns_roce_buf *buf, u32 page_shift);
-
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct ib_umem *umem);
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct hns_roce_buf *buf,
+ unsigned int page_shift);
+int hns_roce_get_umem_bufs(dma_addr_t *bufs,
+ int buf_cnt, struct ib_umem *umem,
+ unsigned int page_shift);
int hns_roce_create_srq(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
-int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata);
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
-struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
+int hns_roce_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n);
-bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq);
-enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
- int cnt);
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata);
__be32 send_ieth(const struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
-struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
-
-int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
-int hns_roce_db_map_user(struct hns_roce_ucontext *context,
- struct ib_udata *udata, unsigned long virt,
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
struct hns_roce_db *db);
void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
struct hns_roce_db *db);
@@ -1196,12 +1301,27 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
+void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
-int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
-void hns_roce_exit(struct hns_roce_dev *hr_dev);
+void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup);
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq);
+int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq);
+struct hns_user_mmap_entry *
+hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
+ size_t length,
+ enum hns_roce_mmap_type mmap_type);
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl);
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx);
+void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx);
-int hns_roce_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res);
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 8e29dbb5b5fb..3d479c63b117 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -31,51 +31,77 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
#include "hns_roce_common.h"
-#define DMA_ADDR_T_SHIFT 12
-#define BT_BA_SHIFT 32
+#define HEM_INDEX_BUF BIT(0)
+#define HEM_INDEX_L0 BIT(1)
+#define HEM_INDEX_L1 BIT(2)
+struct hns_roce_hem_index {
+ u64 buf;
+ u64 l0;
+ u64 l1;
+ u32 inited; /* indicate which index is available */
+};
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
{
- if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) ||
- (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) ||
- (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
- (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
- (hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) ||
- (hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) ||
- (hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) ||
- (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
- (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
- (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
- (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
- return true;
-
- return false;
+ int hop_num = 0;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_GMV:
+ hop_num = hr_dev->caps.gmv_hop_num;
+ break;
+ default:
+ return false;
+ }
+
+ return hop_num;
}
-EXPORT_SYMBOL_GPL(hns_roce_check_whether_mhop);
-static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx,
- u32 bt_chunk_num)
+static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx,
+ u32 bt_chunk_num, u64 hem_max_num)
{
- int i;
+ u64 start_idx = round_down(hem_idx, bt_chunk_num);
+ u64 check_max_num = start_idx + bt_chunk_num;
+ u64 i;
- for (i = 0; i < bt_chunk_num; i++)
- if (hem[start_idx + i])
+ for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
+ if (i != hem_idx && hem[i])
return false;
return true;
}
-static bool hns_roce_check_bt_null(u64 **bt, u64 start_idx, u32 bt_chunk_num)
+static bool hns_roce_check_bt_null(u64 **bt, u64 ba_idx, u32 bt_chunk_num)
{
+ u64 start_idx = round_down(ba_idx, bt_chunk_num);
int i;
for (i = 0; i < bt_chunk_num; i++)
- if (bt[start_idx + i])
+ if (i != ba_idx && bt[start_idx + i])
return false;
return true;
@@ -93,17 +119,13 @@ static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
return 0;
}
-int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, unsigned long *obj,
- struct hns_roce_hem_mhop *mhop)
+static int get_hem_table_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_mhop *mhop,
+ u32 type)
{
struct device *dev = hr_dev->dev;
- u32 chunk_ba_num;
- u32 table_idx;
- u32 bt_num;
- u32 chunk_size;
- switch (table->type) {
+ switch (type) {
case HEM_TYPE_QPC:
mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
+ PAGE_SHIFT);
@@ -160,44 +182,36 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = hr_dev->caps.srqc_bt_num;
mhop->hop_num = hr_dev->caps.srqc_hop_num;
break;
- case HEM_TYPE_MTT:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.mtt_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
- mhop->hop_num = hr_dev->caps.mtt_hop_num;
- break;
- case HEM_TYPE_CQE:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.cqe_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
- mhop->hop_num = hr_dev->caps.cqe_hop_num;
- break;
- case HEM_TYPE_SRQWQE:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
- mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
- break;
- case HEM_TYPE_IDX:
- mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
- + PAGE_SHIFT);
- mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
- + PAGE_SHIFT);
- mhop->ba_l0_num = mhop->bt_chunk_size / 8;
- mhop->hop_num = hr_dev->caps.idx_hop_num;
+ case HEM_TYPE_GMV:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz +
+ PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz +
+ PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.gmv_bt_num;
+ mhop->hop_num = hr_dev->caps.gmv_hop_num;
break;
default:
- dev_err(dev, "Table %d not support multi-hop addressing!\n",
- table->type);
+ dev_err(dev, "table %u not support multi-hop addressing!\n",
+ type);
return -EINVAL;
}
+ return 0;
+}
+
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop)
+{
+ struct device *dev = hr_dev->dev;
+ u32 chunk_ba_num;
+ u32 chunk_size;
+ u32 table_idx;
+ u32 bt_num;
+
+ if (get_hem_table_config(hr_dev, mhop, table->type))
+ return -EINVAL;
+
if (!obj)
return 0;
@@ -206,11 +220,10 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
* MTT/CQE alloc hem for bt pages.
*/
bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
- chunk_ba_num = mhop->bt_chunk_size / 8;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
mhop->bt_chunk_size;
- table_idx = (*obj & (table->num_obj - 1)) /
- (chunk_size / table->obj_size);
+ table_idx = *obj / (chunk_size / table->obj_size);
switch (bt_num) {
case 3:
mhop->l2_idx = table_idx & (chunk_ba_num - 1);
@@ -225,8 +238,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->l0_idx = table_idx;
break;
default:
- dev_err(dev, "Table %d not support hop_num = %d!\n",
- table->type, mhop->hop_num);
+ dev_err(dev, "table %u not support hop_num = %u!\n",
+ table->type, mhop->hop_num);
return -EINVAL;
}
if (mhop->l0_idx >= mhop->ba_l0_num)
@@ -234,343 +247,275 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
return 0;
}
-EXPORT_SYMBOL_GPL(hns_roce_calc_hem_mhop);
static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
- int npages,
- unsigned long hem_alloc_size,
- gfp_t gfp_mask)
+ unsigned long hem_alloc_size)
{
- struct hns_roce_hem_chunk *chunk = NULL;
struct hns_roce_hem *hem;
- struct scatterlist *mem;
int order;
void *buf;
- WARN_ON(gfp_mask & __GFP_HIGHMEM);
+ order = get_order(hem_alloc_size);
+ if (PAGE_SIZE << order != hem_alloc_size) {
+ dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n",
+ hem_alloc_size);
+ return NULL;
+ }
- hem = kmalloc(sizeof(*hem),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ hem = kmalloc(sizeof(*hem), GFP_KERNEL);
if (!hem)
return NULL;
- hem->refcount = 0;
- INIT_LIST_HEAD(&hem->chunk_list);
-
- order = get_order(hem_alloc_size);
-
- while (npages > 0) {
- if (!chunk) {
- chunk = kmalloc(sizeof(*chunk),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
-
- sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
- chunk->npages = 0;
- chunk->nsg = 0;
- memset(chunk->buf, 0, sizeof(chunk->buf));
- list_add_tail(&chunk->list, &hem->chunk_list);
- }
-
- while (1 << order > npages)
- --order;
+ buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size,
+ &hem->dma, GFP_KERNEL);
+ if (!buf)
+ goto fail;
- /*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
- mem = &chunk->mem[chunk->npages];
- buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order,
- &sg_dma_address(mem), gfp_mask);
- if (!buf)
- goto fail;
-
- chunk->buf[chunk->npages] = buf;
- sg_dma_len(mem) = PAGE_SIZE << order;
-
- ++chunk->npages;
- ++chunk->nsg;
- npages -= 1 << order;
- }
+ hem->buf = buf;
+ hem->size = hem_alloc_size;
return hem;
fail:
- hns_roce_free_hem(hr_dev, hem);
+ kfree(hem);
return NULL;
}
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
{
- struct hns_roce_hem_chunk *chunk, *tmp;
- int i;
-
if (!hem)
return;
- list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i)
- dma_free_coherent(hr_dev->dev,
- sg_dma_len(&chunk->mem[i]),
- chunk->buf[i],
- sg_dma_address(&chunk->mem[i]));
- kfree(chunk);
- }
+ dma_free_coherent(hr_dev->dev, hem->size, hem->buf, hem->dma);
kfree(hem);
}
-static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, unsigned long obj)
-{
- spinlock_t *lock = &hr_dev->bt_cmd_lock;
- struct device *dev = hr_dev->dev;
- unsigned long end = 0;
- unsigned long flags;
- struct hns_roce_hem_iter iter;
- void __iomem *bt_cmd;
- u32 bt_cmd_h_val = 0;
- u32 bt_cmd_val[2];
- u32 bt_cmd_l = 0;
- u64 bt_ba = 0;
- int ret = 0;
-
- /* Find the HEM(Hardware Entry Memory) entry */
- unsigned long i = (obj & (table->num_obj - 1)) /
- (table->table_chunk_size / table->obj_size);
-
- switch (table->type) {
- case HEM_TYPE_QPC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
- break;
- case HEM_TYPE_MTPT:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
- HEM_TYPE_MTPT);
- break;
- case HEM_TYPE_CQC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
- break;
- case HEM_TYPE_SRQC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
- HEM_TYPE_SRQC);
- break;
- default:
- return ret;
- }
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
- /* Currently iter only a chunk */
- for (hns_roce_hem_first(table->hem[i], &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT;
-
- spin_lock_irqsave(lock, flags);
-
- bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
- end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
- while (1) {
- if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!(time_before(jiffies, end))) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(lock, flags);
- return -EBUSY;
- }
- } else {
- break;
- }
- mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
- }
-
- bt_cmd_l = (u32)bt_ba;
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
- bt_ba >> BT_BA_SHIFT);
-
- bt_cmd_val[0] = bt_cmd_l;
- bt_cmd_val[1] = bt_cmd_h_val;
- hns_roce_write64_k(bt_cmd_val,
- hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
- spin_unlock_irqrestore(lock, flags);
- }
-
- return ret;
-}
-
-static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long obj)
+static int calc_hem_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
{
struct device *dev = hr_dev->dev;
- struct hns_roce_hem_mhop mhop;
- struct hns_roce_hem_iter iter;
- u32 buf_chunk_size;
- u32 bt_chunk_size;
+ unsigned long mhop_obj = obj;
+ u32 l0_idx, l1_idx, l2_idx;
u32 chunk_ba_num;
- u32 hop_num;
- u32 size;
u32 bt_num;
- u64 hem_idx;
- u64 bt_l1_idx = 0;
- u64 bt_l0_idx = 0;
- u64 bt_ba;
- unsigned long mhop_obj = obj;
- int bt_l1_allocated = 0;
- int bt_l0_allocated = 0;
- int step_idx;
int ret;
- ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+ ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, mhop);
if (ret)
return ret;
- buf_chunk_size = mhop.buf_chunk_size;
- bt_chunk_size = mhop.bt_chunk_size;
- hop_num = mhop.hop_num;
- chunk_ba_num = bt_chunk_size / 8;
-
- bt_num = hns_roce_get_bt_num(table->type, hop_num);
+ l0_idx = mhop->l0_idx;
+ l1_idx = mhop->l1_idx;
+ l2_idx = mhop->l2_idx;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
switch (bt_num) {
case 3:
- hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
- mhop.l1_idx * chunk_ba_num + mhop.l2_idx;
- bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
- bt_l0_idx = mhop.l0_idx;
+ index->l1 = l0_idx * chunk_ba_num + l1_idx;
+ index->l0 = l0_idx;
+ index->buf = l0_idx * chunk_ba_num * chunk_ba_num +
+ l1_idx * chunk_ba_num + l2_idx;
break;
case 2:
- hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
- bt_l0_idx = mhop.l0_idx;
+ index->l0 = l0_idx;
+ index->buf = l0_idx * chunk_ba_num + l1_idx;
break;
case 1:
- hem_idx = mhop.l0_idx;
+ index->buf = l0_idx;
break;
default:
- dev_err(dev, "Table %d not support hop_num = %d!\n",
- table->type, hop_num);
+ dev_err(dev, "table %u not support mhop.hop_num = %u!\n",
+ table->type, mhop->hop_num);
return -EINVAL;
}
- mutex_lock(&table->mutex);
+ if (unlikely(index->buf >= table->num_hem)) {
+ dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n",
+ table->type, index->buf, table->num_hem);
+ return -EINVAL;
+ }
- if (table->hem[hem_idx]) {
- ++table->hem[hem_idx]->refcount;
- goto out;
+ return 0;
+}
+
+static void free_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ u32 bt_size = mhop->bt_chunk_size;
+ struct device *dev = hr_dev->dev;
+
+ if (index->inited & HEM_INDEX_BUF) {
+ hns_roce_free_hem(hr_dev, table->hem[index->buf]);
+ table->hem[index->buf] = NULL;
}
+ if (index->inited & HEM_INDEX_L1) {
+ dma_free_coherent(dev, bt_size, table->bt_l1[index->l1],
+ table->bt_l1_dma_addr[index->l1]);
+ table->bt_l1[index->l1] = NULL;
+ }
+
+ if (index->inited & HEM_INDEX_L0) {
+ dma_free_coherent(dev, bt_size, table->bt_l0[index->l0],
+ table->bt_l0_dma_addr[index->l0]);
+ table->bt_l0[index->l0] = NULL;
+ }
+}
+
+static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ u32 bt_size = mhop->bt_chunk_size;
+ struct device *dev = hr_dev->dev;
+ u64 bt_ba;
+ u32 size;
+ int ret;
+
/* alloc L1 BA's chunk */
- if ((check_whether_bt_num_3(table->type, hop_num) ||
- check_whether_bt_num_2(table->type, hop_num)) &&
- !table->bt_l0[bt_l0_idx]) {
- table->bt_l0[bt_l0_idx] = dma_alloc_coherent(dev, bt_chunk_size,
- &(table->bt_l0_dma_addr[bt_l0_idx]),
+ if ((check_whether_bt_num_3(table->type, mhop->hop_num) ||
+ check_whether_bt_num_2(table->type, mhop->hop_num)) &&
+ !table->bt_l0[index->l0]) {
+ table->bt_l0[index->l0] = dma_alloc_coherent(dev, bt_size,
+ &table->bt_l0_dma_addr[index->l0],
GFP_KERNEL);
- if (!table->bt_l0[bt_l0_idx]) {
+ if (!table->bt_l0[index->l0]) {
ret = -ENOMEM;
goto out;
}
- bt_l0_allocated = 1;
-
- /* set base address to hardware */
- if (table->type < HEM_TYPE_MTT) {
- step_idx = 0;
- if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) {
- ret = -ENODEV;
- dev_err(dev, "set HEM base address to HW failed!\n");
- goto err_dma_alloc_l1;
- }
- }
+ index->inited |= HEM_INDEX_L0;
}
/* alloc L2 BA's chunk */
- if (check_whether_bt_num_3(table->type, hop_num) &&
- !table->bt_l1[bt_l1_idx]) {
- table->bt_l1[bt_l1_idx] = dma_alloc_coherent(dev, bt_chunk_size,
- &(table->bt_l1_dma_addr[bt_l1_idx]),
+ if (check_whether_bt_num_3(table->type, mhop->hop_num) &&
+ !table->bt_l1[index->l1]) {
+ table->bt_l1[index->l1] = dma_alloc_coherent(dev, bt_size,
+ &table->bt_l1_dma_addr[index->l1],
GFP_KERNEL);
- if (!table->bt_l1[bt_l1_idx]) {
+ if (!table->bt_l1[index->l1]) {
ret = -ENOMEM;
- goto err_dma_alloc_l1;
- }
- bt_l1_allocated = 1;
- *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) =
- table->bt_l1_dma_addr[bt_l1_idx];
-
- /* set base address to hardware */
- step_idx = 1;
- if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) {
- ret = -ENODEV;
- dev_err(dev, "set HEM base address to HW failed!\n");
- goto err_alloc_hem_buf;
+ goto err_alloc_hem;
}
+ index->inited |= HEM_INDEX_L1;
+ *(table->bt_l0[index->l0] + mhop->l1_idx) =
+ table->bt_l1_dma_addr[index->l1];
}
/*
* alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
* alloc bt space chunk for MTT/CQE.
*/
- size = table->type < HEM_TYPE_MTT ? buf_chunk_size : bt_chunk_size;
- table->hem[hem_idx] = hns_roce_alloc_hem(hr_dev,
- size >> PAGE_SHIFT,
- size,
- (table->lowmem ? GFP_KERNEL :
- GFP_HIGHUSER) | __GFP_NOWARN);
- if (!table->hem[hem_idx]) {
+ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size);
+ if (!table->hem[index->buf]) {
ret = -ENOMEM;
- goto err_alloc_hem_buf;
+ goto err_alloc_hem;
}
- hns_roce_hem_first(table->hem[hem_idx], &iter);
- bt_ba = hns_roce_hem_addr(&iter);
+ index->inited |= HEM_INDEX_BUF;
+ bt_ba = table->hem[index->buf]->dma;
if (table->type < HEM_TYPE_MTT) {
- if (hop_num == 2) {
- *(table->bt_l1[bt_l1_idx] + mhop.l2_idx) = bt_ba;
- step_idx = 2;
- } else if (hop_num == 1) {
- *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba;
- step_idx = 1;
- } else if (hop_num == HNS_ROCE_HOP_NUM_0) {
- step_idx = 0;
- } else {
- ret = -EINVAL;
- goto err_dma_alloc_l1;
+ if (mhop->hop_num == 2)
+ *(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
+ else if (mhop->hop_num == 1)
+ *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+ } else if (mhop->hop_num == 2) {
+ *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+ }
+
+ return 0;
+err_alloc_hem:
+ free_mhop_hem(hr_dev, table, mhop, index);
+out:
+ return ret;
+}
+
+static int set_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ struct device *dev = hr_dev->dev;
+ u32 step_idx;
+ int ret = 0;
+
+ if (index->inited & HEM_INDEX_L0) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
+ if (ret) {
+ dev_err(dev, "set HEM step 0 failed!\n");
+ goto out;
}
+ }
- /* set HEM base address to hardware */
- if (hr_dev->hw->set_hem(hr_dev, table, obj, step_idx)) {
- ret = -ENODEV;
- dev_err(dev, "set HEM base address to HW failed!\n");
- goto err_alloc_hem_buf;
+ if (index->inited & HEM_INDEX_L1) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
+ if (ret) {
+ dev_err(dev, "set HEM step 1 failed!\n");
+ goto out;
}
- } else if (hop_num == 2) {
- *(table->bt_l0[bt_l0_idx] + mhop.l1_idx) = bt_ba;
}
- ++table->hem[hem_idx]->refcount;
- goto out;
+ if (index->inited & HEM_INDEX_BUF) {
+ if (mhop->hop_num == HNS_ROCE_HOP_NUM_0)
+ step_idx = 0;
+ else
+ step_idx = mhop->hop_num;
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
+ if (ret)
+ dev_err(dev, "set HEM step last failed!\n");
+ }
+out:
+ return ret;
+}
+
+static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj)
+{
+ struct hns_roce_hem_index index = {};
+ struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "calc hem config failed!\n");
+ return ret;
+ }
+
+ mutex_lock(&table->mutex);
+ if (table->hem[index.buf]) {
+ refcount_inc(&table->hem[index.buf]->refcount);
+ goto out;
+ }
-err_alloc_hem_buf:
- if (bt_l1_allocated) {
- dma_free_coherent(dev, bt_chunk_size, table->bt_l1[bt_l1_idx],
- table->bt_l1_dma_addr[bt_l1_idx]);
- table->bt_l1[bt_l1_idx] = NULL;
+ ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "alloc mhop hem failed!\n");
+ goto out;
}
-err_dma_alloc_l1:
- if (bt_l0_allocated) {
- dma_free_coherent(dev, bt_chunk_size, table->bt_l0[bt_l0_idx],
- table->bt_l0_dma_addr[bt_l0_idx]);
- table->bt_l0[bt_l0_idx] = NULL;
+ /* set HEM base address to hardware */
+ if (table->type < HEM_TYPE_MTT) {
+ ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "set HEM address to HW failed!\n");
+ goto err_alloc;
+ }
}
+ refcount_set(&table->hem[index.buf]->refcount, 1);
+ goto out;
+
+err_alloc:
+ free_mhop_hem(hr_dev, table, &mhop, &index);
out:
mutex_unlock(&table->mutex);
return ret;
@@ -580,158 +525,121 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj)
{
struct device *dev = hr_dev->dev;
- int ret = 0;
unsigned long i;
+ int ret = 0;
if (hns_roce_check_whether_mhop(hr_dev, table->type))
return hns_roce_table_mhop_get(hr_dev, table, obj);
- i = (obj & (table->num_obj - 1)) / (table->table_chunk_size /
- table->obj_size);
+ i = obj / (table->table_chunk_size / table->obj_size);
mutex_lock(&table->mutex);
if (table->hem[i]) {
- ++table->hem[i]->refcount;
+ refcount_inc(&table->hem[i]->refcount);
goto out;
}
- table->hem[i] = hns_roce_alloc_hem(hr_dev,
- table->table_chunk_size >> PAGE_SHIFT,
- table->table_chunk_size,
- (table->lowmem ? GFP_KERNEL :
- GFP_HIGHUSER) | __GFP_NOWARN);
+ table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size);
if (!table->hem[i]) {
ret = -ENOMEM;
goto out;
}
/* Set HEM base address(128K/page, pa) to Hardware */
- if (hns_roce_set_hem(hr_dev, table, obj)) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret) {
hns_roce_free_hem(hr_dev, table->hem[i]);
table->hem[i] = NULL;
- ret = -ENODEV;
- dev_err(dev, "set HEM base address to HW failed.\n");
+ dev_err(dev, "set HEM base address to HW failed, ret = %d.\n",
+ ret);
goto out;
}
- ++table->hem[i]->refcount;
+ refcount_set(&table->hem[i]->refcount, 1);
out:
mutex_unlock(&table->mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_table_get);
-static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long obj,
- int check_refcount)
+static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
{
struct device *dev = hr_dev->dev;
- struct hns_roce_hem_mhop mhop;
- unsigned long mhop_obj = obj;
- u32 bt_chunk_size;
+ u32 hop_num = mhop->hop_num;
u32 chunk_ba_num;
- u32 hop_num;
- u32 start_idx;
- u32 bt_num;
- u64 hem_idx;
- u64 bt_l1_idx = 0;
+ u32 step_idx;
int ret;
- ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
- if (ret)
- return;
+ index->inited = HEM_INDEX_BUF;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ if (check_whether_bt_num_2(table->type, hop_num)) {
+ if (hns_roce_check_hem_null(table->hem, index->buf,
+ chunk_ba_num, table->num_hem))
+ index->inited |= HEM_INDEX_L0;
+ } else if (check_whether_bt_num_3(table->type, hop_num)) {
+ if (hns_roce_check_hem_null(table->hem, index->buf,
+ chunk_ba_num, table->num_hem)) {
+ index->inited |= HEM_INDEX_L1;
+ if (hns_roce_check_bt_null(table->bt_l1, index->l1,
+ chunk_ba_num))
+ index->inited |= HEM_INDEX_L0;
+ }
+ }
- bt_chunk_size = mhop.bt_chunk_size;
- hop_num = mhop.hop_num;
- chunk_ba_num = bt_chunk_size / 8;
+ if (table->type < HEM_TYPE_MTT) {
+ if (hop_num == HNS_ROCE_HOP_NUM_0)
+ step_idx = 0;
+ else
+ step_idx = hop_num;
- bt_num = hns_roce_get_bt_num(table->type, hop_num);
- switch (bt_num) {
- case 3:
- hem_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
- mhop.l1_idx * chunk_ba_num + mhop.l2_idx;
- bt_l1_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
- break;
- case 2:
- hem_idx = mhop.l0_idx * chunk_ba_num + mhop.l1_idx;
- break;
- case 1:
- hem_idx = mhop.l0_idx;
- break;
- default:
- dev_err(dev, "Table %d not support hop_num = %d!\n",
- table->type, hop_num);
- return;
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
+ if (ret)
+ dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n",
+ hop_num, ret);
+
+ if (index->inited & HEM_INDEX_L1) {
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
+ if (ret)
+ dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n",
+ ret);
+ }
+
+ if (index->inited & HEM_INDEX_L0) {
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
+ if (ret)
+ dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n",
+ ret);
+ }
}
+}
- mutex_lock(&table->mutex);
+static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj,
+ int check_refcount)
+{
+ struct hns_roce_hem_index index = {};
+ struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
+ int ret;
- if (check_refcount && (--table->hem[hem_idx]->refcount > 0)) {
- mutex_unlock(&table->mutex);
+ ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "calc hem config failed!\n");
return;
}
- if (table->type < HEM_TYPE_MTT && hop_num == 1) {
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
- dev_warn(dev, "Clear HEM base address failed.\n");
- } else if (table->type < HEM_TYPE_MTT && hop_num == 2) {
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 2))
- dev_warn(dev, "Clear HEM base address failed.\n");
- } else if (table->type < HEM_TYPE_MTT &&
- hop_num == HNS_ROCE_HOP_NUM_0) {
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
- dev_warn(dev, "Clear HEM base address failed.\n");
- }
+ if (!check_refcount)
+ mutex_lock(&table->mutex);
+ else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount,
+ &table->mutex))
+ return;
- /*
- * free buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
- * free bt space chunk for MTT/CQE.
- */
- hns_roce_free_hem(hr_dev, table->hem[hem_idx]);
- table->hem[hem_idx] = NULL;
-
- if (check_whether_bt_num_2(table->type, hop_num)) {
- start_idx = mhop.l0_idx * chunk_ba_num;
- if (hns_roce_check_hem_null(table->hem, start_idx,
- chunk_ba_num)) {
- if (table->type < HEM_TYPE_MTT &&
- hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
- dev_warn(dev, "Clear HEM base address failed.\n");
-
- dma_free_coherent(dev, bt_chunk_size,
- table->bt_l0[mhop.l0_idx],
- table->bt_l0_dma_addr[mhop.l0_idx]);
- table->bt_l0[mhop.l0_idx] = NULL;
- }
- } else if (check_whether_bt_num_3(table->type, hop_num)) {
- start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num +
- mhop.l1_idx * chunk_ba_num;
- if (hns_roce_check_hem_null(table->hem, start_idx,
- chunk_ba_num)) {
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1))
- dev_warn(dev, "Clear HEM base address failed.\n");
-
- dma_free_coherent(dev, bt_chunk_size,
- table->bt_l1[bt_l1_idx],
- table->bt_l1_dma_addr[bt_l1_idx]);
- table->bt_l1[bt_l1_idx] = NULL;
-
- start_idx = mhop.l0_idx * chunk_ba_num;
- if (hns_roce_check_bt_null(table->bt_l1, start_idx,
- chunk_ba_num)) {
- if (hr_dev->hw->clear_hem(hr_dev, table, obj,
- 0))
- dev_warn(dev, "Clear HEM base address failed.\n");
-
- dma_free_coherent(dev, bt_chunk_size,
- table->bt_l0[mhop.l0_idx],
- table->bt_l0_dma_addr[mhop.l0_idx]);
- table->bt_l0[mhop.l0_idx] = NULL;
- }
- }
- }
+ clear_mhop_hem(hr_dev, table, obj, &mhop, &index);
+ free_mhop_hem(hr_dev, table, &mhop, &index);
mutex_unlock(&table->mutex);
}
@@ -741,72 +649,67 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
unsigned long i;
+ int ret;
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
hns_roce_table_mhop_put(hr_dev, table, obj, 1);
return;
}
- i = (obj & (table->num_obj - 1)) /
- (table->table_chunk_size / table->obj_size);
+ i = obj / (table->table_chunk_size / table->obj_size);
- mutex_lock(&table->mutex);
+ if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
+ &table->mutex))
+ return;
- if (--table->hem[i]->refcount == 0) {
- /* Clear HEM base address */
- if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0))
- dev_warn(dev, "Clear HEM base address failed.\n");
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret)
+ dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n",
+ ret);
- hns_roce_free_hem(hr_dev, table->hem[i]);
- table->hem[i] = NULL;
- }
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ table->hem[i] = NULL;
mutex_unlock(&table->mutex);
}
-EXPORT_SYMBOL_GPL(hns_roce_table_put);
void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj, dma_addr_t *dma_handle)
{
- struct hns_roce_hem_chunk *chunk;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
- void *addr = NULL;
unsigned long mhop_obj = obj;
unsigned long obj_per_chunk;
unsigned long idx_offset;
int offset, dma_offset;
- int length;
- int i, j;
+ void *addr = NULL;
u32 hem_idx = 0;
-
- if (!table->lowmem)
- return NULL;
+ int i, j;
mutex_lock(&table->mutex);
if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
obj_per_chunk = table->table_chunk_size / table->obj_size;
- hem = table->hem[(obj & (table->num_obj - 1)) / obj_per_chunk];
- idx_offset = (obj & (table->num_obj - 1)) % obj_per_chunk;
+ hem = table->hem[obj / obj_per_chunk];
+ idx_offset = obj % obj_per_chunk;
dma_offset = offset = idx_offset * table->obj_size;
} else {
u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
- hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+ if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop))
+ goto out;
/* mtt mhop */
i = mhop.l0_idx;
j = mhop.l1_idx;
if (mhop.hop_num == 2)
- hem_idx = i * (mhop.bt_chunk_size / 8) + j;
+ hem_idx = i * (mhop.bt_chunk_size / BA_BYTE_LEN) + j;
else if (mhop.hop_num == 1 ||
mhop.hop_num == HNS_ROCE_HOP_NUM_0)
hem_idx = i;
hem = table->hem[hem_idx];
- dma_offset = offset = (obj & (table->num_obj - 1)) * seg_size %
- mhop.bt_chunk_size;
+ dma_offset = offset = obj * seg_size % mhop.bt_chunk_size;
if (mhop.hop_num == 2)
dma_offset = offset = 0;
}
@@ -814,192 +717,49 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
if (!hem)
goto out;
- list_for_each_entry(chunk, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i) {
- length = sg_dma_len(&chunk->mem[i]);
- if (dma_handle && dma_offset >= 0) {
- if (length > (u32)dma_offset)
- *dma_handle = sg_dma_address(
- &chunk->mem[i]) + dma_offset;
- dma_offset -= length;
- }
-
- if (length > (u32)offset) {
- addr = chunk->buf[i] + offset;
- goto out;
- }
- offset -= length;
- }
- }
+ *dma_handle = hem->dma + dma_offset;
+ addr = hem->buf + offset;
out:
mutex_unlock(&table->mutex);
return addr;
}
-EXPORT_SYMBOL_GPL(hns_roce_table_find);
-
-int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end)
-{
- struct hns_roce_hem_mhop mhop;
- unsigned long inc = table->table_chunk_size / table->obj_size;
- unsigned long i;
- int ret;
-
- if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
- hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
- inc = mhop.bt_chunk_size / table->obj_size;
- }
-
- /* Allocate MTT entry memory according to chunk(128K) */
- for (i = start; i <= end; i += inc) {
- ret = hns_roce_table_get(hr_dev, table, i);
- if (ret)
- goto fail;
- }
-
- return 0;
-
-fail:
- while (i > start) {
- i -= inc;
- hns_roce_table_put(hr_dev, table, i);
- }
- return ret;
-}
-
-void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end)
-{
- struct hns_roce_hem_mhop mhop;
- unsigned long inc = table->table_chunk_size / table->obj_size;
- unsigned long i;
-
- if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
- hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
- inc = mhop.bt_chunk_size / table->obj_size;
- }
-
- for (i = start; i <= end; i += inc)
- hns_roce_table_put(hr_dev, table, i);
-}
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
- unsigned long obj_size, unsigned long nobj,
- int use_lowmem)
+ unsigned long obj_size, unsigned long nobj)
{
- struct device *dev = hr_dev->dev;
unsigned long obj_per_chunk;
unsigned long num_hem;
if (!hns_roce_check_whether_mhop(hr_dev, type)) {
table->table_chunk_size = hr_dev->caps.chunk_sz;
obj_per_chunk = table->table_chunk_size / obj_size;
- num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
+ num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
if (!table->hem)
return -ENOMEM;
} else {
+ struct hns_roce_hem_mhop mhop = {};
unsigned long buf_chunk_size;
unsigned long bt_chunk_size;
unsigned long bt_chunk_num;
- unsigned long num_bt_l0 = 0;
+ unsigned long num_bt_l0;
u32 hop_num;
- switch (type) {
- case HEM_TYPE_QPC:
- buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.qpc_bt_num;
- hop_num = hr_dev->caps.qpc_hop_num;
- break;
- case HEM_TYPE_MTPT:
- buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.mpt_bt_num;
- hop_num = hr_dev->caps.mpt_hop_num;
- break;
- case HEM_TYPE_CQC:
- buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.cqc_bt_num;
- hop_num = hr_dev->caps.cqc_hop_num;
- break;
- case HEM_TYPE_SCCC:
- buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.sccc_bt_num;
- hop_num = hr_dev->caps.sccc_hop_num;
- break;
- case HEM_TYPE_QPC_TIMER:
- buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.qpc_timer_bt_num;
- hop_num = hr_dev->caps.qpc_timer_hop_num;
- break;
- case HEM_TYPE_CQC_TIMER:
- buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.cqc_timer_bt_num;
- hop_num = hr_dev->caps.cqc_timer_hop_num;
- break;
- case HEM_TYPE_SRQC:
- buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
- + PAGE_SHIFT);
- num_bt_l0 = hr_dev->caps.srqc_bt_num;
- hop_num = hr_dev->caps.srqc_hop_num;
- break;
- case HEM_TYPE_MTT:
- buf_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.mtt_hop_num;
- break;
- case HEM_TYPE_CQE:
- buf_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.cqe_hop_num;
- break;
- case HEM_TYPE_SRQWQE:
- buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.srqwqe_hop_num;
- break;
- case HEM_TYPE_IDX:
- buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
- + PAGE_SHIFT);
- bt_chunk_size = buf_chunk_size;
- hop_num = hr_dev->caps.idx_hop_num;
- break;
- default:
- dev_err(dev,
- "Table %d not support to init hem table here!\n",
- type);
+ if (get_hem_table_config(hr_dev, &mhop, type))
return -EINVAL;
- }
+
+ buf_chunk_size = mhop.buf_chunk_size;
+ bt_chunk_size = mhop.bt_chunk_size;
+ num_bt_l0 = mhop.ba_l0_num;
+ hop_num = mhop.hop_num;
+
obj_per_chunk = buf_chunk_size / obj_size;
- num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
- bt_chunk_num = bt_chunk_size / 8;
+ num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
+ bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
+
if (type >= HEM_TYPE_MTT)
num_bt_l0 = bt_chunk_num;
@@ -1011,8 +771,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
if (check_whether_bt_num_3(type, hop_num)) {
unsigned long num_bt_l1;
- num_bt_l1 = (num_hem + bt_chunk_num - 1) /
- bt_chunk_num;
+ num_bt_l1 = DIV_ROUND_UP(num_hem, bt_chunk_num);
table->bt_l1 = kcalloc(num_bt_l1,
sizeof(*table->bt_l1),
GFP_KERNEL);
@@ -1044,9 +803,7 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
table->type = type;
table->num_hem = num_hem;
- table->num_obj = nobj;
table->obj_size = obj_size;
- table->lowmem = use_lowmem;
mutex_init(&table->mutex);
return 0;
@@ -1076,10 +833,11 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
{
struct hns_roce_hem_mhop mhop;
u32 buf_chunk_size;
- int i;
u64 obj;
+ int i;
- hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop);
+ if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+ return;
buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size :
mhop.bt_chunk_size;
@@ -1106,33 +864,33 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
{
struct device *dev = hr_dev->dev;
unsigned long i;
+ int obj;
+ int ret;
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
hns_roce_cleanup_mhop_hem_table(hr_dev, table);
+ mutex_destroy(&table->mutex);
return;
}
for (i = 0; i < table->num_hem; ++i)
if (table->hem[i]) {
- if (hr_dev->hw->clear_hem(hr_dev, table,
- i * table->table_chunk_size / table->obj_size, 0))
- dev_err(dev, "Clear HEM base address failed.\n");
+ obj = i * table->table_chunk_size / table->obj_size;
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
+ if (ret)
+ dev_err(dev, "clear HEM base address failed, ret = %d.\n",
+ ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
}
+ mutex_destroy(&table->mutex);
kfree(table->hem);
}
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
- if ((hr_dev->caps.num_idx_segs))
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_idx_table);
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table);
- if (hr_dev->caps.srqc_entry_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
@@ -1142,17 +900,568 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.cqc_timer_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->cqc_timer_table);
- if (hr_dev->caps.sccc_entry_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.trrl_table);
+
+ if (hr_dev->caps.gmv_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table);
+
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table);
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
+}
+
+struct hns_roce_hem_item {
+ struct list_head list; /* link all hems in the same bt level */
+ struct list_head sibling; /* link all hems in last hop for mtt */
+ void *addr;
+ dma_addr_t dma_addr;
+ size_t count; /* max ba numbers */
+ int start; /* start buf offset in this hem */
+ int end; /* end buf offset in this hem */
+ bool exist_bt;
+};
+
+/* All HEM items are linked in a tree structure */
+struct hns_roce_hem_head {
+ struct list_head branch[HNS_ROCE_MAX_BT_REGION];
+ struct list_head root;
+ struct list_head leaf;
+};
+
+static struct hns_roce_hem_item *
+hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
+ bool exist_bt)
+{
+ struct hns_roce_hem_item *hem;
+
+ hem = kzalloc(sizeof(*hem), GFP_KERNEL);
+ if (!hem)
+ return NULL;
+
+ if (exist_bt) {
+ hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN,
+ &hem->dma_addr, GFP_KERNEL);
+ if (!hem->addr) {
+ kfree(hem);
+ return NULL;
+ }
+ }
+
+ hem->exist_bt = exist_bt;
+ hem->count = count;
+ hem->start = start;
+ hem->end = end;
+ INIT_LIST_HEAD(&hem->list);
+ INIT_LIST_HEAD(&hem->sibling);
+
+ return hem;
+}
+
+static void hem_list_free_item(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_item *hem)
+{
+ if (hem->exist_bt)
+ dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
+ hem->addr, hem->dma_addr);
+ kfree(hem);
+}
+
+static void hem_list_free_all(struct hns_roce_dev *hr_dev,
+ struct list_head *head)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+
+ list_for_each_entry_safe(hem, temp_hem, head, list) {
+ list_del(&hem->list);
+ hem_list_free_item(hr_dev, hem);
+ }
+}
+
+static void hem_list_link_bt(void *base_addr, u64 table_addr)
+{
+ *(u64 *)(base_addr) = table_addr;
+}
+
+/* assign L0 table address to hem from root bt */
+static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr,
+ u64 phy_addr)
+{
+ hem->addr = cpu_addr;
+ hem->dma_addr = (dma_addr_t)phy_addr;
+}
+
+static inline bool hem_list_page_is_in_range(struct hns_roce_hem_item *hem,
+ int offset)
+{
+ return (hem->start <= offset && offset <= hem->end);
+}
+
+static struct hns_roce_hem_item *hem_list_search_item(struct list_head *ba_list,
+ int page_offset)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+ struct hns_roce_hem_item *found = NULL;
+
+ list_for_each_entry_safe(hem, temp_hem, ba_list, list) {
+ if (hem_list_page_is_in_range(hem, page_offset)) {
+ found = hem;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
+{
+ /*
+ * hopnum base address table levels
+ * 0 L0(buf)
+ * 1 L0 -> buf
+ * 2 L0 -> L1 -> buf
+ * 3 L0 -> L1 -> L2 -> buf
+ */
+ return bt_level >= (hopnum ? hopnum - 1 : hopnum);
+}
+
+/*
+ * calc base address entries num
+ * @hopnum: num of mutihop addressing
+ * @bt_level: base address table level
+ * @unit: ba entries per bt page
+ */
+static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
+{
+ u64 step;
+ int max;
+ int i;
+
+ if (hopnum <= bt_level)
+ return 0;
+ /*
+ * hopnum bt_level range
+ * 1 0 unit
+ * ------------
+ * 2 0 unit * unit
+ * 2 1 unit
+ * ------------
+ * 3 0 unit * unit * unit
+ * 3 1 unit * unit
+ * 3 2 unit
+ */
+ step = 1;
+ max = hopnum - bt_level;
+ for (i = 0; i < max; i++)
+ step = step * unit;
+
+ return step;
+}
+
+/*
+ * calc the root ba entries which could cover all regions
+ * @regions: buf region array
+ * @region_cnt: array size of @regions
+ * @unit: ba entries per bt page
+ */
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit)
+{
+ struct hns_roce_buf_region *r;
+ int total = 0;
+ u64 step;
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = (struct hns_roce_buf_region *)&regions[i];
+ /* when r->hopnum = 0, the region should not occupy root_ba. */
+ if (!r->hopnum)
+ continue;
+
+ if (r->hopnum > 1) {
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step > 0)
+ total += (r->count + step - 1) / step;
+ } else {
+ total += r->count;
+ }
+ }
+
+ return total;
+}
+
+static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
+ const struct hns_roce_buf_region *r, int unit,
+ int offset, struct list_head *mid_bt,
+ struct list_head *btm_bt)
+{
+ struct hns_roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
+ struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL];
+ struct hns_roce_hem_item *cur, *pre;
+ const int hopnum = r->hopnum;
+ int start_aligned;
+ int distance;
+ int ret = 0;
+ int max_ofs;
+ int level;
+ u64 step;
+ int end;
+
+ if (hopnum <= 1)
+ return 0;
+
+ if (hopnum > HNS_ROCE_MAX_BT_LEVEL) {
+ dev_err(hr_dev->dev, "invalid hopnum %d!\n", hopnum);
+ return -EINVAL;
+ }
+
+ if (offset < r->offset) {
+ dev_err(hr_dev->dev, "invalid offset %d, min %u!\n",
+ offset, r->offset);
+ return -EINVAL;
+ }
+
+ distance = offset - r->offset;
+ max_ofs = r->offset + r->count - 1;
+ for (level = 0; level < hopnum; level++)
+ INIT_LIST_HEAD(&temp_list[level]);
+
+ /* config L1 bt to last bt and link them to corresponding parent */
+ for (level = 1; level < hopnum; level++) {
+ if (!hem_list_is_bottom_bt(hopnum, level)) {
+ cur = hem_list_search_item(&mid_bt[level], offset);
+ if (cur) {
+ hem_ptrs[level] = cur;
+ continue;
+ }
+ }
+
+ step = hem_list_calc_ba_range(hopnum, level, unit);
+ if (step < 1) {
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ start_aligned = (distance / step) * step + r->offset;
+ end = min_t(u64, start_aligned + step - 1, max_ofs);
+ cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
+ true);
+ if (!cur) {
+ ret = -ENOMEM;
+ goto err_exit;
+ }
+ hem_ptrs[level] = cur;
+ list_add(&cur->list, &temp_list[level]);
+ if (hem_list_is_bottom_bt(hopnum, level))
+ list_add(&cur->sibling, &temp_list[0]);
+
+ /* link bt to parent bt */
+ if (level > 1) {
+ pre = hem_ptrs[level - 1];
+ step = (cur->start - pre->start) / step * BA_BYTE_LEN;
+ hem_list_link_bt(pre->addr + step, cur->dma_addr);
+ }
+ }
+
+ list_splice(&temp_list[0], btm_bt);
+ for (level = 1; level < hopnum; level++)
+ list_splice(&temp_list[level], &mid_bt[level]);
+
+ return 0;
+
+err_exit:
+ for (level = 1; level < hopnum; level++)
+ hem_list_free_all(hr_dev, &temp_list[level]);
+
+ return ret;
+}
+
+static struct hns_roce_hem_item *
+alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num,
+ const struct hns_roce_buf_region *regions, int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ struct hns_roce_hem_item *hem;
+ int ba_num;
+ int offset;
+
+ ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
+ if (ba_num < 1)
+ return ERR_PTR(-ENOMEM);
+
+ if (ba_num > unit)
+ return ERR_PTR(-ENOBUFS);
+
+ offset = regions[0].offset;
+ /* indicate to last region */
+ r = &regions[region_cnt - 1];
+ hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
+ ba_num, true);
+ if (!hem)
+ return ERR_PTR(-ENOMEM);
+
+ *max_ba_num = ba_num;
+
+ return hem;
+}
+
+static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+ u64 phy_base, const struct hns_roce_buf_region *r,
+ struct list_head *branch_head,
+ struct list_head *leaf_head)
+{
+ struct hns_roce_hem_item *hem;
+
+ /* This is on the has_mtt branch, if r->hopnum
+ * is 0, there is no root_ba to reuse for the
+ * region's fake hem, so a dma_alloc request is
+ * necessary here.
+ */
+ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
+ r->count, !r->hopnum);
+ if (!hem)
+ return -ENOMEM;
+
+ /* The root_ba can be reused only when r->hopnum > 0. */
+ if (r->hopnum)
+ hem_list_assign_bt(hem, cpu_base, phy_base);
+ list_add(&hem->list, branch_head);
+ list_add(&hem->sibling, leaf_head);
+
+ /* If r->hopnum == 0, 0 is returned,
+ * so that the root_bt entry is not occupied.
+ */
+ return r->hopnum ? r->count : 0;
+}
+
+static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+ int unit, const struct hns_roce_buf_region *r,
+ const struct list_head *branch_head)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+ int total = 0;
+ int offset;
+ u64 step;
+
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step < 1)
+ return -EINVAL;
+
+ /* if exist mid bt, link L1 to L0 */
+ list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
+ offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
+ hem_list_link_bt(cpu_base + offset, hem->dma_addr);
+ total++;
+ }
+
+ return total;
+}
+
+static int
+setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list,
+ int unit, int max_ba_num, struct hns_roce_hem_head *head,
+ const struct hns_roce_buf_region *regions, int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ struct hns_roce_hem_item *root_hem;
+ void *cpu_base;
+ u64 phy_base;
+ int i, total;
+ int ret;
+
+ root_hem = list_first_entry(&head->root,
+ struct hns_roce_hem_item, list);
+ if (!root_hem)
+ return -ENOMEM;
+
+ total = 0;
+ for (i = 0; i < region_cnt && total <= max_ba_num; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ /* all regions's mid[x][0] shared the root_bt's trunk */
+ cpu_base = root_hem->addr + total * BA_BYTE_LEN;
+ phy_base = root_hem->dma_addr + total * BA_BYTE_LEN;
+
+ /* if hopnum is 0 or 1, cut a new fake hem from the root bt
+ * which's address share to all regions.
+ */
+ if (hem_list_is_bottom_bt(r->hopnum, 0))
+ ret = alloc_fake_root_bt(hr_dev, cpu_base, phy_base, r,
+ &head->branch[i], &head->leaf);
+ else
+ ret = setup_middle_bt(hr_dev, cpu_base, unit, r,
+ &hem_list->mid_bt[i][1]);
+
+ if (ret < 0)
+ return ret;
+
+ total += ret;
+ }
+
+ list_splice(&head->leaf, &hem_list->btm_bt);
+ list_splice(&head->root, &hem_list->root_bt);
+ for (i = 0; i < region_cnt; i++)
+ list_splice(&head->branch[i], &hem_list->mid_bt[i][0]);
+
+ return 0;
+}
+
+static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list, int unit,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ struct hns_roce_hem_item *root_hem;
+ struct hns_roce_hem_head head;
+ int max_ba_num;
+ int ret;
+ int i;
+
+ root_hem = hem_list_search_item(&hem_list->root_bt, regions[0].offset);
+ if (root_hem)
+ return 0;
+
+ max_ba_num = 0;
+ root_hem = alloc_root_hem(hr_dev, unit, &max_ba_num, regions,
+ region_cnt);
+ if (IS_ERR(root_hem))
+ return PTR_ERR(root_hem);
+
+ /* List head for storing all allocated HEM items */
+ INIT_LIST_HEAD(&head.root);
+ INIT_LIST_HEAD(&head.leaf);
+ for (i = 0; i < region_cnt; i++)
+ INIT_LIST_HEAD(&head.branch[i]);
+
+ hem_list->root_ba = root_hem->dma_addr;
+ list_add(&root_hem->list, &head.root);
+ ret = setup_root_hem(hr_dev, hem_list, unit, max_ba_num, &head, regions,
+ region_cnt);
+ if (ret) {
+ for (i = 0; i < region_cnt; i++)
+ hem_list_free_all(hr_dev, &head.branch[i]);
+
+ hem_list_free_all(hr_dev, &head.root);
+ }
+
+ return ret;
+}
+
+/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming
+ * the bt page size is not expanded by cal_best_bt_pg_sz()
+ */
+#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800
+
+/* construct the base address table and link them by address hop config */
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt, unsigned int bt_pg_shift)
+{
+ const struct hns_roce_buf_region *r;
+ int ofs, end;
+ int loop;
+ int unit;
+ int ret;
+ int i;
+
+ if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
+ dev_err(hr_dev->dev, "invalid region region_cnt %d!\n",
+ region_cnt);
+ return -EINVAL;
+ }
+
+ unit = (1 << bt_pg_shift) / BA_BYTE_LEN;
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ end = r->offset + r->count;
+ for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+
+ ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
+ hem_list->mid_bt[i],
+ &hem_list->btm_bt);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "alloc hem trunk fail ret = %d!\n", ret);
+ goto err_alloc;
+ }
+ }
+ }
+
+ ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions,
+ region_cnt);
+ if (ret)
+ dev_err(hr_dev->dev, "alloc hem root fail ret = %d!\n", ret);
+ else
+ return 0;
+
+err_alloc:
+ hns_roce_hem_list_release(hr_dev, hem_list);
+
+ return ret;
+}
+
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]);
+
+ hem_list_free_all(hr_dev, &hem_list->root_bt);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ hem_list->root_ba = 0;
+}
+
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ INIT_LIST_HEAD(&hem_list->root_bt);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ INIT_LIST_HEAD(&hem_list->mid_bt[i][j]);
+}
+
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt)
+{
+ struct list_head *head = &hem_list->btm_bt;
+ struct hns_roce_hem_item *hem, *temp_hem;
+ void *cpu_base = NULL;
+ int loop = 1;
+ int nr = 0;
+
+ list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+ loop++;
+
+ if (hem_list_page_is_in_range(hem, offset)) {
+ nr = offset - hem->start;
+ cpu_base = hem->addr + nr * BA_BYTE_LEN;
+ nr = hem->end + 1 - offset;
+ break;
+ }
+ }
+
+ if (mtt_cnt)
+ *mtt_cnt = nr;
+
+ return cpu_base;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index d9d668992e49..9c415b2541af 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -34,9 +34,7 @@
#ifndef _HNS_ROCE_HEM_H
#define _HNS_ROCE_HEM_H
-#define HW_SYNC_TIMEOUT_MSECS 500
-#define HW_SYNC_SLEEP_TIME_INTERVAL 20
-#define BT_CMD_SYNC_SHIFT 31
+#define HEM_HOP_STEP_DIRECT 0xff
enum {
/* MAP HEM(Hardware Entry Memory) */
@@ -47,6 +45,7 @@ enum {
HEM_TYPE_SCCC,
HEM_TYPE_QPC_TIMER,
HEM_TYPE_CQC_TIMER,
+ HEM_TYPE_GMV,
/* UNMAP HEM */
HEM_TYPE_MTT,
@@ -57,44 +56,23 @@ enum {
HEM_TYPE_TRRL,
};
-#define HNS_ROCE_HEM_CHUNK_LEN \
- ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist)))
-
#define check_whether_bt_num_3(type, hop_num) \
- (type < HEM_TYPE_MTT && hop_num == 2)
+ ((type) < HEM_TYPE_MTT && (hop_num) == 2)
#define check_whether_bt_num_2(type, hop_num) \
- ((type < HEM_TYPE_MTT && hop_num == 1) || \
- (type >= HEM_TYPE_MTT && hop_num == 2))
+ (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 2))
#define check_whether_bt_num_1(type, hop_num) \
- ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \
- (type >= HEM_TYPE_MTT && hop_num == 1) || \
- (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0))
-
-enum {
- HNS_ROCE_HEM_PAGE_SHIFT = 12,
- HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT,
-};
-
-struct hns_roce_hem_chunk {
- struct list_head list;
- int npages;
- int nsg;
- struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN];
- void *buf[HNS_ROCE_HEM_CHUNK_LEN];
-};
+ (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0))
struct hns_roce_hem {
- struct list_head chunk_list;
- int refcount;
-};
-
-struct hns_roce_hem_iter {
- struct hns_roce_hem *hem;
- struct hns_roce_hem_chunk *chunk;
- int page_idx;
+ void *buf;
+ dma_addr_t dma;
+ unsigned long size;
+ refcount_t refcount;
};
struct hns_roce_hem_mhop {
@@ -102,9 +80,9 @@ struct hns_roce_hem_mhop {
u32 buf_chunk_size;
u32 bt_chunk_size;
u32 ba_l0_num;
- u32 l0_idx;/* level 0 base address table index */
- u32 l1_idx;/* level 1 base address table index */
- u32 l2_idx;/* level 2 base address table index */
+ u32 l0_idx; /* level 0 base address table index */
+ u32 l1_idx; /* level 1 base address table index */
+ u32 l2_idx; /* level 2 base address table index */
};
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem);
@@ -115,16 +93,9 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj,
dma_addr_t *dma_handle);
-int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end);
-void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end);
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
- unsigned long obj_size, unsigned long nobj,
- int use_lowmem);
+ unsigned long obj_size, unsigned long nobj);
void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table);
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev);
@@ -133,38 +104,17 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_mhop *mhop);
bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
-static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
- struct hns_roce_hem_iter *iter)
-{
- iter->hem = hem;
- iter->chunk = list_empty(&hem->chunk_list) ? NULL :
- list_entry(hem->chunk_list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
-}
-
-static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter)
-{
- return !iter->chunk;
-}
-
-static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter)
-{
- if (++iter->page_idx >= iter->chunk->nsg) {
- if (iter->chunk->list.next == &iter->hem->chunk_list) {
- iter->chunk = NULL;
- return;
- }
-
- iter->chunk = list_entry(iter->chunk->list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
- }
-}
-
-static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
-{
- return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
-}
-
-#endif /*_HNS_ROCE_HEM_H*/
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list);
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit);
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt, unsigned int bt_pg_shift);
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list);
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt);
+
+#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
deleted file mode 100644
index 4c5d0f160c10..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ /dev/null
@@ -1,4734 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/platform_device.h>
-#include <linux/acpi.h>
-#include <linux/etherdevice.h>
-#include <linux/interrupt.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <rdma/ib_umem.h>
-#include "hns_roce_common.h"
-#include "hns_roce_device.h"
-#include "hns_roce_cmd.h"
-#include "hns_roce_hem.h"
-#include "hns_roce_hw_v1.h"
-
-static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg)
-{
- dseg->lkey = cpu_to_le32(sg->lkey);
- dseg->addr = cpu_to_le64(sg->addr);
- dseg->len = cpu_to_le32(sg->length);
-}
-
-static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
- u32 rkey)
-{
- rseg->raddr = cpu_to_le64(remote_addr);
- rseg->rkey = cpu_to_le32(rkey);
- rseg->len = 0;
-}
-
-static int hns_roce_v1_post_send(struct ib_qp *ibqp,
- const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
- struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL;
- struct hns_roce_wqe_ctrl_seg *ctrl = NULL;
- struct hns_roce_wqe_data_seg *dseg = NULL;
- struct hns_roce_qp *qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_sq_db sq_db;
- int ps_opcode = 0, i = 0;
- unsigned long flags = 0;
- void *wqe = NULL;
- u32 doorbell[2];
- int nreq = 0;
- u32 ind = 0;
- int ret = 0;
- u8 *smac;
- int loopback;
-
- if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_RC)) {
- dev_err(dev, "un-supported QP type\n");
- *bad_wr = NULL;
- return -EOPNOTSUPP;
- }
-
- spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, qp->sq.max_gs);
- ret = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
-
- /* Corresponding to the RC and RD type wqe process separately */
- if (ibqp->qp_type == IB_QPT_GSI) {
- ud_sq_wqe = wqe;
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_0_M,
- UD_SEND_WQE_U32_4_DMAC_0_S,
- ah->av.mac[0]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_1_M,
- UD_SEND_WQE_U32_4_DMAC_1_S,
- ah->av.mac[1]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_2_M,
- UD_SEND_WQE_U32_4_DMAC_2_S,
- ah->av.mac[2]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_3_M,
- UD_SEND_WQE_U32_4_DMAC_3_S,
- ah->av.mac[3]);
-
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_DMAC_4_M,
- UD_SEND_WQE_U32_8_DMAC_4_S,
- ah->av.mac[4]);
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_DMAC_5_M,
- UD_SEND_WQE_U32_8_DMAC_5_S,
- ah->av.mac[5]);
-
- smac = (u8 *)hr_dev->dev_addr[qp->port];
- loopback = ether_addr_equal_unaligned(ah->av.mac,
- smac) ? 1 : 0;
- roce_set_bit(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
- loopback);
-
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
- UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
- HNS_ROCE_WQE_OPCODE_SEND);
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M,
- UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S,
- 2);
- roce_set_bit(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S,
- 1);
-
- ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ?
- cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
- ((wr->opcode == IB_WR_SEND_WITH_IMM) ?
- cpu_to_le32(HNS_ROCE_WQE_IMM) : 0);
-
- roce_set_field(ud_sq_wqe->u32_16,
- UD_SEND_WQE_U32_16_DEST_QP_M,
- UD_SEND_WQE_U32_16_DEST_QP_S,
- ud_wr(wr)->remote_qpn);
- roce_set_field(ud_sq_wqe->u32_16,
- UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M,
- UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S,
- ah->av.stat_rate);
-
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_FLOW_LABEL_M,
- UD_SEND_WQE_U32_36_FLOW_LABEL_S,
- ah->av.sl_tclass_flowlabel &
- HNS_ROCE_FLOW_LABEL_MASK);
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_PRIORITY_M,
- UD_SEND_WQE_U32_36_PRIORITY_S,
- le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT);
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_SGID_INDEX_M,
- UD_SEND_WQE_U32_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev, qp->phy_port,
- ah->av.gid_index));
-
- roce_set_field(ud_sq_wqe->u32_40,
- UD_SEND_WQE_U32_40_HOP_LIMIT_M,
- UD_SEND_WQE_U32_40_HOP_LIMIT_S,
- ah->av.hop_limit);
- roce_set_field(ud_sq_wqe->u32_40,
- UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
- UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S,
- ah->av.sl_tclass_flowlabel >>
- HNS_ROCE_TCLASS_SHIFT);
-
- memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
-
- ud_sq_wqe->va0_l =
- cpu_to_le32((u32)wr->sg_list[0].addr);
- ud_sq_wqe->va0_h =
- cpu_to_le32((wr->sg_list[0].addr) >> 32);
- ud_sq_wqe->l_key0 =
- cpu_to_le32(wr->sg_list[0].lkey);
-
- ud_sq_wqe->va1_l =
- cpu_to_le32((u32)wr->sg_list[1].addr);
- ud_sq_wqe->va1_h =
- cpu_to_le32((wr->sg_list[1].addr) >> 32);
- ud_sq_wqe->l_key1 =
- cpu_to_le32(wr->sg_list[1].lkey);
- ind++;
- } else if (ibqp->qp_type == IB_QPT_RC) {
- u32 tmp_len = 0;
-
- ctrl = wqe;
- memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg));
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
-
- ctrl->msg_length =
- cpu_to_le32(le32_to_cpu(ctrl->msg_length) + tmp_len);
-
- ctrl->sgl_pa_h = 0;
- ctrl->flag = 0;
-
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ctrl->imm_data = wr->ex.imm_data;
- break;
- case IB_WR_SEND_WITH_INV:
- ctrl->inv_key =
- cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- default:
- ctrl->imm_data = 0;
- break;
- }
-
- /*Ctrl field, ctrl set type: sig, solic, imm, fence */
- /* SO wait for conforming application scenarios */
- ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ?
- cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
- ((wr->opcode == IB_WR_SEND_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
- cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) |
- (wr->send_flags & IB_SEND_FENCE ?
- (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0);
-
- wqe += sizeof(struct hns_roce_wqe_ctrl_seg);
-
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ;
- set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- break;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE;
- set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- break;
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- case IB_WR_SEND_WITH_IMM:
- ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
- break;
- case IB_WR_LOCAL_INV:
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- case IB_WR_LSO:
- default:
- ps_opcode = HNS_ROCE_WQE_OPCODE_MASK;
- break;
- }
- ctrl->flag |= cpu_to_le32(ps_opcode);
- wqe += sizeof(struct hns_roce_wqe_raddr_seg);
-
- dseg = wqe;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
- if (le32_to_cpu(ctrl->msg_length) >
- hr_dev->caps.max_sq_inline) {
- ret = -EINVAL;
- *bad_wr = wr;
- dev_err(dev, "inline len(1-%d)=%d, illegal",
- ctrl->msg_length,
- hr_dev->caps.max_sq_inline);
- goto out;
- }
- for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *) (uintptr_t)
- wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
- }
- ctrl->flag |= cpu_to_le32(HNS_ROCE_WQE_INLINE);
- } else {
- /*sqe num is two */
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg(dseg + i, wr->sg_list + i);
-
- ctrl->flag |= cpu_to_le32(wr->num_sge <<
- HNS_ROCE_WQE_SGE_NUM_BIT);
- }
- ind++;
- }
- }
-
-out:
- /* Set DB return */
- if (likely(nreq)) {
- qp->sq.head += nreq;
- /* Memory barrier */
- wmb();
-
- sq_db.u32_4 = 0;
- sq_db.u32_8 = 0;
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
- SQ_DOORBELL_U32_4_SQ_HEAD_S,
- (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M,
- SQ_DOORBELL_U32_4_SL_S, qp->sl);
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
- SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
- roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
- SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
- roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
-
- doorbell[0] = le32_to_cpu(sq_db.u32_4);
- doorbell[1] = le32_to_cpu(sq_db.u32_8);
-
- hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
- qp->sq_next_wqe = ind;
- }
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return ret;
-}
-
-static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
- const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- int ret = 0;
- int nreq = 0;
- int ind = 0;
- int i = 0;
- u32 reg_val;
- unsigned long flags = 0;
- struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
- struct hns_roce_wqe_data_seg *scat = NULL;
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_rq_db rq_db;
- uint32_t doorbell[2] = {0};
-
- spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
-
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
- hr_qp->ibqp.recv_cq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
- dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, hr_qp->rq.max_gs);
- ret = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- ctrl = get_recv_wqe(hr_qp, ind);
-
- roce_set_field(ctrl->rwqe_byte_12,
- RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
- RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S,
- wr->num_sge);
-
- scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1);
-
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg(scat + i, wr->sg_list + i);
-
- hr_qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
- }
-
-out:
- if (likely(nreq)) {
- hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
-
- if (ibqp->qp_type == IB_QPT_GSI) {
- __le32 tmp;
-
- /* SW update GSI rq header */
- reg_val = roce_read(to_hr_dev(ibqp->device),
- ROCEE_QP1C_CFG3_0_REG +
- QP1C_CFGN_OFFSET * hr_qp->phy_port);
- tmp = cpu_to_le32(reg_val);
- roce_set_field(tmp,
- ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
- ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
- hr_qp->rq.head);
- reg_val = le32_to_cpu(tmp);
- roce_write(to_hr_dev(ibqp->device),
- ROCEE_QP1C_CFG3_0_REG +
- QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
- } else {
- rq_db.u32_4 = 0;
- rq_db.u32_8 = 0;
-
- roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
- RQ_DOORBELL_U32_4_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M,
- RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
- roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M,
- RQ_DOORBELL_U32_8_CMD_S, 1);
- roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
- 1);
-
- doorbell[0] = le32_to_cpu(rq_db.u32_4);
- doorbell[1] = le32_to_cpu(rq_db.u32_8);
-
- hns_roce_write64_k((__le32 *)doorbell,
- hr_qp->rq.db_reg_l);
- }
- }
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
-
- return ret;
-}
-
-static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
- int sdb_mode, int odb_mode)
-{
- __le32 tmp;
- u32 val;
-
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- tmp = cpu_to_le32(val);
- roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
- roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
-}
-
-static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
- u32 odb_mode)
-{
- __le32 tmp;
- u32 val;
-
- /* Configure SDB/ODB extend mode */
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- tmp = cpu_to_le32(val);
- roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
- roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
-}
-
-static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept,
- u32 sdb_alful)
-{
- __le32 tmp;
- u32 val;
-
- /* Configure SDB */
- val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
- ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful);
- roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
- ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val);
-}
-
-static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept,
- u32 odb_alful)
-{
- __le32 tmp;
- u32 val;
-
- /* Configure ODB */
- val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
- ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful);
- roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
- ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val);
-}
-
-static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
- u32 ext_sdb_alful)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- dma_addr_t sdb_dma_addr;
- __le32 tmp;
- u32 val;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- db = &priv->db_table;
-
- /* Configure extend SDB threshold */
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept);
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful);
-
- /* Configure extend SDB base addr */
- sdb_dma_addr = db->ext_db->sdb_buf_list->map;
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_REG, (u32)(sdb_dma_addr >> 12));
-
- /* Configure extend SDB depth */
- val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S,
- db->ext_db->esdb_dep);
- /*
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val);
-
- dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep);
- dev_dbg(dev, "ext SDB threshold: epmty: 0x%x, ful: 0x%x\n",
- ext_sdb_alept, ext_sdb_alful);
-}
-
-static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
- u32 ext_odb_alful)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- dma_addr_t odb_dma_addr;
- __le32 tmp;
- u32 val;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- db = &priv->db_table;
-
- /* Configure extend ODB threshold */
- roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept);
- roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful);
-
- /* Configure extend ODB base addr */
- odb_dma_addr = db->ext_db->odb_buf_list->map;
- roce_write(hr_dev, ROCEE_EXT_DB_OTH_REG, (u32)(odb_dma_addr >> 12));
-
- /* Configure extend ODB depth */
- val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
- ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S,
- db->ext_db->eodb_dep);
- roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S,
- db->ext_db->eodb_dep);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val);
-
- dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep);
- dev_dbg(dev, "ext ODB threshold: empty: 0x%x, ful: 0x%x\n",
- ext_odb_alept, ext_odb_alful);
-}
-
-static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod,
- u32 odb_ext_mod)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- dma_addr_t sdb_dma_addr;
- dma_addr_t odb_dma_addr;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- db = &priv->db_table;
-
- db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL);
- if (!db->ext_db)
- return -ENOMEM;
-
- if (sdb_ext_mod) {
- db->ext_db->sdb_buf_list = kmalloc(
- sizeof(*db->ext_db->sdb_buf_list), GFP_KERNEL);
- if (!db->ext_db->sdb_buf_list) {
- ret = -ENOMEM;
- goto ext_sdb_buf_fail_out;
- }
-
- db->ext_db->sdb_buf_list->buf = dma_alloc_coherent(dev,
- HNS_ROCE_V1_EXT_SDB_SIZE,
- &sdb_dma_addr, GFP_KERNEL);
- if (!db->ext_db->sdb_buf_list->buf) {
- ret = -ENOMEM;
- goto alloc_sq_db_buf_fail;
- }
- db->ext_db->sdb_buf_list->map = sdb_dma_addr;
-
- db->ext_db->esdb_dep = ilog2(HNS_ROCE_V1_EXT_SDB_DEPTH);
- hns_roce_set_sdb_ext(hr_dev, HNS_ROCE_V1_EXT_SDB_ALEPT,
- HNS_ROCE_V1_EXT_SDB_ALFUL);
- } else
- hns_roce_set_sdb(hr_dev, HNS_ROCE_V1_SDB_ALEPT,
- HNS_ROCE_V1_SDB_ALFUL);
-
- if (odb_ext_mod) {
- db->ext_db->odb_buf_list = kmalloc(
- sizeof(*db->ext_db->odb_buf_list), GFP_KERNEL);
- if (!db->ext_db->odb_buf_list) {
- ret = -ENOMEM;
- goto ext_odb_buf_fail_out;
- }
-
- db->ext_db->odb_buf_list->buf = dma_alloc_coherent(dev,
- HNS_ROCE_V1_EXT_ODB_SIZE,
- &odb_dma_addr, GFP_KERNEL);
- if (!db->ext_db->odb_buf_list->buf) {
- ret = -ENOMEM;
- goto alloc_otr_db_buf_fail;
- }
- db->ext_db->odb_buf_list->map = odb_dma_addr;
-
- db->ext_db->eodb_dep = ilog2(HNS_ROCE_V1_EXT_ODB_DEPTH);
- hns_roce_set_odb_ext(hr_dev, HNS_ROCE_V1_EXT_ODB_ALEPT,
- HNS_ROCE_V1_EXT_ODB_ALFUL);
- } else
- hns_roce_set_odb(hr_dev, HNS_ROCE_V1_ODB_ALEPT,
- HNS_ROCE_V1_ODB_ALFUL);
-
- hns_roce_set_db_ext_mode(hr_dev, sdb_ext_mod, odb_ext_mod);
-
- return 0;
-
-alloc_otr_db_buf_fail:
- kfree(db->ext_db->odb_buf_list);
-
-ext_odb_buf_fail_out:
- if (sdb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE,
- db->ext_db->sdb_buf_list->buf,
- db->ext_db->sdb_buf_list->map);
- }
-
-alloc_sq_db_buf_fail:
- if (sdb_ext_mod)
- kfree(db->ext_db->sdb_buf_list);
-
-ext_sdb_buf_fail_out:
- kfree(db->ext_db);
- return ret;
-}
-
-static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
- struct ib_pd *pd)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_qp_init_attr init_attr;
- struct ib_qp *qp;
-
- memset(&init_attr, 0, sizeof(struct ib_qp_init_attr));
- init_attr.qp_type = IB_QPT_RC;
- init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM;
- init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM;
-
- qp = hns_roce_create_qp(pd, &init_attr, NULL);
- if (IS_ERR(qp)) {
- dev_err(dev, "Create loop qp for mr free failed!");
- return NULL;
- }
-
- return to_hr_qp(qp);
-}
-
-static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_caps *caps = &hr_dev->caps;
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_cq_init_attr cq_init_attr;
- struct hns_roce_free_mr *free_mr;
- struct ib_qp_attr attr = { 0 };
- struct hns_roce_v1_priv *priv;
- struct hns_roce_qp *hr_qp;
- struct ib_device *ibdev;
- struct ib_cq *cq;
- struct ib_pd *pd;
- union ib_gid dgid;
- u64 subnet_prefix;
- int attr_mask = 0;
- int ret = -ENOMEM;
- int i, j;
- u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
- u8 phy_port;
- u8 port = 0;
- u8 sl;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
-
- /* Reserved cq for loop qp */
- cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2;
- cq_init_attr.comp_vector = 0;
- cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL);
- if (IS_ERR(cq)) {
- dev_err(dev, "Create cq for reserved loop qp failed!");
- return -ENOMEM;
- }
- free_mr->mr_free_cq = to_hr_cq(cq);
- free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev;
- free_mr->mr_free_cq->ib_cq.uobject = NULL;
- free_mr->mr_free_cq->ib_cq.comp_handler = NULL;
- free_mr->mr_free_cq->ib_cq.event_handler = NULL;
- free_mr->mr_free_cq->ib_cq.cq_context = NULL;
- atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
-
- ibdev = &hr_dev->ib_dev;
- pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
- if (!pd)
- goto alloc_mem_failed;
-
- pd->device = ibdev;
- ret = hns_roce_alloc_pd(pd, NULL);
- if (ret)
- goto alloc_pd_failed;
-
- free_mr->mr_free_pd = to_hr_pd(pd);
- free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
- free_mr->mr_free_pd->ibpd.uobject = NULL;
- free_mr->mr_free_pd->ibpd.__internal_mr = NULL;
- atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
-
- attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- attr.pkey_index = 0;
- attr.min_rnr_timer = 0;
- /* Disable read ability */
- attr.max_dest_rd_atomic = 0;
- attr.max_rd_atomic = 0;
- /* Use arbitrary values as rq_psn and sq_psn */
- attr.rq_psn = 0x0808;
- attr.sq_psn = 0x0808;
- attr.retry_cnt = 7;
- attr.rnr_retry = 7;
- attr.timeout = 0x12;
- attr.path_mtu = IB_MTU_256;
- attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
- rdma_ah_set_static_rate(&attr.ah_attr, 3);
-
- subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
- (i % HNS_ROCE_MAX_PORTS);
- sl = i / HNS_ROCE_MAX_PORTS;
-
- for (j = 0; j < caps->num_ports; j++) {
- if (hr_dev->iboe.phy_port[j] == phy_port) {
- queue_en[i] = 1;
- port = j;
- break;
- }
- }
-
- if (!queue_en[i])
- continue;
-
- free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
- if (!free_mr->mr_free_qp[i]) {
- dev_err(dev, "Create loop qp failed!\n");
- ret = -ENOMEM;
- goto create_lp_qp_failed;
- }
- hr_qp = free_mr->mr_free_qp[i];
-
- hr_qp->port = port;
- hr_qp->phy_port = phy_port;
- hr_qp->ibqp.qp_type = IB_QPT_RC;
- hr_qp->ibqp.device = &hr_dev->ib_dev;
- hr_qp->ibqp.uobject = NULL;
- atomic_set(&hr_qp->ibqp.usecnt, 0);
- hr_qp->ibqp.pd = pd;
- hr_qp->ibqp.recv_cq = cq;
- hr_qp->ibqp.send_cq = cq;
-
- rdma_ah_set_port_num(&attr.ah_attr, port + 1);
- rdma_ah_set_sl(&attr.ah_attr, sl);
- attr.port_num = port + 1;
-
- attr.dest_qp_num = hr_qp->qpn;
- memcpy(rdma_ah_retrieve_dmac(&attr.ah_attr),
- hr_dev->dev_addr[port],
- MAC_ADDR_OCTET_NUM);
-
- memcpy(&dgid.raw, &subnet_prefix, sizeof(u64));
- memcpy(&dgid.raw[8], hr_dev->dev_addr[port], 3);
- memcpy(&dgid.raw[13], hr_dev->dev_addr[port] + 3, 3);
- dgid.raw[11] = 0xff;
- dgid.raw[12] = 0xfe;
- dgid.raw[8] ^= 2;
- rdma_ah_set_dgid_raw(&attr.ah_attr, dgid.raw);
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_RESET, IB_QPS_INIT);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, IB_QP_DEST_QPN,
- IB_QPS_INIT, IB_QPS_RTR);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_RTR, IB_QPS_RTS);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
- }
-
- return 0;
-
-create_lp_qp_failed:
- for (i -= 1; i >= 0; i--) {
- hr_qp = free_mr->mr_free_qp[i];
- if (hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL))
- dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
- }
-
- hns_roce_dealloc_pd(pd, NULL);
-
-alloc_pd_failed:
- kfree(pd);
-
-alloc_mem_failed:
- if (hns_roce_ib_destroy_cq(cq, NULL))
- dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
-
- return ret;
-}
-
-static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_qp *hr_qp;
- int ret;
- int i;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
-
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- hr_qp = free_mr->mr_free_qp[i];
- if (!hr_qp)
- continue;
-
- ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp, NULL);
- if (ret)
- dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
- i, ret);
- }
-
- ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq, NULL);
- if (ret)
- dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
-
- hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd, NULL);
-}
-
-static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- u32 sdb_ext_mod;
- u32 odb_ext_mod;
- u32 sdb_evt_mod;
- u32 odb_evt_mod;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- db = &priv->db_table;
-
- memset(db, 0, sizeof(*db));
-
- /* Default DB mode */
- sdb_ext_mod = HNS_ROCE_SDB_EXTEND_MODE;
- odb_ext_mod = HNS_ROCE_ODB_EXTEND_MODE;
- sdb_evt_mod = HNS_ROCE_SDB_NORMAL_MODE;
- odb_evt_mod = HNS_ROCE_ODB_POLL_MODE;
-
- db->sdb_ext_mod = sdb_ext_mod;
- db->odb_ext_mod = odb_ext_mod;
-
- /* Init extend DB */
- ret = hns_roce_db_ext_init(hr_dev, sdb_ext_mod, odb_ext_mod);
- if (ret) {
- dev_err(dev, "Failed in extend DB configuration.\n");
- return ret;
- }
-
- hns_roce_set_db_event_mode(hr_dev, sdb_evt_mod, odb_evt_mod);
-
- return 0;
-}
-
-static void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work)
-{
- struct hns_roce_recreate_lp_qp_work *lp_qp_work;
- struct hns_roce_dev *hr_dev;
-
- lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work,
- work);
- hr_dev = to_hr_dev(lp_qp_work->ib_dev);
-
- hns_roce_v1_release_lp_qp(hr_dev);
-
- if (hns_roce_v1_rsv_lp_qp(hr_dev))
- dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n");
-
- if (lp_qp_work->comp_flag)
- complete(lp_qp_work->comp);
-
- kfree(lp_qp_work);
-}
-
-static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_recreate_lp_qp_work *lp_qp_work;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- struct completion comp;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
-
- lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work),
- GFP_KERNEL);
- if (!lp_qp_work)
- return -ENOMEM;
-
- INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn);
-
- lp_qp_work->ib_dev = &(hr_dev->ib_dev);
- lp_qp_work->comp = &comp;
- lp_qp_work->comp_flag = 1;
-
- init_completion(lp_qp_work->comp);
-
- queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
-
- while (time_before_eq(jiffies, end)) {
- if (try_wait_for_completion(&comp))
- return 0;
- msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
- }
-
- lp_qp_work->comp_flag = 0;
- if (try_wait_for_completion(&comp))
- return 0;
-
- dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n");
- return -ETIMEDOUT;
-}
-
-static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_send_wr send_wr;
- const struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&send_wr, 0, sizeof(send_wr));
- send_wr.next = NULL;
- send_wr.num_sge = 0;
- send_wr.send_flags = 0;
- send_wr.sg_list = NULL;
- send_wr.wr_id = (unsigned long long)&send_wr;
- send_wr.opcode = IB_WR_RDMA_WRITE;
-
- ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr);
- if (ret) {
- dev_err(dev, "Post write wqe for mr free failed(%d)!", ret);
- return ret;
- }
-
- return 0;
-}
-
-static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
-{
- struct hns_roce_mr_free_work *mr_work;
- struct ib_wc wc[HNS_ROCE_V1_RESV_QP];
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_cq *mr_free_cq;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_dev *hr_dev;
- struct hns_roce_mr *hr_mr;
- struct hns_roce_qp *hr_qp;
- struct device *dev;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
- int i;
- int ret;
- int ne = 0;
-
- mr_work = container_of(work, struct hns_roce_mr_free_work, work);
- hr_mr = (struct hns_roce_mr *)mr_work->mr;
- hr_dev = to_hr_dev(mr_work->ib_dev);
- dev = &hr_dev->pdev->dev;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
- mr_free_cq = free_mr->mr_free_cq;
-
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- hr_qp = free_mr->mr_free_qp[i];
- if (!hr_qp)
- continue;
- ne++;
-
- ret = hns_roce_v1_send_lp_wqe(hr_qp);
- if (ret) {
- dev_err(dev,
- "Send wqe (qp:0x%lx) for mr free failed(%d)!\n",
- hr_qp->qpn, ret);
- goto free_work;
- }
- }
-
- if (!ne) {
- dev_err(dev, "Reserved loop qp is absent!\n");
- goto free_work;
- }
-
- do {
- ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
- if (ret < 0 && hr_qp) {
- dev_err(dev,
- "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
- hr_qp->qpn, ret, hr_mr->key, ne);
- goto free_work;
- }
- ne -= ret;
- usleep_range(HNS_ROCE_V1_FREE_MR_WAIT_VALUE * 1000,
- (1 + HNS_ROCE_V1_FREE_MR_WAIT_VALUE) * 1000);
- } while (ne && time_before_eq(jiffies, end));
-
- if (ne != 0)
- dev_err(dev,
- "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n",
- hr_mr->key, ne);
-
-free_work:
- if (mr_work->comp_flag)
- complete(mr_work->comp);
- kfree(mr_work);
-}
-
-static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, struct ib_udata *udata)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_mr_free_work *mr_work;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- struct completion comp;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
- unsigned long start = jiffies;
- int npages;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
-
- if (mr->enabled) {
- if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
- & (hr_dev->caps.num_mtpts - 1)))
- dev_warn(dev, "HW2SW_MPT failed!\n");
- }
-
- mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
- if (!mr_work) {
- ret = -ENOMEM;
- goto free_mr;
- }
-
- INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn);
-
- mr_work->ib_dev = &(hr_dev->ib_dev);
- mr_work->comp = &comp;
- mr_work->comp_flag = 1;
- mr_work->mr = (void *)mr;
- init_completion(mr_work->comp);
-
- queue_work(free_mr->free_mr_wq, &(mr_work->work));
-
- while (time_before_eq(jiffies, end)) {
- if (try_wait_for_completion(&comp))
- goto free_mr;
- msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
- }
-
- mr_work->comp_flag = 0;
- if (try_wait_for_completion(&comp))
- goto free_mr;
-
- dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key);
- ret = -ETIMEDOUT;
-
-free_mr:
- dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
- mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
-
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key), 0);
-
- if (mr->umem)
- ib_umem_release(mr->umem);
-
- kfree(mr);
-
- return ret;
-}
-
-static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- db = &priv->db_table;
-
- if (db->sdb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE,
- db->ext_db->sdb_buf_list->buf,
- db->ext_db->sdb_buf_list->map);
- kfree(db->ext_db->sdb_buf_list);
- }
-
- if (db->odb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_ODB_SIZE,
- db->ext_db->odb_buf_list->buf,
- db->ext_db->odb_buf_list->map);
- kfree(db->ext_db->odb_buf_list);
- }
-
- kfree(db->ext_db);
-}
-
-static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
-{
- int ret;
- u32 val;
- __le32 tmp;
- int raq_shift = 0;
- dma_addr_t addr;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_raq_table *raq;
- struct device *dev = &hr_dev->pdev->dev;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- raq = &priv->raq_table;
-
- raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL);
- if (!raq->e_raq_buf)
- return -ENOMEM;
-
- raq->e_raq_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_RAQ_SIZE,
- &addr, GFP_KERNEL);
- if (!raq->e_raq_buf->buf) {
- ret = -ENOMEM;
- goto err_dma_alloc_raq;
- }
- raq->e_raq_buf->map = addr;
-
- /* Configure raq extended address. 48bit 4K align*/
- roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12);
-
- /* Configure raq_shift */
- raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY);
- val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
- ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift);
- /*
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
- ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S,
- raq->e_raq_buf->map >> 44);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val);
- dev_dbg(dev, "Configure raq_shift 0x%x.\n", val);
-
- /* Configure raq threshold */
- val = roce_read(hr_dev, ROCEE_RAQ_WL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
- ROCEE_RAQ_WL_ROCEE_RAQ_WL_S,
- HNS_ROCE_V1_EXT_RAQ_WF);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_RAQ_WL_REG, val);
- dev_dbg(dev, "Configure raq_wl 0x%x.\n", val);
-
- /* Enable extend raq */
- val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S,
- POL_TIME_INTERVAL_VAL);
- roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
- roce_set_field(tmp,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S,
- 2);
- roce_set_bit(tmp,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val);
- dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val);
-
- /* Enable raq drop */
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- tmp = cpu_to_le32(val);
- roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val);
-
- return 0;
-
-err_dma_alloc_raq:
- kfree(raq->e_raq_buf);
- return ret;
-}
-
-static void hns_roce_raq_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_raq_table *raq;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- raq = &priv->raq_table;
-
- dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf,
- raq->e_raq_buf->map);
- kfree(raq->e_raq_buf);
-}
-
-static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
-{
- __le32 tmp;
- u32 val;
-
- if (enable_flag) {
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- /* Open all ports */
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
- ROCEE_GLB_CFG_ROCEE_PORT_ST_S,
- ALL_PORT_VAL_OPEN);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- } else {
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- /* Close all ports */
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
- ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- }
-}
-
-static int hns_roce_bt_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- int ret;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
-
- priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.qpc_buf.buf)
- return -ENOMEM;
-
- priv->bt_table.mtpt_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.mtpt_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.mtpt_buf.buf) {
- ret = -ENOMEM;
- goto err_failed_alloc_mtpt_buf;
- }
-
- priv->bt_table.cqc_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.cqc_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.cqc_buf.buf) {
- ret = -ENOMEM;
- goto err_failed_alloc_cqc_buf;
- }
-
- return 0;
-
-err_failed_alloc_cqc_buf:
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
-
-err_failed_alloc_mtpt_buf:
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
-
- return ret;
-}
-
-static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map);
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
-}
-
-static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_buf_list *tptr_buf;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- tptr_buf = &priv->tptr_table.tptr_buf;
-
- /*
- * This buffer will be used for CQ's tptr(tail pointer), also
- * named ci(customer index). Every CQ will use 2 bytes to save
- * cqe ci in hip06. Hardware will read this area to get new ci
- * when the queue is almost full.
- */
- tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
- &tptr_buf->map, GFP_KERNEL);
- if (!tptr_buf->buf)
- return -ENOMEM;
-
- hr_dev->tptr_dma_addr = tptr_buf->map;
- hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
-
- return 0;
-}
-
-static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_buf_list *tptr_buf;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- tptr_buf = &priv->tptr_table.tptr_buf;
-
- dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
- tptr_buf->buf, tptr_buf->map);
-}
-
-static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
-
- free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
- if (!free_mr->free_mr_wq) {
- dev_err(dev, "Create free mr workqueue failed!\n");
- return -ENOMEM;
- }
-
- ret = hns_roce_v1_rsv_lp_qp(hr_dev);
- if (ret) {
- dev_err(dev, "Reserved loop qp failed(%d)!\n", ret);
- flush_workqueue(free_mr->free_mr_wq);
- destroy_workqueue(free_mr->free_mr_wq);
- }
-
- return ret;
-}
-
-static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- free_mr = &priv->free_mr;
-
- flush_workqueue(free_mr->free_mr_wq);
- destroy_workqueue(free_mr->free_mr_wq);
-
- hns_roce_v1_release_lp_qp(hr_dev);
-}
-
-/**
- * hns_roce_v1_reset - reset RoCE
- * @hr_dev: RoCE device struct pointer
- * @enable: true -- drop reset, false -- reset
- * return 0 - success , negative --fail
- */
-static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
-{
- struct device_node *dsaf_node;
- struct device *dev = &hr_dev->pdev->dev;
- struct device_node *np = dev->of_node;
- struct fwnode_handle *fwnode;
- int ret;
-
- /* check if this is DT/ACPI case */
- if (dev_of_node(dev)) {
- dsaf_node = of_parse_phandle(np, "dsaf-handle", 0);
- if (!dsaf_node) {
- dev_err(dev, "could not find dsaf-handle\n");
- return -EINVAL;
- }
- fwnode = &dsaf_node->fwnode;
- } else if (is_acpi_device_node(dev->fwnode)) {
- struct fwnode_reference_args args;
-
- ret = acpi_node_get_property_reference(dev->fwnode,
- "dsaf-handle", 0, &args);
- if (ret) {
- dev_err(dev, "could not find dsaf-handle\n");
- return ret;
- }
- fwnode = args.fwnode;
- } else {
- dev_err(dev, "cannot read data from DT or ACPI\n");
- return -ENXIO;
- }
-
- ret = hns_dsaf_roce_reset(fwnode, false);
- if (ret)
- return ret;
-
- if (dereset) {
- msleep(SLEEP_TIME_INTERVAL);
- ret = hns_dsaf_roce_reset(fwnode, true);
- }
-
- return ret;
-}
-
-static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
-{
- int i = 0;
- struct hns_roce_caps *caps = &hr_dev->caps;
-
- hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
- hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
- hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) |
- ((u64)roce_read(hr_dev,
- ROCEE_SYS_IMAGE_GUID_H_REG) << 32);
- hr_dev->hw_rev = HNS_ROCE_HW_VER1;
-
- caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
- caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM;
- caps->min_wqes = HNS_ROCE_MIN_WQE_NUM;
- caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM;
- caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
- caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM;
- caps->max_sq_sg = HNS_ROCE_V1_SG_NUM;
- caps->max_rq_sg = HNS_ROCE_V1_SG_NUM;
- caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE;
- caps->num_uars = HNS_ROCE_V1_UAR_NUM;
- caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM;
- caps->num_aeq_vectors = HNS_ROCE_V1_AEQE_VEC_NUM;
- caps->num_comp_vectors = HNS_ROCE_V1_COMP_VEC_NUM;
- caps->num_other_vectors = HNS_ROCE_V1_ABNORMAL_VEC_NUM;
- caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM;
- caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS;
- caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM;
- caps->max_qp_init_rdma = HNS_ROCE_V1_MAX_QP_INIT_RDMA;
- caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA;
- caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ;
- caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE;
- caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE;
- caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE;
- caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE;
- caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE;
- caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE;
- caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
- caps->reserved_lkey = 0;
- caps->reserved_pds = 0;
- caps->reserved_mrws = 1;
- caps->reserved_uars = 0;
- caps->reserved_cqs = 0;
- caps->chunk_sz = HNS_ROCE_V1_TABLE_CHUNK_SIZE;
-
- for (i = 0; i < caps->num_ports; i++)
- caps->pkey_table_len[i] = 1;
-
- for (i = 0; i < caps->num_ports; i++) {
- /* Six ports shared 16 GID in v1 engine */
- if (i >= (HNS_ROCE_V1_GID_NUM % caps->num_ports))
- caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM /
- caps->num_ports;
- else
- caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM /
- caps->num_ports + 1;
- }
-
- caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
- caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
- caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG);
- caps->max_mtu = IB_MTU_2048;
-
- return 0;
-}
-
-static int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
-{
- int ret;
- u32 val;
- __le32 tmp;
- struct device *dev = &hr_dev->pdev->dev;
-
- /* DMAE user config */
- val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
- ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
- ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S,
- 1 << PAGES_SHIFT_16);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val);
-
- val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
- ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf);
- roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
- ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S,
- 1 << PAGES_SHIFT_16);
-
- ret = hns_roce_db_init(hr_dev);
- if (ret) {
- dev_err(dev, "doorbell init failed!\n");
- return ret;
- }
-
- ret = hns_roce_raq_init(hr_dev);
- if (ret) {
- dev_err(dev, "raq init failed!\n");
- goto error_failed_raq_init;
- }
-
- ret = hns_roce_bt_init(hr_dev);
- if (ret) {
- dev_err(dev, "bt init failed!\n");
- goto error_failed_bt_init;
- }
-
- ret = hns_roce_tptr_init(hr_dev);
- if (ret) {
- dev_err(dev, "tptr init failed!\n");
- goto error_failed_tptr_init;
- }
-
- ret = hns_roce_free_mr_init(hr_dev);
- if (ret) {
- dev_err(dev, "free mr init failed!\n");
- goto error_failed_free_mr_init;
- }
-
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
-
- return 0;
-
-error_failed_free_mr_init:
- hns_roce_tptr_free(hr_dev);
-
-error_failed_tptr_init:
- hns_roce_bt_free(hr_dev);
-
-error_failed_bt_init:
- hns_roce_raq_free(hr_dev);
-
-error_failed_raq_init:
- hns_roce_db_free(hr_dev);
- return ret;
-}
-
-static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
-{
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
- hns_roce_free_mr_free(hr_dev);
- hns_roce_tptr_free(hr_dev);
- hns_roce_bt_free(hr_dev);
- hns_roce_raq_free(hr_dev);
- hns_roce_db_free(hr_dev);
-}
-
-static int hns_roce_v1_cmd_pending(struct hns_roce_dev *hr_dev)
-{
- u32 status = readl(hr_dev->reg_base + ROCEE_MB6_REG);
-
- return (!!(status & (1 << HCR_GO_BIT)));
-}
-
-static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier,
- u16 op, u16 token, int event)
-{
- u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG);
- unsigned long end;
- u32 val = 0;
- __le32 tmp;
-
- end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
- while (hns_roce_v1_cmd_pending(hr_dev)) {
- if (time_after(jiffies, end)) {
- dev_err(hr_dev->dev, "jiffies=%d end=%d\n",
- (int)jiffies, (int)end);
- return -EAGAIN;
- }
- cond_resched();
- }
-
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
- op);
- roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
- ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
- roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
- roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
- roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M,
- ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
-
- val = le32_to_cpu(tmp);
- writeq(in_param, hcr + 0);
- writeq(out_param, hcr + 2);
- writel(in_modifier, hcr + 4);
- /* Memory barrier */
- wmb();
-
- writel(val, hcr + 5);
-
- return 0;
-}
-
-static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
- unsigned long timeout)
-{
- u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG;
- unsigned long end = 0;
- u32 status = 0;
-
- end = msecs_to_jiffies(timeout) + jiffies;
- while (hns_roce_v1_cmd_pending(hr_dev) && time_before(jiffies, end))
- cond_resched();
-
- if (hns_roce_v1_cmd_pending(hr_dev)) {
- dev_err(hr_dev->dev, "[cmd_poll]hw run cmd TIMEDOUT!\n");
- return -ETIMEDOUT;
- }
-
- status = le32_to_cpu((__force __le32)
- __raw_readl(hcr + HCR_STATUS_OFFSET));
- if ((status & STATUS_MASK) != 0x1) {
- dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status);
- return -EBUSY;
- }
-
- return 0;
-}
-
-static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
- int gid_index, const union ib_gid *gid,
- const struct ib_gid_attr *attr)
-{
- u32 *p = NULL;
- u8 gid_idx = 0;
-
- gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
- p = (u32 *)&gid->raw[0];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[4];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_ML_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[8];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_MH_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[0xc];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- return 0;
-}
-
-static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
- u8 *addr)
-{
- u32 reg_smac_l;
- u16 reg_smac_h;
- __le32 tmp;
- u16 *p_h;
- u32 *p;
- u32 val;
-
- /*
- * When mac changed, loopback may fail
- * because of smac not equal to dmac.
- * We Need to release and create reserved qp again.
- */
- if (hr_dev->hw->dereg_mr) {
- int ret;
-
- ret = hns_roce_v1_recreate_lp_qp(hr_dev);
- if (ret && ret != -ETIMEDOUT)
- return ret;
- }
-
- p = (u32 *)(&addr[0]);
- reg_smac_l = *p;
- roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG +
- PHY_PORT_OFFSET * phy_port);
-
- val = roce_read(hr_dev,
- ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- tmp = cpu_to_le32(val);
- p_h = (u16 *)(&addr[4]);
- reg_smac_h = *p_h;
- roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
- ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
- val);
-
- return 0;
-}
-
-static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
- enum ib_mtu mtu)
-{
- __le32 tmp;
- u32 val;
-
- val = roce_read(hr_dev,
- ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- tmp = cpu_to_le32(val);
- roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
- ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu);
- val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
- val);
-}
-
-static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx)
-{
- struct hns_roce_v1_mpt_entry *mpt_entry;
- struct sg_dma_page_iter sg_iter;
- u64 *pages;
- int i;
-
- /* MPT filled into mailbox buf */
- mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf;
- memset(mpt_entry, 0, sizeof(*mpt_entry));
-
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M,
- MPT_BYTE_4_KEY_STATE_S, KEY_VALID);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M,
- MPT_BYTE_4_KEY_S, mr->key);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M,
- MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S,
- (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M,
- MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S,
- (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S,
- (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S,
- (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S,
- 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0);
-
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S, 0);
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
- MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
-
- mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova);
- mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32));
- mpt_entry->length = cpu_to_le32((u32)mr->size);
-
- roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
- MPT_BYTE_28_PD_S, mr->pd);
- roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M,
- MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx);
- roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
- MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
-
- /* DMA memory register */
- if (mr->type == MR_TYPE_DMA)
- return 0;
-
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- i = 0;
- for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
- pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12;
-
- /* Directly record to MTPT table firstly 7 entry */
- if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM)
- break;
- i++;
- }
-
- /* Register user mr */
- for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) {
- switch (i) {
- case 0:
- mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_36,
- MPT_BYTE_36_PA0_H_M,
- MPT_BYTE_36_PA0_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_32));
- break;
- case 1:
- roce_set_field(mpt_entry->mpt_byte_36,
- MPT_BYTE_36_PA1_L_M,
- MPT_BYTE_36_PA1_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_40,
- MPT_BYTE_40_PA1_H_M,
- MPT_BYTE_40_PA1_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_24));
- break;
- case 2:
- roce_set_field(mpt_entry->mpt_byte_40,
- MPT_BYTE_40_PA2_L_M,
- MPT_BYTE_40_PA2_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_44,
- MPT_BYTE_44_PA2_H_M,
- MPT_BYTE_44_PA2_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_16));
- break;
- case 3:
- roce_set_field(mpt_entry->mpt_byte_44,
- MPT_BYTE_44_PA3_L_M,
- MPT_BYTE_44_PA3_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_48,
- MPT_BYTE_48_PA3_H_M,
- MPT_BYTE_48_PA3_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_8));
- break;
- case 4:
- mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_56,
- MPT_BYTE_56_PA4_H_M,
- MPT_BYTE_56_PA4_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_32));
- break;
- case 5:
- roce_set_field(mpt_entry->mpt_byte_56,
- MPT_BYTE_56_PA5_L_M,
- MPT_BYTE_56_PA5_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_60,
- MPT_BYTE_60_PA5_H_M,
- MPT_BYTE_60_PA5_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_24));
- break;
- case 6:
- roce_set_field(mpt_entry->mpt_byte_60,
- MPT_BYTE_60_PA6_L_M,
- MPT_BYTE_60_PA6_L_S, (u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_64,
- MPT_BYTE_64_PA6_H_M,
- MPT_BYTE_64_PA6_H_S,
- (u32)(pages[i] >> PAGES_SHIFT_16));
- break;
- default:
- break;
- }
- }
-
- free_page((unsigned long) pages);
-
- mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr));
-
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S,
- ((u32)(mr->pbl_dma_addr >> 32)));
-
- return 0;
-}
-
-static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
-{
- return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
- n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
-}
-
-static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
-{
- struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe);
-
- /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
- return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^
- !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL;
-}
-
-static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
-{
- return get_sw_cqe(hr_cq, hr_cq->cons_index);
-}
-
-static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
-{
- __le32 doorbell[2];
-
- doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1));
- doorbell[1] = 0;
- roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn);
-
- hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
-}
-
-static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
- struct hns_roce_srq *srq)
-{
- struct hns_roce_cqe *cqe, *dest;
- u32 prod_index;
- int nfreed = 0;
- u8 owner_bit;
-
- for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index);
- ++prod_index) {
- if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
- break;
- }
-
- /*
- * Now backwards through the CQ, removing CQ entries
- * that match our QP by overwriting them with next entries.
- */
- while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
- cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
- if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) &
- HNS_ROCE_CQE_QPN_MASK) == qpn) {
- /* In v1 engine, not support SRQ */
- ++nfreed;
- } else if (nfreed) {
- dest = get_cqe(hr_cq, (prod_index + nfreed) &
- hr_cq->ib_cq.cqe);
- owner_bit = roce_get_bit(dest->cqe_byte_4,
- CQE_BYTE_4_OWNER_S);
- memcpy(dest, cqe, sizeof(*cqe));
- roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S,
- owner_bit);
- }
- }
-
- if (nfreed) {
- hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
-
- hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
-}
-
-static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
- struct hns_roce_srq *srq)
-{
- spin_lock_irq(&hr_cq->lock);
- __hns_roce_v1_cq_clean(hr_cq, qpn, srq);
- spin_unlock_irq(&hr_cq->lock);
-}
-
-static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq, void *mb_buf,
- u64 *mtts, dma_addr_t dma_handle, int nent,
- u32 vector)
-{
- struct hns_roce_cq_context *cq_context = NULL;
- struct hns_roce_buf_list *tptr_buf;
- struct hns_roce_v1_priv *priv;
- dma_addr_t tptr_dma_addr;
- int offset;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
- tptr_buf = &priv->tptr_table.tptr_buf;
-
- cq_context = mb_buf;
- memset(cq_context, 0, sizeof(*cq_context));
-
- /* Get the tptr for this CQ. */
- offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
- tptr_dma_addr = tptr_buf->map + offset;
- hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
-
- /* Register cq_context members */
- roce_set_field(cq_context->cqc_byte_4,
- CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M,
- CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
- roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
- CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
-
- cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle);
-
- roce_set_field(cq_context->cqc_byte_12,
- CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
- CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S,
- ((u64)dma_handle >> 32));
- roce_set_field(cq_context->cqc_byte_12,
- CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M,
- CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S,
- ilog2((unsigned int)nent));
- roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
- CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
-
- cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
-
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32);
- /* Dedicated hardware, directly set 0 */
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
- CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0);
- /**
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
- tptr_dma_addr >> 44);
-
- cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12));
-
- roce_set_field(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
- CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
- 0);
- /* The initial value of cq's ci is 0 */
- roce_set_field(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
- CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
-}
-
-static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
-{
- return -EOPNOTSUPP;
-}
-
-static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
- enum ib_cq_notify_flags flags)
-{
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- u32 notification_flag;
- __le32 doorbell[2];
-
- notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
- IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
- /*
- * flags = 0; Notification Flag = 1, next
- * flags = 1; Notification Flag = 0, solocited
- */
- doorbell[0] =
- cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
- roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S,
- hr_cq->cqn | notification_flag);
-
- hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
-
- return 0;
-}
-
-static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
- struct hns_roce_qp **cur_qp, struct ib_wc *wc)
-{
- int qpn;
- int is_send;
- u16 wqe_ctr;
- u32 status;
- u32 opcode;
- struct hns_roce_cqe *cqe;
- struct hns_roce_qp *hr_qp;
- struct hns_roce_wq *wq;
- struct hns_roce_wqe_ctrl_seg *sq_wqe;
- struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
- struct device *dev = &hr_dev->pdev->dev;
-
- /* Find cqe according consumer index */
- cqe = next_cqe_sw(hr_cq);
- if (!cqe)
- return -EAGAIN;
-
- ++hr_cq->cons_index;
- /* Memory barrier */
- rmb();
- /* 0->SQ, 1->RQ */
- is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S));
-
- /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */
- if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) <= 1) {
- qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M,
- CQE_BYTE_20_PORT_NUM_S) +
- roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) *
- HNS_ROCE_MAX_PORTS;
- } else {
- qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S);
- }
-
- if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) {
- hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
- if (unlikely(!hr_qp)) {
- dev_err(dev, "CQ %06lx with entry for unknown QPN %06x\n",
- hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK));
- return -EINVAL;
- }
-
- *cur_qp = hr_qp;
- }
-
- wc->qp = &(*cur_qp)->ibqp;
- wc->vendor_err = 0;
-
- status = roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
- CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) &
- HNS_ROCE_CQE_STATUS_MASK;
- switch (status) {
- case HNS_ROCE_CQE_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR:
- wc->status = IB_WC_LOC_QP_OP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR:
- wc->status = IB_WC_BAD_RESP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
- wc->status = IB_WC_REM_INV_REQ_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR:
- wc->status = IB_WC_REM_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR:
- wc->status = IB_WC_REM_OP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
- wc->status = IB_WC_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
- wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /* CQE status error, directly return */
- if (wc->status != IB_WC_SUCCESS)
- return 0;
-
- if (is_send) {
- /* SQ conrespond to CQE */
- sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_WQE_INDEX_M,
- CQE_BYTE_4_WQE_INDEX_S)&
- ((*cur_qp)->sq.wqe_cnt-1));
- switch (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_OPCODE_MASK) {
- case HNS_ROCE_WQE_OPCODE_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- case HNS_ROCE_WQE_OPCODE_RDMA_READ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- break;
- case HNS_ROCE_WQE_OPCODE_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case HNS_ROCE_WQE_OPCODE_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- break;
- case HNS_ROCE_WQE_OPCODE_UD_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
- wc->wc_flags = (le32_to_cpu(sq_wqe->flag) & HNS_ROCE_WQE_IMM ?
- IB_WC_WITH_IMM : 0);
-
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_WQE_INDEX_M,
- CQE_BYTE_4_WQE_INDEX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- } else {
- /* RQ conrespond to CQE */
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- opcode = roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_OPERATION_TYPE_M,
- CQE_BYTE_4_OPERATION_TYPE_S) &
- HNS_ROCE_CQE_OPCODE_MASK;
- switch (opcode) {
- case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE:
- wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data =
- cpu_to_be32(le32_to_cpu(cqe->immediate_data));
- break;
- case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE:
- if (roce_get_bit(cqe->cqe_byte_4,
- CQE_BYTE_4_IMM_INDICATOR_S)) {
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = cpu_to_be32(
- le32_to_cpu(cqe->immediate_data));
- } else {
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M,
- CQE_BYTE_20_SL_S);
- wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20,
- CQE_BYTE_20_REMOTE_QPN_M,
- CQE_BYTE_20_REMOTE_QPN_S);
- wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20,
- CQE_BYTE_20_GRH_PRESENT_S) ?
- IB_WC_GRH : 0);
- wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28,
- CQE_BYTE_28_P_KEY_IDX_M,
- CQE_BYTE_28_P_KEY_IDX_S);
- }
-
- return 0;
-}
-
-int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- struct hns_roce_qp *cur_qp = NULL;
- unsigned long flags;
- int npolled;
- int ret = 0;
-
- spin_lock_irqsave(&hr_cq->lock, flags);
-
- for (npolled = 0; npolled < num_entries; ++npolled) {
- ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled);
- if (ret)
- break;
- }
-
- if (npolled) {
- *hr_cq->tptr_addr = hr_cq->cons_index &
- ((hr_cq->cq_depth << 1) - 1);
-
- /* Memroy barrier */
- wmb();
- hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
-
- spin_unlock_irqrestore(&hr_cq->lock, flags);
-
- if (ret == 0 || ret == -EAGAIN)
- return npolled;
- else
- return ret;
-}
-
-static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj,
- int step_idx)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- unsigned long end = 0, flags = 0;
- __le32 bt_cmd_val[2] = {0};
- void __iomem *bt_cmd;
- u64 bt_ba = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->priv;
-
- switch (table->type) {
- case HEM_TYPE_QPC:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
- bt_ba = priv->bt_table.qpc_buf.map >> 12;
- break;
- case HEM_TYPE_MTPT:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT);
- bt_ba = priv->bt_table.mtpt_buf.map >> 12;
- break;
- case HEM_TYPE_CQC:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
- bt_ba = priv->bt_table.cqc_buf.map >> 12;
- break;
- case HEM_TYPE_SRQC:
- dev_dbg(dev, "HEM_TYPE_SRQC not support.\n");
- return -EINVAL;
- default:
- return 0;
- }
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
- spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags);
-
- bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
- end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
- while (1) {
- if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!(time_before(jiffies, end))) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(&hr_dev->bt_cmd_lock,
- flags);
- return -EBUSY;
- }
- } else {
- break;
- }
- msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
- }
-
- bt_cmd_val[0] = (__le32)bt_ba;
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
- hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
-
- spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags);
-
- return 0;
-}
-
-static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt,
- enum hns_roce_qp_state cur_state,
- enum hns_roce_qp_state new_state,
- struct hns_roce_qp_context *context,
- struct hns_roce_qp *hr_qp)
-{
- static const u16
- op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = {
- [HNS_ROCE_QP_STATE_RST] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
- },
- [HNS_ROCE_QP_STATE_INIT] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- /* Note: In v1 engine, HW doesn't support RST2INIT.
- * We use RST2INIT cmd instead of INIT2INIT.
- */
- [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
- [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP,
- },
- [HNS_ROCE_QP_STATE_RTR] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP,
- },
- [HNS_ROCE_QP_STATE_RTS] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP,
- [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP,
- },
- [HNS_ROCE_QP_STATE_SQD] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP,
- [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP,
- },
- [HNS_ROCE_QP_STATE_ERR] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- }
- };
-
- struct hns_roce_cmd_mailbox *mailbox;
- struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
-
- if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
- new_state >= HNS_ROCE_QP_NUM_STATE ||
- !op[cur_state][new_state]) {
- dev_err(dev, "[modify_qp]not support state %d to %d\n",
- cur_state, new_state);
- return -EINVAL;
- }
-
- if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
- return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
- HNS_ROCE_CMD_2RST_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
- return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
- HNS_ROCE_CMD_2ERR_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- memcpy(mailbox->buf, context, sizeof(*context));
-
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
- op[cur_state][new_state],
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
-}
-
-static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_sqp_context *context;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t dma_handle = 0;
- u32 __iomem *addr;
- int rq_pa_start;
- __le32 tmp;
- u32 reg_val;
- u64 *mtts;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- /* Search QP buf's MTTs */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
- goto out;
- }
-
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- roce_set_field(context->qp1c_bytes_4,
- QP1C_BYTES_4_SQ_WQE_SHIFT_M,
- QP1C_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qp1c_bytes_4,
- QP1C_BYTES_4_RQ_WQE_SHIFT_M,
- QP1C_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
- QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
-
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
- roce_set_field(context->qp1c_bytes_12,
- QP1C_BYTES_12_SQ_RQ_BT_H_M,
- QP1C_BYTES_12_SQ_RQ_BT_H_S,
- ((u32)(dma_handle >> 32)));
-
- roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M,
- QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head);
- roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M,
- QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
- roce_set_bit(context->qp1c_bytes_16,
- QP1C_BYTES_16_SIGNALING_TYPE_S,
- le32_to_cpu(hr_qp->sq_signal_bits));
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
- 1);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
- 1);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S,
- 0);
-
- roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M,
- QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head);
- roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M,
- QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
-
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l =
- cpu_to_le32((u32)(mtts[rq_pa_start]));
-
- roce_set_field(context->qp1c_bytes_28,
- QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
- QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S,
- (mtts[rq_pa_start]) >> 32);
- roce_set_field(context->qp1c_bytes_28,
- QP1C_BYTES_28_RQ_CUR_IDX_M,
- QP1C_BYTES_28_RQ_CUR_IDX_S, 0);
-
- roce_set_field(context->qp1c_bytes_32,
- QP1C_BYTES_32_RX_CQ_NUM_M,
- QP1C_BYTES_32_RX_CQ_NUM_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
- roce_set_field(context->qp1c_bytes_32,
- QP1C_BYTES_32_TX_CQ_NUM_M,
- QP1C_BYTES_32_TX_CQ_NUM_S,
- to_hr_cq(ibqp->send_cq)->cqn);
-
- context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]);
-
- roce_set_field(context->qp1c_bytes_40,
- QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
- QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
- roce_set_field(context->qp1c_bytes_40,
- QP1C_BYTES_40_SQ_CUR_IDX_M,
- QP1C_BYTES_40_SQ_CUR_IDX_S, 0);
-
- /* Copy context to QP1C register */
- addr = (u32 __iomem *)(hr_dev->reg_base +
- ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context));
-
- writel(le32_to_cpu(context->qp1c_bytes_4), addr);
- writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1);
- writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2);
- writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3);
- writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4);
- writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5);
- writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6);
- writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7);
- writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8);
- writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9);
- }
-
- /* Modify QP1C status */
- reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context));
- tmp = cpu_to_le32(reg_val);
- roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
- ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
- reg_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context), reg_val);
-
- hr_qp->state = new_state;
- if (new_state == IB_QPS_RESET) {
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
-
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
- }
-
- kfree(context);
- return 0;
-
-out:
- kfree(context);
- return -EINVAL;
-}
-
-static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_context *context;
- const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
- dma_addr_t dma_handle_2 = 0;
- dma_addr_t dma_handle = 0;
- __le32 doorbell[2] = {0};
- int rq_pa_start = 0;
- u64 *mtts_2 = NULL;
- int ret = -EINVAL;
- u64 *mtts = NULL;
- int port;
- u8 port_num;
- u8 *dmac;
- u8 *smac;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- /* Search qp buf's mtts */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (mtts == NULL) {
- dev_err(dev, "qp buf pa find failed\n");
- goto out;
- }
-
- /* Search IRRL's mtts */
- mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
- hr_qp->qpn, &dma_handle_2);
- if (mtts_2 == NULL) {
- dev_err(dev, "qp irrl_table find failed\n");
- goto out;
- }
-
- /*
- * Reset to init
- * Mandatory param:
- * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
- * Optional param: NA
- */
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
- to_hr_qp_type(hr_qp->ibqp.qp_type));
-
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
- );
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
- );
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_PD_M,
- QP_CONTEXT_QPC_BYTES_4_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
- hr_qp->access_flags = attr->qp_access_flags;
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
- to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
-
- if (ibqp->srq)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
- to_hr_srq(ibqp->srq)->srqn);
-
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- attr->pkey_index);
- hr_qp->pkey_index = attr->pkey_index;
- roce_set_field(context->qpc_bytes_16,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
-
- } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
- to_hr_qp_type(hr_qp->ibqp.qp_type));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE));
- } else {
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(hr_qp->access_flags &
- IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(hr_qp->access_flags &
- IB_ACCESS_REMOTE_WRITE));
- }
-
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_PD_M,
- QP_CONTEXT_QPC_BYTES_4_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
-
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
- to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
-
- if (ibqp->srq)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
- to_hr_srq(ibqp->srq)->srqn);
- if (attr_mask & IB_QP_PKEY_INDEX)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- attr->pkey_index);
- else
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- hr_qp->pkey_index);
-
- roce_set_field(context->qpc_bytes_16,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
- } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_PKEY_INDEX) ||
- (attr_mask & IB_QP_QKEY)) {
- dev_err(dev, "INIT2RTR attr_mask error\n");
- goto out;
- }
-
- dmac = (u8 *)attr->ah_attr.roce.dmac;
-
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
- roce_set_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
- QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
- ((u32)(dma_handle >> 32)));
- roce_set_bit(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S,
- 1);
- roce_set_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
- attr->min_rnr_timer);
- context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2));
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
- ((u32)(dma_handle_2 >> 32)) &
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M);
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M,
- QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0);
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S,
- 1);
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
- le32_to_cpu(hr_qp->sq_signal_bits));
-
- port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
- hr_qp->port;
- smac = (u8 *)hr_dev->dev_addr[port];
- /* when dmac equals smac or loop_idc is 1, it should loopback */
- if (ether_addr_equal_unaligned(dmac, smac) ||
- hr_dev->loop_idc == 0x1)
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
-
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
- rdma_ah_get_ah_flags(&attr->ah_attr));
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S,
- ilog2((unsigned int)attr->max_dest_rd_atomic));
-
- if (attr_mask & IB_QP_DEST_QPN)
- roce_set_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_S,
- attr->dest_qp_num);
-
- /* Configure GID index */
- port_num = rdma_ah_get_port_num(&attr->ah_attr);
- roce_set_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev,
- port_num - 1,
- grh->sgid_index));
-
- memcpy(&(context->dmac_l), dmac, 4);
-
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_S,
- *((u16 *)(&dmac[4])));
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M,
- QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S,
- rdma_ah_get_static_rate(&attr->ah_attr));
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S,
- grh->hop_limit);
-
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S,
- grh->flow_label);
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S,
- grh->traffic_class);
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_MTU_M,
- QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu);
-
- memcpy(context->dgid, grh->dgid.raw,
- sizeof(grh->dgid.raw));
-
- dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l,
- roce_get_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_S));
-
- roce_set_field(context->qpc_bytes_68,
- QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M,
- QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_set_field(context->qpc_bytes_68,
- QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
-
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l =
- cpu_to_le32((u32)(mtts[rq_pa_start]));
-
- roce_set_field(context->qpc_bytes_76,
- QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S,
- mtts[rq_pa_start] >> 32);
- roce_set_field(context->qpc_bytes_76,
- QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M,
- QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0);
-
- context->rx_rnr_time = 0;
-
- roce_set_field(context->qpc_bytes_84,
- QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M,
- QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S,
- attr->rq_psn - 1);
- roce_set_field(context->qpc_bytes_84,
- QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M,
- QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0);
-
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S,
- attr->rq_psn);
- roce_set_bit(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0);
- roce_set_bit(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0);
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S,
- 0);
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S,
- 0);
-
- context->dma_length = 0;
- context->r_key = 0;
- context->va_l = 0;
- context->va_h = 0;
-
- roce_set_field(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0);
- roce_set_bit(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0);
- roce_set_bit(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_112,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0);
- roce_set_field(context->qpc_bytes_112,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0);
-
- /* For chip resp ack */
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
- hr_qp->phy_port);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
- } else if (cur_state == IB_QPS_RTR &&
- new_state == IB_QPS_RTS) {
- /* If exist optional param, return error */
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_QKEY) ||
- (attr_mask & IB_QP_PATH_MIG_STATE) ||
- (attr_mask & IB_QP_CUR_STATE) ||
- (attr_mask & IB_QP_MIN_RNR_TIMER)) {
- dev_err(dev, "RTR2RTS attr_mask error\n");
- goto out;
- }
-
- context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
-
- roce_set_field(context->qpc_bytes_120,
- QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
-
- roce_set_field(context->qpc_bytes_124,
- QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M,
- QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0);
- roce_set_field(context->qpc_bytes_124,
- QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M,
- QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0);
-
- roce_set_field(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S,
- attr->sq_psn);
- roce_set_bit(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0);
- roce_set_field(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M,
- QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S,
- 0);
- roce_set_bit(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_132,
- QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M,
- QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0);
- roce_set_field(context->qpc_bytes_132,
- QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M,
- QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0);
-
- roce_set_field(context->qpc_bytes_136,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S,
- attr->sq_psn);
- roce_set_field(context->qpc_bytes_136,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S,
- attr->sq_psn);
-
- roce_set_field(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S,
- (attr->sq_psn >> SQ_PSN_SHIFT));
- roce_set_field(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0);
- roce_set_bit(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M,
- QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S,
- attr->retry_cnt);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S,
- attr->rnr_retry);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_LSN_M,
- QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100);
-
- context->rnr_retry = 0;
-
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M,
- QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S,
- attr->retry_cnt);
- if (attr->timeout < 0x12) {
- dev_info(dev, "ack timeout value(0x%x) must bigger than 0x12.\n",
- attr->timeout);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
- 0x12);
- } else {
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
- attr->timeout);
- }
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M,
- QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S,
- attr->rnr_retry);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
- hr_qp->phy_port);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S,
- ilog2((unsigned int)attr->max_rd_atomic));
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0);
- context->pkt_use_len = 0;
-
- roce_set_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn);
- roce_set_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M,
- QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0);
-
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S,
- attr->sq_psn);
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M,
- QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0);
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M,
- QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0);
- context->sge_use_len = 0;
-
- roce_set_field(context->qpc_bytes_176,
- QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0);
- roce_set_field(context->qpc_bytes_176,
- QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S,
- 0);
- roce_set_field(context->qpc_bytes_180,
- QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0);
- roce_set_field(context->qpc_bytes_180,
- QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
- QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
-
- context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
-
- roce_set_field(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
- roce_set_bit(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0);
- roce_set_field(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S,
- 0);
- } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) {
- dev_err(dev, "not support this status migration\n");
- goto out;
- }
-
- /* Every status migrate must change state */
- roce_set_field(context->qpc_bytes_144,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
-
- /* SW pass context to HW */
- ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
- to_hns_roce_state(cur_state),
- to_hns_roce_state(new_state), context,
- hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_qp_modify failed\n");
- goto out;
- }
-
- /*
- * Use rst2init to instead of init2init with drv,
- * need to hw to flash RQ HEAD by DB again
- */
- if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- /* Memory barrier */
- wmb();
-
- roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
- RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
- roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
- RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
- roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_CMD_M,
- RQ_DOORBELL_U32_8_CMD_S, 1);
- roce_set_bit(doorbell[1], RQ_DOORBELL_U32_8_HW_SYNC_S, 1);
-
- if (ibqp->uobject) {
- hr_qp->rq.db_reg_l = hr_dev->reg_base +
- hr_dev->odb_offset +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
- }
-
- hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
- }
-
- hr_qp->state = new_state;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- hr_qp->resp_depth = attr->max_dest_rd_atomic;
- if (attr_mask & IB_QP_PORT) {
- hr_qp->port = attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- }
-
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
-
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
- }
-out:
- kfree(context);
- return ret;
-}
-
-static int hns_roce_v1_modify_qp(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
-
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
- return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state,
- new_state);
- else
- return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state,
- new_state);
-}
-
-static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state)
-{
- switch (state) {
- case HNS_ROCE_QP_STATE_RST:
- return IB_QPS_RESET;
- case HNS_ROCE_QP_STATE_INIT:
- return IB_QPS_INIT;
- case HNS_ROCE_QP_STATE_RTR:
- return IB_QPS_RTR;
- case HNS_ROCE_QP_STATE_RTS:
- return IB_QPS_RTS;
- case HNS_ROCE_QP_STATE_SQD:
- return IB_QPS_SQD;
- case HNS_ROCE_QP_STATE_ERR:
- return IB_QPS_ERR;
- default:
- return IB_QPS_ERR;
- }
-}
-
-static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_qp_context *hr_context)
-{
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
- HNS_ROCE_CMD_QUERY_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (!ret)
- memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
- else
- dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n");
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return ret;
-}
-
-static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_sqp_context context;
- u32 addr;
-
- mutex_lock(&hr_qp->mutex);
-
- if (hr_qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- addr = ROCEE_QP1C_CFG0_0_REG +
- hr_qp->port * sizeof(struct hns_roce_sqp_context);
- context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr));
- context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1));
- context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2));
- context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3));
- context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4));
- context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5));
- context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6));
- context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7));
- context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8));
- context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9));
-
- hr_qp->state = roce_get_field(context.qp1c_bytes_4,
- QP1C_BYTES_4_QP_STATE_M,
- QP1C_BYTES_4_QP_STATE_S);
- qp_attr->qp_state = hr_qp->state;
- qp_attr->path_mtu = IB_MTU_256;
- qp_attr->path_mig_state = IB_MIG_ARMED;
- qp_attr->qkey = QKEY_VAL;
- qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- qp_attr->rq_psn = 0;
- qp_attr->sq_psn = 0;
- qp_attr->dest_qp_num = 1;
- qp_attr->qp_access_flags = 6;
-
- qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
- QP1C_BYTES_20_PKEY_IDX_M,
- QP1C_BYTES_20_PKEY_IDX_S);
- qp_attr->port_num = hr_qp->port + 1;
- qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 0;
- qp_attr->max_dest_rd_atomic = 0;
- qp_attr->min_rnr_timer = 0;
- qp_attr->timeout = 0;
- qp_attr->retry_cnt = 0;
- qp_attr->rnr_retry = 0;
- qp_attr->alt_timeout = 0;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- qp_attr->cap.max_inline_data = 0;
- qp_init_attr->cap = qp_attr->cap;
- qp_init_attr->create_flags = 0;
-
- mutex_unlock(&hr_qp->mutex);
-
- return 0;
-}
-
-static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_context *context;
- int tmp_qp_state = 0;
- int ret = 0;
- int state;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- memset(qp_attr, 0, sizeof(*qp_attr));
- memset(qp_init_attr, 0, sizeof(*qp_init_attr));
-
- mutex_lock(&hr_qp->mutex);
-
- if (hr_qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context);
- if (ret) {
- dev_err(dev, "query qpc error\n");
- ret = -EINVAL;
- goto out;
- }
-
- state = roce_get_field(context->qpc_bytes_144,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S);
- tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state);
- if (tmp_qp_state == -1) {
- dev_err(dev, "to_ib_qp_state error\n");
- ret = -EINVAL;
- goto out;
- }
- hr_qp->state = (u8)tmp_qp_state;
- qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
- qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_MTU_M,
- QP_CONTEXT_QPC_BYTES_48_MTU_S);
- qp_attr->path_mig_state = IB_MIG_ARMED;
- qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
- if (hr_qp->ibqp.qp_type == IB_QPT_UD)
- qp_attr->qkey = QKEY_VAL;
-
- qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S);
- qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S);
- qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_S);
- qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) |
- ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) |
- ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3);
-
- if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
- hr_qp->ibqp.qp_type == IB_QPT_UC) {
- struct ib_global_route *grh =
- rdma_ah_retrieve_grh(&qp_attr->ah_attr);
-
- rdma_ah_set_sl(&qp_attr->ah_attr,
- roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S));
- rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
- grh->flow_label =
- roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
- grh->sgid_index =
- roce_get_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
- grh->hop_limit =
- roce_get_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
- grh->traffic_class =
- roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
-
- memcpy(grh->dgid.raw, context->dgid,
- sizeof(grh->dgid.raw));
- }
-
- qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
- qp_attr->port_num = hr_qp->port + 1;
- qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 1 << roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S);
- qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S);
- qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S));
- qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S));
- qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
- qp_attr->rnr_retry = (u8)context->rnr_retry;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
-
- if (!ibqp->uobject) {
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- } else {
- qp_attr->cap.max_send_wr = 0;
- qp_attr->cap.max_send_sge = 0;
- }
-
- qp_init_attr->cap = qp_attr->cap;
-
-out:
- mutex_unlock(&hr_qp->mutex);
- kfree(context);
- return ret;
-}
-
-static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-
- return hr_qp->doorbell_qpn <= 1 ?
- hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
- hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
-}
-
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_cq *send_cq, *recv_cq;
- int ret;
-
- ret = hns_roce_v1_modify_qp(ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET);
- if (ret)
- return ret;
-
- send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
- recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
-
- hns_roce_lock_cqs(send_cq, recv_cq);
- if (!udata) {
- __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) : NULL);
- if (send_cq != recv_cq)
- __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
- }
- hns_roce_unlock_cqs(send_cq, recv_cq);
-
- hns_roce_qp_remove(hr_dev, hr_qp);
- hns_roce_qp_free(hr_dev, hr_qp);
-
- /* RC QP, release QPN */
- if (hr_qp->ibqp.qp_type == IB_QPT_RC)
- hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
-
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
-
- if (udata)
- ib_umem_release(hr_qp->umem);
- else {
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
-
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
- }
-
- if (hr_qp->ibqp.qp_type == IB_QPT_RC)
- kfree(hr_qp);
- else
- kfree(hr_to_hr_sqp(hr_qp));
- return 0;
-}
-
-static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqe_cnt_ori;
- u32 cqe_cnt_cur;
- u32 cq_buf_size;
- int wait_time = 0;
- int ret = 0;
-
- hns_roce_free_cq(hr_dev, hr_cq);
-
- /*
- * Before freeing cq buffer, we need to ensure that the outstanding CQE
- * have been written by checking the CQE counter.
- */
- cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
- while (1) {
- if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) &
- HNS_ROCE_CQE_WCMD_EMPTY_BIT)
- break;
-
- cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
- if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT)
- break;
-
- msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS);
- if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
- dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
- hr_cq->cqn);
- ret = -ETIMEDOUT;
- break;
- }
- wait_time++;
- }
-
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-
- if (ibcq->uobject)
- ib_umem_release(hr_cq->umem);
- else {
- /* Free the buff of stored cq */
- cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
- hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
- }
-
- kfree(hr_cq);
-
- return ret;
-}
-
-static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not)
-{
- roce_raw_write((eq->cons_index & HNS_ROCE_V1_CONS_IDX_M) |
- (req_not << eq->log_entries), eq->doorbell);
-}
-
-static void hns_roce_v1_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe, int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LWQCE_QPC_ERROR:
- dev_warn(dev, "QP %d, QPC error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_MTU_ERROR:
- dev_warn(dev, "QP %d, MTU error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
- dev_warn(dev, "QP %d, WQE shift error\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SL_ERROR:
- dev_warn(dev, "QP %d, SL error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_PORT_ERROR:
- dev_warn(dev, "QP %d, port error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_v1_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Access Violation Work Queue Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
- dev_warn(dev, "QP %d, R_key violation.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_LENGTH_ERROR:
- dev_warn(dev, "QP %d, length error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_VA_ERROR:
- dev_warn(dev, "QP %d, VA error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_PD_ERROR:
- dev_err(dev, "QP %d, PD error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
- dev_warn(dev, "QP %d, rw acc error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
- dev_warn(dev, "QP %d, key state error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
- dev_warn(dev, "QP %d, MR operation error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_v1_qp_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- int phy_port;
- int qpn;
-
- qpn = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
- phy_port = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
- if (qpn <= 1)
- qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
- "QP %d, phy_port %d.\n", qpn, phy_port);
- break;
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- hns_roce_v1_wq_catas_err_handle(hr_dev, aeqe, qpn);
- break;
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v1_local_wq_access_err_handle(hr_dev, aeqe, qpn);
- break;
- default:
- break;
- }
-
- hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqn;
-
- cqn = roce_get_field(aeqe->event.cq_event.cq,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_warn(dev, "CQ 0x%x access err.\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
- break;
- default:
- break;
- }
-
- hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
-static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
- dev_warn(dev, "SDB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
- dev_warn(dev, "SDB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
- dev_warn(dev, "ODB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
- dev_warn(dev, "ODB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- default:
- break;
- }
-}
-
-static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE;
-
- return (struct hns_roce_aeqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_aeqe *next_aeqe_sw_v1(struct hns_roce_eq *eq)
-{
- struct hns_roce_aeqe *aeqe = get_aeqe_v1(eq, eq->cons_index);
-
- return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
- !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
-}
-
-static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_aeqe *aeqe;
- int aeqes_found = 0;
- int event_type;
-
- while ((aeqe = next_aeqe_sw_v1(eq))) {
-
- /* Make sure we read the AEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
- roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
- event_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_warn(dev, "PATH MIG not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_warn(dev, "COMMUNICATION established\n");
- break;
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "SQ DRAINED not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "PATH MIG failed\n");
- break;
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_v1_qp_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
- dev_warn(dev, "SRQ not support!\n");
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- hns_roce_v1_cq_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
- dev_warn(dev, "port change.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_MB:
- hns_roce_cmd_event(hr_dev,
- le16_to_cpu(aeqe->event.cmd.token),
- aeqe->event.cmd.status,
- le64_to_cpu(aeqe->event.cmd.out_param
- ));
- break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- hns_roce_v1_db_overflow_handle(hr_dev, aeqe);
- break;
- case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- dev_warn(dev, "CEQ 0x%lx overflow.\n",
- roce_get_field(aeqe->event.ce_event.ceqe,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
- break;
- default:
- dev_warn(dev, "Unhandled event %d on EQ %d at idx %u.\n",
- event_type, eq->eqn, eq->cons_index);
- break;
- }
-
- eq->cons_index++;
- aeqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
- dev_warn(dev, "cons_index overflow, set back to 0.\n");
- eq->cons_index = 0;
- }
- }
-
- set_eq_cons_index_v1(eq, 0);
-
- return aeqes_found;
-}
-
-static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE;
-
- return (struct hns_roce_ceqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_ceqe *next_ceqe_sw_v1(struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe = get_ceqe_v1(eq, eq->cons_index);
-
- return (!!(roce_get_bit(ceqe->comp,
- HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
- (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
-}
-
-static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe;
- int ceqes_found = 0;
- u32 cqn;
-
- while ((ceqe = next_ceqe_sw_v1(eq))) {
-
- /* Make sure we read CEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- cqn = roce_get_field(ceqe->comp,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth - 1) {
- dev_warn(&eq->hr_dev->pdev->dev,
- "cons_index overflow, set back to 0.\n");
- eq->cons_index = 0;
- }
- }
-
- set_eq_cons_index_v1(eq, 0);
-
- return ceqes_found;
-}
-
-static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr)
-{
- struct hns_roce_eq *eq = eq_ptr;
- struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work = 0;
-
- if (eq->type_flag == HNS_ROCE_CEQ)
- /* CEQ irq routine, CEQ is pulse irq, not clear */
- int_work = hns_roce_v1_ceq_int(hr_dev, eq);
- else
- /* AEQ irq routine, AEQ is pulse irq, not clear */
- int_work = hns_roce_v1_aeq_int(hr_dev, eq);
-
- return IRQ_RETVAL(int_work);
-}
-
-static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
-{
- struct hns_roce_dev *hr_dev = dev_id;
- struct device *dev = &hr_dev->pdev->dev;
- int int_work = 0;
- u32 caepaemask_val;
- u32 cealmovf_val;
- u32 caepaest_val;
- u32 aeshift_val;
- u32 ceshift_val;
- u32 cemask_val;
- __le32 tmp;
- int i;
-
- /*
- * Abnormal interrupt:
- * AEQ overflow, ECC multi-bit err, CEQ overflow must clear
- * interrupt, mask irq, clear irq, cancel mask operation
- */
- aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
- tmp = cpu_to_le32(aeshift_val);
-
- /* AEQE overflow */
- if (roce_get_bit(tmp,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "AEQ overflow!\n");
-
- /* Set mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- tmp = cpu_to_le32(caepaemask_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- caepaemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
- tmp = cpu_to_le32(caepaest_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
- caepaest_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
-
- /* Clear mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- tmp = cpu_to_le32(caepaemask_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- caepaemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
- }
-
- /* CEQ almost overflow */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(ceshift_val);
-
- if (roce_get_bit(tmp,
- ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
- int_work++;
-
- /* Set mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(cemask_val);
- roce_set_bit(tmp,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- cemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- cealmovf_val = roce_read(hr_dev,
- ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(cealmovf_val);
- roce_set_bit(tmp,
- ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
- 1);
- cealmovf_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET, cealmovf_val);
-
- /* Clear mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- tmp = cpu_to_le32(cemask_val);
- roce_set_bit(tmp,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- cemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
- }
- }
-
- /* ECC multi-bit error alarm */
- dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
-
- dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
-
- return IRQ_RETVAL(int_work);
-}
-
-static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
-{
- u32 aemask_val;
- int masken = 0;
- __le32 tmp;
- int i;
-
- /* AEQ INT */
- aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- tmp = cpu_to_le32(aemask_val);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- masken);
- roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
- aemask_val = le32_to_cpu(tmp);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
-
- /* CEQ INT */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- /* IRQ mask */
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, masken);
- }
-}
-
-static void hns_roce_v1_free_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
- int i;
-
- if (!eq->buf_list)
- return;
-
- for (i = 0; i < npages; ++i)
- dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
- eq->buf_list[i].buf, eq->buf_list[i].map);
-
- kfree(eq->buf_list);
-}
-
-static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
- int enable_flag)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
- __le32 tmp;
- u32 val;
-
- val = readl(eqc);
- tmp = cpu_to_le32(val);
-
- if (enable_flag)
- roce_set_field(tmp,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_VALID);
- else
- roce_set_field(tmp,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
-
- val = le32_to_cpu(tmp);
- writel(val, eqc);
-}
-
-static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t tmp_dma_addr;
- u32 eqconsindx_val = 0;
- u32 eqcuridx_val = 0;
- u32 eqshift_val = 0;
- __le32 tmp2 = 0;
- __le32 tmp1 = 0;
- __le32 tmp = 0;
- int num_bas;
- int ret;
- int i;
-
- num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
-
- if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
- dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
- (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
- num_bas);
- return -EINVAL;
- }
-
- eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
- if (!eq->buf_list)
- return -ENOMEM;
-
- for (i = 0; i < num_bas; ++i) {
- eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
- &tmp_dma_addr,
- GFP_KERNEL);
- if (!eq->buf_list[i].buf) {
- ret = -ENOMEM;
- goto err_out_free_pages;
- }
-
- eq->buf_list[i].map = tmp_dma_addr;
- memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
- }
- eq->cons_index = 0;
- roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
- roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
- eq->log_entries);
- eqshift_val = le32_to_cpu(tmp);
- writel(eqshift_val, eqc);
-
- /* Configure eq extended address 12~44bit */
- writel((u32)(eq->buf_list[0].map >> 12), eqc + 4);
-
- /*
- * Configure eq extended address 45~49 bit.
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
- eq->buf_list[0].map >> 44);
- roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
- eqcuridx_val = le32_to_cpu(tmp1);
- writel(eqcuridx_val, eqc + 8);
-
- /* Configure eq consumer index */
- roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
- eqconsindx_val = le32_to_cpu(tmp2);
- writel(eqconsindx_val, eqc + 0xc);
-
- return 0;
-
-err_out_free_pages:
- for (i -= 1; i >= 0; i--)
- dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
- eq->buf_list[i].map);
-
- kfree(eq->buf_list);
- return ret;
-}
-
-static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_eq *eq;
- int irq_num;
- int eq_num;
- int ret;
- int i, j;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- irq_num = eq_num + hr_dev->caps.num_other_vectors;
-
- eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
- if (!eq_table->eq)
- return -ENOMEM;
-
- eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
- GFP_KERNEL);
- if (!eq_table->eqc_base) {
- ret = -ENOMEM;
- goto err_eqc_base_alloc_fail;
- }
-
- for (i = 0; i < eq_num; i++) {
- eq = &eq_table->eq[i];
- eq->hr_dev = hr_dev;
- eq->eqn = i;
- eq->irq = hr_dev->irq[i];
- eq->log_page_size = PAGE_SHIFT;
-
- if (i < hr_dev->caps.num_comp_vectors) {
- /* CEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_SHIFT_0_REG +
- CEQ_REG_OFFSET * i;
- eq->type_flag = HNS_ROCE_CEQ;
- eq->doorbell = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
- CEQ_REG_OFFSET * i;
- eq->entries = hr_dev->caps.ceqe_depth;
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
- } else {
- /* AEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
- eq->type_flag = HNS_ROCE_AEQ;
- eq->doorbell = hr_dev->reg_base +
- ROCEE_CAEP_AEQE_CONS_IDX_REG;
- eq->entries = hr_dev->caps.aeqe_depth;
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
- }
- }
-
- /* Disable irq */
- hns_roce_v1_int_mask_enable(hr_dev);
-
- /* Configure ce int interval */
- roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_INTERVAL);
-
- /* Configure ce int burst num */
- roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
-
- for (i = 0; i < eq_num; i++) {
- ret = hns_roce_v1_create_eq(hr_dev, &eq_table->eq[i]);
- if (ret) {
- dev_err(dev, "eq create failed\n");
- goto err_create_eq_fail;
- }
- }
-
- for (j = 0; j < irq_num; j++) {
- if (j < eq_num)
- ret = request_irq(hr_dev->irq[j],
- hns_roce_v1_msix_interrupt_eq, 0,
- hr_dev->irq_names[j],
- &eq_table->eq[j]);
- else
- ret = request_irq(hr_dev->irq[j],
- hns_roce_v1_msix_interrupt_abn, 0,
- hr_dev->irq_names[j], hr_dev);
-
- if (ret) {
- dev_err(dev, "request irq error!\n");
- goto err_request_irq_fail;
- }
- }
-
- for (i = 0; i < eq_num; i++)
- hns_roce_v1_enable_eq(hr_dev, i, EQ_ENABLE);
-
- return 0;
-
-err_request_irq_fail:
- for (j -= 1; j >= 0; j--)
- free_irq(hr_dev->irq[j], &eq_table->eq[j]);
-
-err_create_eq_fail:
- for (i -= 1; i >= 0; i--)
- hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
-
- kfree(eq_table->eqc_base);
-
-err_eqc_base_alloc_fail:
- kfree(eq_table->eq);
-
- return ret;
-}
-
-static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- int irq_num;
- int eq_num;
- int i;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- irq_num = eq_num + hr_dev->caps.num_other_vectors;
- for (i = 0; i < eq_num; i++) {
- /* Disable EQ */
- hns_roce_v1_enable_eq(hr_dev, i, EQ_DISABLE);
-
- free_irq(hr_dev->irq[i], &eq_table->eq[i]);
-
- hns_roce_v1_free_eq(hr_dev, &eq_table->eq[i]);
- }
- for (i = eq_num; i < irq_num; i++)
- free_irq(hr_dev->irq[i], hr_dev);
-
- kfree(eq_table->eqc_base);
- kfree(eq_table->eq);
-}
-
-static const struct ib_device_ops hns_roce_v1_dev_ops = {
- .destroy_qp = hns_roce_v1_destroy_qp,
- .modify_cq = hns_roce_v1_modify_cq,
- .poll_cq = hns_roce_v1_poll_cq,
- .post_recv = hns_roce_v1_post_recv,
- .post_send = hns_roce_v1_post_send,
- .query_qp = hns_roce_v1_query_qp,
- .req_notify_cq = hns_roce_v1_req_notify_cq,
-};
-
-static const struct hns_roce_hw hns_roce_hw_v1 = {
- .reset = hns_roce_v1_reset,
- .hw_profile = hns_roce_v1_profile,
- .hw_init = hns_roce_v1_init,
- .hw_exit = hns_roce_v1_exit,
- .post_mbox = hns_roce_v1_post_mbox,
- .chk_mbox = hns_roce_v1_chk_mbox,
- .set_gid = hns_roce_v1_set_gid,
- .set_mac = hns_roce_v1_set_mac,
- .set_mtu = hns_roce_v1_set_mtu,
- .write_mtpt = hns_roce_v1_write_mtpt,
- .write_cqc = hns_roce_v1_write_cqc,
- .modify_cq = hns_roce_v1_modify_cq,
- .clear_hem = hns_roce_v1_clear_hem,
- .modify_qp = hns_roce_v1_modify_qp,
- .query_qp = hns_roce_v1_query_qp,
- .destroy_qp = hns_roce_v1_destroy_qp,
- .post_send = hns_roce_v1_post_send,
- .post_recv = hns_roce_v1_post_recv,
- .req_notify_cq = hns_roce_v1_req_notify_cq,
- .poll_cq = hns_roce_v1_poll_cq,
- .dereg_mr = hns_roce_v1_dereg_mr,
- .destroy_cq = hns_roce_v1_destroy_cq,
- .init_eq = hns_roce_v1_init_eq_table,
- .cleanup_eq = hns_roce_v1_cleanup_eq_table,
- .hns_roce_dev_ops = &hns_roce_v1_dev_ops,
-};
-
-static const struct of_device_id hns_roce_of_match[] = {
- { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, },
- {},
-};
-MODULE_DEVICE_TABLE(of, hns_roce_of_match);
-
-static const struct acpi_device_id hns_roce_acpi_match[] = {
- { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 },
- {},
-};
-MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match);
-
-static int hns_roce_node_match(struct device *dev, void *fwnode)
-{
- return dev->fwnode == fwnode;
-}
-
-static struct
-platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode)
-{
- struct device *dev;
-
- /* get the 'device' corresponding to the matching 'fwnode' */
- dev = bus_find_device(&platform_bus_type, NULL,
- fwnode, hns_roce_node_match);
- /* get the platform device */
- return dev ? to_platform_device(dev) : NULL;
-}
-
-static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct platform_device *pdev = NULL;
- struct net_device *netdev = NULL;
- struct device_node *net_node;
- struct resource *res;
- int port_cnt = 0;
- u8 phy_port;
- int ret;
- int i;
-
- /* check if we are compatible with the underlying SoC */
- if (dev_of_node(dev)) {
- const struct of_device_id *of_id;
-
- of_id = of_match_node(hns_roce_of_match, dev->of_node);
- if (!of_id) {
- dev_err(dev, "device is not compatible!\n");
- return -ENXIO;
- }
- hr_dev->hw = (const struct hns_roce_hw *)of_id->data;
- if (!hr_dev->hw) {
- dev_err(dev, "couldn't get H/W specific DT data!\n");
- return -ENXIO;
- }
- } else if (is_acpi_device_node(dev->fwnode)) {
- const struct acpi_device_id *acpi_id;
-
- acpi_id = acpi_match_device(hns_roce_acpi_match, dev);
- if (!acpi_id) {
- dev_err(dev, "device is not compatible!\n");
- return -ENXIO;
- }
- hr_dev->hw = (const struct hns_roce_hw *) acpi_id->driver_data;
- if (!hr_dev->hw) {
- dev_err(dev, "couldn't get H/W specific ACPI data!\n");
- return -ENXIO;
- }
- } else {
- dev_err(dev, "can't read compatibility data from DT or ACPI\n");
- return -ENXIO;
- }
-
- /* get the mapped register base address */
- res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- hr_dev->reg_base = devm_ioremap_resource(dev, res);
- if (IS_ERR(hr_dev->reg_base))
- return PTR_ERR(hr_dev->reg_base);
-
- /* read the node_guid of IB device from the DT or ACPI */
- ret = device_property_read_u8_array(dev, "node-guid",
- (u8 *)&hr_dev->ib_dev.node_guid,
- GUID_LEN);
- if (ret) {
- dev_err(dev, "couldn't get node_guid from DT or ACPI!\n");
- return ret;
- }
-
- /* get the RoCE associated ethernet ports or netdevices */
- for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) {
- if (dev_of_node(dev)) {
- net_node = of_parse_phandle(dev->of_node, "eth-handle",
- i);
- if (!net_node)
- continue;
- pdev = of_find_device_by_node(net_node);
- } else if (is_acpi_device_node(dev->fwnode)) {
- struct fwnode_reference_args args;
-
- ret = acpi_node_get_property_reference(dev->fwnode,
- "eth-handle",
- i, &args);
- if (ret)
- continue;
- pdev = hns_roce_find_pdev(args.fwnode);
- } else {
- dev_err(dev, "cannot read data from DT or ACPI\n");
- return -ENXIO;
- }
-
- if (pdev) {
- netdev = platform_get_drvdata(pdev);
- phy_port = (u8)i;
- if (netdev) {
- hr_dev->iboe.netdevs[port_cnt] = netdev;
- hr_dev->iboe.phy_port[port_cnt] = phy_port;
- } else {
- dev_err(dev, "no netdev found with pdev %s\n",
- pdev->name);
- return -ENODEV;
- }
- port_cnt++;
- }
- }
-
- if (port_cnt == 0) {
- dev_err(dev, "unable to get eth-handle for available ports!\n");
- return -EINVAL;
- }
-
- hr_dev->caps.num_ports = port_cnt;
-
- /* cmd issue mode: 0 is poll, 1 is event */
- hr_dev->cmd_mod = 1;
- hr_dev->loop_idc = 0;
- hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
- hr_dev->odb_offset = ROCEE_DB_OTHERS_L_0_REG;
-
- /* read the interrupt names from the DT or ACPI */
- ret = device_property_read_string_array(dev, "interrupt-names",
- hr_dev->irq_names,
- HNS_ROCE_V1_MAX_IRQ_NUM);
- if (ret < 0) {
- dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
- return ret;
- }
-
- /* fetch the interrupt numbers */
- for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) {
- hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
- if (hr_dev->irq[i] <= 0) {
- dev_err(dev, "platform get of irq[=%d] failed!\n", i);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-/**
- * hns_roce_probe - RoCE driver entrance
- * @pdev: pointer to platform device
- * Return : int
- *
- */
-static int hns_roce_probe(struct platform_device *pdev)
-{
- int ret;
- struct hns_roce_dev *hr_dev;
- struct device *dev = &pdev->dev;
-
- hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
- if (!hr_dev)
- return -ENOMEM;
-
- hr_dev->priv = kzalloc(sizeof(struct hns_roce_v1_priv), GFP_KERNEL);
- if (!hr_dev->priv) {
- ret = -ENOMEM;
- goto error_failed_kzalloc;
- }
-
- hr_dev->pdev = pdev;
- hr_dev->dev = dev;
- platform_set_drvdata(pdev, hr_dev);
-
- if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) &&
- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) {
- dev_err(dev, "Not usable DMA addressing mode\n");
- ret = -EIO;
- goto error_failed_get_cfg;
- }
-
- ret = hns_roce_get_cfg(hr_dev);
- if (ret) {
- dev_err(dev, "Get Configuration failed!\n");
- goto error_failed_get_cfg;
- }
-
- ret = hns_roce_init(hr_dev);
- if (ret) {
- dev_err(dev, "RoCE engine init failed!\n");
- goto error_failed_get_cfg;
- }
-
- return 0;
-
-error_failed_get_cfg:
- kfree(hr_dev->priv);
-
-error_failed_kzalloc:
- ib_dealloc_device(&hr_dev->ib_dev);
-
- return ret;
-}
-
-/**
- * hns_roce_remove - remove RoCE device
- * @pdev: pointer to platform device
- */
-static int hns_roce_remove(struct platform_device *pdev)
-{
- struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev);
-
- hns_roce_exit(hr_dev);
- kfree(hr_dev->priv);
- ib_dealloc_device(&hr_dev->ib_dev);
-
- return 0;
-}
-
-static struct platform_driver hns_roce_driver = {
- .probe = hns_roce_probe,
- .remove = hns_roce_remove,
- .driver = {
- .name = DRV_NAME,
- .of_match_table = hns_roce_of_match,
- .acpi_match_table = ACPI_PTR(hns_roce_acpi_match),
- },
-};
-
-module_platform_driver(hns_roce_driver);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
-MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>");
-MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>");
-MODULE_DESCRIPTION("Hisilicon Hip06 Family RoCE Driver");
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
deleted file mode 100644
index 52307b2c7100..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ /dev/null
@@ -1,1099 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _HNS_ROCE_HW_V1_H
-#define _HNS_ROCE_HW_V1_H
-
-#define CQ_STATE_VALID 2
-
-#define HNS_ROCE_V1_MAX_PD_NUM 0x8000
-#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000
-#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000
-
-#define HNS_ROCE_V1_MAX_QP_NUM 0x40000
-#define HNS_ROCE_V1_MAX_WQE_NUM 0x4000
-
-#define HNS_ROCE_V1_MAX_MTPT_NUM 0x80000
-
-#define HNS_ROCE_V1_MAX_MTT_SEGS 0x100000
-
-#define HNS_ROCE_V1_MAX_QP_INIT_RDMA 128
-#define HNS_ROCE_V1_MAX_QP_DEST_RDMA 128
-
-#define HNS_ROCE_V1_MAX_SQ_DESC_SZ 64
-#define HNS_ROCE_V1_MAX_RQ_DESC_SZ 64
-#define HNS_ROCE_V1_SG_NUM 2
-#define HNS_ROCE_V1_INLINE_SIZE 32
-
-#define HNS_ROCE_V1_UAR_NUM 256
-#define HNS_ROCE_V1_PHY_UAR_NUM 8
-
-#define HNS_ROCE_V1_GID_NUM 16
-#define HNS_ROCE_V1_RESV_QP 8
-
-#define HNS_ROCE_V1_MAX_IRQ_NUM 34
-#define HNS_ROCE_V1_COMP_VEC_NUM 32
-#define HNS_ROCE_V1_AEQE_VEC_NUM 1
-#define HNS_ROCE_V1_ABNORMAL_VEC_NUM 1
-
-#define HNS_ROCE_V1_COMP_EQE_NUM 0x8000
-#define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400
-
-#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256
-#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
-#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64
-#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64
-#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64
-
-#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32
-#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000
-
-#define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17)
-
-#define HNS_ROCE_V1_EXT_RAQ_WF 8
-#define HNS_ROCE_V1_RAQ_ENTRY 64
-#define HNS_ROCE_V1_RAQ_DEPTH 32768
-#define HNS_ROCE_V1_RAQ_SIZE (HNS_ROCE_V1_RAQ_ENTRY * HNS_ROCE_V1_RAQ_DEPTH)
-
-#define HNS_ROCE_V1_SDB_DEPTH 0x400
-#define HNS_ROCE_V1_ODB_DEPTH 0x400
-
-#define HNS_ROCE_V1_DB_RSVD 0x80
-
-#define HNS_ROCE_V1_SDB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_SDB_ALFUL (HNS_ROCE_V1_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_ODB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_ODB_ALFUL (HNS_ROCE_V1_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-
-#define HNS_ROCE_V1_EXT_SDB_DEPTH 0x4000
-#define HNS_ROCE_V1_EXT_ODB_DEPTH 0x4000
-#define HNS_ROCE_V1_EXT_SDB_ENTRY 16
-#define HNS_ROCE_V1_EXT_ODB_ENTRY 16
-#define HNS_ROCE_V1_EXT_SDB_SIZE \
- (HNS_ROCE_V1_EXT_SDB_DEPTH * HNS_ROCE_V1_EXT_SDB_ENTRY)
-#define HNS_ROCE_V1_EXT_ODB_SIZE \
- (HNS_ROCE_V1_EXT_ODB_DEPTH * HNS_ROCE_V1_EXT_ODB_ENTRY)
-
-#define HNS_ROCE_V1_EXT_SDB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_EXT_SDB_ALFUL \
- (HNS_ROCE_V1_EXT_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_EXT_ODB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_EXT_ODB_ALFUL \
- (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-
-#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000
-#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000
-#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5
-#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20
-
-#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17)
-
-#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2
-#define HNS_ROCE_V1_TPTR_BUF_SIZE \
- (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
-
-#define HNS_ROCE_ODB_POLL_MODE 0
-
-#define HNS_ROCE_SDB_NORMAL_MODE 0
-#define HNS_ROCE_SDB_EXTEND_MODE 1
-
-#define HNS_ROCE_ODB_EXTEND_MODE 1
-
-#define KEY_VALID 0x02
-
-#define HNS_ROCE_CQE_QPN_MASK 0x3ffff
-#define HNS_ROCE_CQE_STATUS_MASK 0x1f
-#define HNS_ROCE_CQE_OPCODE_MASK 0xf
-
-#define HNS_ROCE_CQE_SUCCESS 0x00
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03
-#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04
-#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05
-#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a
-#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b
-#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c
-
-#define QP1C_CFGN_OFFSET 0x28
-#define PHY_PORT_OFFSET 0x8
-#define MTPT_IDX_SHIFT 16
-#define ALL_PORT_VAL_OPEN 0x3f
-#define POL_TIME_INTERVAL_VAL 0x80
-#define SLEEP_TIME_INTERVAL 20
-#define SQ_PSN_SHIFT 8
-#define QKEY_VAL 0x80010000
-#define SDB_INV_CNT_OFFSET 8
-
-#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
-#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
-
-#define HNS_ROCE_INT_MASK_DISABLE 0
-#define HNS_ROCE_INT_MASK_ENABLE 1
-
-#define CEQ_REG_OFFSET 0x18
-
-#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
-
-#define HNS_ROCE_V1_CONS_IDX_M GENMASK(15, 0)
-
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M GENMASK(31, 16)
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M GENMASK(23, 16)
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M GENMASK(30, 24)
-
-#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M GENMASK(23, 0)
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M GENMASK(27, 25)
-
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M GENMASK(15, 0)
-
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0)
-
-struct hns_roce_cq_context {
- __le32 cqc_byte_4;
- __le32 cq_bt_l;
- __le32 cqc_byte_12;
- __le32 cur_cqe_ba0_l;
- __le32 cqc_byte_20;
- __le32 cqe_tptr_addr_l;
- __le32 cur_cqe_ba1_l;
- __le32 cqc_byte_32;
-};
-
-#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0
-#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \
- (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S)
-
-#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16
-#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \
- (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \
- (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24
-#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16
-#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8
-#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9
-
-#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8
-#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14
-#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15
-
-#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16
-#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S)
-
-struct hns_roce_cqe {
- __le32 cqe_byte_4;
- union {
- __le32 r_key;
- __le32 immediate_data;
- };
- __le32 byte_cnt;
- __le32 cqe_byte_16;
- __le32 cqe_byte_20;
- __le32 s_mac_l;
- __le32 cqe_byte_28;
- __le32 reserved;
-};
-
-#define CQE_BYTE_4_OWNER_S 7
-#define CQE_BYTE_4_SQ_RQ_FLAG_S 14
-
-#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8
-#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \
- (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S)
-
-#define CQE_BYTE_4_WQE_INDEX_S 16
-#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S)
-
-#define CQE_BYTE_4_OPERATION_TYPE_S 0
-#define CQE_BYTE_4_OPERATION_TYPE_M \
- (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S)
-
-#define CQE_BYTE_4_IMM_INDICATOR_S 15
-
-#define CQE_BYTE_16_LOCAL_QPN_S 0
-#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S)
-
-#define CQE_BYTE_20_PORT_NUM_S 26
-#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S)
-
-#define CQE_BYTE_20_SL_S 24
-#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S)
-
-#define CQE_BYTE_20_REMOTE_QPN_S 0
-#define CQE_BYTE_20_REMOTE_QPN_M \
- (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S)
-
-#define CQE_BYTE_20_GRH_PRESENT_S 29
-
-#define CQE_BYTE_28_P_KEY_IDX_S 16
-#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S)
-
-#define CQ_DB_REQ_NOT_SOL 0
-#define CQ_DB_REQ_NOT (1 << 16)
-
-struct hns_roce_v1_mpt_entry {
- __le32 mpt_byte_4;
- __le32 pbl_addr_l;
- __le32 mpt_byte_12;
- __le32 virt_addr_l;
- __le32 virt_addr_h;
- __le32 length;
- __le32 mpt_byte_28;
- __le32 pa0_l;
- __le32 mpt_byte_36;
- __le32 mpt_byte_40;
- __le32 mpt_byte_44;
- __le32 mpt_byte_48;
- __le32 pa4_l;
- __le32 mpt_byte_56;
- __le32 mpt_byte_60;
- __le32 mpt_byte_64;
-};
-
-#define MPT_BYTE_4_KEY_STATE_S 0
-#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S)
-
-#define MPT_BYTE_4_KEY_S 8
-#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S)
-
-#define MPT_BYTE_4_PAGE_SIZE_S 16
-#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S)
-
-#define MPT_BYTE_4_MW_TYPE_S 20
-
-#define MPT_BYTE_4_MW_BIND_ENABLE_S 21
-
-#define MPT_BYTE_4_OWN_S 22
-
-#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24
-#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \
- (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S)
-
-#define MPT_BYTE_4_REMOTE_ATOMIC_S 26
-#define MPT_BYTE_4_LOCAL_WRITE_S 27
-#define MPT_BYTE_4_REMOTE_WRITE_S 28
-#define MPT_BYTE_4_REMOTE_READ_S 29
-#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30
-#define MPT_BYTE_4_ADDRESS_TYPE_S 31
-
-#define MPT_BYTE_12_PBL_ADDR_H_S 0
-#define MPT_BYTE_12_PBL_ADDR_H_M \
- (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S)
-
-#define MPT_BYTE_12_MW_BIND_COUNTER_S 17
-#define MPT_BYTE_12_MW_BIND_COUNTER_M \
- (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S)
-
-#define MPT_BYTE_28_PD_S 0
-#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S)
-
-#define MPT_BYTE_28_L_KEY_IDX_L_S 16
-#define MPT_BYTE_28_L_KEY_IDX_L_M \
- (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S)
-
-#define MPT_BYTE_36_PA0_H_S 0
-#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S)
-
-#define MPT_BYTE_36_PA1_L_S 8
-#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S)
-
-#define MPT_BYTE_40_PA1_H_S 0
-#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S)
-
-#define MPT_BYTE_40_PA2_L_S 16
-#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S)
-
-#define MPT_BYTE_44_PA2_H_S 0
-#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S)
-
-#define MPT_BYTE_44_PA3_L_S 24
-#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S)
-
-#define MPT_BYTE_48_PA3_H_S 0
-#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S)
-
-#define MPT_BYTE_56_PA4_H_S 0
-#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S)
-
-#define MPT_BYTE_56_PA5_L_S 8
-#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S)
-
-#define MPT_BYTE_60_PA5_H_S 0
-#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S)
-
-#define MPT_BYTE_60_PA6_L_S 16
-#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S)
-
-#define MPT_BYTE_64_PA6_H_S 0
-#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S)
-
-#define MPT_BYTE_64_L_KEY_IDX_H_S 24
-#define MPT_BYTE_64_L_KEY_IDX_H_M \
- (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S)
-
-struct hns_roce_wqe_ctrl_seg {
- __le32 sgl_pa_h;
- __le32 flag;
- union {
- __be32 imm_data;
- __le32 inv_key;
- };
- __le32 msg_length;
-};
-
-struct hns_roce_wqe_data_seg {
- __le64 addr;
- __le32 lkey;
- __le32 len;
-};
-
-struct hns_roce_wqe_raddr_seg {
- __le32 rkey;
- __le32 len;/* reserved */
- __le64 raddr;
-};
-
-struct hns_roce_rq_wqe_ctrl {
- __le32 rwqe_byte_4;
- __le32 rocee_sgl_ba_l;
- __le32 rwqe_byte_12;
- __le32 reserved[5];
-};
-
-#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16
-#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \
- (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S)
-
-#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000
-
-#define GID_LEN 16
-
-struct hns_roce_ud_send_wqe {
- __le32 dmac_h;
- __le32 u32_8;
- __le32 immediate_data;
-
- __le32 u32_16;
- union {
- unsigned char dgid[GID_LEN];
- struct {
- __le32 u32_20;
- __le32 u32_24;
- __le32 u32_28;
- __le32 u32_32;
- };
- };
-
- __le32 u32_36;
- __le32 u32_40;
-
- __le32 va0_l;
- __le32 va0_h;
- __le32 l_key0;
-
- __le32 va1_l;
- __le32 va1_h;
- __le32 l_key1;
-};
-
-#define UD_SEND_WQE_U32_4_DMAC_0_S 0
-#define UD_SEND_WQE_U32_4_DMAC_0_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_1_S 8
-#define UD_SEND_WQE_U32_4_DMAC_1_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_2_S 16
-#define UD_SEND_WQE_U32_4_DMAC_2_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_3_S 24
-#define UD_SEND_WQE_U32_4_DMAC_3_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S)
-
-#define UD_SEND_WQE_U32_8_DMAC_4_S 0
-#define UD_SEND_WQE_U32_8_DMAC_4_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S)
-
-#define UD_SEND_WQE_U32_8_DMAC_5_S 8
-#define UD_SEND_WQE_U32_8_DMAC_5_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
-
-#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
-
-#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
-#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \
- (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-
-#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24
-#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \
- (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S)
-
-#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31
-
-#define UD_SEND_WQE_U32_16_DEST_QP_S 0
-#define UD_SEND_WQE_U32_16_DEST_QP_M \
- (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S)
-
-#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24
-#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S)
-
-#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0
-#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \
- (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S)
-
-#define UD_SEND_WQE_U32_36_PRIORITY_S 20
-#define UD_SEND_WQE_U32_36_PRIORITY_M \
- (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S)
-
-#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24
-#define UD_SEND_WQE_U32_36_SGID_INDEX_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S)
-
-#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0
-#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S)
-
-#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8
-#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S)
-
-struct hns_roce_sqp_context {
- __le32 qp1c_bytes_4;
- __le32 sq_rq_bt_l;
- __le32 qp1c_bytes_12;
- __le32 qp1c_bytes_16;
- __le32 qp1c_bytes_20;
- __le32 cur_rq_wqe_ba_l;
- __le32 qp1c_bytes_28;
- __le32 qp1c_bytes_32;
- __le32 cur_sq_wqe_ba_l;
- __le32 qp1c_bytes_40;
-};
-
-#define QP1C_BYTES_4_QP_STATE_S 0
-#define QP1C_BYTES_4_QP_STATE_M \
- (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
-
-#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
-#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
-
-#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12
-#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S)
-
-#define QP1C_BYTES_4_PD_S 16
-#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S)
-
-#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0
-#define QP1C_BYTES_12_SQ_RQ_BT_H_M \
- (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S)
-
-#define QP1C_BYTES_16_RQ_HEAD_S 0
-#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S)
-
-#define QP1C_BYTES_16_PORT_NUM_S 16
-#define QP1C_BYTES_16_PORT_NUM_M \
- (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S)
-
-#define QP1C_BYTES_16_SIGNALING_TYPE_S 27
-#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28
-#define QP1C_BYTES_16_RQ_BA_FLG_S 29
-#define QP1C_BYTES_16_SQ_BA_FLG_S 30
-#define QP1C_BYTES_16_QP1_ERR_S 31
-
-#define QP1C_BYTES_20_SQ_HEAD_S 0
-#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S)
-
-#define QP1C_BYTES_20_PKEY_IDX_S 16
-#define QP1C_BYTES_20_PKEY_IDX_M \
- (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S)
-
-#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0
-#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S)
-
-#define QP1C_BYTES_28_RQ_CUR_IDX_S 16
-#define QP1C_BYTES_28_RQ_CUR_IDX_M \
- (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S)
-
-#define QP1C_BYTES_32_TX_CQ_NUM_S 0
-#define QP1C_BYTES_32_TX_CQ_NUM_M \
- (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S)
-
-#define QP1C_BYTES_32_RX_CQ_NUM_S 16
-#define QP1C_BYTES_32_RX_CQ_NUM_M \
- (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S)
-
-#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0
-#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S)
-
-#define QP1C_BYTES_40_SQ_CUR_IDX_S 16
-#define QP1C_BYTES_40_SQ_CUR_IDX_M \
- (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S)
-
-#define HNS_ROCE_WQE_INLINE (1UL<<31)
-#define HNS_ROCE_WQE_SE (1UL<<30)
-
-#define HNS_ROCE_WQE_SGE_NUM_BIT 24
-#define HNS_ROCE_WQE_IMM (1UL<<23)
-#define HNS_ROCE_WQE_FENCE (1UL<<21)
-#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20)
-
-#define HNS_ROCE_WQE_OPCODE_SEND (0<<16)
-#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16)
-#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16)
-#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16)
-#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16)
-#define HNS_ROCE_WQE_OPCODE_MASK (15<<16)
-
-struct hns_roce_qp_context {
- __le32 qpc_bytes_4;
- __le32 qpc_bytes_8;
- __le32 qpc_bytes_12;
- __le32 qpc_bytes_16;
- __le32 sq_rq_bt_l;
- __le32 qpc_bytes_24;
- __le32 irrl_ba_l;
- __le32 qpc_bytes_32;
- __le32 qpc_bytes_36;
- __le32 dmac_l;
- __le32 qpc_bytes_44;
- __le32 qpc_bytes_48;
- u8 dgid[16];
- __le32 qpc_bytes_68;
- __le32 cur_rq_wqe_ba_l;
- __le32 qpc_bytes_76;
- __le32 rx_rnr_time;
- __le32 qpc_bytes_84;
- __le32 qpc_bytes_88;
- union {
- __le32 rx_sge_len;
- __le32 dma_length;
- };
- union {
- __le32 rx_sge_num;
- __le32 rx_send_pktn;
- __le32 r_key;
- };
- __le32 va_l;
- __le32 va_h;
- __le32 qpc_bytes_108;
- __le32 qpc_bytes_112;
- __le32 rx_cur_sq_wqe_ba_l;
- __le32 qpc_bytes_120;
- __le32 qpc_bytes_124;
- __le32 qpc_bytes_128;
- __le32 qpc_bytes_132;
- __le32 qpc_bytes_136;
- __le32 qpc_bytes_140;
- __le32 qpc_bytes_144;
- __le32 qpc_bytes_148;
- union {
- __le32 rnr_retry;
- __le32 ack_time;
- };
- __le32 qpc_bytes_156;
- __le32 pkt_use_len;
- __le32 qpc_bytes_164;
- __le32 qpc_bytes_168;
- union {
- __le32 sge_use_len;
- __le32 pa_use_len;
- };
- __le32 qpc_bytes_176;
- __le32 qpc_bytes_180;
- __le32 tx_cur_sq_wqe_ba_l;
- __le32 qpc_bytes_188;
- __le32 rvd21;
-};
-
-#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0
-#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3
-#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4
-#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5
-#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6
-#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7
-
-#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8
-#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S)
-
-#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12
-#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S)
-
-#define QP_CONTEXT_QPC_BYTES_4_PD_S 16
-#define QP_CONTEXT_QPC_BYTES_4_PD_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S)
-
-#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0
-#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S)
-
-#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16
-#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S)
-
-#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0
-#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S)
-
-#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0
-#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S)
-
-#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0
-#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \
- (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18
-#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)
-
-#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23
-
-#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \
- (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18
-#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S)
-
-#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20
-#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21
-#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22
-#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23
-
-#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24
-#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S)
-
-#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0
-#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S)
-
-#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24
-#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0
-#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16
-#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24
-#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0
-#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \
- (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20
-#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28
-#define QP_CONTEXT_QPC_BYTES_48_MTU_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S)
-
-#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0
-#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8
-#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24
-#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24
-#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25
-
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \
- (((1UL << 2) - 1) << \
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S)
-
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25
-
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S)
-
-#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0
-#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16
-#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24
-
-#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25
-#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27
-
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S)
-
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31
-
-#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0
-#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0
-#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2
-#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5
-#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8
-#define QP_CONTEXT_QPC_BYTES_148_LSN_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0
-#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3
-#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8
-#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11
-#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_SL_S 14
-#define QP_CONTEXT_QPC_BYTES_156_SL_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16
-#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24
-#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S)
-
-#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24
-#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24
-#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26
-#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28
-#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29
-#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30
-
-#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0
-#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0
-#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8
-
-#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S)
-
-#define STATUS_MASK 0xff
-#define GO_BIT_TIMEOUT_MSECS 10000
-#define HCR_STATUS_OFFSET 0x18
-#define HCR_GO_BIT 15
-
-struct hns_roce_rq_db {
- __le32 u32_4;
- __le32 u32_8;
-};
-
-#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0
-#define RQ_DOORBELL_U32_4_RQ_HEAD_M \
- (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S)
-
-#define RQ_DOORBELL_U32_8_QPN_S 0
-#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S)
-
-#define RQ_DOORBELL_U32_8_CMD_S 28
-#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S)
-
-#define RQ_DOORBELL_U32_8_HW_SYNC_S 31
-
-struct hns_roce_sq_db {
- __le32 u32_4;
- __le32 u32_8;
-};
-
-#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0
-#define SQ_DOORBELL_U32_4_SQ_HEAD_M \
- (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
-
-#define SQ_DOORBELL_U32_4_SL_S 16
-#define SQ_DOORBELL_U32_4_SL_M \
- (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S)
-
-#define SQ_DOORBELL_U32_4_PORT_S 18
-#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
-
-#define SQ_DOORBELL_U32_8_QPN_S 0
-#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S)
-
-#define SQ_DOORBELL_HW_SYNC_S 31
-
-struct hns_roce_ext_db {
- int esdb_dep;
- int eodb_dep;
- struct hns_roce_buf_list *sdb_buf_list;
- struct hns_roce_buf_list *odb_buf_list;
-};
-
-struct hns_roce_db_table {
- int sdb_ext_mod;
- int odb_ext_mod;
- struct hns_roce_ext_db *ext_db;
-};
-
-struct hns_roce_bt_table {
- struct hns_roce_buf_list qpc_buf;
- struct hns_roce_buf_list mtpt_buf;
- struct hns_roce_buf_list cqc_buf;
-};
-
-struct hns_roce_tptr_table {
- struct hns_roce_buf_list tptr_buf;
-};
-
-struct hns_roce_qp_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct hns_roce_qp *qp;
- u32 db_wait_stage;
- u32 sdb_issue_ptr;
- u32 sdb_inv_cnt;
- u32 sche_cnt;
-};
-
-struct hns_roce_mr_free_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct completion *comp;
- int comp_flag;
- void *mr;
-};
-
-struct hns_roce_recreate_lp_qp_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct completion *comp;
- int comp_flag;
-};
-
-struct hns_roce_free_mr {
- struct workqueue_struct *free_mr_wq;
- struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP];
- struct hns_roce_cq *mr_free_cq;
- struct hns_roce_pd *mr_free_pd;
-};
-
-struct hns_roce_v1_priv {
- struct hns_roce_db_table db_table;
- struct hns_roce_raq_table raq_table;
- struct hns_roce_bt_table bt_table;
- struct hns_roce_tptr_table tptr_table;
- struct hns_roce_free_mr free_mr;
-};
-
-int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
-int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
-
-#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index b5392cb5b20f..2d6ae89e525b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -33,244 +33,688 @@
#include <linux/acpi.h>
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
+#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include <net/addrconf.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
-#include "hnae3.h"
+#include "hclge_main.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
+#include "hns_roce_bond.h"
-static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
- struct ib_sge *sg)
+#define CREATE_TRACE_POINTS
+#include "hns_roce_trace.h"
+
+enum {
+ CMD_RST_PRC_OTHERS,
+ CMD_RST_PRC_SUCCESS,
+ CMD_RST_PRC_EBUSY,
+};
+
+enum ecc_resource_type {
+ ECC_RESOURCE_QPC,
+ ECC_RESOURCE_CQC,
+ ECC_RESOURCE_MPT,
+ ECC_RESOURCE_SRQC,
+ ECC_RESOURCE_GMV,
+ ECC_RESOURCE_QPC_TIMER,
+ ECC_RESOURCE_CQC_TIMER,
+ ECC_RESOURCE_SCCC,
+ ECC_RESOURCE_COUNT,
+};
+
+static const struct {
+ const char *name;
+ u8 read_bt0_op;
+ u8 write_bt0_op;
+} fmea_ram_res[] = {
+ { "ECC_RESOURCE_QPC",
+ HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 },
+ { "ECC_RESOURCE_CQC",
+ HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 },
+ { "ECC_RESOURCE_MPT",
+ HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 },
+ { "ECC_RESOURCE_SRQC",
+ HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 },
+ /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */
+ { "ECC_RESOURCE_GMV",
+ 0, 0 },
+ { "ECC_RESOURCE_QPC_TIMER",
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 },
+ { "ECC_RESOURCE_CQC_TIMER",
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 },
+ { "ECC_RESOURCE_SCCC",
+ HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 },
+};
+
+static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct ib_sge *sg)
{
dseg->lkey = cpu_to_le32(sg->lkey);
dseg->addr = cpu_to_le64(sg->addr);
dseg->len = cpu_to_le32(sg->length);
}
+/*
+ * mapped-value = 1 + real-value
+ * The hns wr opcode real value is start from 0, In order to distinguish between
+ * initialized and uninitialized map values, we plus 1 to the actual value when
+ * defining the mapping, so that the validity can be identified by checking the
+ * mapped value is greater than 0.
+ */
+#define HR_OPC_MAP(ib_key, hr_key) \
+ [IB_WR_ ## ib_key] = 1 + HNS_ROCE_V2_WQE_OP_ ## hr_key
+
+static const u32 hns_roce_op_code[] = {
+ HR_OPC_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_OPC_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE_WITH_IMM),
+ HR_OPC_MAP(SEND, SEND),
+ HR_OPC_MAP(SEND_WITH_IMM, SEND_WITH_IMM),
+ HR_OPC_MAP(RDMA_READ, RDMA_READ),
+ HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
+ HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
+ HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
+ HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
+ HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
+ HR_OPC_MAP(REG_MR, FAST_REG_PMR),
+};
+
+static u32 to_hr_opcode(u32 ib_opcode)
+{
+ if (ib_opcode >= ARRAY_SIZE(hns_roce_op_code))
+ return HNS_ROCE_V2_WQE_OP_MASK;
+
+ return hns_roce_op_code[ib_opcode] ? hns_roce_op_code[ib_opcode] - 1 :
+ HNS_ROCE_V2_WQE_OP_MASK;
+}
+
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
- struct hns_roce_wqe_frmr_seg *fseg,
const struct ib_reg_wr *wr)
{
+ struct hns_roce_wqe_frmr_seg *fseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+ u64 pbl_ba;
/* use ib_access_flags */
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
- wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
- wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_RR_S,
- wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_RW_S,
- wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_FRMR_WQE_BYTE_4_LW_S,
- wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ hr_reg_write_bool(fseg, FRMR_BIND_EN, 0);
+ hr_reg_write_bool(fseg, FRMR_ATOMIC,
+ wr->access & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_write_bool(fseg, FRMR_RR, wr->access & IB_ACCESS_REMOTE_READ);
+ hr_reg_write_bool(fseg, FRMR_RW, wr->access & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_write_bool(fseg, FRMR_LW, wr->access & IB_ACCESS_LOCAL_WRITE);
/* Data structure reuse may lead to confusion */
- rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
- rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
+ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
+ rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba));
+ rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba));
rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
rc_sq_wqe->rkey = cpu_to_le32(wr->key);
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
- fseg->pbl_size = cpu_to_le32(mr->pbl_size);
- roce_set_field(fseg->mode_buf_pg_sz,
- V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
- V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
- mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_bit(fseg->mode_buf_pg_sz,
- V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
+ hr_reg_write(fseg, FRMR_PBL_SIZE, mr->npages);
+ hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+ hr_reg_clear(fseg, FRMR_BLK_MODE);
+ hr_reg_clear(fseg, FRMR_BLOCK_SIZE);
+ hr_reg_clear(fseg, FRMR_ZBVA);
}
-static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
- const struct ib_atomic_wr *wr)
+static void set_atomic_seg(const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int valid_num_sge)
{
- if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
- aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
- aseg->cmp_data = cpu_to_le64(wr->compare_add);
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_wqe_atomic_seg *aseg =
+ (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg);
+
+ set_data_seg_v2(dseg, wr->sg_list);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
+ aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add);
} else {
- aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
- aseg->cmp_data = 0;
+ aseg->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(wr)->compare_add);
+ aseg->cmp_data = 0;
}
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
}
-static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
- unsigned int *sge_ind)
+static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ unsigned int *sge_idx, u32 msg_len)
{
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct ib_sge *sg;
- int num_in_wqe = 0;
- int extend_sge_num;
- int fi_sge_num;
- int se_sge_num;
- int shift;
- int i;
+ struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+ unsigned int left_len_in_pg;
+ unsigned int idx = *sge_idx;
+ unsigned int i = 0;
+ unsigned int len;
+ void *addr;
+ void *dseg;
+
+ if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
+ ibdev_err(ibdev,
+ "no enough extended sge space for inline data.\n");
+ return -EINVAL;
+ }
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
- num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
- extend_sge_num = wr->num_sge - num_in_wqe;
- sg = wr->sg_list + num_in_wqe;
- shift = qp->hr_buf.page_shift;
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg;
+ len = wr->sg_list[0].length;
+ addr = (void *)(unsigned long)(wr->sg_list[0].addr);
- /*
- * Check whether wr->num_sge sges are in the same page. If not, we
- * should calculate how many sges in the first page and the second
- * page.
+ /* When copying data to extended sge space, the left length in page may
+ * not long enough for current user's sge. So the data should be
+ * splited into several parts, one in the first page, and the others in
+ * the subsequent pages.
*/
- dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
- fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
- (uintptr_t)dseg) /
- sizeof(struct hns_roce_v2_wqe_data_seg);
- if (extend_sge_num > fi_sge_num) {
- se_sge_num = extend_sge_num - fi_sge_num;
- for (i = 0; i < fi_sge_num; i++) {
- set_data_seg_v2(dseg++, sg + i);
- (*sge_ind)++;
+ while (1) {
+ if (len <= left_len_in_pg) {
+ memcpy(dseg, addr, len);
+
+ idx += len / HNS_ROCE_SGE_SIZE;
+
+ i++;
+ if (i >= wr->num_sge)
+ break;
+
+ left_len_in_pg -= len;
+ len = wr->sg_list[i].length;
+ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+ dseg += len;
+ } else {
+ memcpy(dseg, addr, left_len_in_pg);
+
+ len -= left_len_in_pg;
+ addr += left_len_in_pg;
+ idx += left_len_in_pg / HNS_ROCE_SGE_SIZE;
+ dseg = hns_roce_get_extend_sge(qp,
+ idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
}
- dseg = get_send_extend_sge(qp,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < se_sge_num; i++) {
- set_data_seg_v2(dseg++, sg + fi_sge_num + i);
- (*sge_ind)++;
+ }
+
+ *sge_idx = idx;
+
+ return 0;
+}
+
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge,
+ unsigned int *sge_ind, unsigned int cnt)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ unsigned int idx = *sge_ind;
+
+ while (cnt > 0) {
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ if (likely(sge->length)) {
+ set_data_seg_v2(dseg, sge);
+ idx++;
+ cnt--;
}
- } else {
- for (i = 0; i < extend_sge_num; i++) {
- set_data_seg_v2(dseg++, sg + i);
- (*sge_ind)++;
+ sge++;
+ }
+
+ *sge_ind = idx;
+}
+
+static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ int mtu = ib_mtu_enum_to_int(qp->path_mtu);
+
+ if (mtu < 0 || len > qp->max_inline_data || len > mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
+ len, qp->max_inline_data, mtu);
+ return false;
+ }
+
+ return true;
+}
+
+static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_idx)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int curr_idx = *sge_idx;
+ void *dseg = rc_sq_wqe;
+ unsigned int i;
+ int ret;
+
+ if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
+ ibdev_err(ibdev, "invalid inline parameters!\n");
+ return -EINVAL;
+ }
+
+ if (!check_inl_data_len(qp, msg_len))
+ return -EINVAL;
+
+ dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+ if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+ hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dseg, ((void *)wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ dseg += wr->sg_list[i].length;
}
+ } else {
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+ if (ret)
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
}
+
+ *sge_idx = curr_idx;
+
+ return 0;
}
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
- void *wqe, unsigned int *sge_ind,
- const struct ib_send_wr **bad_wr)
+ unsigned int *sge_ind,
+ unsigned int valid_num_sge)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int j = 0;
int i;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
- if (le32_to_cpu(rc_sq_wqe->msg_len) >
- hr_dev->caps.max_sq_inline) {
- *bad_wr = wr;
- dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal",
- rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline);
- return -EINVAL;
- }
-
- if (wr->opcode == IB_WR_RDMA_READ) {
- *bad_wr = wr;
- dev_err(hr_dev->dev, "Not support inline data!\n");
- return -EINVAL;
- }
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
+ !!(wr->send_flags & IB_SEND_INLINE));
+ if (wr->send_flags & IB_SEND_INLINE)
+ return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
+ if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *)wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
}
-
- roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
- 1);
} else {
- if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
- for (i = 0; i < wr->num_sge; i++) {
- if (likely(wr->sg_list[i].length)) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
- }
- } else {
- roce_set_field(rc_sq_wqe->byte_20,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
- if (likely(wr->sg_list[i].length)) {
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
+ for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ j++;
}
+ }
+
+ set_extend_sge(qp, wr->sg_list + i, sge_ind,
+ valid_num_sge - HNS_ROCE_SGE_IN_WQE);
+ }
- set_extend_sge(qp, wr, sge_ind);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
+
+ return 0;
+}
+
+static int check_send_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (unlikely(hr_qp->state == IB_QPS_RESET ||
+ hr_qp->state == IB_QPS_INIT ||
+ hr_qp->state == IB_QPS_RTR))
+ return -EINVAL;
+ else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+ return -EIO;
+
+ return 0;
+}
+
+static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr,
+ unsigned int *sge_len)
+{
+ unsigned int valid_num = 0;
+ unsigned int len = 0;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ len += wr->sg_list[i].length;
+ valid_num++;
}
+ }
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge);
+ *sge_len = len;
+ return valid_num;
+}
+
+static __le32 get_immtdata(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+ default:
+ return 0;
}
+}
+
+static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ ud_sq_wqe->immtdata = get_immtdata(wr);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return 0;
}
-static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state);
+static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ struct hns_roce_ah *ah)
+{
+ struct ib_device *ib_dev = ah->ibah.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
+
+ ud_sq_wqe->sgid_index = ah->av.gid_index;
+
+ memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN);
+ memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
+
+ return 0;
+}
+
+static inline int set_ud_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+ unsigned int owner_bit)
+{
+ struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+ struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe;
+ unsigned int curr_idx = *sge_idx;
+ unsigned int valid_num_sge;
+ u32 msg_len = 0;
+ int ret;
+
+ valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+
+ ret = set_ud_opcode(ud_sq_wqe, wr);
+ if (WARN_ON_ONCE(ret))
+ return ret;
+
+ ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
+ !!(wr->send_flags & IB_SEND_SIGNALED));
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
+ !!(wr->send_flags & IB_SEND_SOLICITED));
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
+ curr_idx & (qp->sge.sge_cnt - 1));
+
+ ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
+ qp->qkey : ud_wr(wr)->remote_qkey);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
+
+ ret = fill_ud_av(ud_sq_wqe, ah);
+ if (ret)
+ return ret;
+
+ qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl;
+
+ set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
+
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
+
+ return 0;
+}
+
+static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+ int ret = 0;
+
+ rc_sq_wqe->immtdata = get_immtdata(wr);
+
+ switch (ib_op) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+ break;
+ case IB_WR_REG_MR:
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ else
+ ret = -EOPNOTSUPP;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (unlikely(ret))
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return ret;
+}
+
+static inline int set_rc_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+ unsigned int owner_bit)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+ unsigned int curr_idx = *sge_idx;
+ unsigned int valid_num_sge;
+ u32 msg_len = 0;
+ int ret;
+
+ valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+
+ rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+ ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr);
+ if (WARN_ON_ONCE(ret))
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO,
+ (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
+ (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
+ (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
+ curr_idx & (qp->sge.sge_cnt - 1));
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ if (msg_len != ATOMIC_WR_LEN)
+ return -EINVAL;
+ set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
+ } else if (wr->opcode != IB_WR_REG_MR) {
+ ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
+ &curr_idx, valid_num_sge);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
+
+ return ret;
+}
+
+static inline void update_sq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *qp)
+{
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ } else {
+ struct hns_roce_v2_db sq_db = {};
+
+ hr_reg_write(&sq_db, DB_TAG, qp->qpn);
+ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB);
+ hr_reg_write(&sq_db, DB_PI, qp->sq.head);
+ hr_reg_write(&sq_db, DB_SL, qp->sl);
+
+ hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg);
+ }
+}
+
+static inline void update_rq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *qp)
+{
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ } else {
+ if (likely(qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)) {
+ *qp->rdb.db_record =
+ qp->rq.head & V2_DB_PRODUCER_IDX_M;
+ } else {
+ struct hns_roce_v2_db rq_db = {};
+
+ hr_reg_write(&rq_db, DB_TAG, qp->qpn);
+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
+ hr_reg_write(&rq_db, DB_PI, qp->rq.head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&rq_db,
+ qp->rq.db_reg);
+ }
+ }
+}
+
+static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ u64 __iomem *dest)
+{
+#define HNS_ROCE_WRITE_TIMES 8
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int i;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++)
+ writeq_relaxed(*(val + i), dest + i);
+}
+
+static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+{
+#define HNS_ROCE_SL_SHIFT 2
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ return;
+ }
+ /* All kinds of DirectWQE have the same header field layout */
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
+ qp->sl >> HNS_ROCE_SL_SHIFT);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
+}
static int hns_roce_v2_post_send(struct ib_qp *ibqp,
const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
- struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
- struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
- struct hns_roce_wqe_frmr_seg *fseg;
- struct device *dev = hr_dev->dev;
- struct hns_roce_v2_db sq_db;
- struct ib_qp_attr attr;
- unsigned int sge_ind = 0;
+ unsigned long flags = 0;
unsigned int owner_bit;
- unsigned long flags;
- unsigned int ind;
+ unsigned int sge_idx;
+ unsigned int wqe_idx;
void *wqe = NULL;
- bool loopback;
- int attr_mask;
- u32 tmp_len;
- int ret = 0;
- u32 hr_op;
- u8 *smac;
- int nreq;
- int i;
+ u32 nreq;
+ int ret;
- if (unlikely(ibqp->qp_type != IB_QPT_RC &&
- ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_UD)) {
- dev_err(dev, "Not supported QP(0x%x)type!\n", ibqp->qp_type);
- *bad_wr = wr;
- return -EOPNOTSUPP;
- }
+ spin_lock_irqsave(&qp->sq.lock, flags);
- if (unlikely(qp->state == IB_QPS_RESET || qp->state == IB_QPS_INIT ||
- qp->state == IB_QPS_RTR)) {
- dev_err(dev, "Post WQE fail, QP state %d err!\n", qp->state);
+ ret = check_send_valid(hr_dev, qp);
+ if (unlikely(ret)) {
*bad_wr = wr;
- return -EINVAL;
+ nreq = 0;
+ goto out;
}
- spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
- sge_ind = qp->next_sge;
+ sge_idx = qp->next_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
@@ -279,333 +723,48 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
goto out;
}
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, qp->sq.max_gs);
+ ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n",
+ wr->num_sge, qp->sq.max_gs);
ret = -EINVAL;
*bad_wr = wr;
goto out;
}
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
-
+ wqe = hns_roce_get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
owner_bit =
~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
- tmp_len = 0;
- /* Corresponding to the QP type, wqe process separately */
- if (ibqp->qp_type == IB_QPT_GSI) {
- ud_sq_wqe = wqe;
- memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe));
-
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M,
- V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]);
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M,
- V2_UD_SEND_WQE_DMAC_1_S, ah->av.mac[1]);
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_2_M,
- V2_UD_SEND_WQE_DMAC_2_S, ah->av.mac[2]);
- roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_3_M,
- V2_UD_SEND_WQE_DMAC_3_S, ah->av.mac[3]);
- roce_set_field(ud_sq_wqe->byte_48,
- V2_UD_SEND_WQE_BYTE_48_DMAC_4_M,
- V2_UD_SEND_WQE_BYTE_48_DMAC_4_S,
- ah->av.mac[4]);
- roce_set_field(ud_sq_wqe->byte_48,
- V2_UD_SEND_WQE_BYTE_48_DMAC_5_M,
- V2_UD_SEND_WQE_BYTE_48_DMAC_5_S,
- ah->av.mac[5]);
-
- /* MAC loopback */
- smac = (u8 *)hr_dev->dev_addr[qp->port];
- loopback = ether_addr_equal_unaligned(ah->av.mac,
- smac) ? 1 : 0;
-
- roce_set_bit(ud_sq_wqe->byte_40,
- V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback);
-
- roce_set_field(ud_sq_wqe->byte_4,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
- V2_UD_SEND_WQE_BYTE_4_OPCODE_S,
- HNS_ROCE_V2_WQE_OP_SEND);
-
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
-
- ud_sq_wqe->msg_len =
- cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
-
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ud_sq_wqe->immtdata =
- cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
- break;
- default:
- ud_sq_wqe->immtdata = 0;
- break;
- }
-
- /* Set sig attr */
- roce_set_bit(ud_sq_wqe->byte_4,
- V2_UD_SEND_WQE_BYTE_4_CQE_S,
- (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
-
- /* Set se attr */
- roce_set_bit(ud_sq_wqe->byte_4,
- V2_UD_SEND_WQE_BYTE_4_SE_S,
- (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
- roce_set_bit(ud_sq_wqe->byte_4,
- V2_UD_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
-
- roce_set_field(ud_sq_wqe->byte_16,
- V2_UD_SEND_WQE_BYTE_16_PD_M,
- V2_UD_SEND_WQE_BYTE_16_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
-
- roce_set_field(ud_sq_wqe->byte_16,
- V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
-
- roce_set_field(ud_sq_wqe->byte_20,
- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
- V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
- sge_ind & (qp->sge.sge_cnt - 1));
-
- roce_set_field(ud_sq_wqe->byte_24,
- V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
- V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0);
- ud_sq_wqe->qkey =
- cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
- qp->qkey : ud_wr(wr)->remote_qkey);
- roce_set_field(ud_sq_wqe->byte_32,
- V2_UD_SEND_WQE_BYTE_32_DQPN_M,
- V2_UD_SEND_WQE_BYTE_32_DQPN_S,
- ud_wr(wr)->remote_qpn);
-
- roce_set_field(ud_sq_wqe->byte_36,
- V2_UD_SEND_WQE_BYTE_36_VLAN_M,
- V2_UD_SEND_WQE_BYTE_36_VLAN_S,
- le16_to_cpu(ah->av.vlan));
- roce_set_field(ud_sq_wqe->byte_36,
- V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
- V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S,
- ah->av.hop_limit);
- roce_set_field(ud_sq_wqe->byte_36,
- V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
- V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
- ah->av.sl_tclass_flowlabel >>
- HNS_ROCE_TCLASS_SHIFT);
- roce_set_field(ud_sq_wqe->byte_40,
- V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
- V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
- ah->av.sl_tclass_flowlabel &
- HNS_ROCE_FLOW_LABEL_MASK);
- roce_set_field(ud_sq_wqe->byte_40,
- V2_UD_SEND_WQE_BYTE_40_SL_M,
- V2_UD_SEND_WQE_BYTE_40_SL_S,
- le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT);
- roce_set_field(ud_sq_wqe->byte_40,
- V2_UD_SEND_WQE_BYTE_40_PORTN_M,
- V2_UD_SEND_WQE_BYTE_40_PORTN_S,
- qp->port);
-
- roce_set_bit(ud_sq_wqe->byte_40,
- V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
- ah->av.vlan_en ? 1 : 0);
- roce_set_field(ud_sq_wqe->byte_48,
- V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
- V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
- hns_get_gid_index(hr_dev, qp->phy_port,
- ah->av.gid_index));
-
- memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
- GID_LEN_V2);
-
- set_extend_sge(qp, wr, &sge_ind);
- ind++;
- } else if (ibqp->qp_type == IB_QPT_RC) {
- rc_sq_wqe = wqe;
- memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe));
- for (i = 0; i < wr->num_sge; i++)
- tmp_len += wr->sg_list[i].length;
-
- rc_sq_wqe->msg_len =
- cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len);
-
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- rc_sq_wqe->immtdata =
- cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
- break;
- case IB_WR_SEND_WITH_INV:
- rc_sq_wqe->inv_key =
- cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- default:
- rc_sq_wqe->immtdata = 0;
- break;
- }
+ /* RC and UD share the same DirectWQE field layout */
+ ((struct hns_roce_v2_rc_send_wqe *)wqe)->byte_4 = 0;
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_FENCE_S,
- (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
-
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_SE_S,
- (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
-
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_CQE_S,
- (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
-
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
-
- wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
- rc_sq_wqe->rkey =
- cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va =
- cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_RDMA_WRITE:
- hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
- rc_sq_wqe->rkey =
- cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va =
- cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_RDMA_WRITE_WITH_IMM:
- hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
- rc_sq_wqe->rkey =
- cpu_to_le32(rdma_wr(wr)->rkey);
- rc_sq_wqe->va =
- cpu_to_le64(rdma_wr(wr)->remote_addr);
- break;
- case IB_WR_SEND:
- hr_op = HNS_ROCE_V2_WQE_OP_SEND;
- break;
- case IB_WR_SEND_WITH_INV:
- hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
- break;
- case IB_WR_SEND_WITH_IMM:
- hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
- break;
- case IB_WR_LOCAL_INV:
- hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
- roce_set_bit(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
- rc_sq_wqe->inv_key =
- cpu_to_le32(wr->ex.invalidate_rkey);
- break;
- case IB_WR_REG_MR:
- hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
- fseg = wqe;
- set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
- rc_sq_wqe->rkey =
- cpu_to_le32(atomic_wr(wr)->rkey);
- rc_sq_wqe->va =
- cpu_to_le64(atomic_wr(wr)->remote_addr);
- break;
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
- rc_sq_wqe->rkey =
- cpu_to_le32(atomic_wr(wr)->rkey);
- rc_sq_wqe->va =
- cpu_to_le64(atomic_wr(wr)->remote_addr);
- break;
- case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- hr_op =
- HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
- break;
- case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
- hr_op =
- HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
- break;
- default:
- hr_op = HNS_ROCE_V2_WQE_OP_MASK;
- break;
- }
-
- roce_set_field(rc_sq_wqe->byte_4,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
- V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
-
- if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
- struct hns_roce_v2_wqe_data_seg *dseg;
-
- dseg = wqe;
- set_data_seg_v2(dseg, wr->sg_list);
- wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
- set_atomic_seg(wqe, atomic_wr(wr));
- roce_set_field(rc_sq_wqe->byte_16,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
- V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
- wr->num_sge);
- } else if (wr->opcode != IB_WR_REG_MR) {
- ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
- wqe, &sge_ind, bad_wr);
- if (ret)
- goto out;
- }
+ /* Corresponding to the QP type, wqe process separately */
+ if (ibqp->qp_type == IB_QPT_RC)
+ ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+ else
+ ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
- ind++;
- } else {
- dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
- spin_unlock_irqrestore(&qp->sq.lock, flags);
+ trace_hns_sq_wqe(qp->qpn, wqe_idx, wqe, 1 << qp->sq.wqe_shift,
+ wr->wr_id, TRACE_SQ);
+ if (unlikely(ret)) {
*bad_wr = wr;
- return -EOPNOTSUPP;
+ goto out;
}
}
out:
if (likely(nreq)) {
qp->sq.head += nreq;
- /* Memory barrier */
- wmb();
-
- sq_db.byte_4 = 0;
- sq_db.parameter = 0;
-
- roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_TAG_M,
- V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
- roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
- V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
- roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
- V2_DB_PARAMETER_IDX_S,
- qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1));
- roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
- V2_DB_PARAMETER_SL_S, qp->sl);
-
- hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l);
-
- qp->sq_next_wqe = ind;
- qp->next_sge = sge_ind;
-
- if (qp->state == IB_QPS_ERR) {
- attr_mask = IB_QP_STATE;
- attr.qp_state = IB_QPS_ERR;
-
- ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask,
- qp->state, IB_QPS_ERR);
- if (ret) {
- spin_unlock_irqrestore(&qp->sq.lock, flags);
- *bad_wr = wr;
- return ret;
- }
- }
+ qp->next_sge = sge_idx;
+
+ if (nreq == 1 && !ret &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -613,108 +772,251 @@ out:
return ret;
}
+static int check_recv_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+ return -EIO;
+
+ if (hr_qp->state == IB_QPS_RESET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
+ u32 max_sge, bool rsv)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ u32 i, cnt;
+
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
+ if (!wr->sg_list[i].length)
+ continue;
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
+ }
+
+ /* Fill a reserved sge to make hw stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
+ }
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
+ u32 wqe_idx, u32 max_sge)
+{
+ void *wqe = NULL;
+
+ wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ trace_hns_rq_wqe(hr_qp->qpn, wqe_idx, wqe, 1 << hr_qp->rq.wqe_shift,
+ wr->wr_id, TRACE_RQ);
+}
+
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_rinl_sge *sge_list;
- struct device *dev = hr_dev->dev;
- struct ib_qp_attr attr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 wqe_idx, nreq, max_sge;
unsigned long flags;
- void *wqe = NULL;
- int attr_mask;
- int ret = 0;
- int nreq;
- int ind;
- int i;
+ int ret;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
- if (hr_qp->state == IB_QPS_RESET) {
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+ ret = check_recv_valid(hr_dev, hr_qp);
+ if (unlikely(ret)) {
*bad_wr = wr;
- return -EINVAL;
+ nreq = 0;
+ goto out;
}
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
- hr_qp->ibqp.recv_cq)) {
+ if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
+ hr_qp->ibqp.recv_cq))) {
ret = -ENOMEM;
*bad_wr = wr;
goto out;
}
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
- dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, hr_qp->rq.max_gs);
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
+ wr->num_sge, max_sge);
ret = -EINVAL;
*bad_wr = wr;
goto out;
}
- wqe = get_recv_wqe(hr_qp, ind);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
- for (i = 0; i < wr->num_sge; i++) {
- if (!wr->sg_list[i].length)
- continue;
- set_data_seg_v2(dseg, wr->sg_list + i);
- dseg++;
- }
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+ fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
+ }
+
+out:
+ if (likely(nreq)) {
+ hr_qp->rq.head += nreq;
+
+ update_rq_db(hr_dev, hr_qp);
+ }
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+
+ return ret;
+}
+
+static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
+{
+ return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
+}
+
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
+{
+ return hns_roce_buf_offset(idx_que->mtr.kmem,
+ n << idx_que->entry_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
+{
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
- if (i < hr_qp->rq.max_gs) {
- dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
- dseg->addr = 0;
+ bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
+ srq->idx_que.tail++;
+
+ spin_unlock(&srq->lock);
+}
+
+static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
+}
+
+static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge,
+ const struct ib_recv_wr *wr)
+{
+ struct ib_device *ib_dev = srq->ibsrq.device;
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ib_dev,
+ "failed to check sge, wr->num_sge = %d, max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ return -EINVAL;
+ }
+
+ if (unlikely(hns_roce_srqwq_overflow(srq))) {
+ ibdev_err(ib_dev,
+ "failed to check srqwq status, srqwq is full.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 pos;
+
+ pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
+ if (unlikely(pos == srq->wqe_cnt))
+ return -ENOSPC;
+
+ bitmap_set(idx_que->bitmap, pos, 1);
+ *wqe_idx = pos;
+ return 0;
+}
+
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *buf;
+
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ buf = get_idx_buf(idx_que, head);
+ *buf = cpu_to_le32(wqe_idx);
+
+ idx_que->head++;
+}
+
+static void update_srq_db(struct hns_roce_srq *srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct hns_roce_v2_db db = {};
+
+ hr_reg_write(&db, DB_TAG, srq->srqn);
+ hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
+ hr_reg_write(&db, DB_PI, srq->idx_que.head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&db, srq->db_reg);
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ unsigned long flags;
+ int ret = 0;
+ u32 max_sge;
+ u32 wqe_idx;
+ void *wqe;
+ u32 nreq;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ max_sge = srq->max_gs - srq->rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ ret = check_post_srq_valid(srq, max_sge, wr);
+ if (ret) {
+ *bad_wr = wr;
+ break;
}
- /* rq support inline data */
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
- sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list;
- hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt =
- (u32)wr->num_sge;
- for (i = 0; i < wr->num_sge; i++) {
- sge_list[i].addr =
- (void *)(u64)wr->sg_list[i].addr;
- sge_list[i].len = wr->sg_list[i].length;
- }
+ ret = get_srq_wqe_idx(srq, &wqe_idx);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ break;
}
- hr_qp->rq.wrid[ind] = wr->wr_id;
+ wqe = get_srq_wqe_buf(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
+ fill_wqe_idx(srq, wqe_idx);
+ srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
+ trace_hns_srq_wqe(srq->srqn, wqe_idx, wqe, 1 << srq->wqe_shift,
+ wr->wr_id, TRACE_SRQ);
}
-out:
if (likely(nreq)) {
- hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
-
- *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
-
- if (hr_qp->state == IB_QPS_ERR) {
- attr_mask = IB_QP_STATE;
- attr.qp_state = IB_QPS_ERR;
-
- ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr,
- attr_mask, hr_qp->state,
- IB_QPS_ERR);
- if (ret) {
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
- *bad_wr = wr;
- return ret;
- }
- }
+ if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB)
+ *srq->rdb.db_record = srq->idx_que.head &
+ V2_DB_PRODUCER_IDX_M;
+ else
+ update_srq_db(srq);
}
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+
+ spin_unlock_irqrestore(&srq->lock, flags);
return ret;
}
-static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
+static u32 hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
unsigned long instance_stage,
unsigned long reset_stage)
{
@@ -737,13 +1039,18 @@ static int hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
return CMD_RST_PRC_SUCCESS;
}
-static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
+static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
unsigned long instance_stage,
unsigned long reset_stage)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+#define HW_RESET_TIMEOUT_US 1000000
+#define HW_RESET_SLEEP_US 1000
+
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long val;
+ int ret;
/* When hardware reset is detected, we should stop sending mailbox&cmq&
* doorbell to hardware. If now in .init_instance() function, we should
@@ -755,7 +1062,11 @@ static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
* again.
*/
hr_dev->dis_db = true;
- if (!ops->get_hw_reset_stat(handle))
+
+ ret = read_poll_timeout(ops->ae_dev_reset_cnt, val,
+ val > hr_dev->reset_cnt, HW_RESET_SLEEP_US,
+ HW_RESET_TIMEOUT_US, false, handle);
+ if (!ret)
hr_dev->is_reset = true;
if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
@@ -765,9 +1076,9 @@ static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
return CMD_RST_PRC_SUCCESS;
}
-static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev)
+static u32 hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
@@ -782,20 +1093,16 @@ static int hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev)
return CMD_RST_PRC_EBUSY;
}
-static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
+static u32 check_aedev_reset_status(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
- struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
- unsigned long instance_stage; /* the current instance stage */
- unsigned long reset_stage; /* the current reset stage */
+ unsigned long instance_stage; /* the current instance stage */
+ unsigned long reset_stage; /* the current reset stage */
unsigned long reset_cnt;
bool sw_resetting;
bool hw_resetting;
- if (hr_dev->is_reset)
- return CMD_RST_PRC_SUCCESS;
-
/* Get information about reset from NIC driver or RoCE driver itself,
* the meaning of the following variables from NIC driver are described
* as below:
@@ -806,28 +1113,53 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev)
instance_stage = handle->rinfo.instance_state;
reset_stage = handle->rinfo.reset_state;
reset_cnt = ops->ae_dev_reset_cnt(handle);
- hw_resetting = ops->get_hw_reset_stat(handle);
- sw_resetting = ops->ae_dev_resetting(handle);
-
if (reset_cnt != hr_dev->reset_cnt)
return hns_roce_v2_cmd_hw_reseted(hr_dev, instance_stage,
reset_stage);
- else if (hw_resetting)
+
+ hw_resetting = ops->get_cmdq_stat(handle);
+ if (hw_resetting)
return hns_roce_v2_cmd_hw_resetting(hr_dev, instance_stage,
reset_stage);
- else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT)
+
+ sw_resetting = ops->ae_dev_resetting(handle);
+ if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT)
return hns_roce_v2_cmd_sw_resetting(hr_dev);
- return 0;
+ return CMD_RST_PRC_OTHERS;
}
-static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring)
+static bool check_device_is_in_reset(struct hns_roce_dev *hr_dev)
{
- int ntu = ring->next_to_use;
- int ntc = ring->next_to_clean;
- int used = (ntu - ntc + ring->desc_num) % ring->desc_num;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (hr_dev->reset_cnt != ops->ae_dev_reset_cnt(handle))
+ return true;
+
+ if (ops->get_hw_reset_stat(handle))
+ return true;
+
+ if (ops->ae_dev_resetting(handle))
+ return true;
+
+ return false;
+}
+
+static bool v2_chk_mbox_is_avail(struct hns_roce_dev *hr_dev, bool *busy)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ u32 status;
+
+ if (hr_dev->is_reset)
+ status = CMD_RST_PRC_SUCCESS;
+ else
+ status = check_aedev_reset_status(hr_dev, priv->handle);
+
+ *busy = (status == CMD_RST_PRC_EBUSY);
- return ring->desc_num - used - 1;
+ return status == CMD_RST_PRC_OTHERS;
}
static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev,
@@ -835,124 +1167,71 @@ static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev,
{
int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc);
- ring->desc = kzalloc(size, GFP_KERNEL);
+ ring->desc = dma_alloc_coherent(hr_dev->dev, size,
+ &ring->desc_dma_addr, GFP_KERNEL);
if (!ring->desc)
return -ENOMEM;
- ring->desc_dma_addr = dma_map_single(hr_dev->dev, ring->desc, size,
- DMA_BIDIRECTIONAL);
- if (dma_mapping_error(hr_dev->dev, ring->desc_dma_addr)) {
- ring->desc_dma_addr = 0;
- kfree(ring->desc);
- ring->desc = NULL;
- return -ENOMEM;
- }
-
return 0;
}
static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_cmq_ring *ring)
{
- dma_unmap_single(hr_dev->dev, ring->desc_dma_addr,
- ring->desc_num * sizeof(struct hns_roce_cmq_desc),
- DMA_BIDIRECTIONAL);
+ dma_free_coherent(hr_dev->dev,
+ ring->desc_num * sizeof(struct hns_roce_cmq_desc),
+ ring->desc, ring->desc_dma_addr);
ring->desc_dma_addr = 0;
- kfree(ring->desc);
}
-static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type)
+static int init_csq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *csq)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
- struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ?
- &priv->cmq.csq : &priv->cmq.crq;
+ dma_addr_t dma;
+ int ret;
- ring->flag = ring_type;
- ring->next_to_clean = 0;
- ring->next_to_use = 0;
+ csq->desc_num = CMD_CSQ_DESC_NUM;
+ spin_lock_init(&csq->lock);
+ csq->flag = TYPE_CSQ;
+ csq->head = 0;
- return hns_roce_alloc_cmq_desc(hr_dev, ring);
-}
+ ret = hns_roce_alloc_cmq_desc(hr_dev, csq);
+ if (ret)
+ return ret;
-static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
-{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
- struct hns_roce_v2_cmq_ring *ring = (ring_type == TYPE_CSQ) ?
- &priv->cmq.csq : &priv->cmq.crq;
- dma_addr_t dma = ring->desc_dma_addr;
-
- if (ring_type == TYPE_CSQ) {
- roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, (u32)dma);
- roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG,
- upper_32_bits(dma));
- roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
- (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) |
- HNS_ROCE_CMQ_ENABLE);
- roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
- roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0);
- } else {
- roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_L_REG, (u32)dma);
- roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG,
- upper_32_bits(dma));
- roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG,
- (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) |
- HNS_ROCE_CMQ_ENABLE);
- roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0);
- roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0);
- }
+ dma = csq->desc_dma_addr;
+ roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, lower_32_bits(dma));
+ roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma));
+ roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
+ (u32)csq->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
+
+ /* Make sure to write CI first and then PI */
+ roce_write(hr_dev, ROCEE_TX_CMQ_CI_REG, 0);
+ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, 0);
+
+ return 0;
}
static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
int ret;
- /* Setup the queue entries for command queue */
- priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM;
- priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM;
-
- /* Setup the lock for command queue */
- spin_lock_init(&priv->cmq.csq.lock);
- spin_lock_init(&priv->cmq.crq.lock);
-
- /* Setup Tx write back timeout */
priv->cmq.tx_timeout = HNS_ROCE_CMQ_TX_TIMEOUT;
- /* Init CSQ */
- ret = hns_roce_init_cmq_ring(hr_dev, TYPE_CSQ);
- if (ret) {
- dev_err(hr_dev->dev, "Init CSQ error, ret = %d.\n", ret);
- return ret;
- }
-
- /* Init CRQ */
- ret = hns_roce_init_cmq_ring(hr_dev, TYPE_CRQ);
- if (ret) {
- dev_err(hr_dev->dev, "Init CRQ error, ret = %d.\n", ret);
- goto err_crq;
- }
-
- /* Init CSQ REG */
- hns_roce_cmq_init_regs(hr_dev, TYPE_CSQ);
-
- /* Init CRQ REG */
- hns_roce_cmq_init_regs(hr_dev, TYPE_CRQ);
-
- return 0;
-
-err_crq:
- hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq);
+ ret = init_csq(hr_dev, &priv->cmq.csq);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to init CSQ, ret = %d.\n", ret);
return ret;
}
static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq);
- hns_roce_free_cmq_desc(hr_dev, &priv->cmq.crq);
}
static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
@@ -961,8 +1240,7 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
{
memset((void *)desc, 0, sizeof(struct hns_roce_cmq_desc));
desc->opcode = cpu_to_le16(opcode);
- desc->flag =
- cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+ desc->flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
if (is_read)
desc->flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_WR);
else
@@ -971,119 +1249,168 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
- u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG);
+ u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
- return head == priv->cmq.csq.next_to_use;
+ return tail == priv->cmq.csq.head;
}
-static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev)
+static void update_cmdq_status(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
- struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- struct hns_roce_cmq_desc *desc;
- u16 ntc = csq->next_to_clean;
- u32 head;
- int clean = 0;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
- desc = &csq->desc[ntc];
- head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG);
- while (head != ntc) {
- memset(desc, 0, sizeof(*desc));
- ntc++;
- if (ntc == csq->desc_num)
- ntc = 0;
- desc = &csq->desc[ntc];
- clean++;
- }
- csq->next_to_clean = ntc;
+ if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
+ hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
+}
- return clean;
+static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
+{
+ struct hns_roce_cmd_errcode errcode_table[] = {
+ {CMD_EXEC_SUCCESS, 0},
+ {CMD_NO_AUTH, -EPERM},
+ {CMD_NOT_EXIST, -EOPNOTSUPP},
+ {CMD_CRQ_FULL, -EXFULL},
+ {CMD_NEXT_ERR, -ENOSR},
+ {CMD_NOT_EXEC, -ENOTBLK},
+ {CMD_PARA_ERR, -EINVAL},
+ {CMD_RESULT_ERR, -ERANGE},
+ {CMD_TIMEOUT, -ETIME},
+ {CMD_HILINK_ERR, -ENOLINK},
+ {CMD_INFO_ILLEGAL, -ENXIO},
+ {CMD_INVALID, -EBADR},
+ };
+ u16 i;
+
+ for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
+ if (desc_ret == errcode_table[i].return_status)
+ return errcode_table[i].errno;
+ return -EIO;
}
-static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmq_desc *desc, int num)
+static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
+{
+ static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
+ {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
+ if (cmdq_tx_timeout[i].opcode == opcode)
+ return cmdq_tx_timeout[i].tx_timeout;
+
+ return tx_timeout;
+}
+
+static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
- struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- struct hns_roce_cmq_desc *desc_to_use;
- bool complete = false;
u32 timeout = 0;
- int handle = 0;
- u16 desc_ret;
- int ret = 0;
- int ntc;
- spin_lock_bh(&csq->lock);
+ do {
+ if (hns_roce_cmq_csq_done(hr_dev))
+ break;
+ udelay(1);
+ } while (++timeout < tx_timeout);
+}
- if (num > hns_roce_cmq_space(csq)) {
- spin_unlock_bh(&csq->lock);
- return -EBUSY;
- }
+static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc,
+ int num, u32 tx_timeout)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+ u16 desc_ret;
+ u32 tail;
+ int ret;
+ int i;
- /*
- * Record the location of desc in the cmq for this time
- * which will be use for hardware to write back
- */
- ntc = csq->next_to_use;
+ tail = csq->head;
+
+ for (i = 0; i < num; i++) {
+ trace_hns_cmdq_req(hr_dev, &desc[i]);
- while (handle < num) {
- desc_to_use = &csq->desc[csq->next_to_use];
- *desc_to_use = desc[handle];
- dev_dbg(hr_dev->dev, "set cmq desc:\n");
- csq->next_to_use++;
- if (csq->next_to_use == csq->desc_num)
- csq->next_to_use = 0;
- handle++;
+ csq->desc[csq->head++] = desc[i];
+ if (csq->head == csq->desc_num)
+ csq->head = 0;
}
/* Write to hardware */
- roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use);
+ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
- /*
- * If the command is sync, wait for the firmware to write back,
- * if multi descriptors to be sent, use the first one to check
- */
- if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
- do {
- if (hns_roce_cmq_csq_done(hr_dev))
- break;
- udelay(1);
- timeout++;
- } while (timeout < priv->cmq.tx_timeout);
- }
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
+ hns_roce_wait_csq_done(hr_dev, tx_timeout);
if (hns_roce_cmq_csq_done(hr_dev)) {
- complete = true;
- handle = 0;
- while (handle < num) {
- /* get the result of hardware write back */
- desc_to_use = &csq->desc[ntc];
- desc[handle] = *desc_to_use;
- dev_dbg(hr_dev->dev, "Get cmq desc:\n");
- desc_ret = desc[handle].retval;
- if (desc_ret == CMD_EXEC_SUCCESS)
- ret = 0;
- else
- ret = -EIO;
- priv->cmq.last_status = desc_ret;
- ntc++;
- handle++;
- if (ntc == csq->desc_num)
- ntc = 0;
+ ret = 0;
+ for (i = 0; i < num; i++) {
+ trace_hns_cmdq_resp(hr_dev, &csq->desc[tail]);
+
+ /* check the result of hardware write back */
+ desc_ret = le16_to_cpu(csq->desc[tail++].retval);
+ if (tail == csq->desc_num)
+ tail = 0;
+ if (likely(desc_ret == CMD_EXEC_SUCCESS))
+ continue;
+
+ ret = hns_roce_cmd_err_convert_errno(desc_ret);
}
- }
+ } else {
+ /* FW/HW reset or incorrect number of desc */
+ tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
+ dev_warn(hr_dev->dev, "CMDQ move tail from %u to %u.\n",
+ csq->head, tail);
+ csq->head = tail;
+
+ update_cmdq_status(hr_dev);
- if (!complete)
ret = -EAGAIN;
+ }
+
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
+
+ return ret;
+}
+
+static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+ u16 opcode = le16_to_cpu(desc->opcode);
+ u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
+ u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT;
+ u32 rsv_tail;
+ int ret;
+ int i;
+
+ while (try_cnt) {
+ try_cnt--;
+
+ spin_lock_bh(&csq->lock);
+ rsv_tail = csq->head;
+ ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
+ if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME &&
+ try_cnt) {
+ spin_unlock_bh(&csq->lock);
+ mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC);
+ continue;
+ }
- /* clean the command send queue */
- handle = hns_roce_cmq_csq_clean(hr_dev);
- if (handle != num)
- dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n",
- handle, num);
+ for (i = 0; i < num; i++) {
+ desc[i] = csq->desc[rsv_tail++];
+ if (rsv_tail == csq->desc_num)
+ rsv_tail = 0;
+ }
+ spin_unlock_bh(&csq->lock);
+ break;
+ }
- spin_unlock_bh(&csq->lock);
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = 0x%x, return = %d.\n",
+ opcode, ret);
return ret;
}
@@ -1091,27 +1418,112 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
- int retval;
+ bool busy;
int ret;
- ret = hns_roce_v2_rst_process_cmd(hr_dev);
- if (ret == CMD_RST_PRC_SUCCESS)
- return 0;
- if (ret == CMD_RST_PRC_EBUSY)
- return ret;
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
ret = __hns_roce_cmq_send(hr_dev, desc, num);
if (ret) {
- retval = hns_roce_v2_rst_process_cmd(hr_dev);
- if (retval == CMD_RST_PRC_SUCCESS)
- return 0;
- else if (retval == CMD_RST_PRC_EBUSY)
- return retval;
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
}
return ret;
}
+static enum hns_roce_opcode_type
+ get_bond_opcode(enum hns_roce_bond_cmd_type bond_type)
+{
+ switch (bond_type) {
+ case HNS_ROCE_SET_BOND:
+ return HNS_ROCE_OPC_SET_BOND_INFO;
+ case HNS_ROCE_CHANGE_BOND:
+ return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT;
+ case HNS_ROCE_CLEAR_BOND:
+ return HNS_ROCE_OPC_CLEAR_BOND_INFO;
+ default:
+ WARN(true, "Invalid bond type %d!\n", bond_type);
+ return HNS_ROCE_OPC_SET_BOND_INFO;
+ }
+}
+
+static enum hns_roce_bond_hashtype
+ get_bond_hashtype(enum netdev_lag_hash netdev_hashtype)
+{
+ switch (netdev_hashtype) {
+ case NETDEV_LAG_HASH_L2:
+ return BOND_HASH_L2;
+ case NETDEV_LAG_HASH_L34:
+ return BOND_HASH_L34;
+ case NETDEV_LAG_HASH_L23:
+ return BOND_HASH_L23;
+ default:
+ WARN(true, "Invalid hash type %d!\n", netdev_hashtype);
+ return BOND_HASH_L2;
+ }
+}
+
+int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
+ enum hns_roce_bond_cmd_type bond_type)
+{
+ enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type);
+ struct hns_roce_bond_info *slave_info;
+ struct hns_roce_cmq_desc desc = {};
+ int ret;
+
+ slave_info = (struct hns_roce_bond_info *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, opcode, false);
+
+ slave_info->bond_id = cpu_to_le32(bond_grp->bond_id);
+ if (bond_type == HNS_ROCE_CLEAR_BOND)
+ goto out;
+
+ if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ slave_info->bond_mode = cpu_to_le32(BOND_MODE_1);
+ if (bond_grp->active_slave_num != 1)
+ ibdev_warn(&bond_grp->main_hr_dev->ib_dev,
+ "active slave cnt(%u) in Mode 1 is invalid.\n",
+ bond_grp->active_slave_num);
+ } else {
+ slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4);
+ slave_info->hash_policy =
+ cpu_to_le32(get_bond_hashtype(bond_grp->hash_type));
+ }
+
+ slave_info->active_slave_cnt = cpu_to_le32(bond_grp->active_slave_num);
+ slave_info->active_slave_mask = cpu_to_le32(bond_grp->active_slave_map);
+ slave_info->slave_mask = cpu_to_le32(bond_grp->slave_map);
+
+out:
+ ret = hns_roce_cmq_send(bond_grp->main_hr_dev, &desc, 1);
+ if (ret)
+ ibdev_err(&bond_grp->main_hr_dev->ib_dev,
+ "cmq bond type(%d) failed, ret = %d.\n",
+ bond_type, ret);
+
+ return ret;
+}
+
+static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev,
+ dma_addr_t base_addr, u8 cmd, unsigned long tag)
+{
+ struct hns_roce_cmd_mailbox *mbox;
+ int ret;
+
+ mbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mbox))
+ return PTR_ERR(mbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, cmd, tag);
+ hns_roce_free_cmd_mailbox(hr_dev, mbox);
+ return ret;
+}
+
static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
{
struct hns_roce_query_version *resp;
@@ -1124,12 +1536,198 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
return ret;
resp = (struct hns_roce_query_version *)desc.data;
- hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
+ hr_dev->hw_rev = le16_to_cpu(resp->rocee_hw_version);
hr_dev->vendor_id = hr_dev->pci_dev->vendor;
return 0;
}
+static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long end;
+
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (!ops->get_hw_reset_stat(handle)) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "func clear success after reset.\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "func clear failed.\n");
+}
+
+static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long end;
+
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (ops->ae_dev_reset_cnt(handle) !=
+ hr_dev->reset_cnt) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "func clear success after sw reset\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "func clear failed because of unfinished sw reset\n");
+}
+
+static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval,
+ int flag)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) {
+ hr_dev->dis_db = true;
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev, "func clear success after reset.\n");
+ return;
+ }
+
+ if (ops->get_hw_reset_stat(handle)) {
+ func_clr_hw_resetting_state(hr_dev, handle);
+ return;
+ }
+
+ if (ops->ae_dev_resetting(handle) &&
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) {
+ func_clr_sw_resetting_state(hr_dev, handle);
+ return;
+ }
+
+ if (retval && !flag)
+ dev_warn(hr_dev->dev,
+ "func clear read failed, ret = %d.\n", retval);
+
+ dev_warn(hr_dev->dev, "func clear failed.\n");
+}
+
+static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ bool fclr_write_fail_flag = false;
+ struct hns_roce_func_clear *resp;
+ struct hns_roce_cmq_desc desc;
+ unsigned long end;
+ int ret = 0;
+
+ if (check_device_is_in_reset(hr_dev))
+ goto out;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false);
+ resp = (struct hns_roce_func_clear *)desc.data;
+ resp->rst_funcid_en = cpu_to_le32(vf_id);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ fclr_write_fail_flag = true;
+ dev_err(hr_dev->dev, "func clear write failed, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL);
+ end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS;
+ while (end) {
+ if (check_device_is_in_reset(hr_dev))
+ goto out;
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT);
+ end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR,
+ true);
+
+ resp->rst_funcid_en = cpu_to_le32(vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ continue;
+
+ if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) {
+ if (vf_id == 0)
+ hr_dev->is_reset = true;
+ return;
+ }
+ }
+
+out:
+ hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag);
+}
+
+static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *req_a;
+
+ req_a = (struct hns_roce_cmq_req *)desc[0].data;
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+ hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+ int i;
+
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return;
+
+ for (i = hr_dev->func_num - 1; i >= 0; i--) {
+ __hns_roce_function_clear(hr_dev, i);
+
+ if (i == 0)
+ continue;
+
+ ret = hns_roce_free_vf_resource(hr_dev, i);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to free vf resource, vf_id = %d, ret = %d.\n",
+ i, ret);
+ }
+}
+
+static int hns_roce_clear_extdb_list_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLEAR_EXTDB_LIST_INFO,
+ false);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to clear extended doorbell info, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
{
struct hns_roce_query_fw_info *resp;
@@ -1147,707 +1745,1279 @@ static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
return 0;
}
+static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ hr_dev->func_num = 1;
+ return 0;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_FUNC_INFO,
+ true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ hr_dev->func_num = 1;
+ return ret;
+ }
+
+ hr_dev->func_num = le32_to_cpu(desc.func_info.own_func_num);
+ hr_dev->cong_algo_tmpl_id = le32_to_cpu(desc.func_info.own_mac_id);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev,
+ u64 *stats, u32 port, int *num_counters)
+{
+#define CNT_PER_DESC 3
+ struct hns_roce_cmq_desc *desc;
+ int bd_idx, cnt_idx;
+ __le64 *cnt_data;
+ int desc_num;
+ int ret;
+ int i;
+
+ if (port > hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC);
+ desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ for (i = 0; i < desc_num; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i],
+ HNS_ROCE_OPC_QUERY_COUNTER, true);
+ if (i != desc_num - 1)
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ ret = hns_roce_cmq_send(hr_dev, desc, desc_num);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to get counter, ret = %d.\n", ret);
+ goto err_out;
+ }
+
+ for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) {
+ bd_idx = i / CNT_PER_DESC;
+ if (bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC &&
+ !(desc[bd_idx].flag & cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT)))
+ break;
+
+ cnt_data = (__le64 *)&desc[bd_idx].data[0];
+ cnt_idx = i % CNT_PER_DESC;
+ stats[i] = le64_to_cpu(cnt_data[cnt_idx]);
+ }
+ *num_counters = i;
+
+err_out:
+ kfree(desc);
+ return ret;
+}
+
static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_cfg_global_param *req;
struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 clock_cycles_of_1us;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
false);
- req = (struct hns_roce_cfg_global_param *)desc.data;
- memset(req, 0, sizeof(*req));
- roce_set_field(req->time_cfg_udp_port,
- CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_M,
- CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8);
- roce_set_field(req->time_cfg_udp_port,
- CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M,
- CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
+ else
+ clock_cycles_of_1us = HNS_ROCE_1US_CFG;
+
+ hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
+ hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
+static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
{
struct hns_roce_cmq_desc desc[2];
- struct hns_roce_pf_res_a *req_a;
- struct hns_roce_pf_res_b *req_b;
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ enum hns_roce_opcode_type opcode;
+ u32 func_num;
int ret;
- int i;
-
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_QUERY_PF_RES, true);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ if (is_vf) {
+ opcode = HNS_ROCE_OPC_QUERY_VF_RES;
+ func_num = 1;
+ } else {
+ opcode = HNS_ROCE_OPC_QUERY_PF_RES;
+ func_num = hr_dev->func_num;
}
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, true);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, true);
+
ret = hns_roce_cmq_send(hr_dev, desc, 2);
if (ret)
return ret;
- req_a = (struct hns_roce_pf_res_a *)desc[0].data;
- req_b = (struct hns_roce_pf_res_b *)desc[1].data;
-
- hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num,
- PF_RES_DATA_1_PF_QPC_BT_NUM_M,
- PF_RES_DATA_1_PF_QPC_BT_NUM_S);
- hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num,
- PF_RES_DATA_2_PF_SRQC_BT_NUM_M,
- PF_RES_DATA_2_PF_SRQC_BT_NUM_S);
- hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num,
- PF_RES_DATA_3_PF_CQC_BT_NUM_M,
- PF_RES_DATA_3_PF_CQC_BT_NUM_S);
- hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num,
- PF_RES_DATA_4_PF_MPT_BT_NUM_M,
- PF_RES_DATA_4_PF_MPT_BT_NUM_S);
-
- hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num,
- PF_RES_DATA_3_PF_SL_NUM_M,
- PF_RES_DATA_3_PF_SL_NUM_S);
- hr_dev->caps.sccc_bt_num = roce_get_field(req_b->sccc_bt_idx_num,
- PF_RES_DATA_4_PF_SCCC_BT_NUM_M,
- PF_RES_DATA_4_PF_SCCC_BT_NUM_S);
+ caps->qpc_bt_num = hr_reg_read(r_a, FUNC_RES_A_QPC_BT_NUM) / func_num;
+ caps->srqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_SRQC_BT_NUM) / func_num;
+ caps->cqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_CQC_BT_NUM) / func_num;
+ caps->mpt_bt_num = hr_reg_read(r_a, FUNC_RES_A_MPT_BT_NUM) / func_num;
+ caps->eqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_EQC_BT_NUM) / func_num;
+ caps->smac_bt_num = hr_reg_read(r_b, FUNC_RES_B_SMAC_NUM) / func_num;
+ caps->sgid_bt_num = hr_reg_read(r_b, FUNC_RES_B_SGID_NUM) / func_num;
+ caps->sccc_bt_num = hr_reg_read(r_b, FUNC_RES_B_SCCC_BT_NUM) / func_num;
+
+ if (is_vf) {
+ caps->sl_num = hr_reg_read(r_b, FUNC_RES_V_QID_NUM) / func_num;
+ caps->gmv_bt_num = hr_reg_read(r_b, FUNC_RES_V_GMV_BT_NUM) /
+ func_num;
+ } else {
+ caps->sl_num = hr_reg_read(r_b, FUNC_RES_B_QID_NUM) / func_num;
+ caps->gmv_bt_num = hr_reg_read(r_b, FUNC_RES_B_GMV_BT_NUM) /
+ func_num;
+ }
return 0;
}
-static int hns_roce_query_pf_timer_resource(struct hns_roce_dev *hr_dev)
+static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_pf_timer_res_a *req_a;
- struct hns_roce_cmq_desc desc[2];
- int ret, i;
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
- true);
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
+ true);
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ caps->qpc_timer_bt_num = hr_reg_read(req, PF_TIMER_RES_QPC_ITEM_NUM);
+ caps->cqc_timer_bt_num = hr_reg_read(req, PF_TIMER_RES_CQC_ITEM_NUM);
+
+ return 0;
+}
+
+static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = load_func_res_caps(hr_dev, false);
+ if (ret) {
+ dev_err(dev, "failed to load pf res caps, ret = %d.\n", ret);
+ return ret;
}
- ret = hns_roce_cmq_send(hr_dev, desc, 2);
+ ret = load_pf_timer_res_caps(hr_dev);
if (ret)
- return ret;
+ dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
+ ret);
- req_a = (struct hns_roce_pf_timer_res_a *)desc[0].data;
+ return ret;
+}
- hr_dev->caps.qpc_timer_bt_num =
- roce_get_field(req_a->qpc_timer_bt_idx_num,
- PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M,
- PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S);
- hr_dev->caps.cqc_timer_bt_num =
- roce_get_field(req_a->cqc_timer_bt_idx_num,
- PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M,
- PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S);
+static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
- return 0;
+ ret = load_func_res_caps(hr_dev, true);
+ if (ret)
+ dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret);
+
+ return ret;
}
-static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
- int vf_id)
+static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ u32 vf_id)
{
- struct hns_roce_cmq_desc desc;
struct hns_roce_vf_switch *swt;
+ struct hns_roce_cmq_desc desc;
int ret;
swt = (struct hns_roce_vf_switch *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
- swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
- roce_set_field(swt->fun_id,
- VF_SWITCH_DATA_FUN_ID_VF_ID_M,
- VF_SWITCH_DATA_FUN_ID_VF_ID_S,
- vf_id);
+ swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
- desc.flag =
- cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+
+ desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
- roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
+ hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
+ hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
+ hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev)
+{
+ u32 vf_id;
+ int ret;
+
+ for (vf_id = 0; vf_id < hr_dev->func_num; vf_id++) {
+ ret = __hns_roce_set_vf_switch_param(hr_dev, vf_id);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id)
{
struct hns_roce_cmq_desc desc[2];
- struct hns_roce_vf_res_a *req_a;
- struct hns_roce_vf_res_b *req_b;
- int i;
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
+ struct hns_roce_caps *caps = &hr_dev->caps;
- req_a = (struct hns_roce_vf_res_a *)desc[0].data;
- req_b = (struct hns_roce_vf_res_b *)desc[1].data;
- memset(req_a, 0, sizeof(*req_a));
- memset(req_b, 0, sizeof(*req_b));
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i],
- HNS_ROCE_OPC_ALLOC_VF_RES, false);
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+
+ hr_reg_write(r_a, FUNC_RES_A_VF_ID, vf_id);
+
+ hr_reg_write(r_a, FUNC_RES_A_QPC_BT_NUM, caps->qpc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_QPC_BT_IDX, vf_id * caps->qpc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_SRQC_BT_NUM, caps->srqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_SRQC_BT_IDX, vf_id * caps->srqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_CQC_BT_NUM, caps->cqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_CQC_BT_IDX, vf_id * caps->cqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_MPT_BT_NUM, caps->mpt_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_MPT_BT_IDX, vf_id * caps->mpt_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_EQC_BT_NUM, caps->eqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_EQC_BT_IDX, vf_id * caps->eqc_bt_num);
+ hr_reg_write(r_b, FUNC_RES_V_QID_NUM, caps->sl_num);
+ hr_reg_write(r_b, FUNC_RES_B_QID_IDX, vf_id * caps->sl_num);
+ hr_reg_write(r_b, FUNC_RES_B_SCCC_BT_NUM, caps->sccc_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SCCC_BT_IDX, vf_id * caps->sccc_bt_num);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ hr_reg_write(r_b, FUNC_RES_V_GMV_BT_NUM, caps->gmv_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_GMV_BT_IDX,
+ vf_id * caps->gmv_bt_num);
+ } else {
+ hr_reg_write(r_b, FUNC_RES_B_SGID_NUM, caps->sgid_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SGID_IDX,
+ vf_id * caps->sgid_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SMAC_NUM, caps->smac_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SMAC_IDX,
+ vf_id * caps->smac_bt_num);
+ }
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
-
- if (i == 0) {
- roce_set_field(req_a->vf_qpc_bt_idx_num,
- VF_RES_A_DATA_1_VF_QPC_BT_IDX_M,
- VF_RES_A_DATA_1_VF_QPC_BT_IDX_S, 0);
- roce_set_field(req_a->vf_qpc_bt_idx_num,
- VF_RES_A_DATA_1_VF_QPC_BT_NUM_M,
- VF_RES_A_DATA_1_VF_QPC_BT_NUM_S,
- HNS_ROCE_VF_QPC_BT_NUM);
-
- roce_set_field(req_a->vf_srqc_bt_idx_num,
- VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M,
- VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S, 0);
- roce_set_field(req_a->vf_srqc_bt_idx_num,
- VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M,
- VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S,
- HNS_ROCE_VF_SRQC_BT_NUM);
-
- roce_set_field(req_a->vf_cqc_bt_idx_num,
- VF_RES_A_DATA_3_VF_CQC_BT_IDX_M,
- VF_RES_A_DATA_3_VF_CQC_BT_IDX_S, 0);
- roce_set_field(req_a->vf_cqc_bt_idx_num,
- VF_RES_A_DATA_3_VF_CQC_BT_NUM_M,
- VF_RES_A_DATA_3_VF_CQC_BT_NUM_S,
- HNS_ROCE_VF_CQC_BT_NUM);
-
- roce_set_field(req_a->vf_mpt_bt_idx_num,
- VF_RES_A_DATA_4_VF_MPT_BT_IDX_M,
- VF_RES_A_DATA_4_VF_MPT_BT_IDX_S, 0);
- roce_set_field(req_a->vf_mpt_bt_idx_num,
- VF_RES_A_DATA_4_VF_MPT_BT_NUM_M,
- VF_RES_A_DATA_4_VF_MPT_BT_NUM_S,
- HNS_ROCE_VF_MPT_BT_NUM);
-
- roce_set_field(req_a->vf_eqc_bt_idx_num,
- VF_RES_A_DATA_5_VF_EQC_IDX_M,
- VF_RES_A_DATA_5_VF_EQC_IDX_S, 0);
- roce_set_field(req_a->vf_eqc_bt_idx_num,
- VF_RES_A_DATA_5_VF_EQC_NUM_M,
- VF_RES_A_DATA_5_VF_EQC_NUM_S,
- HNS_ROCE_VF_EQC_NUM);
- } else {
- roce_set_field(req_b->vf_smac_idx_num,
- VF_RES_B_DATA_1_VF_SMAC_IDX_M,
- VF_RES_B_DATA_1_VF_SMAC_IDX_S, 0);
- roce_set_field(req_b->vf_smac_idx_num,
- VF_RES_B_DATA_1_VF_SMAC_NUM_M,
- VF_RES_B_DATA_1_VF_SMAC_NUM_S,
- HNS_ROCE_VF_SMAC_NUM);
-
- roce_set_field(req_b->vf_sgid_idx_num,
- VF_RES_B_DATA_2_VF_SGID_IDX_M,
- VF_RES_B_DATA_2_VF_SGID_IDX_S, 0);
- roce_set_field(req_b->vf_sgid_idx_num,
- VF_RES_B_DATA_2_VF_SGID_NUM_M,
- VF_RES_B_DATA_2_VF_SGID_NUM_S,
- HNS_ROCE_VF_SGID_NUM);
-
- roce_set_field(req_b->vf_qid_idx_sl_num,
- VF_RES_B_DATA_3_VF_QID_IDX_M,
- VF_RES_B_DATA_3_VF_QID_IDX_S, 0);
- roce_set_field(req_b->vf_qid_idx_sl_num,
- VF_RES_B_DATA_3_VF_SL_NUM_M,
- VF_RES_B_DATA_3_VF_SL_NUM_S,
- HNS_ROCE_VF_SL_NUM);
-
- roce_set_field(req_b->vf_sccc_idx_num,
- VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M,
- VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S, 0);
- roce_set_field(req_b->vf_sccc_idx_num,
- VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M,
- VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S,
- HNS_ROCE_VF_SCCC_BT_NUM);
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
+{
+ u32 func_num = max_t(u32, 1, hr_dev->func_num);
+ u32 vf_id;
+ int ret;
+
+ for (vf_id = 0; vf_id < func_num; vf_id++) {
+ ret = config_vf_hem_resource(hr_dev, vf_id);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to config vf-%u hem res, ret = %d.\n",
+ vf_id, ret);
+ return ret;
}
}
- return hns_roce_cmq_send(hr_dev, desc, 2);
+ return 0;
}
static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
{
- u8 srqc_hop_num = hr_dev->caps.srqc_hop_num;
- u8 qpc_hop_num = hr_dev->caps.qpc_hop_num;
- u8 cqc_hop_num = hr_dev->caps.cqc_hop_num;
- u8 mpt_hop_num = hr_dev->caps.mpt_hop_num;
- u8 sccc_hop_num = hr_dev->caps.sccc_hop_num;
- struct hns_roce_cfg_bt_attr *req;
struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_BT_ATTR, false);
- req = (struct hns_roce_cfg_bt_attr *)desc.data;
- memset(req, 0, sizeof(*req));
-
- roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M,
- CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S,
- hr_dev->caps.qpc_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M,
- CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S,
- hr_dev->caps.qpc_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M,
- CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S,
- qpc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : qpc_hop_num);
-
- roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M,
- CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S,
- hr_dev->caps.srqc_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M,
- CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S,
- hr_dev->caps.srqc_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M,
- CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S,
- srqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : srqc_hop_num);
-
- roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M,
- CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S,
- hr_dev->caps.cqc_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M,
- CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S,
- hr_dev->caps.cqc_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M,
- CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S,
- cqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : cqc_hop_num);
-
- roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M,
- CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S,
- hr_dev->caps.mpt_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M,
- CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S,
- hr_dev->caps.mpt_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M,
- CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S,
- mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num);
-
- roce_set_field(req->vf_sccc_cfg,
- CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M,
- CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S,
- hr_dev->caps.sccc_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_sccc_cfg,
- CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M,
- CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S,
- hr_dev->caps.sccc_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(req->vf_sccc_cfg,
- CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M,
- CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S,
- sccc_hop_num ==
- HNS_ROCE_HOP_NUM_0 ? 0 : sccc_hop_num);
+
+ hr_reg_write(req, CFG_BT_ATTR_QPC_BA_PGSZ,
+ caps->qpc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_QPC_BUF_PGSZ,
+ caps->qpc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_QPC_HOPNUM,
+ to_hr_hem_hopnum(caps->qpc_hop_num, caps->num_qps));
+
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_BA_PGSZ,
+ caps->srqc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_BUF_PGSZ,
+ caps->srqc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_HOPNUM,
+ to_hr_hem_hopnum(caps->srqc_hop_num, caps->num_srqs));
+
+ hr_reg_write(req, CFG_BT_ATTR_CQC_BA_PGSZ,
+ caps->cqc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_CQC_BUF_PGSZ,
+ caps->cqc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_CQC_HOPNUM,
+ to_hr_hem_hopnum(caps->cqc_hop_num, caps->num_cqs));
+
+ hr_reg_write(req, CFG_BT_ATTR_MPT_BA_PGSZ,
+ caps->mpt_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_MPT_BUF_PGSZ,
+ caps->mpt_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_MPT_HOPNUM,
+ to_hr_hem_hopnum(caps->mpt_hop_num, caps->num_mtpts));
+
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_BA_PGSZ,
+ caps->sccc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_BUF_PGSZ,
+ caps->sccc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_HOPNUM,
+ to_hr_hem_hopnum(caps->sccc_hop_num, caps->num_qps));
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num,
+ u32 *buf_page_size, u32 *bt_page_size, u32 hem_type)
+{
+ u64 obj_per_chunk;
+ u64 bt_chunk_size = PAGE_SIZE;
+ u64 buf_chunk_size = PAGE_SIZE;
+ u64 obj_per_chunk_default = buf_chunk_size / obj_size;
+
+ *buf_page_size = 0;
+ *bt_page_size = 0;
+
+ switch (hop_num) {
+ case 3:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 2:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 1:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case HNS_ROCE_HOP_NUM_0:
+ obj_per_chunk = ctx_bt_num * obj_per_chunk_default;
+ break;
+ default:
+ pr_err("table %u not support hop_num = %u!\n", hem_type,
+ hop_num);
+ return;
+ }
+
+ if (hem_type >= HEM_TYPE_MTT)
+ *bt_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+ else
+ *buf_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+}
+
+static void set_hem_page_size(struct hns_roce_dev *hr_dev)
{
struct hns_roce_caps *caps = &hr_dev->caps;
+
+ /* EQ */
+ caps->eqe_ba_pg_sz = 0;
+ caps->eqe_buf_pg_sz = 0;
+
+ /* Link Table */
+ caps->llm_buf_pg_sz = 0;
+
+ /* MR */
+ caps->mpt_ba_pg_sz = 0;
+ caps->mpt_buf_pg_sz = 0;
+ caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
+ caps->pbl_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
+ caps->mpt_bt_num, &caps->mpt_buf_pg_sz, &caps->mpt_ba_pg_sz,
+ HEM_TYPE_MTPT);
+
+ /* QP */
+ caps->qpc_ba_pg_sz = 0;
+ caps->qpc_buf_pg_sz = 0;
+ caps->qpc_timer_ba_pg_sz = 0;
+ caps->qpc_timer_buf_pg_sz = 0;
+ caps->sccc_ba_pg_sz = 0;
+ caps->sccc_buf_pg_sz = 0;
+ caps->mtt_ba_pg_sz = 0;
+ caps->mtt_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
+ caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
+ HEM_TYPE_QPC);
+
+ if (caps->flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ calc_pg_sz(caps->num_qps, caps->sccc_sz, caps->sccc_hop_num,
+ caps->sccc_bt_num, &caps->sccc_buf_pg_sz,
+ &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC);
+
+ /* CQ */
+ caps->cqc_ba_pg_sz = 0;
+ caps->cqc_buf_pg_sz = 0;
+ caps->cqc_timer_ba_pg_sz = 0;
+ caps->cqc_timer_buf_pg_sz = 0;
+ caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
+ caps->cqe_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
+ caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
+ HEM_TYPE_CQC);
+ calc_pg_sz(caps->max_cqes, caps->cqe_sz, caps->cqe_hop_num,
+ 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
+
+ /* SRQ */
+ if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ caps->srqc_ba_pg_sz = 0;
+ caps->srqc_buf_pg_sz = 0;
+ caps->srqwqe_ba_pg_sz = 0;
+ caps->srqwqe_buf_pg_sz = 0;
+ caps->idx_ba_pg_sz = 0;
+ caps->idx_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz,
+ caps->srqc_hop_num, caps->srqc_bt_num,
+ &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz,
+ HEM_TYPE_SRQC);
+ calc_pg_sz(caps->num_srqwqe_segs, caps->mtt_entry_sz,
+ caps->srqwqe_hop_num, 1, &caps->srqwqe_buf_pg_sz,
+ &caps->srqwqe_ba_pg_sz, HEM_TYPE_SRQWQE);
+ calc_pg_sz(caps->num_idx_segs, caps->idx_entry_sz,
+ caps->idx_hop_num, 1, &caps->idx_buf_pg_sz,
+ &caps->idx_ba_pg_sz, HEM_TYPE_IDX);
+ }
+
+ /* GMV */
+ caps->gmv_ba_pg_sz = 0;
+ caps->gmv_buf_pg_sz = 0;
+}
+
+/* Apply all loaded caps before setting to hardware */
+static void apply_func_caps(struct hns_roce_dev *hr_dev)
+{
+#define MAX_GID_TBL_LEN 256
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ /* The following configurations don't need to be got from firmware. */
+ caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+ caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+ caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+
+ caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
+
+ if (!caps->num_comp_vectors)
+ caps->num_comp_vectors =
+ min_t(u32, caps->eqc_bt_num - HNS_ROCE_V2_AEQE_VEC_NUM,
+ (u32)priv->handle->rinfo.num_vectors -
+ (HNS_ROCE_V2_AEQE_VEC_NUM + HNS_ROCE_V2_ABNORMAL_VEC_NUM));
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->eqe_hop_num = HNS_ROCE_V3_EQE_HOP_NUM;
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+
+ /* The following configurations will be overwritten */
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+
+ /* The following configurations are not got from firmware */
+ caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ;
+
+ caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0;
+
+ /* It's meaningless to support excessively large gid_table_len,
+ * as the type of sgid_index in kernel struct ib_global_route
+ * and userspace struct ibv_global_route are u8/uint8_t (0-255).
+ */
+ caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN,
+ caps->gmv_bt_num *
+ (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz));
+
+ caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE /
+ caps->gmv_entry_sz);
+ } else {
+ u32 func_num = max_t(u32, 1, hr_dev->func_num);
+
+ caps->eqe_hop_num = HNS_ROCE_V2_EQE_HOP_NUM;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+ caps->gid_table_len[0] /= func_num;
+ }
+
+ if (hr_dev->is_vf) {
+ caps->default_aeq_arm_st = 0x3;
+ caps->default_ceq_arm_st = 0x3;
+ caps->default_ceq_max_cnt = 0x1;
+ caps->default_ceq_period = 0x10;
+ caps->default_aeq_max_cnt = 0x1;
+ caps->default_aeq_period = 0x10;
+ }
+
+ set_hem_page_size(hr_dev);
+}
+
+static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM] = {};
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_query_pf_caps_a *resp_a;
+ struct hns_roce_query_pf_caps_b *resp_b;
+ struct hns_roce_query_pf_caps_c *resp_c;
+ struct hns_roce_query_pf_caps_d *resp_d;
+ struct hns_roce_query_pf_caps_e *resp_e;
+ struct hns_roce_query_pf_caps_f *resp_f;
+ enum hns_roce_opcode_type cmd;
+ int ctx_hop_num;
+ int pbl_hop_num;
+ int cmd_num;
int ret;
+ int i;
- ret = hns_roce_cmq_query_hw_info(hr_dev);
+ cmd = hr_dev->is_vf ? HNS_ROCE_OPC_QUERY_VF_CAPS_NUM :
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM;
+ cmd_num = hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 :
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM;
+
+ for (i = 0; i < cmd_num - 1; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i], cmd, true);
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[cmd_num - 1], cmd, true);
+ desc[cmd_num - 1].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ ret = hns_roce_cmq_send(hr_dev, desc, cmd_num);
+ if (ret)
+ return ret;
+
+ resp_a = (struct hns_roce_query_pf_caps_a *)desc[0].data;
+ resp_b = (struct hns_roce_query_pf_caps_b *)desc[1].data;
+ resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
+ resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
+ resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+ resp_f = (struct hns_roce_query_pf_caps_f *)desc[5].data;
+
+ caps->local_ca_ack_delay = resp_a->local_ca_ack_delay;
+ caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
+ caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
+ caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
+ caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
+ caps->num_aeq_vectors = resp_a->num_aeq_vectors;
+ caps->num_other_vectors = resp_a->num_other_vectors;
+ caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
+ caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
+
+ caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
+ caps->irrl_entry_sz = resp_b->irrl_entry_sz;
+ caps->trrl_entry_sz = resp_b->trrl_entry_sz;
+ caps->cqc_entry_sz = resp_b->cqc_entry_sz;
+ caps->srqc_entry_sz = resp_b->srqc_entry_sz;
+ caps->idx_entry_sz = resp_b->idx_entry_sz;
+ caps->sccc_sz = resp_b->sccc_sz;
+ caps->max_mtu = resp_b->max_mtu;
+ caps->min_cqes = resp_b->min_cqes;
+ caps->min_wqes = resp_b->min_wqes;
+ caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap);
+ caps->pkey_table_len[0] = resp_b->pkey_table_len;
+ caps->phy_num_uars = resp_b->phy_num_uars;
+ ctx_hop_num = resp_b->ctx_hop_num;
+ pbl_hop_num = resp_b->pbl_hop_num;
+
+ caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS);
+
+ caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS);
+ caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
+ HNS_ROCE_CAP_FLAGS_EX_SHIFT;
+
+ if (hr_dev->is_vf)
+ caps->flags &= ~HNS_ROCE_CAP_FLAG_BOND;
+
+ caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
+ caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
+ caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
+ caps->num_xrcds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_XRCDS);
+ caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
+ caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
+ caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
+ caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
+ caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
+
+ caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
+ caps->cong_cap = hr_reg_read(resp_d, PF_CAPS_D_CONG_CAP);
+ caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
+ caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
+ caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
+ caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_cong_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
+ caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
+ caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
+ caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
+ caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS);
+
+ caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
+ caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
+ caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
+ caps->reserved_xrcds = hr_reg_read(resp_e, PF_CAPS_E_RSV_XRCDS);
+ caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
+ caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
+
+ caps->max_ack_req_msg_len = le32_to_cpu(resp_f->max_ack_req_msg_len);
+
+ caps->qpc_hop_num = ctx_hop_num;
+ caps->sccc_hop_num = ctx_hop_num;
+ caps->srqc_hop_num = ctx_hop_num;
+ caps->cqc_hop_num = ctx_hop_num;
+ caps->mpt_hop_num = ctx_hop_num;
+ caps->mtt_hop_num = pbl_hop_num;
+ caps->cqe_hop_num = pbl_hop_num;
+ caps->srqwqe_hop_num = pbl_hop_num;
+ caps->idx_hop_num = pbl_hop_num;
+ caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM);
+ caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
+ caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
+
+ if (!(caps->page_size_cap & PAGE_SIZE))
+ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+
+ if (!hr_dev->is_vf) {
+ caps->cqe_sz = resp_a->cqe_sz;
+ caps->qpc_sz = le16_to_cpu(resp_b->qpc_sz);
+ caps->default_aeq_arm_st =
+ hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST);
+ caps->default_ceq_arm_st =
+ hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST);
+ caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
+ caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
+ caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
+ caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period);
+ }
+
+ return 0;
+}
+
+static int config_hem_entry_size(struct hns_roce_dev *hr_dev, u32 type, u32 val)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ hr_reg_write(req, CFG_HEM_ENTRY_SIZE_TYPE, type);
+ hr_reg_write(req, CFG_HEM_ENTRY_SIZE_VALUE, val);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ return 0;
+
+ ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_QPC_SIZE,
+ caps->qpc_sz);
if (ret) {
- dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
- ret);
+ dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret);
return ret;
}
- ret = hns_roce_query_fw_ver(hr_dev);
+ ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_SCCC_SIZE,
+ caps->sccc_sz);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_vf_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ hr_dev->func_num = 1;
+
+ ret = hns_roce_query_caps(hr_dev);
if (ret) {
- dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
- ret);
+ dev_err(dev, "failed to query VF caps, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_vf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query VF resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ apply_func_caps(hr_dev);
+
+ ret = hns_roce_v2_set_bt(hr_dev);
+ if (ret)
+ dev_err(dev, "failed to config VF BA table, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_pf_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_query_func_info(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query func info, ret = %d.\n", ret);
return ret;
}
ret = hns_roce_config_global_param(hr_dev);
if (ret) {
- dev_err(hr_dev->dev, "Configure global param fail, ret = %d.\n",
- ret);
+ dev_err(dev, "failed to config global param, ret = %d.\n", ret);
return ret;
}
- /* Get pf resource owned by every pf */
- ret = hns_roce_query_pf_resource(hr_dev);
+ ret = hns_roce_set_vf_switch_param(hr_dev);
if (ret) {
- dev_err(hr_dev->dev, "Query pf resource fail, ret = %d.\n",
- ret);
+ dev_err(dev, "failed to set switch param, ret = %d.\n", ret);
return ret;
}
- if (hr_dev->pci_dev->revision == 0x21) {
- ret = hns_roce_query_pf_timer_resource(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev,
- "Query pf timer resource fail, ret = %d.\n",
- ret);
- return ret;
- }
+ ret = hns_roce_query_caps(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query PF caps, ret = %d.\n", ret);
+ return ret;
}
+ ret = hns_roce_query_pf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query pf resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ apply_func_caps(hr_dev);
+
ret = hns_roce_alloc_vf_resource(hr_dev);
if (ret) {
- dev_err(hr_dev->dev, "Allocate vf resource fail, ret = %d.\n",
- ret);
+ dev_err(dev, "failed to alloc vf resource, ret = %d.\n", ret);
return ret;
}
- if (hr_dev->pci_dev->revision == 0x21) {
- ret = hns_roce_set_vf_switch_param(hr_dev, 0);
- if (ret) {
- dev_err(hr_dev->dev,
- "Set function switch param fail, ret = %d.\n",
- ret);
- return ret;
- }
+ ret = hns_roce_v2_set_bt(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to config BA table, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Configure the size of QPC, SCCC, etc. */
+ return hns_roce_config_entry_size(hr_dev);
+}
+
+static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_cmq_query_hw_info(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query hardware info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_fw_ver(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query firmware info, ret = %d.\n", ret);
+ return ret;
}
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
- caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
- caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
- caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
- caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
- caps->min_cqes = HNS_ROCE_MIN_CQE_NUM;
- caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
- caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM;
- caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
- caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
- caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
- caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
- caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
- caps->num_uars = HNS_ROCE_V2_UAR_NUM;
- caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
- caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
- caps->num_comp_vectors = HNS_ROCE_V2_COMP_VEC_NUM;
- caps->num_other_vectors = HNS_ROCE_V2_ABNORMAL_VEC_NUM;
- caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
- caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
- caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
- caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
- caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
- caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
- caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
- caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
- caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ;
- caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ;
- caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ;
- caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
- caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ;
- caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
- caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ;
- caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
- caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
- caps->idx_entry_sz = 4;
- caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
- caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
- caps->reserved_lkey = 0;
- caps->reserved_pds = 0;
- caps->reserved_mrws = 1;
- caps->reserved_uars = 0;
- caps->reserved_cqs = 0;
- caps->reserved_srqs = 0;
- caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
-
- caps->qpc_ba_pg_sz = 0;
- caps->qpc_buf_pg_sz = 0;
- caps->qpc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
- caps->srqc_ba_pg_sz = 0;
- caps->srqc_buf_pg_sz = 0;
- caps->srqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
- caps->cqc_ba_pg_sz = 0;
- caps->cqc_buf_pg_sz = 0;
- caps->cqc_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
- caps->mpt_ba_pg_sz = 0;
- caps->mpt_buf_pg_sz = 0;
- caps->mpt_hop_num = HNS_ROCE_CONTEXT_HOP_NUM;
- caps->pbl_ba_pg_sz = 2;
- caps->pbl_buf_pg_sz = 0;
- caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
- caps->mtt_ba_pg_sz = 0;
- caps->mtt_buf_pg_sz = 0;
- caps->mtt_hop_num = HNS_ROCE_MTT_HOP_NUM;
- caps->cqe_ba_pg_sz = 0;
- caps->cqe_buf_pg_sz = 0;
- caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
- caps->srqwqe_ba_pg_sz = 0;
- caps->srqwqe_buf_pg_sz = 0;
- caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM;
- caps->idx_ba_pg_sz = 0;
- caps->idx_buf_pg_sz = 0;
- caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM;
- caps->eqe_ba_pg_sz = 0;
- caps->eqe_buf_pg_sz = 0;
- caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
- caps->tsq_buf_pg_sz = 0;
- caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
-
- caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
- HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
- HNS_ROCE_CAP_FLAG_RQ_INLINE |
- HNS_ROCE_CAP_FLAG_RECORD_DB |
- HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
-
- if (hr_dev->pci_dev->revision == 0x21)
- caps->flags |= HNS_ROCE_CAP_FLAG_MW |
- HNS_ROCE_CAP_FLAG_FRMR;
-
- caps->pkey_table_len[0] = 1;
- caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
- caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
- caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM;
- caps->local_ca_ack_delay = 0;
- caps->max_mtu = IB_MTU_4096;
-
- caps->max_srqs = HNS_ROCE_V2_MAX_SRQ;
- caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
- caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
-
- if (hr_dev->pci_dev->revision == 0x21) {
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
- HNS_ROCE_CAP_FLAG_SRQ |
- HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
-
- caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
- caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
- caps->qpc_timer_ba_pg_sz = 0;
- caps->qpc_timer_buf_pg_sz = 0;
- caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
- caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
- caps->cqc_timer_ba_pg_sz = 0;
- caps->cqc_timer_buf_pg_sz = 0;
- caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
-
- caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ;
- caps->sccc_ba_pg_sz = 0;
- caps->sccc_buf_pg_sz = 0;
- caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM;
+ if (hr_dev->is_vf)
+ return hns_roce_v2_vf_profile(hr_dev);
+ else
+ return hns_roce_v2_pf_profile(hr_dev);
+}
+
+static void config_llm_table(struct hns_roce_buf *data_buf, void *cfg_buf)
+{
+ u32 i, next_ptr, page_num;
+ __le64 *entry = cfg_buf;
+ dma_addr_t addr;
+ u64 val;
+
+ page_num = data_buf->npages;
+ for (i = 0; i < page_num; i++) {
+ addr = hns_roce_buf_page(data_buf, i);
+ if (i == (page_num - 1))
+ next_ptr = 0;
+ else
+ next_ptr = i + 1;
+
+ val = HNS_ROCE_EXT_LLM_ENTRY(addr, (u64)next_ptr);
+ entry[i] = cpu_to_le64(val);
}
+}
- ret = hns_roce_v2_set_bt(hr_dev);
+static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *table)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ struct hns_roce_buf *buf = table->buf;
+ enum hns_roce_opcode_type opcode;
+ dma_addr_t addr;
+
+ opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+
+ hr_reg_write(r_a, CFG_LLM_A_BA_L, lower_32_bits(table->table.map));
+ hr_reg_write(r_a, CFG_LLM_A_BA_H, upper_32_bits(table->table.map));
+ hr_reg_write(r_a, CFG_LLM_A_DEPTH, buf->npages);
+ hr_reg_write(r_a, CFG_LLM_A_PGSZ, to_hr_hw_page_shift(buf->page_shift));
+ hr_reg_enable(r_a, CFG_LLM_A_INIT_EN);
+
+ addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, 0));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_L, lower_32_bits(addr));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_H, upper_32_bits(addr));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_NXTPTR, 1);
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_PTR, 0);
+
+ addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, buf->npages - 1));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_L, lower_32_bits(addr));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_H, upper_32_bits(addr));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_PTR, buf->npages - 1);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static struct hns_roce_link_table *
+alloc_link_table_buf(struct hns_roce_dev *hr_dev)
+{
+ u16 total_sl = hr_dev->caps.sl_num * hr_dev->func_num;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_link_table *link_tbl;
+ u32 pg_shift, size, min_size;
+
+ link_tbl = &priv->ext_llm;
+ pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT;
+ size = hr_dev->caps.num_qps * hr_dev->func_num *
+ HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
+ min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(total_sl) << pg_shift;
+
+ /* Alloc data table */
+ size = max(size, min_size);
+ link_tbl->buf = hns_roce_buf_alloc(hr_dev, size, pg_shift, 0);
+ if (IS_ERR(link_tbl->buf))
+ return ERR_PTR(-ENOMEM);
+
+ /* Alloc config table */
+ size = link_tbl->buf->npages * sizeof(u64);
+ link_tbl->table.buf = dma_alloc_coherent(hr_dev->dev, size,
+ &link_tbl->table.map,
+ GFP_KERNEL);
+ if (!link_tbl->table.buf) {
+ hns_roce_buf_free(hr_dev, link_tbl->buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return link_tbl;
+}
+
+static void free_link_table_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *tbl)
+{
+ if (tbl->buf) {
+ u32 size = tbl->buf->npages * sizeof(u64);
+
+ dma_free_coherent(hr_dev->dev, size, tbl->table.buf,
+ tbl->table.map);
+ }
+
+ hns_roce_buf_free(hr_dev, tbl->buf);
+}
+
+static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_link_table *link_tbl;
+ int ret;
+
+ link_tbl = alloc_link_table_buf(hr_dev);
+ if (IS_ERR(link_tbl))
+ return -ENOMEM;
+
+ if (WARN_ON(link_tbl->buf->npages > HNS_ROCE_V2_EXT_LLM_MAX_DEPTH)) {
+ ret = -EINVAL;
+ goto err_alloc;
+ }
+
+ config_llm_table(link_tbl->buf, link_tbl->table.buf);
+ ret = set_llm_cfg_to_hw(hr_dev, link_tbl);
if (ret)
- dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
- ret);
+ goto err_alloc;
+ return 0;
+
+err_alloc:
+ free_link_table_buf(hr_dev, link_tbl);
return ret;
}
-static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
- enum hns_roce_link_table_type type)
+static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_cmq_desc desc[2];
- struct hns_roce_cfg_llm_a *req_a =
- (struct hns_roce_cfg_llm_a *)desc[0].data;
- struct hns_roce_cfg_llm_b *req_b =
- (struct hns_roce_cfg_llm_b *)desc[1].data;
struct hns_roce_v2_priv *priv = hr_dev->priv;
- struct hns_roce_link_table *link_tbl;
- struct hns_roce_link_table_entry *entry;
- enum hns_roce_opcode_type opcode;
- u32 page_num;
- int i;
- switch (type) {
- case TSQ_LINK_TABLE:
- link_tbl = &priv->tsq;
- opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
- break;
- case TPQ_LINK_TABLE:
- link_tbl = &priv->tpq;
- opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM;
- break;
- default:
- return -EINVAL;
+ free_link_table_buf(hr_dev, &priv->ext_llm);
+}
+
+static void free_dip_entry(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_dip *hr_dip;
+ unsigned long idx;
+
+ xa_lock(&hr_dev->qp_table.dip_xa);
+
+ xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) {
+ __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx);
+ kfree(hr_dip);
}
- page_num = link_tbl->npages;
- entry = link_tbl->table.buf;
- memset(req_a, 0, sizeof(*req_a));
- memset(req_b, 0, sizeof(*req_b));
+ xa_unlock(&hr_dev->qp_table.dip_xa);
+}
- for (i = 0; i < 2; i++) {
- hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
+static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_pd *hr_pd;
+ struct ib_pd *pd;
+
+ hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL);
+ if (!hr_pd)
+ return NULL;
+ pd = &hr_pd->ibpd;
+ pd->device = ibdev;
+
+ if (hns_roce_alloc_pd(pd, NULL)) {
+ ibdev_err(ibdev, "failed to create pd for free mr.\n");
+ kfree(hr_pd);
+ return NULL;
+ }
+ free_mr->rsv_pd = to_hr_pd(pd);
+ free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev;
+ free_mr->rsv_pd->ibpd.uobject = NULL;
+ free_mr->rsv_pd->ibpd.__internal_mr = NULL;
+ atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0);
+
+ return pd;
+}
- if (i == 0)
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
-
- if (i == 0) {
- req_a->base_addr_l = link_tbl->table.map & 0xffffffff;
- req_a->base_addr_h = (link_tbl->table.map >> 32) &
- 0xffffffff;
- roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_QUE_DEPTH_M,
- CFG_LLM_QUE_DEPTH_S,
- link_tbl->npages);
- roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_QUE_PGSZ_M,
- CFG_LLM_QUE_PGSZ_S,
- link_tbl->pg_sz);
- req_a->head_ba_l = entry[0].blk_ba0;
- req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr;
- roce_set_field(req_a->head_ptr,
- CFG_LLM_HEAD_PTR_M,
- CFG_LLM_HEAD_PTR_S, 0);
- } else {
- req_b->tail_ba_l = entry[page_num - 1].blk_ba0;
- roce_set_field(req_b->tail_ba_h,
- CFG_LLM_TAIL_BA_H_M,
- CFG_LLM_TAIL_BA_H_S,
- entry[page_num - 1].blk_ba1_nxt_ptr &
- HNS_ROCE_LINK_TABLE_BA1_M);
- roce_set_field(req_b->tail_ptr,
- CFG_LLM_TAIL_PTR_M,
- CFG_LLM_TAIL_PTR_S,
- (entry[page_num - 2].blk_ba1_nxt_ptr &
- HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
- HNS_ROCE_LINK_TABLE_NXT_PTR_S);
+static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_cq_init_attr cq_init_attr = {};
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *cq;
+
+ cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+
+ hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
+ if (!hr_cq)
+ return NULL;
+
+ cq = &hr_cq->ib_cq;
+ cq->device = ibdev;
+
+ if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) {
+ ibdev_err(ibdev, "failed to create cq for free mr.\n");
+ kfree(hr_cq);
+ return NULL;
+ }
+ free_mr->rsv_cq = to_hr_cq(cq);
+ free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev;
+ free_mr->rsv_cq->ib_cq.uobject = NULL;
+ free_mr->rsv_cq->ib_cq.comp_handler = NULL;
+ free_mr->rsv_cq->ib_cq.event_handler = NULL;
+ free_mr->rsv_cq->ib_cq.cq_context = NULL;
+ atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0);
+
+ return cq;
+}
+
+static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq,
+ struct ib_qp_init_attr *init_attr, int i)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ struct ib_qp *qp;
+ int ret;
+
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (!hr_qp)
+ return -ENOMEM;
+
+ qp = &hr_qp->ibqp;
+ qp->device = ibdev;
+
+ ret = hns_roce_create_qp(qp, init_attr, NULL);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create qp for free mr.\n");
+ kfree(hr_qp);
+ return ret;
+ }
+
+ free_mr->rsv_qp[i] = hr_qp;
+ free_mr->rsv_qp[i]->ibqp.recv_cq = cq;
+ free_mr->rsv_qp[i]->ibqp.send_cq = cq;
+
+ return 0;
+}
+
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp *qp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ if (free_mr->rsv_qp[i]) {
+ qp = &free_mr->rsv_qp[i]->ibqp;
+ hns_roce_v2_destroy_qp(qp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ free_mr->rsv_qp[i] = NULL;
}
}
- roce_set_field(req_a->depth_pgsz_init_en,
- CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1);
- return hns_roce_cmq_send(hr_dev, desc, 2);
+ if (free_mr->rsv_cq) {
+ hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL);
+ kfree(free_mr->rsv_cq);
+ free_mr->rsv_cq = NULL;
+ }
+
+ if (free_mr->rsv_pd) {
+ hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL);
+ kfree(free_mr->rsv_pd);
+ free_mr->rsv_pd = NULL;
+ }
+
+ mutex_destroy(&free_mr->mutex);
}
-static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
- enum hns_roce_link_table_type type)
+static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
- struct hns_roce_link_table *link_tbl;
- struct hns_roce_link_table_entry *entry;
- struct device *dev = hr_dev->dev;
- u32 buf_chk_sz;
- dma_addr_t t;
- int func_num = 1;
- int pg_num_a;
- int pg_num_b;
- int pg_num;
- int size;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_init_attr qp_init_attr = {};
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ int ret;
int i;
- switch (type) {
- case TSQ_LINK_TABLE:
- link_tbl = &priv->tsq;
- buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT);
- pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz;
- pg_num_b = hr_dev->caps.sl_num * 4 + 2;
- break;
- case TPQ_LINK_TABLE:
- link_tbl = &priv->tpq;
- buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT);
- pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz;
- pg_num_b = 2 * 4 * func_num + 2;
- break;
- default:
- return -EINVAL;
+ pd = free_mr_init_pd(hr_dev);
+ if (!pd)
+ return -ENOMEM;
+
+ cq = free_mr_init_cq(hr_dev);
+ if (!cq) {
+ ret = -ENOMEM;
+ goto create_failed_cq;
}
- pg_num = max(pg_num_a, pg_num_b);
- size = pg_num * sizeof(struct hns_roce_link_table_entry);
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.send_cq = cq;
+ qp_init_attr.recv_cq = cq;
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
+ qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
+ qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
+ qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
- link_tbl->table.buf = dma_alloc_coherent(dev, size,
- &link_tbl->table.map,
- GFP_KERNEL);
- if (!link_tbl->table.buf)
- goto out;
+ ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i);
+ if (ret)
+ goto create_failed_qp;
+ }
- link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list),
- GFP_KERNEL);
- if (!link_tbl->pg_list)
- goto err_kcalloc_failed;
+ return 0;
- entry = link_tbl->table.buf;
- for (i = 0; i < pg_num; ++i) {
- link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz,
- &t, GFP_KERNEL);
- if (!link_tbl->pg_list[i].buf)
- goto err_alloc_buf_failed;
+create_failed_qp:
+ for (i--; i >= 0; i--) {
+ hns_roce_v2_destroy_qp(&free_mr->rsv_qp[i]->ibqp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ }
+ hns_roce_destroy_cq(cq, NULL);
+ kfree(cq);
- link_tbl->pg_list[i].map = t;
- memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz);
+create_failed_cq:
+ hns_roce_dealloc_pd(pd, NULL);
+ kfree(pd);
- entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
- roce_set_field(entry[i].blk_ba1_nxt_ptr,
- HNS_ROCE_LINK_TABLE_BA1_M,
- HNS_ROCE_LINK_TABLE_BA1_S,
- t >> 44);
+ return ret;
+}
- if (i < (pg_num - 1))
- roce_set_field(entry[i].blk_ba1_nxt_ptr,
- HNS_ROCE_LINK_TABLE_NXT_PTR_M,
- HNS_ROCE_LINK_TABLE_NXT_PTR_S,
- i + 1);
+static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
+ struct ib_qp_attr *attr, int sl_num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ int loopback;
+ int mask;
+ int ret;
+
+ hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp);
+ hr_qp->free_mr_en = 1;
+ hr_qp->ibqp.device = ibdev;
+ hr_qp->ibqp.qp_type = IB_QPT_RC;
+
+ mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_INIT, NULL);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
+ ret);
+ return ret;
}
- link_tbl->npages = pg_num;
- link_tbl->pg_sz = buf_chk_sz;
- return hns_roce_config_link_table(hr_dev, type);
+ loopback = hr_dev->loop_idc;
+ /* Set qpc lbi = 1 incidate loopback IO */
+ hr_dev->loop_idc = 1;
-err_alloc_buf_failed:
- for (i -= 1; i >= 0; i--)
- dma_free_coherent(dev, buf_chk_sz,
- link_tbl->pg_list[i].buf,
- link_tbl->pg_list[i].map);
- kfree(link_tbl->pg_list);
+ mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+ attr->qp_state = IB_QPS_RTR;
+ attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ attr->path_mtu = IB_MTU_256;
+ attr->dest_qp_num = hr_qp->qpn;
+ attr->rq_psn = HNS_ROCE_FREE_MR_USED_PSN;
-err_kcalloc_failed:
- dma_free_coherent(dev, size, link_tbl->table.buf,
- link_tbl->table.map);
+ rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
-out:
- return -ENOMEM;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_RTR, NULL);
+ hr_dev->loop_idc = loopback;
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
+ IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
+ attr->qp_state = IB_QPS_RTS;
+ attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+ attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
+ attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
+ IB_QPS_RTS, NULL);
+ if (ret)
+ ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
+ ret);
+
+ return ret;
}
-static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
- struct hns_roce_link_table *link_tbl)
+static int free_mr_modify_qp(struct hns_roce_dev *hr_dev)
{
- struct device *dev = hr_dev->dev;
- int size;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_attr attr = {};
+ int ret;
int i;
- size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry);
+ rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_static_rate(&attr.ah_attr, 3);
+ rdma_ah_set_port_num(&attr.ah_attr, 1);
- for (i = 0; i < link_tbl->npages; ++i)
- if (link_tbl->pg_list[i].buf)
- dma_free_coherent(dev, link_tbl->pg_sz,
- link_tbl->pg_list[i].buf,
- link_tbl->pg_list[i].map);
- kfree(link_tbl->pg_list);
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ ret = free_mr_modify_rsv_qp(hr_dev, &attr, i);
+ if (ret)
+ return ret;
+ }
- dma_free_coherent(dev, size, link_tbl->table.buf,
- link_tbl->table.map);
+ return 0;
}
-static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+static int free_mr_init(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
- int qpc_count, cqc_count;
- int ret, i;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ int ret;
+
+ mutex_init(&free_mr->mutex);
- /* TSQ includes SQ doorbell and ack doorbell */
- ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
+ ret = free_mr_alloc_res(hr_dev);
if (ret) {
- dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret);
+ mutex_destroy(&free_mr->mutex);
return ret;
}
- ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
- if (ret) {
- dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret);
- goto err_tpq_init_failed;
+ ret = free_mr_modify_qp(hr_dev);
+ if (ret)
+ goto err_modify_qp;
+
+ return 0;
+
+err_modify_qp:
+ free_mr_exit(hr_dev);
+
+ return ret;
+}
+
+static int get_hem_table(struct hns_roce_dev *hr_dev)
+{
+ unsigned int qpc_count;
+ unsigned int cqc_count;
+ unsigned int gmv_count;
+ int ret;
+ int i;
+
+ /* Alloc memory for source address table buffer space chunk */
+ for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num;
+ gmv_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count);
+ if (ret)
+ goto err_gmv_failed;
}
- /* Alloc memory for QPC Timer buffer space chunk*/
+ if (hr_dev->is_vf)
+ return 0;
+
+ /* Alloc memory for QPC Timer buffer space chunk */
for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
qpc_count++) {
ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table,
@@ -1858,7 +3028,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
}
}
- /* Alloc memory for CQC Timer buffer space chunk*/
+ /* Alloc memory for CQC Timer buffer space chunk */
for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num;
cqc_count++) {
ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table,
@@ -1879,187 +3049,305 @@ err_qpc_timer_failed:
for (i = 0; i < qpc_count; i++)
hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
- hns_roce_free_link_table(hr_dev, &priv->tpq);
-
-err_tpq_init_failed:
- hns_roce_free_link_table(hr_dev, &priv->tsq);
+err_gmv_failed:
+ for (i = 0; i < gmv_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
return ret;
}
-static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
+static void put_hem_table(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_priv *priv = hr_dev->priv;
+ int i;
+
+ for (i = 0; i < hr_dev->caps.gmv_entry_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
- hns_roce_free_link_table(hr_dev, &priv->tpq);
- hns_roce_free_link_table(hr_dev, &priv->tsq);
+ if (hr_dev->is_vf)
+ return;
+
+ for (i = 0; i < hr_dev->caps.qpc_timer_bt_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
+
+ for (i = 0; i < hr_dev->caps.cqc_timer_bt_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
}
-static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
+static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_cmq_desc desc;
- struct hns_roce_mbox_status *mb_st =
- (struct hns_roce_mbox_status *)desc.data;
- enum hns_roce_cmd_return_status status;
+ int ret;
- hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init free mr!\n");
+ return ret;
+ }
+ }
- status = hns_roce_cmq_send(hr_dev, &desc, 1);
- if (status)
- return status;
+ /* The hns ROCEE requires the extdb info to be cleared before using */
+ ret = hns_roce_clear_extdb_list_info(hr_dev);
+ if (ret)
+ goto err_clear_extdb_failed;
- return cpu_to_le32(mb_st->mb_status_hw_run);
-}
+ ret = get_hem_table(hr_dev);
+ if (ret)
+ goto err_get_hem_table_failed;
-static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
-{
- u32 status = hns_roce_query_mbox_status(hr_dev);
+ if (hr_dev->is_vf)
+ return 0;
- return status >> HNS_ROCE_HW_RUN_BIT_SHIFT;
+ ret = hns_roce_init_link_table(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init llm, ret = %d.\n", ret);
+ goto err_llm_init_failed;
+ }
+
+ return 0;
+
+err_llm_init_failed:
+ put_hem_table(hr_dev);
+err_get_hem_table_failed:
+ hns_roce_function_clear(hr_dev);
+err_clear_extdb_failed:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
+ return ret;
}
-static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev)
+static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
{
- u32 status = hns_roce_query_mbox_status(hr_dev);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
+ hns_roce_function_clear(hr_dev);
- return status & HNS_ROCE_HW_MB_STATUS_MASK;
+ if (!hr_dev->is_vf)
+ hns_roce_free_link_table(hr_dev);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ free_dip_entry(hr_dev);
}
-static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier,
- u16 op, u16 token, int event)
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
- mb->in_param_l = cpu_to_le64(in_param);
- mb->in_param_h = cpu_to_le64(in_param) >> 32;
- mb->out_param_l = cpu_to_le64(out_param);
- mb->out_param_h = cpu_to_le64(out_param) >> 32;
- mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
- mb->token_event_en = cpu_to_le32(event << 16 | token);
+ mb->in_param_l = cpu_to_le32(mbox_msg->in_param);
+ mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32);
+ mb->out_param_l = cpu_to_le32(mbox_msg->out_param);
+ mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32);
+ mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd);
+ mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 |
+ mbox_msg->token);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier, u8 op_modifier,
- u16 op, u16 token, int event)
+static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
+ u8 *complete_status)
{
- struct device *dev = hr_dev->dev;
+ struct hns_roce_mbox_status *mb_st;
+ struct hns_roce_cmq_desc desc;
unsigned long end;
- int ret;
+ int ret = -EBUSY;
+ u32 status;
+ bool busy;
+
+ mb_st = (struct hns_roce_mbox_status *)desc.data;
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
+ status = 0;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
+ true);
+ ret = __hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (!ret) {
+ status = le32_to_cpu(mb_st->mb_status_hw_run);
+ /* No pending message exists in ROCEE mbox. */
+ if (!(status & MB_ST_HW_RUN_M))
+ break;
+ } else if (!v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ break;
+ }
- end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies;
- while (hns_roce_v2_cmd_pending(hr_dev)) {
if (time_after(jiffies, end)) {
- dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies,
- (int)end);
- return -EAGAIN;
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to wait mbox status 0x%x\n",
+ status);
+ return -ETIMEDOUT;
}
+
cond_resched();
+ ret = -EBUSY;
}
- ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, token, event);
- if (ret)
- dev_err(dev, "Post mailbox fail(%d)\n", ret);
+ if (!ret) {
+ *complete_status = (u8)(status & MB_ST_COMPLETE_M);
+ } else if (!v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ /* Ignore all errors if the mbox is unavailable. */
+ ret = 0;
+ *complete_status = MB_ST_COMPLETE_M;
+ }
return ret;
}
-static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
- unsigned long timeout)
+static int v2_post_mbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- struct device *dev = hr_dev->dev;
- unsigned long end = 0;
- u32 status;
-
- end = msecs_to_jiffies(timeout) + jiffies;
- while (hns_roce_v2_cmd_pending(hr_dev) && time_before(jiffies, end))
- cond_resched();
+ u8 status = 0;
+ int ret;
- if (hns_roce_v2_cmd_pending(hr_dev)) {
- dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n");
- return -ETIMEDOUT;
+ /* Waiting for the mbox to be idle */
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS,
+ &status);
+ if (unlikely(ret)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to check post mbox status = 0x%x, ret = %d.\n",
+ status, ret);
+ return ret;
}
- status = hns_roce_v2_cmd_complete(hr_dev);
- if (status != 0x1) {
- if (status == CMD_RST_PRC_EBUSY)
- return status;
+ /* Post new message to mbox */
+ ret = hns_roce_mbox_post(hr_dev, mbox_msg);
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to post mailbox, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev)
+{
+ u8 status = 0;
+ int ret;
- dev_err(dev, "mailbox status 0x%x!\n", status);
- return -EBUSY;
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS,
+ &status);
+ if (!ret) {
+ if (status != MB_ST_COMPLETE_SUCC)
+ return -EBUSY;
+ } else {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to check mbox status = 0x%x, ret = %d.\n",
+ status, ret);
}
- return 0;
+ return ret;
}
-static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
- int gid_index, const union ib_gid *gid,
- enum hns_roce_sgid_type sgid_type)
+static void copy_gid(void *dest, const union ib_gid *gid)
+{
+#define GID_SIZE 4
+ const union ib_gid *src = gid;
+ __le32 (*p)[GID_SIZE] = dest;
+ int i;
+
+ if (!gid)
+ src = &zgid;
+
+ for (i = 0; i < GID_SIZE; i++)
+ (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]);
+}
+
+static int config_sgid_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_cfg_sgid_tb *sgid_tb =
(struct hns_roce_cfg_sgid_tb *)desc.data;
- u32 *p;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
- roce_set_field(sgid_tb->table_idx_rsv,
- CFG_SGID_TB_TABLE_IDX_M,
- CFG_SGID_TB_TABLE_IDX_S, gid_index);
- roce_set_field(sgid_tb->vf_sgid_type_rsv,
- CFG_SGID_TB_VF_SGID_TYPE_M,
- CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
+ hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
+ hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
- p = (u32 *)&gid->raw[0];
- sgid_tb->vf_sgid_l = cpu_to_le32(*p);
+ copy_gid(&sgid_tb->vf_sgid_l, gid);
- p = (u32 *)&gid->raw[4];
- sgid_tb->vf_sgid_ml = cpu_to_le32(*p);
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
- p = (u32 *)&gid->raw[8];
- sgid_tb->vf_sgid_mh = cpu_to_le32(*p);
+static int config_gmv_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type,
+ const struct ib_gid_attr *attr)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cfg_gmv_tb_a *tb_a =
+ (struct hns_roce_cfg_gmv_tb_a *)desc[0].data;
+ struct hns_roce_cfg_gmv_tb_b *tb_b =
+ (struct hns_roce_cfg_gmv_tb_b *)desc[1].data;
- p = (u32 *)&gid->raw[0xc];
- sgid_tb->vf_sgid_h = cpu_to_le32(*p);
+ u16 vlan_id = VLAN_CFI_MASK;
+ u8 mac[ETH_ALEN] = {};
+ int ret;
- return hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (gid) {
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac);
+ if (ret)
+ return ret;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+
+ copy_gid(&tb_a->vf_sgid_l, gid);
+
+ hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
+
+ tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
+
+ hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
+ hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
}
-static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
- int gid_index, const union ib_gid *gid,
+static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, int gid_index,
+ const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
int ret;
- if (!gid || !attr)
- return -EINVAL;
-
- if (attr->gid_type == IB_GID_TYPE_ROCE)
- sgid_type = GID_TYPE_FLAG_ROCE_V1;
-
- if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- if (ipv6_addr_v4mapped((void *)gid))
- sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4;
- else
- sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
+ if (gid) {
+ if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4;
+ else
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
+ } else if (attr->gid_type == IB_GID_TYPE_ROCE) {
+ sgid_type = GID_TYPE_FLAG_ROCE_V1;
+ }
}
- ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr);
+ else
+ ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+
if (ret)
- dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret);
+ ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n",
+ ret);
return ret;
}
static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
- u8 *addr)
+ const u8 *addr)
{
struct hns_roce_cmq_desc desc;
struct hns_roce_cfg_smac_tb *smac_tb =
@@ -2072,103 +3360,71 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
reg_smac_l = *(u32 *)(&addr[0]);
reg_smac_h = *(u16 *)(&addr[4]);
- memset(smac_tb, 0, sizeof(*smac_tb));
- roce_set_field(smac_tb->tb_idx_rsv,
- CFG_SMAC_TB_IDX_M,
- CFG_SMAC_TB_IDX_S, phy_port);
- roce_set_field(smac_tb->vf_smac_h_rsv,
- CFG_SMAC_TB_VF_SMAC_H_M,
- CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
- smac_tb->vf_smac_l = reg_smac_l;
+ hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
+ hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
+ smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
+static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_mpt_entry *mpt_entry,
struct hns_roce_mr *mr)
{
- struct sg_dma_page_iter sg_iter;
- u64 page_addr;
- u64 *pages;
+ u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba;
+ int ret;
int i;
- mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
- mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
- roce_set_field(mpt_entry->byte_48_mode_ba,
- V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
- upper_32_bits(mr->pbl_ba >> 3));
+ ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ min_t(int, ARRAY_SIZE(pages), mr->npages));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Aligned to the hardware address access unit */
+ for (i = 0; i < ARRAY_SIZE(pages); i++)
+ pages[i] >>= MPT_PBL_BUF_ADDR_S;
- pages = (u64 *)__get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
+ pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
- i = 0;
- for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
- page_addr = sg_page_iter_dma_address(&sg_iter);
- pages[i] = page_addr >> 6;
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
- /* Record the first 2 entry directly to MTPT table */
- if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
- goto found;
- i++;
- }
-found:
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
- roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
- V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
+ hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
- roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
- V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
- roce_set_field(mpt_entry->byte_64_buf_pa1,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
-
- free_page((unsigned long)pages);
+ hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
-static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx)
+static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
+ void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
- int ret;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num ==
- HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mr->pd);
-
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
- (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
- mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
- (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
- (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
- (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
-
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
- mr->type == MR_TYPE_MR ? 0 : 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
- 1);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_write_bool(mpt_entry, MPT_ATOMIC_EN,
+ mr->access & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_write_bool(mpt_entry, MPT_RR_EN,
+ mr->access & IB_ACCESS_REMOTE_READ);
+ hr_reg_write_bool(mpt_entry, MPT_RW_EN,
+ mr->access & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_write_bool(mpt_entry, MPT_LW_EN,
+ mr->access & IB_ACCESS_LOCAL_WRITE);
mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
@@ -2176,56 +3432,51 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ if (mr->type != MR_TYPE_MR)
+ hr_reg_enable(mpt_entry, MPT_PA);
+
if (mr->type == MR_TYPE_DMA)
return 0;
- ret = set_mtpt_pbl(mpt_entry, mr);
+ if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0)
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num);
- return ret;
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
+
+ return set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags,
- u32 pdn, int mr_access_flags, u64 iova,
- u64 size, void *mb_buf)
+ void *mb_buf)
{
struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
+ u32 mr_access_flags = mr->access;
int ret = 0;
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
-
- if (flags & IB_MR_REREG_PD) {
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, pdn);
- mr->pd = pdn;
- }
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
if (flags & IB_MR_REREG_ACCESS) {
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
- V2_MPT_BYTE_8_BIND_EN_S,
- (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
- V2_MPT_BYTE_8_ATOMIC_EN_S,
+ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
+ hr_reg_write(mpt_entry, MPT_RR_EN,
mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
+ hr_reg_write(mpt_entry, MPT_RW_EN,
mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
+ hr_reg_write(mpt_entry, MPT_LW_EN,
mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
}
if (flags & IB_MR_REREG_TRANS) {
- mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova));
- mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova));
- mpt_entry->len_l = cpu_to_le32(lower_32_bits(size));
- mpt_entry->len_h = cpu_to_le32(upper_32_bits(size));
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
- mr->iova = iova;
- mr->size = size;
-
- ret = set_mtpt_pbl(mpt_entry, mr);
+ ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
return ret;
@@ -2233,135 +3484,165 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
+ dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
struct hns_roce_v2_mpt_entry *mpt_entry;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mr->pd);
-
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
-
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
-
- mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
-
- mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
- roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
- V2_MPT_BYTE_48_PBL_BA_H_S,
- upper_32_bits(mr->pbl_ba >> 3));
-
- roce_set_field(mpt_entry->byte_64_buf_pa1,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_enable(mpt_entry, MPT_RA_EN);
+ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
+
+ hr_reg_enable(mpt_entry, MPT_FRE);
+ hr_reg_enable(mpt_entry, MPT_BPD);
+ hr_reg_clear(mpt_entry, MPT_PA);
+
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >>
+ MPT_PBL_BA_ADDR_S));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
return 0;
}
-static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
+static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
{
- struct hns_roce_v2_mpt_entry *mpt_entry;
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_send_wr *bad_wr;
+ struct ib_rdma_wr rdma_wr = {};
+ struct ib_send_wr *send_wr;
+ int ret;
- mpt_entry = mb_buf;
- memset(mpt_entry, 0, sizeof(*mpt_entry));
+ send_wr = &rdma_wr.wr;
+ send_wr->opcode = IB_WR_RDMA_WRITE;
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
- V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
- V2_MPT_BYTE_4_PD_S, mw->pdn);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_HOP_NUM_M,
- V2_MPT_BYTE_4_PBL_HOP_NUM_S,
- mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : mw->pbl_hop_num);
- roce_set_field(mpt_entry->byte_4_pd_hop_st,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
- mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
-
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
- roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
-
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
- roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
- mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
-
- roce_set_field(mpt_entry->byte_64_buf_pa1,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
- mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
-
- mpt_entry->lkey = cpu_to_le32(mw->rkey);
+ ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+ ret);
+ return ret;
+ }
return 0;
}
-static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
-{
- return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
- n * HNS_ROCE_V2_CQE_ENTRY_SIZE);
-}
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
-static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n)
+static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe);
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)];
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ unsigned long end;
+ int cqe_cnt = 0;
+ int npolled;
+ int ret;
+ int i;
- /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
- return (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_OWNER_S) ^
- !!(n & (hr_cq->ib_cq.cqe + 1))) ? cqe : NULL;
+ /*
+ * If the device initialization is not complete or in the uninstall
+ * process, then there is no need to execute free mr.
+ */
+ if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT ||
+ hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT)
+ return;
+
+ mutex_lock(&free_mr->mutex);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ hr_qp = free_mr->rsv_qp[i];
+
+ ret = free_mr_post_send_lp_wqe(hr_qp);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+ hr_qp->qpn, ret);
+ break;
+ }
+
+ cqe_cnt++;
+ }
+
+ end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
+ while (cqe_cnt) {
+ npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
+ if (npolled < 0) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+
+ if (time_after(jiffies, end)) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+ cqe_cnt -= npolled;
+ }
+
+out:
+ mutex_unlock(&free_mr->mutex);
}
-static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
+static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev)
{
- return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_send_cmd_to_hw(hr_dev);
}
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{
- return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
}
-static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n)
{
- u32 bitmap_num;
- int bit_num;
-
- /* always called with interrupts disabled. */
- spin_lock(&srq->lock);
-
- bitmap_num = wqe_index / (sizeof(u64) * 8);
- bit_num = wqe_index % (sizeof(u64) * 8);
- srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
- srq->tail++;
+ struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe);
- spin_unlock(&srq->lock);
+ /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
+ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & hr_cq->cq_depth)) ? cqe :
+ NULL;
}
-static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
+static inline void update_cq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
{
- *hr_cq->set_ci_db = cons_index & 0xffffff;
+ if (likely(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)) {
+ *hr_cq->set_ci_db = hr_cq->cons_index & V2_CQ_DB_CONS_IDX_M;
+ } else {
+ struct hns_roce_v2_db cq_db = {};
+
+ hr_reg_write(&cq_db, DB_TAG, hr_cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB);
+ hr_reg_write(&cq_db, DB_CQ_CI, hr_cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
+
+ hns_roce_write64(hr_dev, (__le32 *)&cq_db, hr_cq->db_reg);
+ }
}
static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
struct hns_roce_srq *srq)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
struct hns_roce_v2_cqe *cqe, *dest;
u32 prod_index;
int nfreed = 0;
@@ -2370,7 +3651,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
++prod_index) {
- if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
+ if (prod_index > hr_cq->cons_index + hr_cq->ib_cq.cqe)
break;
}
@@ -2380,36 +3661,24 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
*/
while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
cqe = get_cqe_v2(hr_cq, prod_index & hr_cq->ib_cq.cqe);
- if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
- V2_CQE_BYTE_16_LCL_QPN_S) &
- HNS_ROCE_V2_CQE_QPN_MASK) == qpn) {
- if (srq &&
- roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) {
- wqe_index = roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S);
+ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
+ if (srq && hr_reg_read(cqe, CQE_S_R)) {
+ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX);
hns_roce_free_srq_wqe(srq, wqe_index);
}
++nfreed;
} else if (nfreed) {
dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
hr_cq->ib_cq.cqe);
- owner_bit = roce_get_bit(dest->byte_4,
- V2_CQE_BYTE_4_OWNER_S);
- memcpy(dest, cqe, sizeof(*cqe));
- roce_set_bit(dest->byte_4, V2_CQE_BYTE_4_OWNER_S,
- owner_bit);
+ owner_bit = hr_reg_read(dest, CQE_OWNER);
+ memcpy(dest, cqe, hr_cq->cqe_size);
+ hr_reg_write(dest, CQE_OWNER, owner_bit);
}
}
if (nfreed) {
hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
- hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
+ update_cq_db(hr_dev, hr_cq);
}
}
@@ -2423,77 +3692,51 @@ static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf,
- u64 *mtts, dma_addr_t dma_handle, int nent,
- u32 vector)
+ u64 *mtts, dma_addr_t dma_handle)
{
struct hns_roce_v2_cq_context *cq_context;
cq_context = mb_buf;
memset(cq_context, 0, sizeof(*cq_context));
- roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CQ_ST_M,
- V2_CQC_BYTE_4_CQ_ST_S, V2_CQ_STATE_VALID);
- roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_ARM_ST_M,
- V2_CQC_BYTE_4_ARM_ST_S, REG_NXT_CEQE);
- roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_SHIFT_M,
- V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent));
- roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M,
- V2_CQC_BYTE_4_CEQN_S, vector);
- cq_context->byte_4_pg_ceqn = cpu_to_le32(cq_context->byte_4_pg_ceqn);
-
- roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M,
- V2_CQC_BYTE_8_CQN_S, hr_cq->cqn);
-
- cq_context->cqe_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
- cq_context->cqe_cur_blk_addr =
- cpu_to_le32(cq_context->cqe_cur_blk_addr);
-
- roce_set_field(cq_context->byte_16_hop_addr,
- V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M,
- V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S,
- cpu_to_le32((mtts[0]) >> (32 + PAGE_ADDR_SHIFT)));
- roce_set_field(cq_context->byte_16_hop_addr,
- V2_CQC_BYTE_16_CQE_HOP_NUM_M,
- V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num ==
- HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
-
- cq_context->cqe_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT);
- roce_set_field(cq_context->byte_24_pgsz_addr,
- V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M,
- V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S,
- cpu_to_le32((mtts[1]) >> (32 + PAGE_ADDR_SHIFT)));
- roce_set_field(cq_context->byte_24_pgsz_addr,
- V2_CQC_BYTE_24_CQE_BA_PG_SZ_M,
- V2_CQC_BYTE_24_CQE_BA_PG_SZ_S,
- hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(cq_context->byte_24_pgsz_addr,
- V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M,
- V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S,
- hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET);
-
- cq_context->cqe_ba = (u32)(dma_handle >> 3);
-
- roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M,
- V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3)));
-
- if (hr_cq->db_en)
- roce_set_bit(cq_context->byte_44_db_record,
- V2_CQC_BYTE_44_DB_RECORD_EN_S, 1);
-
- roce_set_field(cq_context->byte_44_db_record,
- V2_CQC_BYTE_44_DB_RECORD_ADDR_M,
- V2_CQC_BYTE_44_DB_RECORD_ADDR_S,
- ((u32)hr_cq->db.dma) >> 1);
- cq_context->db_record_addr = hr_cq->db.dma >> 32;
-
- roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_MAX_CNT_M,
- V2_CQC_BYTE_56_CQ_MAX_CNT_S,
- HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
- roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_PERIOD_M,
- V2_CQC_BYTE_56_CQ_PERIOD_S,
- HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
+ hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID);
+ hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED);
+ hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth));
+ hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector);
+ hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn);
+
+ if (hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE)
+ hr_reg_write(cq_context, CQC_CQE_SIZE, CQE_SIZE_64B);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(cq_context, CQC_STASH);
+
+ hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts[0]));
+ hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
+ hr_reg_write(cq_context, CQC_CQE_HOP_NUM, hr_dev->caps.cqe_hop_num ==
+ HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
+ hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts[1]));
+ hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_write(cq_context, CQC_CQE_BAR_PG_SZ,
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S);
+ hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S);
+ hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN,
+ hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB);
+ hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L,
+ ((u32)hr_cq->db.dma) >> 1);
+ hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H,
+ hr_cq->db.dma >> CQC_CQE_DB_RECORD_ADDR_H_S);
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT,
+ HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
+ hr_reg_write(cq_context, CQC_CQ_PERIOD,
+ HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
}
static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
@@ -2501,348 +3744,354 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- u32 notification_flag;
- u32 doorbell[2];
+ struct hns_roce_v2_db cq_db = {};
+ u32 notify_flag;
- doorbell[0] = 0;
- doorbell[1] = 0;
-
- notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
- V2_CQ_DB_REQ_NOT : V2_CQ_DB_REQ_NOT_SOL;
/*
- * flags = 0; Notification Flag = 1, next
- * flags = 1; Notification Flag = 0, solocited
+ * flags = 0, then notify_flag : next
+ * flags = 1, then notify flag : solocited
*/
- roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_TAG_M, V2_DB_BYTE_4_TAG_S,
- hr_cq->cqn);
- roce_set_field(doorbell[0], V2_CQ_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S,
- HNS_ROCE_V2_CQ_DB_NTR);
- roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CONS_IDX_M,
- V2_CQ_DB_PARAMETER_CONS_IDX_S,
- hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
- roce_set_field(doorbell[1], V2_CQ_DB_PARAMETER_CMD_SN_M,
- V2_CQ_DB_PARAMETER_CMD_SN_S, hr_cq->arm_sn & 0x3);
- roce_set_bit(doorbell[1], V2_CQ_DB_PARAMETER_NOTIFY_S,
- notification_flag);
-
- hns_roce_write64(hr_dev, doorbell, hr_cq->cq_db_l);
+ notify_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ V2_CQ_DB_REQ_NOT : V2_CQ_DB_REQ_NOT_SOL;
+
+ hr_reg_write(&cq_db, DB_TAG, hr_cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NOTIFY);
+ hr_reg_write(&cq_db, DB_CQ_CI, hr_cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, hr_cq->arm_sn);
+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, notify_flag);
+
+ hns_roce_write64(hr_dev, (__le32 *)&cq_db, hr_cq->db_reg);
return 0;
}
-static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
- struct hns_roce_qp **cur_qp,
- struct ib_wc *wc)
+static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq,
+ int num_entries, struct ib_wc *wc)
{
- struct hns_roce_rinl_sge *sge_list;
- u32 wr_num, wr_cnt, sge_num;
- u32 sge_cnt, data_len, size;
- void *wqe_buf;
+ unsigned int left;
+ int npolled = 0;
- wr_num = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S) & 0xffff;
- wr_cnt = wr_num & ((*cur_qp)->rq.wqe_cnt - 1);
+ left = wq->head - wq->tail;
+ if (left == 0)
+ return 0;
- sge_list = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sg_list;
- sge_num = (*cur_qp)->rq_inl_buf.wqe_list[wr_cnt].sge_cnt;
- wqe_buf = get_recv_wqe(*cur_qp, wr_cnt);
- data_len = wc->byte_len;
+ left = min_t(unsigned int, (unsigned int)num_entries, left);
+ while (npolled < left) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = 0;
+ wc->qp = &hr_qp->ibqp;
+
+ wq->tail++;
+ wc++;
+ npolled++;
+ }
+
+ return npolled;
+}
- for (sge_cnt = 0; (sge_cnt < sge_num) && (data_len); sge_cnt++) {
- size = min(sge_list[sge_cnt].len, data_len);
- memcpy((void *)sge_list[sge_cnt].addr, wqe_buf, size);
+static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct hns_roce_qp *hr_qp;
+ int npolled = 0;
- data_len -= size;
- wqe_buf += size;
+ list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->sq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
}
- if (data_len) {
- wc->status = IB_WC_LOC_LEN_ERR;
- return -EAGAIN;
+ list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->rq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
}
- return 0;
+out:
+ return npolled;
}
-static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
- struct hns_roce_qp **cur_qp, struct ib_wc *wc)
+static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+ struct ib_wc *wc)
{
- struct hns_roce_srq *srq = NULL;
- struct hns_roce_dev *hr_dev;
- struct hns_roce_v2_cqe *cqe;
- struct hns_roce_qp *hr_qp;
- struct hns_roce_wq *wq;
- struct ib_qp_attr attr;
- int attr_mask;
- int is_send;
- u16 wqe_ctr;
- u32 opcode;
- u32 status;
- int qpn;
- int ret;
+ static const struct {
+ u32 cqe_status;
+ enum ib_wc_status wc_status;
+ } map[] = {
+ { HNS_ROCE_CQE_V2_SUCCESS, IB_WC_SUCCESS },
+ { HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR, IB_WC_LOC_LEN_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_PROT_ERR, IB_WC_LOC_PROT_ERR },
+ { HNS_ROCE_CQE_V2_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR },
+ { HNS_ROCE_CQE_V2_MW_BIND_ERR, IB_WC_MW_BIND_ERR },
+ { HNS_ROCE_CQE_V2_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR, IB_WC_REM_INV_REQ_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR, IB_WC_REM_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_OP_ERR, IB_WC_REM_OP_ERR },
+ { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR,
+ IB_WC_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR },
+ { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR}
+ };
+
+ u32 cqe_status = hr_reg_read(cqe, CQE_STATUS);
+ int i;
- /* Find cqe according to consumer index */
- cqe = next_cqe_sw_v2(hr_cq);
- if (!cqe)
- return -EAGAIN;
+ wc->status = IB_WC_GENERAL_ERR;
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (cqe_status == map[i].cqe_status) {
+ wc->status = map[i].wc_status;
+ break;
+ }
- ++hr_cq->cons_index;
- /* Memory barrier */
- rmb();
+ if (likely(wc->status == IB_WC_SUCCESS ||
+ wc->status == IB_WC_WR_FLUSH_ERR))
+ return;
+
+ ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n",
+ cqe_status);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ cq->cqe_size, false);
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
- /* 0->SQ, 1->RQ */
- is_send = !roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S);
+ /*
+ * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
+ * the standard protocol, the driver must ignore it and needn't to set
+ * the QP to an error state.
+ */
+ if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR)
+ return;
+
+ flush_cqe(hr_dev, qp);
+}
+
+static int get_cur_qp(struct hns_roce_cq *hr_cq, struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp **cur_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_qp *hr_qp = *cur_qp;
+ u32 qpn;
- qpn = roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
- V2_CQE_BYTE_16_LCL_QPN_S);
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
- if (!*cur_qp || (qpn & HNS_ROCE_V2_CQE_QPN_MASK) != (*cur_qp)->qpn) {
- hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ if (!hr_qp || qpn != hr_qp->qpn) {
hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
if (unlikely(!hr_qp)) {
- dev_err(hr_dev->dev, "CQ %06lx with entry for unknown QPN %06x\n",
- hr_cq->cqn, (qpn & HNS_ROCE_V2_CQE_QPN_MASK));
+ ibdev_err(&hr_dev->ib_dev,
+ "CQ %06lx with entry for unknown QPN %06x\n",
+ hr_cq->cqn, qpn);
return -EINVAL;
}
*cur_qp = hr_qp;
}
- wc->qp = &(*cur_qp)->ibqp;
- wc->vendor_err = 0;
+ return 0;
+}
- if (is_send) {
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
+/*
+ * mapped-value = 1 + real-value
+ * The ib wc opcode's real value is start from 0, In order to distinguish
+ * between initialized and uninitialized map values, we plus 1 to the actual
+ * value when defining the mapping, so that the validity can be identified by
+ * checking whether the mapped value is greater than 0.
+ */
+#define HR_WC_OP_MAP(hr_key, ib_key) \
+ [HNS_ROCE_V2_WQE_OP_ ## hr_key] = 1 + IB_WC_ ## ib_key
+
+static const u32 wc_send_op_map[] = {
+ HR_WC_OP_MAP(SEND, SEND),
+ HR_WC_OP_MAP(SEND_WITH_INV, SEND),
+ HR_WC_OP_MAP(SEND_WITH_IMM, SEND),
+ HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
+ HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
+ HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
+ HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_MSK_FETCH_AND_ADD, MASKED_FETCH_ADD),
+ HR_WC_OP_MAP(FAST_REG_PMR, REG_MR),
+};
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- } else if ((*cur_qp)->ibqp.srq) {
- srq = to_hr_srq((*cur_qp)->ibqp.srq);
- wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S));
- wc->wr_id = srq->wrid[wqe_ctr];
- hns_roce_free_srq_wqe(srq, wqe_ctr);
- } else {
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- }
+static int to_ib_wc_send_op(u32 hr_opcode)
+{
+ if (hr_opcode >= ARRAY_SIZE(wc_send_op_map))
+ return -EINVAL;
- status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
- V2_CQE_BYTE_4_STATUS_S);
- switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
- case HNS_ROCE_CQE_V2_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR:
- wc->status = IB_WC_LOC_QP_OP_ERR;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_PROT_ERR:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case HNS_ROCE_CQE_V2_WR_FLUSH_ERR:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- case HNS_ROCE_CQE_V2_MW_BIND_ERR:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case HNS_ROCE_CQE_V2_BAD_RESP_ERR:
- wc->status = IB_WC_BAD_RESP_ERR;
- break;
- case HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR:
- wc->status = IB_WC_REM_INV_REQ_ERR;
+ return wc_send_op_map[hr_opcode] ? wc_send_op_map[hr_opcode] - 1 :
+ -EINVAL;
+}
+
+static const u32 wc_recv_op_map[] = {
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, WITH_IMM),
+ HR_WC_OP_MAP(SEND, RECV),
+ HR_WC_OP_MAP(SEND_WITH_IMM, WITH_IMM),
+ HR_WC_OP_MAP(SEND_WITH_INV, RECV),
+};
+
+static int to_ib_wc_recv_op(u32 hr_opcode)
+{
+ if (hr_opcode >= ARRAY_SIZE(wc_recv_op_map))
+ return -EINVAL;
+
+ return wc_recv_op_map[hr_opcode] ? wc_recv_op_map[hr_opcode] - 1 :
+ -EINVAL;
+}
+
+static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+{
+ u32 hr_opcode;
+ int ib_opcode;
+
+ wc->wc_flags = 0;
+
+ hr_opcode = hr_reg_read(cqe, CQE_OPCODE);
+ switch (hr_opcode) {
+ case HNS_ROCE_V2_WQE_OP_RDMA_READ:
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
break;
- case HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR:
- wc->status = IB_WC_REM_ACCESS_ERR;
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
break;
- case HNS_ROCE_CQE_V2_REMOTE_OP_ERR:
- wc->status = IB_WC_REM_OP_ERR;
+ case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD:
+ wc->byte_len = 8;
break;
- case HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR:
- wc->status = IB_WC_RETRY_EXC_ERR;
+ default:
break;
- case HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR:
- wc->status = IB_WC_RNR_RETRY_EXC_ERR;
+ }
+
+ ib_opcode = to_ib_wc_send_op(hr_opcode);
+ if (ib_opcode < 0)
+ wc->status = IB_WC_GENERAL_ERR;
+ else
+ wc->opcode = ib_opcode;
+}
+
+static int fill_recv_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+{
+ u32 hr_opcode;
+ int ib_opcode;
+
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+
+ hr_opcode = hr_reg_read(cqe, CQE_OPCODE);
+ switch (hr_opcode) {
+ case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
+ case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immtdata));
break;
- case HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR:
- wc->status = IB_WC_REM_ABORT_ERR;
+ case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = le32_to_cpu(cqe->rkey);
break;
default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
+ wc->wc_flags = 0;
}
- /* flush cqe if wc status is error, excluding flush error */
- if ((wc->status != IB_WC_SUCCESS) &&
- (wc->status != IB_WC_WR_FLUSH_ERR)) {
- attr_mask = IB_QP_STATE;
- attr.qp_state = IB_QPS_ERR;
- return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp,
- &attr, attr_mask,
- (*cur_qp)->state, IB_QPS_ERR);
+ ib_opcode = to_ib_wc_recv_op(hr_opcode);
+ if (ib_opcode < 0)
+ wc->status = IB_WC_GENERAL_ERR;
+ else
+ wc->opcode = ib_opcode;
+
+ wc->sl = hr_reg_read(cqe, CQE_SL);
+ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
+ wc->slid = 0;
+ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IB_WC_GRH : 0;
+ wc->port_num = hr_reg_read(cqe, CQE_PORTN);
+ wc->pkey_index = 0;
+
+ if (hr_reg_read(cqe, CQE_VID_VLD)) {
+ wc->vlan_id = hr_reg_read(cqe, CQE_VID);
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->vlan_id = 0xffff;
}
- if (wc->status == IB_WC_WR_FLUSH_ERR)
- return 0;
+ wc->network_hdr_type = hr_reg_read(cqe, CQE_PORT_TYPE);
+
+ return 0;
+}
+
+static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
+ struct hns_roce_qp **cur_qp, struct ib_wc *wc)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_qp *qp = *cur_qp;
+ struct hns_roce_srq *srq = NULL;
+ struct hns_roce_v2_cqe *cqe;
+ struct hns_roce_wq *wq;
+ int is_send;
+ u16 wqe_idx;
+ int ret;
+ cqe = get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
+ if (!cqe)
+ return -EAGAIN;
+
+ ++hr_cq->cons_index;
+ /* Memory barrier */
+ rmb();
+
+ ret = get_cur_qp(hr_cq, cqe, &qp);
+ if (ret)
+ return ret;
+
+ wc->qp = &qp->ibqp;
+ wc->vendor_err = 0;
+
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
+
+ is_send = !hr_reg_read(cqe, CQE_S_R);
if (is_send) {
- wc->wc_flags = 0;
- /* SQ corresponding to CQE */
- switch (roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M,
- V2_CQE_BYTE_4_OPCODE_S) & 0x1f) {
- case HNS_ROCE_SQ_OPCODE_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- case HNS_ROCE_SQ_OPCODE_SEND_WITH_INV:
- wc->opcode = IB_WC_SEND;
- break;
- case HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM:
- wc->opcode = IB_WC_SEND;
- wc->wc_flags |= IB_WC_WITH_IMM;
- break;
- case HNS_ROCE_SQ_OPCODE_RDMA_READ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- break;
- case HNS_ROCE_SQ_OPCODE_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM:
- wc->opcode = IB_WC_RDMA_WRITE;
- wc->wc_flags |= IB_WC_WITH_IMM;
- break;
- case HNS_ROCE_SQ_OPCODE_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- wc->wc_flags |= IB_WC_WITH_INVALIDATE;
- break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP:
- wc->opcode = IB_WC_COMP_SWAP;
- wc->byte_len = 8;
- break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD:
- wc->opcode = IB_WC_FETCH_ADD;
- wc->byte_len = 8;
- break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP:
- wc->opcode = IB_WC_MASKED_COMP_SWAP;
- wc->byte_len = 8;
- break;
- case HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD:
- wc->opcode = IB_WC_MASKED_FETCH_ADD;
- wc->byte_len = 8;
- break;
- case HNS_ROCE_SQ_OPCODE_FAST_REG_WR:
- wc->opcode = IB_WC_REG_MR;
- break;
- case HNS_ROCE_SQ_OPCODE_BIND_MW:
- wc->opcode = IB_WC_REG_MR;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
- } else {
- /* RQ correspond to CQE */
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+ wq = &qp->sq;
- opcode = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_OPCODE_M,
- V2_CQE_BYTE_4_OPCODE_S);
- switch (opcode & 0x1f) {
- case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
- wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data =
- cpu_to_be32(le32_to_cpu(cqe->immtdata));
- break;
- case HNS_ROCE_V2_OPCODE_SEND:
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
- break;
- case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data =
- cpu_to_be32(le32_to_cpu(cqe->immtdata));
- break;
- case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = IB_WC_WITH_INVALIDATE;
- wc->ex.invalidate_rkey = le32_to_cpu(cqe->rkey);
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
+ /* If sg_signal_bit is set, tail pointer will be updated to
+ * the WQE corresponding to the current CQE.
+ */
+ if (qp->sq_signal_bits)
+ wq->tail += (wqe_idx - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
- if ((wc->qp->qp_type == IB_QPT_RC ||
- wc->qp->qp_type == IB_QPT_UC) &&
- (opcode == HNS_ROCE_V2_OPCODE_SEND ||
- opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_IMM ||
- opcode == HNS_ROCE_V2_OPCODE_SEND_WITH_INV) &&
- (roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_RQ_INLINE_S))) {
- ret = hns_roce_handle_recv_inl_wqe(cqe, cur_qp, wc);
- if (ret)
- return -EAGAIN;
- }
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
- wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
- V2_CQE_BYTE_32_SL_S);
- wc->src_qp = (u8)roce_get_field(cqe->byte_32,
- V2_CQE_BYTE_32_RMT_QPN_M,
- V2_CQE_BYTE_32_RMT_QPN_S);
- wc->slid = 0;
- wc->wc_flags |= (roce_get_bit(cqe->byte_32,
- V2_CQE_BYTE_32_GRH_S) ?
- IB_WC_GRH : 0);
- wc->port_num = roce_get_field(cqe->byte_32,
- V2_CQE_BYTE_32_PORTN_M, V2_CQE_BYTE_32_PORTN_S);
- wc->pkey_index = 0;
- memcpy(wc->smac, cqe->smac, 4);
- wc->smac[4] = roce_get_field(cqe->byte_28,
- V2_CQE_BYTE_28_SMAC_4_M,
- V2_CQE_BYTE_28_SMAC_4_S);
- wc->smac[5] = roce_get_field(cqe->byte_28,
- V2_CQE_BYTE_28_SMAC_5_M,
- V2_CQE_BYTE_28_SMAC_5_S);
- if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
- wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
- V2_CQE_BYTE_28_VID_M,
- V2_CQE_BYTE_28_VID_S);
+ fill_send_wc(wc, cqe);
+ } else {
+ if (qp->ibqp.srq) {
+ srq = to_hr_srq(qp->ibqp.srq);
+ wc->wr_id = srq->wrid[wqe_idx];
+ hns_roce_free_srq_wqe(srq, wqe_idx);
} else {
- wc->vlan_id = 0xffff;
+ wq = &qp->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
}
- wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
- wc->network_hdr_type = roce_get_field(cqe->byte_28,
- V2_CQE_BYTE_28_PORT_TYPE_M,
- V2_CQE_BYTE_28_PORT_TYPE_S);
+ ret = fill_recv_wc(wc, cqe);
}
- return 0;
+ get_cqe_status(hr_dev, qp, hr_cq, cqe, wc);
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ return 0;
+
+ return ret;
}
static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
struct ib_wc *wc)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
struct hns_roce_qp *cur_qp = NULL;
unsigned long flags;
@@ -2850,29 +4099,109 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
spin_lock_irqsave(&hr_cq->lock, flags);
+ /*
+ * When the device starts to reset, the state is RST_DOWN. At this time,
+ * there may still be some valid CQEs in the hardware that are not
+ * polled. Therefore, it is not allowed to switch to the software mode
+ * immediately. When the state changes to UNINIT, CQE no longer exists
+ * in the hardware, and then switch to software mode.
+ */
+ if (hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) {
+ npolled = hns_roce_v2_sw_poll_cq(hr_cq, num_entries, wc);
+ goto out;
+ }
+
for (npolled = 0; npolled < num_entries; ++npolled) {
if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled))
break;
}
- if (npolled) {
- /* Memory barrier */
- wmb();
- hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
+ if (npolled)
+ update_cq_db(hr_dev, hr_cq);
+out:
spin_unlock_irqrestore(&hr_cq->lock, flags);
return npolled;
}
+static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
+ u32 step_idx, u8 *mbox_cmd)
+{
+ u8 cmd;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ cmd = HNS_ROCE_CMD_WRITE_QPC_BT0;
+ break;
+ case HEM_TYPE_MTPT:
+ cmd = HNS_ROCE_CMD_WRITE_MPT_BT0;
+ break;
+ case HEM_TYPE_CQC:
+ cmd = HNS_ROCE_CMD_WRITE_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ cmd = HNS_ROCE_CMD_WRITE_SRQC_BT0;
+ break;
+ case HEM_TYPE_SCCC:
+ cmd = HNS_ROCE_CMD_WRITE_SCCC_BT0;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ cmd = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ cmd = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
+ break;
+ default:
+ dev_warn(hr_dev->dev, "failed to check hem type %u.\n", type);
+ return -EINVAL;
+ }
+
+ *mbox_cmd = cmd + step_idx;
+
+ return 0;
+}
+
+static int config_gmv_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj,
+ dma_addr_t base_addr)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 idx = obj / (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz);
+ u64 addr = to_hr_hw_page_addr(base_addr);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+
+ hr_reg_write(req, CFG_GMV_BT_BA_L, lower_32_bits(addr));
+ hr_reg_write(req, CFG_GMV_BT_BA_H, upper_32_bits(addr));
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj,
+ dma_addr_t base_addr, u32 hem_type, u32 step_idx)
+{
+ int ret;
+ u8 cmd;
+
+ if (unlikely(hem_type == HEM_TYPE_GMV))
+ return config_gmv_ba_to_hw(hr_dev, obj, base_addr);
+
+ if (unlikely(hem_type == HEM_TYPE_SCCC && step_idx))
+ return 0;
+
+ ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &cmd);
+ if (ret < 0)
+ return ret;
+
+ return config_hem_ba_to_hw(hr_dev, base_addr, cmd, obj);
+}
+
static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
- int step_idx)
+ u32 step_idx)
{
- struct device *dev = hr_dev->dev;
- struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -2883,7 +4212,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
u64 bt_ba = 0;
u32 chunk_ba_num;
u32 hop_num;
- u16 op = 0xff;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
return 0;
@@ -2905,145 +4233,93 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
hem_idx = i;
}
- switch (table->type) {
- case HEM_TYPE_QPC:
- op = HNS_ROCE_CMD_WRITE_QPC_BT0;
- break;
- case HEM_TYPE_MTPT:
- op = HNS_ROCE_CMD_WRITE_MPT_BT0;
- break;
- case HEM_TYPE_CQC:
- op = HNS_ROCE_CMD_WRITE_CQC_BT0;
- break;
- case HEM_TYPE_SRQC:
- op = HNS_ROCE_CMD_WRITE_SRQC_BT0;
- break;
- case HEM_TYPE_SCCC:
- op = HNS_ROCE_CMD_WRITE_SCCC_BT0;
- break;
- case HEM_TYPE_QPC_TIMER:
- op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
- break;
- case HEM_TYPE_CQC_TIMER:
- op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
- break;
- default:
- dev_warn(dev, "Table %d not to be written by mailbox!\n",
- table->type);
- return 0;
- }
-
- if (table->type == HEM_TYPE_SCCC && step_idx)
- return 0;
-
- op += step_idx;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
if (table->type == HEM_TYPE_SCCC)
obj = mhop.l0_idx;
if (check_whether_last_step(hop_num, step_idx)) {
hem = table->hem[hem_idx];
- for (hns_roce_hem_first(hem, &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter);
-
- /* configure the ba, tag, and op */
- ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma,
- obj, 0, op,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- }
+
+ ret = set_hem_to_hw(hr_dev, obj, hem->dma, table->type, step_idx);
} else {
if (step_idx == 0)
bt_ba = table->bt_l0_dma_addr[i];
else if (step_idx == 1 && hop_num == 2)
bt_ba = table->bt_l1_dma_addr[l1_idx];
- /* configure the ba, tag, and op */
- ret = hns_roce_cmd_mbox(hr_dev, bt_ba, mailbox->dma, obj,
- 0, op, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx);
}
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
}
static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj,
- int step_idx)
+ struct hns_roce_hem_table *table,
+ int tag, u32 step_idx)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
- int ret = 0;
- u16 op = 0xff;
+ struct device *dev = hr_dev->dev;
+ u8 cmd = 0xff;
+ int ret;
if (!hns_roce_check_whether_mhop(hr_dev, table->type))
return 0;
switch (table->type) {
case HEM_TYPE_QPC:
- op = HNS_ROCE_CMD_DESTROY_QPC_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_QPC_BT0;
break;
case HEM_TYPE_MTPT:
- op = HNS_ROCE_CMD_DESTROY_MPT_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_MPT_BT0;
break;
case HEM_TYPE_CQC:
- op = HNS_ROCE_CMD_DESTROY_CQC_BT0;
+ cmd = HNS_ROCE_CMD_DESTROY_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ cmd = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
break;
case HEM_TYPE_SCCC:
case HEM_TYPE_QPC_TIMER:
case HEM_TYPE_CQC_TIMER:
- break;
- case HEM_TYPE_SRQC:
- op = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
- break;
+ case HEM_TYPE_GMV:
+ return 0;
default:
- dev_warn(dev, "Table %d not to be destroyed by mailbox!\n",
+ dev_warn(dev, "table %u not to be destroyed by mailbox!\n",
table->type);
return 0;
}
- if (table->type == HEM_TYPE_SCCC ||
- table->type == HEM_TYPE_QPC_TIMER ||
- table->type == HEM_TYPE_CQC_TIMER)
- return 0;
-
- op += step_idx;
+ cmd += step_idx;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- /* configure the tag and op */
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, obj, 0, op,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cmd, tag);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
}
static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt,
- enum ib_qp_state cur_state,
- enum ib_qp_state new_state,
struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_cmd_mailbox *mailbox;
+ int qpc_size;
int ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- memcpy(mailbox->buf, context, sizeof(*context) * 2);
+ /* The qpc size of HIP08 is only 256B, which is half of HIP09 */
+ qpc_size = hr_dev->caps.qpc_sz;
+ memcpy(mailbox->buf, context, qpc_size);
+ memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
- HNS_ROCE_CMD_MODIFY_QPC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_QPC, hr_qp->qpn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -3067,24 +4343,46 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- !!(access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, 0);
+ hr_reg_write_bool(context, QPC_RRE,
+ access_flags & IB_ACCESS_REMOTE_READ);
+ hr_reg_clear(qpc_mask, QPC_RRE);
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- !!(access_flags & IB_ACCESS_REMOTE_WRITE));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S, 0);
+ hr_reg_write_bool(context, QPC_RWE,
+ access_flags & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_clear(qpc_mask, QPC_RWE);
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0);
+ hr_reg_write_bool(context, QPC_ATE,
+ access_flags & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_clear(qpc_mask, QPC_ATE);
+ hr_reg_write_bool(context, QPC_EXT_ATE,
+ access_flags & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_clear(qpc_mask, QPC_EXT_ATE);
+}
+
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context)
+{
+ hr_reg_write(context, QPC_SGE_SHIFT,
+ to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift));
+
+ hr_reg_write(context, QPC_SQ_SHIFT, ilog2(hr_qp->sq.wqe_cnt));
+
+ hr_reg_write(context, QPC_RQ_SHIFT, ilog2(hr_qp->rq.wqe_cnt));
+}
+
+static inline int get_cqn(struct ib_cq *ib_cq)
+{
+ return ib_cq ? to_hr_cq(ib_cq)->cqn : 0;
+}
+
+static inline int get_pdn(struct ib_pd *ib_pd)
+{
+ return ib_pd ? to_hr_pd(ib_pd)->pdn : 0;
}
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
- int attr_mask,
- struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -3095,1149 +4393,975 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
- V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
- V2_QPC_BYTE_4_TST_S, 0);
-
- if (ibqp->qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- hr_qp->sq.max_gs > 2 ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
- roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
- V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
- V2_QPC_BYTE_4_SQPN_S, 0);
-
- roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
- V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
- roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
- V2_QPC_BYTE_16_PD_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
- V2_QPC_BYTE_20_RQWS_S, ilog2(hr_qp->rq.max_gs));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M,
- V2_QPC_BYTE_20_RQWS_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
+ hr_reg_write(context, QPC_TST, to_hr_qp_type(ibqp->qp_type));
+
+ hr_reg_write(context, QPC_PD, get_pdn(ibqp->pd));
+
+ hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs));
+
+ set_qpc_wqe_cnt(hr_qp, context);
/* No VLAN need to set 0xFFF */
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
- roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, 0);
+ hr_reg_write(context, QPC_VLAN_ID, 0xfff);
+
+ if (ibqp->qp_type == IB_QPT_XRC_TGT) {
+ context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+
+ hr_reg_enable(context, QPC_XRC_QP_TYPE);
+ }
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hr_reg_enable(context, QPC_RQ_RECORD_EN);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ hr_reg_enable(context, QPC_OWNER_MODE);
+
+ hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_L,
+ lower_32_bits(hr_qp->rdb.dma) >> 1);
+ hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_H,
+ upper_32_bits(hr_qp->rdb.dma));
+
+ hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
- /*
- * Set some fields in context to zero, Because the default values
- * of all fields in context are zero, we need not set them to 0 again.
- * but we should set the relevant fields of context mask to 0.
- */
- roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_TX_ERR_S, 0);
- roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_SQ_RX_ERR_S, 0);
- roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
- roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
-
- roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
- V2_QPC_BYTE_60_TEMPID_S, 0);
-
- roce_set_field(qpc_mask->byte_60_qpst_tempid,
- V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
- 0);
- roce_set_bit(qpc_mask->byte_60_qpst_tempid,
- V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
- roce_set_bit(qpc_mask->byte_60_qpst_tempid,
- V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
- roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
- roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
-
- if (hr_qp->rdb_en) {
- roce_set_bit(context->byte_68_rq_db,
- V2_QPC_BYTE_68_RQ_RECORD_EN_S, 1);
- roce_set_bit(qpc_mask->byte_68_rq_db,
- V2_QPC_BYTE_68_RQ_RECORD_EN_S, 0);
- }
-
- roce_set_field(context->byte_68_rq_db,
- V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
- V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S,
- ((u32)hr_qp->rdb.dma) >> 1);
- roce_set_field(qpc_mask->byte_68_rq_db,
- V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M,
- V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0);
- context->rq_db_record_addr = hr_qp->rdb.dma >> 32;
- qpc_mask->rq_db_record_addr = 0;
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S,
- (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) ? 1 : 0);
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, 0);
-
- roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
- V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
- roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
- V2_QPC_BYTE_80_RX_CQN_S, 0);
if (ibqp->srq) {
- roce_set_field(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S,
- to_hr_srq(ibqp->srq)->srqn);
- roce_set_field(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0);
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQ_EN_S, 1);
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQ_EN_S, 0);
- }
-
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M,
- V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0);
-
- roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_SRQ_INFO_M,
- V2_QPC_BYTE_92_SRQ_INFO_S, 0);
-
- roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
- V2_QPC_BYTE_96_RX_REQ_MSN_S, 0);
-
- roce_set_field(qpc_mask->byte_104_rq_sge,
- V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_M,
- V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_S, 0);
-
- roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
- roce_set_field(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M,
- V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0);
- roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_RNR_S, 0);
+ hr_reg_enable(context, QPC_SRQ_EN);
+ hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
+ }
- qpc_mask->rq_rnr_timer = 0;
- qpc_mask->rx_msg_len = 0;
- qpc_mask->rx_rkey_pkt_info = 0;
- qpc_mask->rx_va = 0;
-
- roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M,
- V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0);
- roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
- V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
-
- roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
- 0);
- roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
- V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
- roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
- V2_QPC_BYTE_140_RAQ_TRRL_TAIL_S, 0);
-
- roce_set_field(qpc_mask->byte_144_raq,
- V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
- V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
- roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
- V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
- roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
-
- roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RQ_MSN_M,
- V2_QPC_BYTE_148_RQ_MSN_S, 0);
- roce_set_field(qpc_mask->byte_148_raq, V2_QPC_BYTE_148_RAQ_SYNDROME_M,
- V2_QPC_BYTE_148_RAQ_SYNDROME_S, 0);
-
- roce_set_field(qpc_mask->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M,
- V2_QPC_BYTE_152_RAQ_PSN_S, 0);
- roce_set_field(qpc_mask->byte_152_raq,
- V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M,
- V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S, 0);
-
- roce_set_field(qpc_mask->byte_156_raq, V2_QPC_BYTE_156_RAQ_USE_PKTN_M,
- V2_QPC_BYTE_156_RAQ_USE_PKTN_S, 0);
-
- roce_set_field(qpc_mask->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
- roce_set_field(qpc_mask->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
- V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
-
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_INVLD_FLG_S, 0);
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_IRRL_IDX_LSB_M,
- V2_QPC_BYTE_168_IRRL_IDX_LSB_S, 0);
-
- roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
- V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4);
- roce_set_field(qpc_mask->byte_172_sq_psn,
- V2_QPC_BYTE_172_ACK_REQ_FREQ_M,
- V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0);
-
- roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
- 0);
-
- roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
- roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
-
- roce_set_field(qpc_mask->byte_176_msg_pktn,
- V2_QPC_BYTE_176_MSG_USE_PKTN_M,
- V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
- roce_set_field(qpc_mask->byte_176_msg_pktn,
- V2_QPC_BYTE_176_IRRL_HEAD_PRE_M,
- V2_QPC_BYTE_176_IRRL_HEAD_PRE_S, 0);
-
- roce_set_field(qpc_mask->byte_184_irrl_idx,
- V2_QPC_BYTE_184_IRRL_IDX_MSB_M,
- V2_QPC_BYTE_184_IRRL_IDX_MSB_S, 0);
-
- qpc_mask->cur_sge_offset = 0;
-
- roce_set_field(qpc_mask->byte_192_ext_sge,
- V2_QPC_BYTE_192_CUR_SGE_IDX_M,
- V2_QPC_BYTE_192_CUR_SGE_IDX_S, 0);
- roce_set_field(qpc_mask->byte_192_ext_sge,
- V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_M,
- V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_S, 0);
-
- roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M,
- V2_QPC_BYTE_196_IRRL_HEAD_S, 0);
-
- roce_set_field(qpc_mask->byte_200_sq_max, V2_QPC_BYTE_200_SQ_MAX_IDX_M,
- V2_QPC_BYTE_200_SQ_MAX_IDX_S, 0);
- roce_set_field(qpc_mask->byte_200_sq_max,
- V2_QPC_BYTE_200_LCL_OPERATED_CNT_M,
- V2_QPC_BYTE_200_LCL_OPERATED_CNT_S, 0);
-
- roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RNR_FLG_S, 0);
- roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_PKT_RTY_FLG_S, 0);
-
- roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M,
- V2_QPC_BYTE_212_CHECK_FLG_S, 0);
-
- qpc_mask->sq_timer = 0;
-
- roce_set_field(qpc_mask->byte_220_retry_psn_msn,
- V2_QPC_BYTE_220_RETRY_MSG_MSN_M,
- V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0);
- roce_set_field(qpc_mask->byte_232_irrl_sge,
- V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
- V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
-
- roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
- 0);
- roce_set_bit(qpc_mask->byte_232_irrl_sge,
- V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
- roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
- 0);
-
- qpc_mask->irrl_cur_sge_offset = 0;
-
- roce_set_field(qpc_mask->byte_240_irrl_tail,
- V2_QPC_BYTE_240_IRRL_TAIL_REAL_M,
- V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0);
- roce_set_field(qpc_mask->byte_240_irrl_tail,
- V2_QPC_BYTE_240_IRRL_TAIL_RD_M,
- V2_QPC_BYTE_240_IRRL_TAIL_RD_S, 0);
- roce_set_field(qpc_mask->byte_240_irrl_tail,
- V2_QPC_BYTE_240_RX_ACK_MSN_M,
- V2_QPC_BYTE_240_RX_ACK_MSN_S, 0);
-
- roce_set_field(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_M,
- V2_QPC_BYTE_248_IRRL_PSN_S, 0);
- roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_ACK_PSN_ERR_S,
- 0);
- roce_set_field(qpc_mask->byte_248_ack_psn,
- V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M,
- V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0);
- roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_IRRL_PSN_VLD_S,
- 0);
- roce_set_bit(qpc_mask->byte_248_ack_psn,
- V2_QPC_BYTE_248_RNR_RETRY_FLAG_S, 0);
- roce_set_bit(qpc_mask->byte_248_ack_psn, V2_QPC_BYTE_248_CQ_ERR_IND_S,
- 0);
-
- hr_qp->access_flags = attr->qp_access_flags;
- roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
- V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
- V2_QPC_BYTE_252_TX_CQN_S, 0);
-
- roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_ERR_TYPE_M,
- V2_QPC_BYTE_252_ERR_TYPE_S, 0);
-
- roce_set_field(qpc_mask->byte_256_sqflush_rqcqe,
- V2_QPC_BYTE_256_RQ_CQE_IDX_M,
- V2_QPC_BYTE_256_RQ_CQE_IDX_S, 0);
- roce_set_field(qpc_mask->byte_256_sqflush_rqcqe,
- V2_QPC_BYTE_256_SQ_FLUSH_IDX_M,
- V2_QPC_BYTE_256_SQ_FLUSH_IDX_S, 0);
+ hr_reg_enable(context, QPC_FRE);
+
+ hr_reg_write(context, QPC_TX_CQN, get_cqn(ibqp->send_cq));
+
+ if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
+ return;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(&context->ext, QPCEX_STASH);
}
static void modify_qp_init_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-
/*
* In v2 engine, software pass context and context mask to hardware
* when modifying qp. If software need modify some fields in context,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
- V2_QPC_BYTE_4_TST_S, to_hr_qp_type(hr_qp->ibqp.qp_type));
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M,
- V2_QPC_BYTE_4_TST_S, 0);
-
- if (ibqp->qp_type == IB_QPT_GSI)
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sge.sge_cnt));
- else
- roce_set_field(context->byte_4_sqpn_tst,
- V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, hr_qp->sq.max_gs > 2 ?
- ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0);
-
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M,
- V2_QPC_BYTE_4_SGE_SHIFT_S, 0);
-
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- 0);
- } else {
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_WRITE));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_RWE_S,
- 0);
-
- roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- !!(hr_qp->access_flags & IB_ACCESS_REMOTE_ATOMIC));
- roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S,
- 0);
- }
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
- (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
-
- roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
- V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn);
- roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M,
- V2_QPC_BYTE_16_PD_S, 0);
-
- roce_set_field(context->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
- V2_QPC_BYTE_80_RX_CQN_S, to_hr_cq(ibqp->recv_cq)->cqn);
- roce_set_field(qpc_mask->byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_RX_CQN_M,
- V2_QPC_BYTE_80_RX_CQN_S, 0);
-
- roce_set_field(context->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
- V2_QPC_BYTE_252_TX_CQN_S, to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_TX_CQN_M,
- V2_QPC_BYTE_252_TX_CQN_S, 0);
+ hr_reg_write(context, QPC_TST, to_hr_qp_type(ibqp->qp_type));
+ hr_reg_clear(qpc_mask, QPC_TST);
+
+ hr_reg_write(context, QPC_PD, get_pdn(ibqp->pd));
+ hr_reg_clear(qpc_mask, QPC_PD);
+
+ hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
+ hr_reg_clear(qpc_mask, QPC_RX_CQN);
+
+ hr_reg_write(context, QPC_TX_CQN, get_cqn(ibqp->send_cq));
+ hr_reg_clear(qpc_mask, QPC_TX_CQN);
if (ibqp->srq) {
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQ_EN_S, 1);
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQ_EN_S, 0);
- roce_set_field(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S,
- to_hr_srq(ibqp->srq)->srqn);
- roce_set_field(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_SRQN_M, V2_QPC_BYTE_76_SRQN_S, 0);
- }
-
- roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
- V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn);
- roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M,
- V2_QPC_BYTE_4_SQPN_S, 0);
+ hr_reg_enable(context, QPC_SRQ_EN);
+ hr_reg_clear(qpc_mask, QPC_SRQ_EN);
+ hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
+ hr_reg_clear(qpc_mask, QPC_SRQN);
+ }
+}
- if (attr_mask & IB_QP_DEST_QPN) {
- roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S, hr_qp->qpn);
- roce_set_field(qpc_mask->byte_56_dqpn_err,
- V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
+static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ u64 mtts[MTT_MIN_COUNT] = { 0 };
+ u64 wqe_sge_ba;
+ int ret;
+
+ /* Search qp buf's mtts */
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
+ MTT_MIN_COUNT);
+ if (hr_qp->rq.wqe_cnt && ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
+
+ wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr);
+
+ context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
+ qpc_mask->wqe_sge_ba = 0;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_WQE_SGE_BA_H, wqe_sge_ba >> (32 + 3));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_H);
+
+ hr_reg_write(context, QPC_SQ_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num,
+ hr_qp->sq.wqe_cnt));
+ hr_reg_clear(qpc_mask, QPC_SQ_HOP_NUM);
+
+ hr_reg_write(context, QPC_SGE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num,
+ hr_qp->sge.sge_cnt));
+ hr_reg_clear(qpc_mask, QPC_SGE_HOP_NUM);
+
+ hr_reg_write(context, QPC_RQ_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num,
+ hr_qp->rq.wqe_cnt));
+
+ hr_reg_clear(qpc_mask, QPC_RQ_HOP_NUM);
+
+ hr_reg_write(context, QPC_WQE_SGE_BA_PG_SZ,
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_PG_SZ);
+
+ hr_reg_write(context, QPC_WQE_SGE_BUF_PG_SZ,
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BUF_PG_SZ);
+
+ context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
+ qpc_mask->rq_cur_blk_addr = 0;
+
+ hr_reg_write(context, QPC_RQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
+ hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ context->rq_nxt_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
+ qpc_mask->rq_nxt_blk_addr = 0;
+ hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+ }
+
+ return 0;
+}
+
+static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 sge_cur_blk = 0;
+ u64 sq_cur_blk = 0;
+ int ret;
+
+ /* search qp buf's mtts */
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset,
+ &sq_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
+ }
+ if (hr_qp->sge.sge_cnt > 0) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset, &sge_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
+ }
+ }
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_SQ_CUR_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_write(context, QPC_SQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_BLK_ADDR_H);
+
+ hr_reg_write(context, QPC_SQ_CUR_SGE_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
+ hr_reg_write(context, QPC_SQ_CUR_SGE_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_SGE_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_SGE_BLK_ADDR_H);
+
+ hr_reg_write(context, QPC_RX_SQ_CUR_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_write(context, QPC_RX_SQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_RX_SQ_CUR_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_RX_SQ_CUR_BLK_ADDR_H);
+
+ return 0;
+}
+
+static inline enum ib_mtu get_mtu(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr)
+{
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
+ return IB_MTU_4096;
+
+ return attr->path_mtu;
}
static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct ib_udata *udata)
{
- const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = hr_dev->dev;
- dma_addr_t dma_handle_3;
- dma_addr_t dma_handle_2;
- dma_addr_t dma_handle;
- u32 page_size;
- u8 port_num;
- u64 *mtts_3;
- u64 *mtts_2;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t trrl_ba;
+ dma_addr_t irrl_ba;
+ enum ib_mtu ib_mtu;
+ u8 ack_req_freq;
+ const u8 *smac;
+ int lp_msg_len;
+ u8 lp_pktn_ini;
u64 *mtts;
u8 *dmac;
- u8 *smac;
- int port;
+ u32 port;
+ int mtu;
+ int ret;
- /* Search qp buf's mtts */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
- return -EINVAL;
+ ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret);
+ return ret;
}
/* Search IRRL's mtts */
- mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
- hr_qp->qpn, &dma_handle_2);
- if (!mtts_2) {
- dev_err(dev, "qp irrl_table find failed\n");
+ mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
+ hr_qp->qpn, &irrl_ba);
+ if (!mtts) {
+ ibdev_err(ibdev, "failed to find qp irrl_table.\n");
return -EINVAL;
}
/* Search TRRL's mtts */
- mtts_3 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table,
- hr_qp->qpn, &dma_handle_3);
- if (!mtts_3) {
- dev_err(dev, "qp trrl_table find failed\n");
+ mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table,
+ hr_qp->qpn, &trrl_ba);
+ if (!mtts) {
+ ibdev_err(ibdev, "failed to find qp trrl_table.\n");
return -EINVAL;
}
if (attr_mask & IB_QP_ALT_PATH) {
- dev_err(dev, "INIT2RTR attr_mask (0x%x) error\n", attr_mask);
+ ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error.\n",
+ attr_mask);
return -EINVAL;
}
- dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->wqe_sge_ba = (u32)(dma_handle >> 3);
- qpc_mask->wqe_sge_ba = 0;
-
- /*
- * In v2 engine, software pass context and context mask to hardware
- * when modifying qp. If software need modify some fields in context,
- * we should set all bits of the relevant fields in context mask to
- * 0 at the same time, else set them to 0x1.
- */
- roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M,
- V2_QPC_BYTE_12_WQE_SGE_BA_S, dma_handle >> (32 + 3));
- roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_WQE_SGE_BA_M,
- V2_QPC_BYTE_12_WQE_SGE_BA_S, 0);
-
- roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
- V2_QPC_BYTE_12_SQ_HOP_NUM_S,
- hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : hr_dev->caps.mtt_hop_num);
- roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M,
- V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGE_HOP_NUM_M,
- V2_QPC_BYTE_20_SGE_HOP_NUM_S,
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
- hr_dev->caps.mtt_hop_num : 0);
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGE_HOP_NUM_M,
- V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_HOP_NUM_M,
- V2_QPC_BYTE_20_RQ_HOP_NUM_S,
- hr_dev->caps.mtt_hop_num == HNS_ROCE_HOP_NUM_0 ?
- 0 : hr_dev->caps.mtt_hop_num);
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_RQ_HOP_NUM_M,
- V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0);
-
- roce_set_field(context->byte_16_buf_ba_pg_sz,
- V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
- V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S,
- hr_dev->caps.mtt_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
- V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
- V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0);
-
- roce_set_field(context->byte_16_buf_ba_pg_sz,
- V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
- V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S,
- hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
- V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
- V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0);
-
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- context->rq_cur_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size]
- >> PAGE_ADDR_SHIFT);
- qpc_mask->rq_cur_blk_addr = 0;
-
- roce_set_field(context->byte_92_srq_info,
- V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
- V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S,
- mtts[hr_qp->rq.offset / page_size]
- >> (32 + PAGE_ADDR_SHIFT));
- roce_set_field(qpc_mask->byte_92_srq_info,
- V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M,
- V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0);
-
- context->rq_nxt_blk_addr = (u32)(mtts[hr_qp->rq.offset / page_size + 1]
- >> PAGE_ADDR_SHIFT);
- qpc_mask->rq_nxt_blk_addr = 0;
-
- roce_set_field(context->byte_104_rq_sge,
- V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M,
- V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S,
- mtts[hr_qp->rq.offset / page_size + 1]
- >> (32 + PAGE_ADDR_SHIFT));
- roce_set_field(qpc_mask->byte_104_rq_sge,
- V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M,
- V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0);
-
- roce_set_field(context->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M,
- V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4);
- roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M,
- V2_QPC_BYTE_132_TRRL_BA_S, 0);
- context->trrl_ba = (u32)(dma_handle_3 >> (16 + 4));
+ hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> QPC_TRRL_BA_L_S);
+ hr_reg_clear(qpc_mask, QPC_TRRL_BA_L);
+ context->trrl_ba = cpu_to_le32(trrl_ba >> QPC_TRRL_BA_M_S);
qpc_mask->trrl_ba = 0;
- roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M,
- V2_QPC_BYTE_140_TRRL_BA_S,
- (u32)(dma_handle_3 >> (32 + 16 + 4)));
- roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M,
- V2_QPC_BYTE_140_TRRL_BA_S, 0);
+ hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> QPC_TRRL_BA_H_S);
+ hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
- context->irrl_ba = (u32)(dma_handle_2 >> 6);
+ context->irrl_ba = cpu_to_le32(irrl_ba >> QPC_IRRL_BA_L_S);
qpc_mask->irrl_ba = 0;
- roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M,
- V2_QPC_BYTE_208_IRRL_BA_S,
- dma_handle_2 >> (32 + 6));
- roce_set_field(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M,
- V2_QPC_BYTE_208_IRRL_BA_S, 0);
+ hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> QPC_IRRL_BA_H_S);
+ hr_reg_clear(qpc_mask, QPC_IRRL_BA_H);
- roce_set_bit(context->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 1);
- roce_set_bit(qpc_mask->byte_208_irrl, V2_QPC_BYTE_208_RMT_E2E_S, 0);
+ hr_reg_enable(context, QPC_RMT_E2E);
+ hr_reg_clear(qpc_mask, QPC_RMT_E2E);
- roce_set_bit(context->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S,
- hr_qp->sq_signal_bits);
- roce_set_bit(qpc_mask->byte_252_err_txcqn, V2_QPC_BYTE_252_SIG_TYPE_S,
- 0);
+ hr_reg_write(context, QPC_SIG_TYPE, hr_qp->sq_signal_bits);
+ hr_reg_clear(qpc_mask, QPC_SIG_TYPE);
port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port;
- smac = (u8 *)hr_dev->dev_addr[port];
+ smac = (const u8 *)hr_dev->dev_addr[port];
+ dmac = (u8 *)attr->ah_attr.roce.dmac;
/* when dmac equals smac or loop_idc is 1, it should loopback */
if (ether_addr_equal_unaligned(dmac, smac) ||
hr_dev->loop_idc == 0x1) {
- roce_set_bit(context->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 1);
- roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_LBI_S, 0);
+ hr_reg_write(context, QPC_LBI, hr_dev->loop_idc);
+ hr_reg_clear(qpc_mask, QPC_LBI);
}
if (attr_mask & IB_QP_DEST_QPN) {
- roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S, attr->dest_qp_num);
- roce_set_field(qpc_mask->byte_56_dqpn_err,
- V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
- }
-
- /* Configure GID index */
- port_num = rdma_ah_get_port_num(&attr->ah_attr);
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S,
- hns_get_gid_index(hr_dev, port_num - 1,
- grh->sgid_index));
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, 0);
- memcpy(&(context->dmac), dmac, 4);
- roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
- V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4])));
+ hr_reg_write(context, QPC_DQPN, attr->dest_qp_num);
+ hr_reg_clear(qpc_mask, QPC_DQPN);
+ }
+
+ memcpy(&context->dmac, dmac, sizeof(u32));
+ hr_reg_write(context, QPC_DMAC_H, *((u16 *)(&dmac[4])));
qpc_mask->dmac = 0;
- roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M,
- V2_QPC_BYTE_52_DMAC_S, 0);
+ hr_reg_clear(qpc_mask, QPC_DMAC_H);
- roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
- V2_QPC_BYTE_56_LP_PKTN_INI_S, 4);
- roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
- V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
+ ib_mtu = get_mtu(ibqp, attr);
+ hr_qp->path_mtu = ib_mtu;
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
- else if (attr_mask & IB_QP_PATH_MTU)
- roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, attr->path_mtu);
-
- roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S, 0);
-
- roce_set_field(context->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head);
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
-
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M,
- V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S, 0);
- roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0);
- roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M,
- V2_QPC_BYTE_96_RX_REQ_MSN_S, 0);
- roce_set_field(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M,
- V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S, 0);
+ mtu = ib_mtu_enum_to_int(ib_mtu);
+ if (WARN_ON(mtu <= 0))
+ return -EINVAL;
+#define MIN_LP_MSG_LEN 1024
+ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
+ lp_msg_len = max(mtu, MIN_LP_MSG_LEN);
+ lp_pktn_ini = ilog2(lp_msg_len / mtu);
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ hr_reg_write(context, QPC_MTU, ib_mtu);
+ hr_reg_clear(qpc_mask, QPC_MTU);
+ }
+
+ hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
+ hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
+
+ /*
+ * There are several constraints for ACK_REQ_FREQ:
+ * 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise
+ * it may cause some unexpected retries when sending large
+ * payload.
+ * 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI.
+ * 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP
+ * or HC3 congestion control algorithm.
+ */
+ if (hr_qp->cong_type == CONG_TYPE_LDCP ||
+ hr_qp->cong_type == CONG_TYPE_HC3 ||
+ hr_dev->caps.max_ack_req_msg_len < lp_msg_len)
+ ack_req_freq = lp_pktn_ini;
+ else
+ ack_req_freq = ilog2(hr_dev->caps.max_ack_req_msg_len / mtu);
+ hr_reg_write(context, QPC_ACK_REQ_FREQ, ack_req_freq);
+ hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ);
+
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_MSN);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_LAST_OPTYPE);
context->rq_rnr_timer = 0;
qpc_mask->rq_rnr_timer = 0;
- roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_HEAD_MAX_M,
- V2_QPC_BYTE_132_TRRL_HEAD_MAX_S, 0);
- roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
- V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
+ hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX);
+ hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX);
+
+#define MAX_LP_SGEN 3
+ /* rocee send 2^lp_sgen_ini segs every time */
+ hr_reg_write(context, QPC_LP_SGEN_INI, MAX_LP_SGEN);
+ hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI);
- roce_set_field(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_LP_SGEN_INI_M,
- V2_QPC_BYTE_168_LP_SGEN_INI_S, 3);
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_LP_SGEN_INI_M,
- V2_QPC_BYTE_168_LP_SGEN_INI_S, 0);
+ if (udata && ibqp->qp_type == IB_QPT_RC &&
+ (uctx->config & HNS_ROCE_RQ_INLINE_FLAGS)) {
+ hr_reg_write_bool(context, QPC_RQIE,
+ hr_dev->caps.flags &
+ HNS_ROCE_CAP_FLAG_RQ_INLINE);
+ hr_reg_clear(qpc_mask, QPC_RQIE);
+ }
+
+ if (udata &&
+ (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_XRC_TGT) &&
+ (uctx->config & HNS_ROCE_CQE_INLINE_FLAGS)) {
+ hr_reg_write_bool(context, QPC_CQEIE,
+ hr_dev->caps.flags &
+ HNS_ROCE_CAP_FLAG_CQE_INLINE);
+ hr_reg_clear(qpc_mask, QPC_CQEIE);
+
+ hr_reg_write(context, QPC_CQEIS, 0);
+ hr_reg_clear(qpc_mask, QPC_CQEIS);
+ }
return 0;
}
-static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
+static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = hr_dev->dev;
- dma_addr_t dma_handle;
- u32 page_size;
- u64 *mtts;
-
- /* Search qp buf's mtts */
- mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
- return -EINVAL;
- }
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
/* Not support alternate path and path migration */
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_PATH_MIG_STATE)) {
- dev_err(dev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
+ if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) {
+ ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
return -EINVAL;
}
- /*
- * In v2 engine, software pass context and context mask to hardware
- * when modifying qp. If software need modify some fields in context,
- * we should set all bits of the relevant fields in context mask to
- * 0 at the same time, else set them to 0x1.
- */
- context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
- roce_set_field(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
- V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S,
- mtts[0] >> (32 + PAGE_ADDR_SHIFT));
- qpc_mask->sq_cur_blk_addr = 0;
- roce_set_field(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
- V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, 0);
-
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- context->sq_cur_sge_blk_addr =
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
- ((u32)(mtts[hr_qp->sge.offset / page_size]
- >> PAGE_ADDR_SHIFT)) : 0;
- roce_set_field(context->byte_184_irrl_idx,
- V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
- V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S,
- ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > 2) ?
- (mtts[hr_qp->sge.offset / page_size] >>
- (32 + PAGE_ADDR_SHIFT)) : 0);
- qpc_mask->sq_cur_sge_blk_addr = 0;
- roce_set_field(qpc_mask->byte_184_irrl_idx,
- V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M,
- V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0);
-
- context->rx_sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
- roce_set_field(context->byte_232_irrl_sge,
- V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
- V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S,
- mtts[0] >> (32 + PAGE_ADDR_SHIFT));
- qpc_mask->rx_sq_cur_blk_addr = 0;
- roce_set_field(qpc_mask->byte_232_irrl_sge,
- V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M,
- V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, 0);
+ ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret);
+ return ret;
+ }
/*
* Set some fields in context to zero, Because the default values
* of all fields in context are zero, we need not set them to 0 again.
* but we should set the relevant fields of context mask to 0.
*/
- roce_set_field(qpc_mask->byte_232_irrl_sge,
- V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
- V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
+ hr_reg_clear(qpc_mask, QPC_IRRL_SGE_IDX);
+
+ hr_reg_clear(qpc_mask, QPC_RX_ACK_MSN);
+
+ hr_reg_clear(qpc_mask, QPC_ACK_LAST_OPTYPE);
+ hr_reg_clear(qpc_mask, QPC_IRRL_PSN_VLD);
+ hr_reg_clear(qpc_mask, QPC_IRRL_PSN);
+
+ hr_reg_clear(qpc_mask, QPC_IRRL_TAIL_REAL);
+
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_MSN);
+
+ hr_reg_clear(qpc_mask, QPC_RNR_RETRY_FLAG);
- roce_set_field(qpc_mask->byte_240_irrl_tail,
- V2_QPC_BYTE_240_RX_ACK_MSN_M,
- V2_QPC_BYTE_240_RX_ACK_MSN_S, 0);
+ hr_reg_clear(qpc_mask, QPC_CHECK_FLG);
- roce_set_field(qpc_mask->byte_248_ack_psn,
- V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M,
- V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S, 0);
- roce_set_bit(qpc_mask->byte_248_ack_psn,
- V2_QPC_BYTE_248_IRRL_PSN_VLD_S, 0);
- roce_set_field(qpc_mask->byte_248_ack_psn,
- V2_QPC_BYTE_248_IRRL_PSN_M,
- V2_QPC_BYTE_248_IRRL_PSN_S, 0);
+ hr_reg_clear(qpc_mask, QPC_V2_IRRL_HEAD);
- roce_set_field(qpc_mask->byte_240_irrl_tail,
- V2_QPC_BYTE_240_IRRL_TAIL_REAL_M,
- V2_QPC_BYTE_240_IRRL_TAIL_REAL_S, 0);
+ return 0;
+}
- roce_set_field(qpc_mask->byte_220_retry_psn_msn,
- V2_QPC_BYTE_220_RETRY_MSG_MSN_M,
- V2_QPC_BYTE_220_RETRY_MSG_MSN_S, 0);
+static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn)
+{
+ struct hns_roce_dip *hr_dip;
+ int ret;
- roce_set_bit(qpc_mask->byte_248_ack_psn,
- V2_QPC_BYTE_248_RNR_RETRY_FLAG_S, 0);
+ hr_dip = xa_load(dip_xa, qpn);
+ if (hr_dip)
+ return 0;
- roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_CHECK_FLG_M,
- V2_QPC_BYTE_212_CHECK_FLG_S, 0);
+ hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL);
+ if (!hr_dip)
+ return -ENOMEM;
- roce_set_field(context->byte_212_lsn, V2_QPC_BYTE_212_LSN_M,
- V2_QPC_BYTE_212_LSN_S, 0x100);
- roce_set_field(qpc_mask->byte_212_lsn, V2_QPC_BYTE_212_LSN_M,
- V2_QPC_BYTE_212_LSN_S, 0);
+ ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL));
+ if (ret)
+ kfree(hr_dip);
- roce_set_field(qpc_mask->byte_196_sq_psn, V2_QPC_BYTE_196_IRRL_HEAD_M,
- V2_QPC_BYTE_196_IRRL_HEAD_S, 0);
+ return ret;
+}
+
+static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ u32 *dip_idx)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct xarray *dip_xa = &hr_dev->qp_table.dip_xa;
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_dip *hr_dip;
+ unsigned long idx;
+ int ret = 0;
+
+ ret = alloc_dip_entry(dip_xa, ibqp->qp_num);
+ if (ret)
+ return ret;
+
+ xa_lock(dip_xa);
+
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt &&
+ !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
+ *dip_idx = hr_dip->dip_idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
+ goto out;
+ }
+ }
+
+ /* If no dgid is found, a new dip and a mapping between dgid and
+ * dip_idx will be created.
+ */
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt)
+ continue;
+
+ *dip_idx = idx;
+ memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ hr_dip->dip_idx = idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
+ break;
+ }
+
+ /* This should never happen. */
+ if (WARN_ON_ONCE(!hr_qp->dip))
+ ret = -ENOSPC;
+
+out:
+ xa_unlock(dip_xa);
+ return ret;
+}
+
+enum {
+ CONG_DCQCN,
+ CONG_WINDOW,
+};
+
+enum {
+ UNSUPPORT_CONG_LEVEL,
+ SUPPORT_CONG_LEVEL,
+};
+
+enum {
+ CONG_LDCP,
+ CONG_HC3,
+};
+
+enum {
+ DIP_INVALID,
+ DIP_VALID,
+};
+
+enum {
+ WND_LIMIT,
+ WND_UNLIMIT,
+};
+
+static int check_cong_type(struct ib_qp *ibqp,
+ struct hns_roce_congestion_algorithm *cong_alg)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ /* different congestion types match different configurations */
+ switch (hr_qp->cong_type) {
+ case CONG_TYPE_DCQCN:
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ case CONG_TYPE_LDCP:
+ cong_alg->alg_sel = CONG_WINDOW;
+ cong_alg->alg_sub_sel = CONG_LDCP;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_UNLIMIT;
+ break;
+ case CONG_TYPE_HC3:
+ cong_alg->alg_sel = CONG_WINDOW;
+ cong_alg->alg_sub_sel = CONG_HC3;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ case CONG_TYPE_DIP:
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_VALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ default:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ }
return 0;
}
-static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
+static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_congestion_algorithm cong_field;
+ struct ib_device *ibdev = ibqp->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ u32 dip_idx = 0;
+ int ret;
- if ((cur_state != IB_QPS_RESET &&
- (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) ||
- ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) &&
- (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS))
- return true;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ||
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE)
+ return 0;
- return false;
+ ret = check_cong_type(ibqp, &cong_field);
+ if (ret)
+ return ret;
+ hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id +
+ hr_qp->cong_type * HNS_ROCE_CONG_SIZE);
+ hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID);
+ hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL);
+ hr_reg_write(&context->ext, QPCEX_CONG_ALG_SUB_SEL,
+ cong_field.alg_sub_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SUB_SEL);
+ hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX_VLD, cong_field.dip_vld);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_DIP_CTX_IDX_VLD);
+ hr_reg_write(&context->ext, QPCEX_SQ_RQ_NOT_FORBID_EN,
+ cong_field.wnd_mode_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_SQ_RQ_NOT_FORBID_EN);
+
+ /* if dip is disabled, there is no need to set dip idx */
+ if (cong_field.dip_vld == 0)
+ return 0;
+
+ ret = get_dip_ctx_idx(ibqp, attr, &dip_idx);
+ if (ret) {
+ ibdev_err(ibdev, "failed to fill cong field, ret = %d.\n", ret);
+ return ret;
+ }
+
+ hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX, dip_idx);
+ hr_reg_write(&qpc_mask->ext, QPCEX_DIP_CTX_IDX, 0);
+
+ return 0;
}
-static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
+static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!ops->get_dscp_prio)
+ return -EOPNOTSUPP;
+
+ return ops->get_dscp_prio(handle, dscp, tc_mode, priority);
+}
+
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl)
+{
+ u32 max_sl;
+
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(sl > max_sl)) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to set SL(%u). Shouldn't be larger than %u.\n",
+ sl, max_sl);
+ return false;
+ }
+
+ return true;
+}
+
+static int hns_roce_set_sl(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_qp_context *context;
- struct hns_roce_v2_qp_context *qpc_mask;
- struct device *dev = hr_dev->dev;
- int ret = -EINVAL;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
- context = kcalloc(2, sizeof(*context), GFP_ATOMIC);
- if (!context)
- return -ENOMEM;
+ ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
+ &hr_qp->tc_mode, &hr_qp->priority);
+ if (ret && ret != -EOPNOTSUPP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to get dscp, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ hr_qp->sl = hr_qp->priority;
+ else
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ if (!check_sl_valid(hr_dev, hr_qp->sl))
+ return -EINVAL;
+
+ hr_reg_write(context, QPC_SL, hr_qp->sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+
+ return 0;
+}
+
+static int hns_roce_v2_set_path(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_gid_attr *gid_attr = NULL;
+ u8 sl = rdma_ah_get_sl(&attr->ah_attr);
+ int is_roce_protocol;
+ u16 vlan_id = 0xffff;
+ bool is_udp = false;
+ u8 ib_port;
+ u8 hr_port;
+ int ret;
- qpc_mask = context + 1;
/*
- * In v2 engine, software pass context and context mask to hardware
- * when modifying qp. If software need modify some fields in context,
- * we should set all bits of the relevant fields in context mask to
- * 0 at the same time, else set them to 0x1.
+ * If free_mr_en of qp is set, it means that this qp comes from
+ * free mr. This qp will perform the loopback operation.
+ * In the loopback scenario, only sl needs to be set.
*/
- memset(qpc_mask, 0xff, sizeof(*qpc_mask));
+ if (hr_qp->free_mr_en) {
+ if (!check_sl_valid(hr_dev, sl))
+ return -EINVAL;
+ hr_reg_write(context, QPC_SL, sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+ hr_qp->sl = sl;
+ return 0;
+ }
+
+ ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
+ hr_port = ib_port - 1;
+ is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+ if (is_roce_protocol) {
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+ }
+
+ /* Only HIP08 needs to set the vlan_en bits in QPC */
+ if (vlan_id < VLAN_N_VID &&
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ hr_reg_enable(context, QPC_RQ_VLAN_EN);
+ hr_reg_clear(qpc_mask, QPC_RQ_VLAN_EN);
+ hr_reg_enable(context, QPC_SQ_VLAN_EN);
+ hr_reg_clear(qpc_mask, QPC_SQ_VLAN_EN);
+ }
+
+ hr_reg_write(context, QPC_VLAN_ID, vlan_id);
+ hr_reg_clear(qpc_mask, QPC_VLAN_ID);
+
+ if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+ ibdev_err(ibdev, "sgid_index(%u) too large. max is %d\n",
+ grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+ return -EINVAL;
+ }
+
+ if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+ ibdev_err(ibdev, "ah attr is not RDMA roce type\n");
+ return -EINVAL;
+ }
+
+ hr_reg_write(context, QPC_UDPSPN,
+ is_udp ? rdma_get_udp_sport(grh->flow_label, ibqp->qp_num,
+ attr->dest_qp_num) :
+ 0);
+
+ hr_reg_clear(qpc_mask, QPC_UDPSPN);
+
+ hr_reg_write(context, QPC_GMV_IDX, grh->sgid_index);
+
+ hr_reg_clear(qpc_mask, QPC_GMV_IDX);
+
+ hr_reg_write(context, QPC_HOPLIMIT, grh->hop_limit);
+ hr_reg_clear(qpc_mask, QPC_HOPLIMIT);
+
+ ret = fill_cong_field(ibqp, attr, context, qpc_mask);
+ if (ret)
+ return ret;
+
+ hr_reg_write(context, QPC_TC, get_tclass(&attr->ah_attr.grh));
+ hr_reg_clear(qpc_mask, QPC_TC);
+
+ hr_reg_write(context, QPC_FL, grh->flow_label);
+ hr_reg_clear(qpc_mask, QPC_FL);
+ memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+
+ return hns_roce_set_sl(ibqp, attr, context, qpc_mask);
+}
+
+static bool check_qp_state(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ static const bool sm[][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true },
+ [IB_QPS_INIT] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true,
+ [IB_QPS_RTR] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_SQD] = {},
+ [IB_QPS_SQE] = {},
+ [IB_QPS_ERR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_ERR] = true }
+ };
+
+ return sm[cur_state][new_state];
+}
+
+static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ int ret = 0;
+
+ if (!check_qp_state(cur_state, new_state))
+ return -EINVAL;
+
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- memset(qpc_mask, 0, sizeof(*qpc_mask));
- modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
- qpc_mask);
+ memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
+ modify_qp_reset_to_init(ibqp, context);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- modify_qp_init_to_init(ibqp, attr, attr_mask, context,
- qpc_mask);
+ modify_qp_init_to_init(ibqp, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
- qpc_mask);
- if (ret)
- goto out;
+ qpc_mask, udata);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
- ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context,
- qpc_mask);
- if (ret)
- goto out;
- } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) {
- /* Nothing */
- ;
- } else {
- dev_err(dev, "Illegal state for QP!\n");
- ret = -EINVAL;
- goto out;
+ ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask);
}
- /* When QP state is err, SQ and RQ WQE should be flushed */
- if (new_state == IB_QPS_ERR) {
- roce_set_field(context->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
- hr_qp->sq.head);
- roce_set_field(qpc_mask->byte_160_sq_ci_pi,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
-
- if (!ibqp->srq) {
- roce_set_field(context->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
- hr_qp->rq.head);
- roce_set_field(qpc_mask->byte_84_rq_ci_pi,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
- V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
- }
- }
+ return ret;
+}
- if (attr_mask & IB_QP_AV) {
- const struct ib_global_route *grh =
- rdma_ah_read_grh(&attr->ah_attr);
- const struct ib_gid_attr *gid_attr = NULL;
- int is_roce_protocol;
- u16 vlan = 0xffff;
- u8 ib_port;
- u8 hr_port;
-
- ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num :
- hr_qp->port + 1;
- hr_port = ib_port - 1;
- is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
- rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
-
- if (is_roce_protocol) {
- gid_attr = attr->ah_attr.grh.sgid_attr;
- ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL);
- if (ret)
- goto out;
+static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
+{
+#define QP_ACK_TIMEOUT_MAX_HIP08 20
+#define QP_ACK_TIMEOUT_MAX 31
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 20.\n");
+ return false;
}
-
- if (vlan < VLAN_CFI_MASK) {
- roce_set_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
- roce_set_bit(qpc_mask->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
- roce_set_bit(context->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
- roce_set_bit(qpc_mask->byte_168_irrl_idx,
- V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
+ *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+ } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 31.\n");
+ return false;
}
+ }
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, vlan);
- roce_set_field(qpc_mask->byte_24_mtu_tc,
- V2_QPC_BYTE_24_VLAN_ID_M,
- V2_QPC_BYTE_24_VLAN_ID_S, 0);
-
- if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
- dev_err(hr_dev->dev,
- "sgid_index(%u) too large. max is %d\n",
- grh->sgid_index,
- hr_dev->caps.gid_table_len[hr_port]);
- ret = -EINVAL;
- goto out;
- }
+ return true;
+}
- if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
- dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
- ret = -EINVAL;
- goto out;
- }
+static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret = 0;
+ u8 timeout;
- roce_set_field(context->byte_52_udpspn_dmac,
- V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S,
- (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- 0 : 0x12b7);
-
- roce_set_field(qpc_mask->byte_52_udpspn_dmac,
- V2_QPC_BYTE_52_UDPSPN_M,
- V2_QPC_BYTE_52_UDPSPN_S, 0);
-
- roce_set_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index);
-
- roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S, 0);
-
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
- roce_set_field(qpc_mask->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
-
- if (hr_dev->pci_dev->revision == 0x21 &&
- gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
- grh->traffic_class >> 2);
- else
- roce_set_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
- grh->traffic_class);
- roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S, 0);
- roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S, grh->flow_label);
- roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S, 0);
- memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
- memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
- roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S,
- rdma_ah_get_sl(&attr->ah_attr));
- roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S, 0);
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ if (attr_mask & IB_QP_AV) {
+ ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ return ret;
}
if (attr_mask & IB_QP_TIMEOUT) {
- if (attr->timeout < 31) {
- roce_set_field(context->byte_28_at_fl,
- V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
- attr->timeout);
- roce_set_field(qpc_mask->byte_28_at_fl,
- V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S,
- 0);
- } else {
- dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n");
+ timeout = attr->timeout;
+ if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
+ hr_reg_write(context, QPC_AT, timeout);
+ hr_reg_clear(qpc_mask, QPC_AT);
}
}
if (attr_mask & IB_QP_RETRY_CNT) {
- roce_set_field(context->byte_212_lsn,
- V2_QPC_BYTE_212_RETRY_NUM_INIT_M,
- V2_QPC_BYTE_212_RETRY_NUM_INIT_S,
- attr->retry_cnt);
- roce_set_field(qpc_mask->byte_212_lsn,
- V2_QPC_BYTE_212_RETRY_NUM_INIT_M,
- V2_QPC_BYTE_212_RETRY_NUM_INIT_S, 0);
-
- roce_set_field(context->byte_212_lsn,
- V2_QPC_BYTE_212_RETRY_CNT_M,
- V2_QPC_BYTE_212_RETRY_CNT_S,
- attr->retry_cnt);
- roce_set_field(qpc_mask->byte_212_lsn,
- V2_QPC_BYTE_212_RETRY_CNT_M,
- V2_QPC_BYTE_212_RETRY_CNT_S, 0);
+ hr_reg_write(context, QPC_RETRY_NUM_INIT, attr->retry_cnt);
+ hr_reg_clear(qpc_mask, QPC_RETRY_NUM_INIT);
+
+ hr_reg_write(context, QPC_RETRY_CNT, attr->retry_cnt);
+ hr_reg_clear(qpc_mask, QPC_RETRY_CNT);
}
if (attr_mask & IB_QP_RNR_RETRY) {
- roce_set_field(context->byte_244_rnr_rxack,
- V2_QPC_BYTE_244_RNR_NUM_INIT_M,
- V2_QPC_BYTE_244_RNR_NUM_INIT_S, attr->rnr_retry);
- roce_set_field(qpc_mask->byte_244_rnr_rxack,
- V2_QPC_BYTE_244_RNR_NUM_INIT_M,
- V2_QPC_BYTE_244_RNR_NUM_INIT_S, 0);
+ hr_reg_write(context, QPC_RNR_NUM_INIT, attr->rnr_retry);
+ hr_reg_clear(qpc_mask, QPC_RNR_NUM_INIT);
- roce_set_field(context->byte_244_rnr_rxack,
- V2_QPC_BYTE_244_RNR_CNT_M,
- V2_QPC_BYTE_244_RNR_CNT_S, attr->rnr_retry);
- roce_set_field(qpc_mask->byte_244_rnr_rxack,
- V2_QPC_BYTE_244_RNR_CNT_M,
- V2_QPC_BYTE_244_RNR_CNT_S, 0);
+ hr_reg_write(context, QPC_RNR_CNT, attr->rnr_retry);
+ hr_reg_clear(qpc_mask, QPC_RNR_CNT);
}
if (attr_mask & IB_QP_SQ_PSN) {
- roce_set_field(context->byte_172_sq_psn,
- V2_QPC_BYTE_172_SQ_CUR_PSN_M,
- V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn);
- roce_set_field(qpc_mask->byte_172_sq_psn,
- V2_QPC_BYTE_172_SQ_CUR_PSN_M,
- V2_QPC_BYTE_172_SQ_CUR_PSN_S, 0);
-
- roce_set_field(context->byte_196_sq_psn,
- V2_QPC_BYTE_196_SQ_MAX_PSN_M,
- V2_QPC_BYTE_196_SQ_MAX_PSN_S, attr->sq_psn);
- roce_set_field(qpc_mask->byte_196_sq_psn,
- V2_QPC_BYTE_196_SQ_MAX_PSN_M,
- V2_QPC_BYTE_196_SQ_MAX_PSN_S, 0);
-
- roce_set_field(context->byte_220_retry_psn_msn,
- V2_QPC_BYTE_220_RETRY_MSG_PSN_M,
- V2_QPC_BYTE_220_RETRY_MSG_PSN_S, attr->sq_psn);
- roce_set_field(qpc_mask->byte_220_retry_psn_msn,
- V2_QPC_BYTE_220_RETRY_MSG_PSN_M,
- V2_QPC_BYTE_220_RETRY_MSG_PSN_S, 0);
-
- roce_set_field(context->byte_224_retry_msg,
- V2_QPC_BYTE_224_RETRY_MSG_PSN_M,
- V2_QPC_BYTE_224_RETRY_MSG_PSN_S,
- attr->sq_psn >> 16);
- roce_set_field(qpc_mask->byte_224_retry_msg,
- V2_QPC_BYTE_224_RETRY_MSG_PSN_M,
- V2_QPC_BYTE_224_RETRY_MSG_PSN_S, 0);
-
- roce_set_field(context->byte_224_retry_msg,
- V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M,
- V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S,
- attr->sq_psn);
- roce_set_field(qpc_mask->byte_224_retry_msg,
- V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M,
- V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S, 0);
-
- roce_set_field(context->byte_244_rnr_rxack,
- V2_QPC_BYTE_244_RX_ACK_EPSN_M,
- V2_QPC_BYTE_244_RX_ACK_EPSN_S, attr->sq_psn);
- roce_set_field(qpc_mask->byte_244_rnr_rxack,
- V2_QPC_BYTE_244_RX_ACK_EPSN_M,
- V2_QPC_BYTE_244_RX_ACK_EPSN_S, 0);
+ hr_reg_write(context, QPC_SQ_CUR_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_PSN);
+
+ hr_reg_write(context, QPC_SQ_MAX_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_SQ_MAX_PSN);
+
+ hr_reg_write(context, QPC_RETRY_MSG_PSN_L, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_PSN_L);
+
+ hr_reg_write(context, QPC_RETRY_MSG_PSN_H,
+ attr->sq_psn >> RETRY_MSG_PSN_SHIFT);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_PSN_H);
+
+ hr_reg_write(context, QPC_RETRY_MSG_FPKT_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_FPKT_PSN);
+
+ hr_reg_write(context, QPC_RX_ACK_EPSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RX_ACK_EPSN);
}
if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) &&
attr->max_dest_rd_atomic) {
- roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
- V2_QPC_BYTE_140_RR_MAX_S,
- fls(attr->max_dest_rd_atomic - 1));
- roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M,
- V2_QPC_BYTE_140_RR_MAX_S, 0);
+ hr_reg_write(context, QPC_RR_MAX,
+ fls(attr->max_dest_rd_atomic - 1));
+ hr_reg_clear(qpc_mask, QPC_RR_MAX);
}
if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
- roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M,
- V2_QPC_BYTE_208_SR_MAX_S,
- fls(attr->max_rd_atomic - 1));
- roce_set_field(qpc_mask->byte_208_irrl,
- V2_QPC_BYTE_208_SR_MAX_M,
- V2_QPC_BYTE_208_SR_MAX_S, 0);
+ hr_reg_write(context, QPC_SR_MAX, fls(attr->max_rd_atomic - 1));
+ hr_reg_clear(qpc_mask, QPC_SR_MAX);
}
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
- roce_set_field(context->byte_80_rnr_rx_cqn,
- V2_QPC_BYTE_80_MIN_RNR_TIME_M,
- V2_QPC_BYTE_80_MIN_RNR_TIME_S,
- attr->min_rnr_timer);
- roce_set_field(qpc_mask->byte_80_rnr_rx_cqn,
- V2_QPC_BYTE_80_MIN_RNR_TIME_M,
- V2_QPC_BYTE_80_MIN_RNR_TIME_S, 0);
+ hr_reg_write(context, QPC_MIN_RNR_TIME,
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
+ hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
}
- /* RC&UC required attr */
if (attr_mask & IB_QP_RQ_PSN) {
- roce_set_field(context->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_EPSN_M,
- V2_QPC_BYTE_108_RX_REQ_EPSN_S, attr->rq_psn);
- roce_set_field(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_EPSN_M,
- V2_QPC_BYTE_108_RX_REQ_EPSN_S, 0);
+ hr_reg_write(context, QPC_RX_REQ_EPSN, attr->rq_psn);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_EPSN);
- roce_set_field(context->byte_152_raq, V2_QPC_BYTE_152_RAQ_PSN_M,
- V2_QPC_BYTE_152_RAQ_PSN_S, attr->rq_psn - 1);
- roce_set_field(qpc_mask->byte_152_raq,
- V2_QPC_BYTE_152_RAQ_PSN_M,
- V2_QPC_BYTE_152_RAQ_PSN_S, 0);
+ hr_reg_write(context, QPC_RAQ_PSN, attr->rq_psn - 1);
+ hr_reg_clear(qpc_mask, QPC_RAQ_PSN);
}
if (attr_mask & IB_QP_QKEY) {
- context->qkey_xrcd = attr->qkey;
+ context->qkey_xrcd = cpu_to_le32(attr->qkey);
qpc_mask->qkey_xrcd = 0;
hr_qp->qkey = attr->qkey;
}
- roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
- ibqp->srq ? 1 : 0);
- roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_INV_CREDIT_S, 0);
-
- /* Every status migrate must change state */
- roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
- V2_QPC_BYTE_60_QP_ST_S, new_state);
- roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
- V2_QPC_BYTE_60_QP_ST_S, 0);
-
- /* SW pass context to HW */
- ret = hns_roce_v2_qp_modify(hr_dev, &hr_qp->mtt, cur_state, new_state,
- context, hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret);
- goto out;
- }
+ return ret;
+}
- hr_qp->state = new_state;
+static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
if (attr_mask & IB_QP_ACCESS_FLAGS)
hr_qp->atomic_rd_en = attr->qp_access_flags;
@@ -4248,47 +5372,149 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
hr_qp->port = attr->port_num - 1;
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
}
+}
+
+static void clear_qp(struct hns_roce_qp *hr_qp)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
+ if (ibqp->send_cq)
+ hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
+ hr_qp->qpn, NULL);
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
- hr_qp->next_sge = 0;
- if (hr_qp->rq.wqe_cnt)
- *hr_qp->rdb.db_record = 0;
+ if (ibqp->recv_cq && ibqp->recv_cq != ibqp->send_cq)
+ hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq),
+ hr_qp->qpn, ibqp->srq ?
+ to_hr_srq(ibqp->srq) : NULL);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ *hr_qp->rdb.db_record = 0;
+
+ hr_qp->rq.head = 0;
+ hr_qp->rq.tail = 0;
+ hr_qp->sq.head = 0;
+ hr_qp->sq.tail = 0;
+ hr_qp->next_sge = 0;
+}
+
+static void v2_set_flushed_fields(struct ib_qp *ibqp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long sq_flag = 0;
+ unsigned long rq_flag = 0;
+
+ if (ibqp->qp_type == IB_QPT_XRC_TGT)
+ return;
+
+ spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+ trace_hns_sq_flush_cqe(hr_qp->qpn, hr_qp->sq.head, TRACE_SQ);
+ hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head);
+ hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX);
+ hr_qp->state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
+
+ if (ibqp->srq || ibqp->qp_type == IB_QPT_XRC_INI) /* no RQ */
+ return;
+
+ spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+ trace_hns_rq_flush_cqe(hr_qp->qpn, hr_qp->rq.head, TRACE_RQ);
+ hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head);
+ hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
+}
+
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context *context;
+ struct hns_roce_v2_qp_context *qpc_mask;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret = -ENOMEM;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ context = kvzalloc(sizeof(*context), GFP_KERNEL);
+ qpc_mask = kvzalloc(sizeof(*qpc_mask), GFP_KERNEL);
+ if (!context || !qpc_mask)
+ goto out;
+
+ memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
+
+ ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
+ new_state, context, qpc_mask, udata);
+ if (ret)
+ goto out;
+
+ /* When QP state is err, SQ and RQ WQE should be flushed */
+ if (new_state == IB_QPS_ERR)
+ v2_set_flushed_fields(ibqp, context, qpc_mask);
+
+ /* Configure the optional fields */
+ ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ goto out;
+
+ hr_reg_write_bool(context, QPC_INV_CREDIT,
+ to_hr_qp_type(hr_qp->ibqp.qp_type) == SERV_TYPE_XRC ||
+ ibqp->srq);
+ hr_reg_clear(qpc_mask, QPC_INV_CREDIT);
+
+ /* Every status migrate must change state */
+ hr_reg_write(context, QPC_QP_ST, new_state);
+ hr_reg_clear(qpc_mask, QPC_QP_ST);
+
+ /* SW pass context to HW */
+ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret);
+ goto out;
}
+ hr_qp->state = new_state;
+
+ hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
+
+ if (new_state == IB_QPS_RESET && !ibqp->uobject)
+ clear_qp(hr_qp);
+
out:
- kfree(context);
+ kvfree(qpc_mask);
+ kvfree(context);
return ret;
}
-static inline enum ib_qp_state to_ib_qp_st(enum hns_roce_v2_qp_state state)
+static int to_ib_qp_st(enum hns_roce_v2_qp_state state)
{
- switch (state) {
- case HNS_ROCE_QP_ST_RST: return IB_QPS_RESET;
- case HNS_ROCE_QP_ST_INIT: return IB_QPS_INIT;
- case HNS_ROCE_QP_ST_RTR: return IB_QPS_RTR;
- case HNS_ROCE_QP_ST_RTS: return IB_QPS_RTS;
- case HNS_ROCE_QP_ST_SQ_DRAINING:
- case HNS_ROCE_QP_ST_SQD: return IB_QPS_SQD;
- case HNS_ROCE_QP_ST_SQER: return IB_QPS_SQE;
- case HNS_ROCE_QP_ST_ERR: return IB_QPS_ERR;
- default: return -1;
- }
+ static const enum ib_qp_state map[] = {
+ [HNS_ROCE_QP_ST_RST] = IB_QPS_RESET,
+ [HNS_ROCE_QP_ST_INIT] = IB_QPS_INIT,
+ [HNS_ROCE_QP_ST_RTR] = IB_QPS_RTR,
+ [HNS_ROCE_QP_ST_RTS] = IB_QPS_RTS,
+ [HNS_ROCE_QP_ST_SQD] = IB_QPS_SQD,
+ [HNS_ROCE_QP_ST_SQER] = IB_QPS_SQE,
+ [HNS_ROCE_QP_ST_ERR] = IB_QPS_ERR,
+ [HNS_ROCE_QP_ST_SQ_DRAINING] = IB_QPS_SQD
+ };
+
+ return (state < ARRAY_SIZE(map)) ? map[state] : -1;
}
-static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_v2_qp_context *hr_context)
+static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
{
struct hns_roce_cmd_mailbox *mailbox;
int ret;
@@ -4297,37 +5523,90 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
- HNS_ROCE_CMD_QUERY_QPC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (ret) {
- dev_err(hr_dev->dev, "QUERY QP cmd process error\n");
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC,
+ qpn);
+ if (ret)
goto out;
- }
- memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
+ memcpy(buffer, mailbox->buf, hr_dev->caps.qpc_sz);
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
}
+static int hns_roce_v2_query_srqc(struct hns_roce_dev *hr_dev, u32 srqn,
+ void *buffer)
+{
+ struct hns_roce_srq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SRQC,
+ srqn);
+ if (ret)
+ goto out;
+
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 sccn,
+ void *buffer)
+{
+ struct hns_roce_v2_scc_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC,
+ sccn);
+ if (ret)
+ goto out;
+
+ context = mailbox->buf;
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context)
+{
+ u8 timeout;
+
+ timeout = (u8)hr_reg_read(context, QPC_AT);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+
+ return timeout;
+}
+
static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_qp_context *context;
- struct device *dev = hr_dev->dev;
+ struct hns_roce_v2_qp_context context = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
int tmp_qp_state;
int state;
int ret;
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
memset(qp_attr, 0, sizeof(*qp_attr));
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
@@ -4339,221 +5618,204 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto done;
}
- ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, context);
+ ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
if (ret) {
- dev_err(dev, "query qpc error\n");
+ ibdev_err_ratelimited(ibdev,
+ "failed to query QPC, ret = %d.\n",
+ ret);
ret = -EINVAL;
goto out;
}
- state = roce_get_field(context->byte_60_qpst_tempid,
- V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
+ state = hr_reg_read(&context, QPC_QP_ST);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) {
- dev_err(dev, "Illegal ib_qp_state\n");
+ ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n");
ret = -EINVAL;
goto out;
}
hr_qp->state = (u8)tmp_qp_state;
qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
- qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_MTU_M,
- V2_QPC_BYTE_24_MTU_S);
+ qp_attr->path_mtu = (enum ib_mtu)hr_reg_read(&context, QPC_MTU);
qp_attr->path_mig_state = IB_MIG_ARMED;
- qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
if (hr_qp->ibqp.qp_type == IB_QPT_UD)
- qp_attr->qkey = V2_QKEY_VAL;
-
- qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn,
- V2_QPC_BYTE_108_RX_REQ_EPSN_M,
- V2_QPC_BYTE_108_RX_REQ_EPSN_S);
- qp_attr->sq_psn = (u32)roce_get_field(context->byte_172_sq_psn,
- V2_QPC_BYTE_172_SQ_CUR_PSN_M,
- V2_QPC_BYTE_172_SQ_CUR_PSN_S);
- qp_attr->dest_qp_num = (u8)roce_get_field(context->byte_56_dqpn_err,
- V2_QPC_BYTE_56_DQPN_M,
- V2_QPC_BYTE_56_DQPN_S);
- qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RRE_S)) << 2) |
- ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_RWE_S)) << 1) |
- ((roce_get_bit(context->byte_76_srqn_op_en,
- V2_QPC_BYTE_76_ATE_S)) << 3);
+ qp_attr->qkey = le32_to_cpu(context.qkey_xrcd);
+
+ qp_attr->rq_psn = hr_reg_read(&context, QPC_RX_REQ_EPSN);
+ qp_attr->sq_psn = (u32)hr_reg_read(&context, QPC_SQ_CUR_PSN);
+ qp_attr->dest_qp_num = hr_reg_read(&context, QPC_DQPN);
+ qp_attr->qp_access_flags =
+ ((hr_reg_read(&context, QPC_RRE)) << V2_QP_RRE_S) |
+ ((hr_reg_read(&context, QPC_RWE)) << V2_QP_RWE_S) |
+ ((hr_reg_read(&context, QPC_ATE)) << V2_QP_ATE_S);
+
if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
- hr_qp->ibqp.qp_type == IB_QPT_UC) {
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
struct ib_global_route *grh =
- rdma_ah_retrieve_grh(&qp_attr->ah_attr);
+ rdma_ah_retrieve_grh(&qp_attr->ah_attr);
rdma_ah_set_sl(&qp_attr->ah_attr,
- roce_get_field(context->byte_28_at_fl,
- V2_QPC_BYTE_28_SL_M,
- V2_QPC_BYTE_28_SL_S));
- grh->flow_label = roce_get_field(context->byte_28_at_fl,
- V2_QPC_BYTE_28_FL_M,
- V2_QPC_BYTE_28_FL_S);
- grh->sgid_index = roce_get_field(context->byte_20_smac_sgid_idx,
- V2_QPC_BYTE_20_SGID_IDX_M,
- V2_QPC_BYTE_20_SGID_IDX_S);
- grh->hop_limit = roce_get_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_HOP_LIMIT_M,
- V2_QPC_BYTE_24_HOP_LIMIT_S);
- grh->traffic_class = roce_get_field(context->byte_24_mtu_tc,
- V2_QPC_BYTE_24_TC_M,
- V2_QPC_BYTE_24_TC_S);
-
- memcpy(grh->dgid.raw, context->dgid, sizeof(grh->dgid.raw));
+ hr_reg_read(&context, QPC_SL));
+ rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
+ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
+ grh->flow_label = hr_reg_read(&context, QPC_FL);
+ grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
+ grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
+ grh->traffic_class = hr_reg_read(&context, QPC_TC);
+
+ memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw));
}
qp_attr->port_num = hr_qp->port + 1;
qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 1 << roce_get_field(context->byte_208_irrl,
- V2_QPC_BYTE_208_SR_MAX_M,
- V2_QPC_BYTE_208_SR_MAX_S);
- qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->byte_140_raq,
- V2_QPC_BYTE_140_RR_MAX_M,
- V2_QPC_BYTE_140_RR_MAX_S);
- qp_attr->min_rnr_timer = (u8)roce_get_field(context->byte_80_rnr_rx_cqn,
- V2_QPC_BYTE_80_MIN_RNR_TIME_M,
- V2_QPC_BYTE_80_MIN_RNR_TIME_S);
- qp_attr->timeout = (u8)roce_get_field(context->byte_28_at_fl,
- V2_QPC_BYTE_28_AT_M,
- V2_QPC_BYTE_28_AT_S);
- qp_attr->retry_cnt = roce_get_field(context->byte_212_lsn,
- V2_QPC_BYTE_212_RETRY_CNT_M,
- V2_QPC_BYTE_212_RETRY_CNT_S);
- qp_attr->rnr_retry = context->rq_rnr_timer;
+ qp_attr->max_rd_atomic = 1 << hr_reg_read(&context, QPC_SR_MAX);
+ qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX);
+
+ qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME);
+ qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context);
+ qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
+ qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT);
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+ qp_attr->cap.max_inline_data = hr_qp->max_inline_data;
- if (!ibqp->uobject) {
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- } else {
- qp_attr->cap.max_send_wr = 0;
- qp_attr->cap.max_send_sge = 0;
- }
+ qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+ qp_init_attr->qp_context = ibqp->qp_context;
+ qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->recv_cq = ibqp->recv_cq;
+ qp_init_attr->send_cq = ibqp->send_cq;
+ qp_init_attr->srq = ibqp->srq;
qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits;
out:
mutex_unlock(&hr_qp->mutex);
- kfree(context);
return ret;
}
+static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp)
+{
+ return ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+ hr_qp->state != IB_QPS_RESET);
+}
+
static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct ib_udata *udata)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cq *send_cq, *recv_cq;
- struct device *dev = hr_dev->dev;
- int ret;
+ unsigned long flags;
+ int ret = 0;
- if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) {
+ if (modify_qp_is_ok(hr_qp)) {
/* Modify qp to reset before destroying qp */
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
- hr_qp->state, IB_QPS_RESET);
- if (ret) {
- dev_err(dev, "modify QP %06lx to ERR failed.\n",
- hr_qp->qpn);
- return ret;
- }
+ hr_qp->state, IB_QPS_RESET, udata);
+ if (ret)
+ ibdev_err_ratelimited(ibdev,
+ "failed to modify QP to RST, ret = %d.\n",
+ ret);
}
- send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
- recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+ send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+ recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
hns_roce_lock_cqs(send_cq, recv_cq);
if (!udata) {
- __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) : NULL);
- if (send_cq != recv_cq)
+ if (recv_cq)
+ __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn,
+ (hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) :
+ NULL));
+
+ if (send_cq && send_cq != recv_cq)
__hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL);
}
hns_roce_qp_remove(hr_dev, hr_qp);
hns_roce_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
- hns_roce_qp_free(hr_dev, hr_qp);
-
- /* Not special_QP, free their QPN */
- if ((hr_qp->ibqp.qp_type == IB_QPT_RC) ||
- (hr_qp->ibqp.qp_type == IB_QPT_UC) ||
- (hr_qp->ibqp.qp_type == IB_QPT_UD))
- hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
-
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+ return ret;
+}
- if (udata) {
- struct hns_roce_ucontext *context =
- rdma_udata_to_drv_context(
- udata,
- struct hns_roce_ucontext,
- ibucontext);
+static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dip *hr_dip = hr_qp->dip;
- if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1))
- hns_roce_db_unmap_user(context, &hr_qp->sdb);
+ if (!hr_dip)
+ return;
- if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
- hns_roce_db_unmap_user(context, &hr_qp->rdb);
- ib_umem_release(hr_qp->umem);
- } else {
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
- if (hr_qp->rq.wqe_cnt)
- hns_roce_free_db(hr_dev, &hr_qp->rdb);
- }
+ xa_lock(&hr_dev->qp_table.dip_xa);
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
- hr_qp->rq.wqe_cnt) {
- kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
- kfree(hr_qp->rq_inl_buf.wqe_list);
- }
+ hr_dip->qp_cnt--;
+ if (!hr_dip->qp_cnt)
+ memset(hr_dip->dgid, 0, GID_LEN_V2);
- return 0;
+ xa_unlock(&hr_dev->qp_table.dip_xa);
}
-static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long flags;
int ret;
+ /* Make sure flush_cqe() is completed */
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+ flush_work(&hr_qp->flush_work.work);
+
+ if (hr_qp->cong_type == CONG_TYPE_DIP)
+ put_dip_ctx_idx(hr_dev, hr_qp);
+
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
- if (ret) {
- dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret);
- return ret;
- }
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
+ hr_qp->qpn, ret);
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
- kfree(hr_to_hr_sqp(hr_qp));
- else
- kfree(hr_qp);
+ hns_roce_qp_destroy(hr_dev, hr_qp, udata);
return 0;
}
static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp)
+ struct hns_roce_qp *hr_qp)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_sccc_clr_done *resp;
struct hns_roce_sccc_clr *clr;
struct hns_roce_cmq_desc desc;
int ret, i;
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
mutex_lock(&hr_dev->qp_table.scc_mutex);
/* set scc ctx clear done flag */
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
- dev_err(hr_dev->dev, "Reset SCC ctx failed(%d)\n", ret);
+ ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret);
goto out;
}
@@ -4563,7 +5825,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
clr->qpn = cpu_to_le32(hr_qp->qpn);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
- dev_err(hr_dev->dev, "Clear SCC ctx failed(%d)\n", ret);
+ ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret);
goto out;
}
@@ -4574,7 +5836,8 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
HNS_ROCE_OPC_QUERY_SCCC, true);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret) {
- dev_err(hr_dev->dev, "Query clr cmq failed(%d)\n", ret);
+ ibdev_err(ibdev, "failed to query clr cmq, ret = %d\n",
+ ret);
goto out;
}
@@ -4584,7 +5847,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
msleep(20);
}
- dev_err(hr_dev->dev, "Query SCC clr done flag overtime.\n");
+ ibdev_err(ibdev, "query SCC clr done flag overtime.\n");
ret = -ETIMEDOUT;
out:
@@ -4592,6 +5855,197 @@ out:
return ret;
}
+#define DMA_IDX_SHIFT 3
+#define DMA_WQE_SHIFT 3
+
+static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
+ struct hns_roce_srq_context *ctx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ u64 mtts_idx[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_idx;
+ int ret;
+
+ /* Get physical address of idx que buf */
+ ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
+ ARRAY_SIZE(mtts_idx));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr);
+
+ hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT);
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_H,
+ upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT));
+
+ hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift));
+
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[0]));
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
+
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[1]));
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
+
+ return 0;
+}
+
+static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_srq_context *ctx = mb_buf;
+ u64 mtts_wqe[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_wqe;
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ /* Get the physical address of srq buf */
+ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
+ ARRAY_SIZE(mtts_wqe));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr);
+
+ hr_reg_write(ctx, SRQC_SRQ_ST, 1);
+ hr_reg_write_bool(ctx, SRQC_SRQ_TYPE,
+ srq->ibsrq.srq_type == IB_SRQT_XRC);
+ hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
+ hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
+ hr_reg_write(ctx, SRQC_XRCD, srq->xrcdn);
+ hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
+ hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
+ hr_reg_write(ctx, SRQC_RQWS,
+ srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1));
+
+ hr_reg_write(ctx, SRQC_WQE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
+ srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT);
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_H,
+ upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT));
+
+ hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+
+ if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB) {
+ hr_reg_enable(ctx, SRQC_DB_RECORD_EN);
+ hr_reg_write(ctx, SRQC_DB_RECORD_ADDR_L,
+ lower_32_bits(srq->rdb.dma) >> 1);
+ hr_reg_write(ctx, SRQC_DB_RECORD_ADDR_H,
+ upper_32_bits(srq->rdb.dma));
+ }
+
+ return hns_roce_v2_write_srqc_index_queue(srq, ctx);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret = 0;
+
+ /* Resizing SRQs is not supported yet */
+ if (srq_attr_mask & IB_SRQ_MAX_WR) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit > srq->wqe_cnt) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto out;
+ }
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ hr_reg_write(srq_context, SRQC_LIMIT_WL, srq_attr->srq_limit);
+ hr_reg_clear(srqc_mask, SRQC_LIMIT_WL);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to handle cmd of modifying SRQ, ret = %d.\n",
+ ret);
+ }
+
+out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT]);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_SRQC, srq->srqn);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd of querying SRQ, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ attr->srq_limit = hr_reg_read(srq_context, SRQC_LIMIT_WL);
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
@@ -4602,137 +6056,208 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ ret = PTR_ERR_OR_ZERO(mailbox);
+ if (ret)
+ goto err_out;
cq_context = mailbox->buf;
cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1;
memset(cqc_mask, 0xff, sizeof(*cqc_mask));
- roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_MAX_CNT_M, V2_CQC_BYTE_56_CQ_MAX_CNT_S,
- cq_count);
- roce_set_field(cqc_mask->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_MAX_CNT_M, V2_CQC_BYTE_56_CQ_MAX_CNT_S,
- 0);
- roce_set_field(cq_context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_PERIOD_M, V2_CQC_BYTE_56_CQ_PERIOD_S,
- cq_period);
- roce_set_field(cqc_mask->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_PERIOD_M, V2_CQC_BYTE_56_CQ_PERIOD_S,
- 0);
-
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 1,
- HNS_ROCE_CMD_MODIFY_CQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
+ hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev,
+ "cq_period(%u) reached the upper limit, adjusted to 65.\n",
+ cq_period);
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+ }
+ cq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+ hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
+ hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
if (ret)
- dev_err(hr_dev->dev, "MODIFY CQ Failed to cmd mailbox.\n");
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when modifying CQ, ret = %d.\n",
+ ret);
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT]);
return ret;
}
-static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn)
+static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
+ void *buffer)
{
- struct hns_roce_qp *hr_qp;
- struct ib_qp_attr attr;
- int attr_mask;
+ struct hns_roce_v2_cq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
int ret;
- hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
- if (!hr_qp) {
- dev_warn(hr_dev->dev, "no hr_qp can be found!\n");
- return;
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_CQC, cqn);
+ if (ret) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when querying CQ, ret = %d.\n",
+ ret);
+ goto err_mailbox;
}
- if (hr_qp->ibqp.uobject) {
- if (hr_qp->sdb_en == 1) {
- hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
- if (hr_qp->rdb_en == 1)
- hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
- } else {
- dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n");
- return;
- }
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key,
+ void *buffer)
+{
+ struct hns_roce_v2_mpt_entry *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ key_to_hw_index(key));
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when querying MPT, ret = %d.\n",
+ ret);
+ goto err_mailbox;
}
- attr_mask = IB_QP_STATE;
- attr.qp_state = IB_QPS_ERR;
- ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- hr_qp->state, IB_QPS_ERR);
- if (ret)
- dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n",
- qpn);
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
}
-static void hns_roce_irq_work_handle(struct work_struct *work)
+static void dump_aeqe_log(struct hns_roce_work *irq_work)
{
- struct hns_roce_work *irq_work =
- container_of(work, struct hns_roce_work, work);
- struct device *dev = irq_work->hr_dev->dev;
- u32 qpn = irq_work->qpn;
- u32 cqn = irq_work->cqn;
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_info(dev, "Path migrated succeeded.\n");
+ ibdev_info(ibdev, "path migrated succeeded.\n");
break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "Path migration failed.\n");
+ ibdev_warn(ibdev, "path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_info(dev, "Communication established.\n");
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "Send queue drained.\n");
+ ibdev_dbg(ibdev, "send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- dev_err(dev, "Local work queue 0x%x catas error, sub_type:%d\n",
- qpn, irq_work->sub_type);
- hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n",
+ irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_err(dev, "Invalid request local work queue 0x%x error.\n",
- qpn);
- hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ ibdev_err(ibdev, "invalid request local work queue 0x%x error.\n",
+ irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- dev_err(dev, "Local access violation work queue 0x%x error, sub_type:%d\n",
- qpn, irq_work->sub_type);
- hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+ ibdev_err(ibdev, "local access violation work queue 0x%x error, sub_event type is: %d\n",
+ irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- dev_warn(dev, "SRQ limit reach.\n");
+ ibdev_dbg(ibdev, "SRQ limit reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
- dev_warn(dev, "SRQ last wqe reach.\n");
+ ibdev_dbg(ibdev, "SRQ last wqe reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- dev_err(dev, "SRQ catas error.\n");
+ ibdev_err(ibdev, "SRQ catas error.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_err(dev, "CQ 0x%x access err.\n", cqn);
+ ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+ ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- dev_warn(dev, "DB overflow.\n");
+ ibdev_warn(ibdev, "DB overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_MB:
break;
case HNS_ROCE_EVENT_TYPE_FLR:
- dev_warn(dev, "Function level reset.\n");
+ ibdev_warn(ibdev, "function level reset.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ ibdev_err(ibdev, "xrc domain violation error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ ibdev_err(ibdev, "invalid xrceth error.\n");
+ break;
+ default:
+ ibdev_info(ibdev, "Undefined event %d.\n",
+ irq_work->event_type);
+ break;
+ }
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *irq_work =
+ container_of(work, struct hns_roce_work, work);
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ int event_type = irq_work->event_type;
+ u32 queue_num = irq_work->queue_num;
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_qp_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ hns_roce_cq_event(hr_dev, queue_num, event_type);
break;
default:
break;
}
+ dump_aeqe_log(irq_work);
+
kfree(irq_work);
}
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq,
- u32 qpn, u32 cqn)
+ struct hns_roce_eq *eq, u32 queue_num)
{
struct hns_roce_work *irq_work;
@@ -4740,144 +6265,77 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
if (!irq_work)
return;
- INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
+ INIT_WORK(&irq_work->work, hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev;
- irq_work->qpn = qpn;
- irq_work->cqn = cqn;
irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type;
- queue_work(hr_dev->irq_workq, &(irq_work->work));
+ irq_work->queue_num = queue_num;
+ queue_work(hr_dev->irq_workq, &irq_work->work);
}
-static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
+static void update_eq_db(struct hns_roce_eq *eq)
{
struct hns_roce_dev *hr_dev = eq->hr_dev;
- u32 doorbell[2];
-
- doorbell[0] = 0;
- doorbell[1] = 0;
+ struct hns_roce_v2_db eq_db = {};
if (eq->type_flag == HNS_ROCE_AEQ) {
- roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
- HNS_ROCE_V2_EQ_DB_CMD_S,
- eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
- HNS_ROCE_EQ_DB_CMD_AEQ :
- HNS_ROCE_EQ_DB_CMD_AEQ_ARMED);
+ hr_reg_write(&eq_db, EQ_DB_CMD,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_AEQ :
+ HNS_ROCE_EQ_DB_CMD_AEQ_ARMED);
} else {
- roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_TAG_M,
- HNS_ROCE_V2_EQ_DB_TAG_S, eq->eqn);
+ hr_reg_write(&eq_db, EQ_DB_TAG, eq->eqn);
- roce_set_field(doorbell[0], HNS_ROCE_V2_EQ_DB_CMD_M,
- HNS_ROCE_V2_EQ_DB_CMD_S,
- eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
- HNS_ROCE_EQ_DB_CMD_CEQ :
- HNS_ROCE_EQ_DB_CMD_CEQ_ARMED);
+ hr_reg_write(&eq_db, EQ_DB_CMD,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_CEQ :
+ HNS_ROCE_EQ_DB_CMD_CEQ_ARMED);
}
- roce_set_field(doorbell[1], HNS_ROCE_V2_EQ_DB_PARA_M,
- HNS_ROCE_V2_EQ_DB_PARA_S,
- (eq->cons_index & HNS_ROCE_V2_CONS_IDX_M));
-
- hns_roce_write64(hr_dev, doorbell, eq->doorbell);
-}
-
-static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
-{
- u32 buf_chk_sz;
- unsigned long off;
+ hr_reg_write(&eq_db, EQ_DB_CI, eq->cons_index);
- buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
- off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
-
- return (struct hns_roce_aeqe *)((char *)(eq->buf_list->buf) +
- off % buf_chk_sz);
-}
-
-static struct hns_roce_aeqe *mhop_get_aeqe(struct hns_roce_eq *eq, u32 entry)
-{
- u32 buf_chk_sz;
- unsigned long off;
-
- buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
-
- off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE;
-
- if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
- return (struct hns_roce_aeqe *)((u8 *)(eq->bt_l0) +
- off % buf_chk_sz);
- else
- return (struct hns_roce_aeqe *)((u8 *)
- (eq->buf[off / buf_chk_sz]) + off % buf_chk_sz);
+ hns_roce_write64(hr_dev, (__le32 *)&eq_db, eq->db_reg);
}
static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
{
struct hns_roce_aeqe *aeqe;
- if (!eq->hop_num)
- aeqe = get_aeqe_v2(eq, eq->cons_index);
- else
- aeqe = mhop_get_aeqe(eq, eq->cons_index);
+ aeqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ eq->eqe_size);
- return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
+ return (hr_reg_read(aeqe, AEQE_OWNER) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
-static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
- struct device *dev = hr_dev->dev;
- struct hns_roce_aeqe *aeqe;
- int aeqe_found = 0;
+ struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
+ irqreturn_t aeqe_found = IRQ_NONE;
+ int num_aeqes = 0;
int event_type;
+ u32 queue_num;
int sub_type;
- u32 srqn;
- u32 qpn;
- u32 cqn;
-
- while ((aeqe = next_aeqe_sw_v2(eq))) {
+ while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) {
/* Make sure we read AEQ entry after we have checked the
* ownership bit
*/
dma_rmb();
- event_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
- HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
- sub_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_V2_AEQE_SUB_TYPE_M,
- HNS_ROCE_V2_AEQE_SUB_TYPE_S);
- qpn = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
- cqn = roce_get_field(aeqe->event.cq_event.cq,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
- srqn = roce_get_field(aeqe->event.srq_event.srq,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
- HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+ event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE);
+ sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE);
+ queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_qp_event(hr_dev, qpn, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- hns_roce_srq_event(hr_dev, srqn, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- hns_roce_cq_event(hr_dev, cqn, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_flush_cqe(hr_dev, queue_num);
break;
case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev,
@@ -4885,681 +6343,397 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
aeqe->event.cmd.status,
le64_to_cpu(aeqe->event.cmd.out_param));
break;
- case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- break;
- case HNS_ROCE_EVENT_TYPE_FLR:
- break;
default:
- dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
- event_type, eq->eqn, eq->cons_index);
break;
}
eq->event_type = event_type;
eq->sub_type = sub_type;
++eq->cons_index;
- aeqe_found = 1;
+ aeqe_found = IRQ_HANDLED;
+ trace_hns_ae_info(event_type, aeqe, eq->eqe_size);
- if (eq->cons_index > (2 * eq->entries - 1)) {
- dev_warn(dev, "cons_index overflow, set back to 0.\n");
- eq->cons_index = 0;
- }
- hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
- }
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
- set_eq_cons_index_v2(eq);
- return aeqe_found;
-}
-
-static struct hns_roce_ceqe *get_ceqe_v2(struct hns_roce_eq *eq, u32 entry)
-{
- u32 buf_chk_sz;
- unsigned long off;
+ hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
- buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
- off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
-
- return (struct hns_roce_ceqe *)((char *)(eq->buf_list->buf) +
- off % buf_chk_sz);
-}
-
-static struct hns_roce_ceqe *mhop_get_ceqe(struct hns_roce_eq *eq, u32 entry)
-{
- u32 buf_chk_sz;
- unsigned long off;
-
- buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+ aeqe = next_aeqe_sw_v2(eq);
+ ++num_aeqes;
+ }
- off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE;
+ update_eq_db(eq);
- if (eq->hop_num == HNS_ROCE_HOP_NUM_0)
- return (struct hns_roce_ceqe *)((u8 *)(eq->bt_l0) +
- off % buf_chk_sz);
- else
- return (struct hns_roce_ceqe *)((u8 *)(eq->buf[off /
- buf_chk_sz]) + off % buf_chk_sz);
+ return IRQ_RETVAL(aeqe_found);
}
static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
{
struct hns_roce_ceqe *ceqe;
- if (!eq->hop_num)
- ceqe = get_ceqe_v2(eq, eq->cons_index);
- else
- ceqe = mhop_get_ceqe(eq, eq->cons_index);
+ ceqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ eq->eqe_size);
- return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
- (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
+ return (hr_reg_read(ceqe, CEQE_OWNER) ^
+ !!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
-static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
{
- struct device *dev = hr_dev->dev;
- struct hns_roce_ceqe *ceqe;
- int ceqe_found = 0;
- u32 cqn;
+ queue_work(system_bh_wq, &eq->work);
- while ((ceqe = next_ceqe_sw_v2(eq))) {
-
- /* Make sure we read CEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- cqn = roce_get_field(ceqe->comp,
- HNS_ROCE_V2_CEQE_COMP_CQN_M,
- HNS_ROCE_V2_CEQE_COMP_CQN_S);
-
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqe_found = 1;
-
- if (eq->cons_index > (2 * eq->entries - 1)) {
- dev_warn(dev, "cons_index overflow, set back to 0.\n");
- eq->cons_index = 0;
- }
- }
-
- set_eq_cons_index_v2(eq);
-
- return ceqe_found;
+ return IRQ_HANDLED;
}
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
{
struct hns_roce_eq *eq = eq_ptr;
struct hns_roce_dev *hr_dev = eq->hr_dev;
- int int_work = 0;
+ irqreturn_t int_work;
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
- int_work = hns_roce_v2_ceq_int(hr_dev, eq);
+ int_work = hns_roce_v2_ceq_int(eq);
else
- /* Asychronous event interrupt */
+ /* Asynchronous event interrupt */
int_work = hns_roce_v2_aeq_int(hr_dev, eq);
return IRQ_RETVAL(int_work);
}
-static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
+ u32 int_st)
{
- struct hns_roce_dev *hr_dev = dev_id;
- struct device *dev = hr_dev->dev;
- int int_work = 0;
- u32 int_st;
+ struct pci_dev *pdev = hr_dev->pci_dev;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops = ae_dev->ops;
+ enum hnae3_reset_type reset_type;
+ irqreturn_t int_work = IRQ_NONE;
u32 int_en;
- /* Abnormal interrupt */
- int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
- if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
- struct pci_dev *pdev = hr_dev->pci_dev;
- struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
- const struct hnae3_ae_ops *ops = ae_dev->ops;
+ if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+ dev_err(hr_dev->dev, "AEQ overflow!\n");
- dev_err(dev, "AEQ overflow!\n");
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
+ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
- roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ reset_type = hr_dev->is_vf ?
+ HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET;
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
- ops->set_default_reset_request(ae_dev,
- HNAE3_FUNC_RESET);
+ ops->set_default_reset_request(ae_dev, reset_type);
if (ops->reset_event)
ops->reset_event(pdev, NULL);
- roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
+ int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
- int_work = 1;
- } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) {
- dev_err(dev, "BUS ERR!\n");
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no basic abn irq found.\n");
+ }
- roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S, 1);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ return IRQ_RETVAL(int_work);
+}
- roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
- int_work = 1;
- } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) {
- dev_err(dev, "OTHER ERR!\n");
+ ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR);
+ ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE);
+ ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG);
- roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S, 1);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st);
+ return 0;
+}
- roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 addr_upper;
+ u32 addr_low;
+ int ret;
- int_work = 1;
- } else
- dev_err(dev, "There is no abnormal irq found!\n");
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
- return IRQ_RETVAL(int_work);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read gmv, ret = %d.\n", ret);
+ return ret;
+ }
+
+ addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L);
+ addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+ hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low);
+ hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
}
-static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
- int eq_num, int enable_flag)
+static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
{
- int i;
+ if (res_type == ECC_RESOURCE_QPC_TIMER ||
+ res_type == ECC_RESOURCE_CQC_TIMER ||
+ res_type == ECC_RESOURCE_SCCC)
+ return le64_to_cpu(*data);
- if (enable_flag == EQ_ENABLE) {
- for (i = 0; i < eq_num; i++)
- roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
- i * EQ_REG_OFFSET,
- HNS_ROCE_V2_VF_EVENT_INT_EN_M);
+ return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT;
+}
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG,
- HNS_ROCE_V2_VF_ABN_INT_EN_M);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG,
- HNS_ROCE_V2_VF_ABN_INT_CFG_M);
- } else {
- for (i = 0; i < eq_num; i++)
- roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
- i * EQ_REG_OFFSET,
- HNS_ROCE_V2_VF_EVENT_INT_EN_M & 0x0);
+static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
+ u32 index)
+{
+ u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op;
+ u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u64 addr;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG,
- HNS_ROCE_V2_VF_ABN_INT_EN_M & 0x0);
- roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG,
- HNS_ROCE_V2_VF_ABN_INT_CFG_M & 0x0);
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read fmea ram, ret = %d.\n",
+ ret);
+ goto out;
}
+
+ addr = fmea_get_ram_res_addr(res_type, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to write fmea ram, ret = %d.\n",
+ ret);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
}
-static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn)
+static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
{
- struct device *dev = hr_dev->dev;
+ u32 res_type = ecc_info->res_type;
+ u32 index = ecc_info->index;
int ret;
- if (eqn < hr_dev->caps.num_comp_vectors)
- ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
- 0, HNS_ROCE_CMD_DESTROY_CEQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT);
+
+ if (res_type >= ECC_RESOURCE_COUNT) {
+ dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n",
+ res_type);
+ return;
+ }
+
+ if (res_type == ECC_RESOURCE_GMV)
+ ret = fmea_recover_gmv(hr_dev, index);
else
- ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M,
- 0, HNS_ROCE_CMD_DESTROY_AEQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = fmea_recover_others(hr_dev, res_type, index);
if (ret)
- dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
+ dev_err(hr_dev->dev,
+ "failed to recover %s, index = %u, ret = %d.\n",
+ fmea_ram_res[res_type].name, index, ret);
}
-static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static void fmea_ram_ecc_work(struct work_struct *ecc_work)
{
- struct device *dev = hr_dev->dev;
- u64 idx;
- u64 size;
- u32 buf_chk_sz;
- u32 bt_chk_sz;
- u32 mhop_num;
- int eqe_alloc;
- int i = 0;
- int j = 0;
-
- mhop_num = hr_dev->caps.eqe_hop_num;
- buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
- bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
+ struct hns_roce_dev *hr_dev =
+ container_of(ecc_work, struct hns_roce_dev, ecc_work);
+ struct fmea_ram_ecc ecc_info = {};
- /* hop_num = 0 */
- if (mhop_num == HNS_ROCE_HOP_NUM_0) {
- dma_free_coherent(dev, (unsigned int)(eq->entries *
- eq->eqe_size), eq->bt_l0, eq->l0_dma);
+ if (fmea_ram_ecc_query(hr_dev, &ecc_info)) {
+ dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n");
return;
}
- /* hop_num = 1 or hop = 2 */
- dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
- if (mhop_num == 1) {
- for (i = 0; i < eq->l0_last_num; i++) {
- if (i == eq->l0_last_num - 1) {
- eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
- size = (eq->entries - eqe_alloc) * eq->eqe_size;
- dma_free_coherent(dev, size, eq->buf[i],
- eq->buf_dma[i]);
- break;
- }
- dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
- eq->buf_dma[i]);
- }
- } else if (mhop_num == 2) {
- for (i = 0; i < eq->l0_last_num; i++) {
- dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
- eq->l1_dma[i]);
-
- for (j = 0; j < bt_chk_sz / 8; j++) {
- idx = i * (bt_chk_sz / 8) + j;
- if ((i == eq->l0_last_num - 1)
- && j == eq->l1_last_num - 1) {
- eqe_alloc = (buf_chk_sz / eq->eqe_size)
- * idx;
- size = (eq->entries - eqe_alloc)
- * eq->eqe_size;
- dma_free_coherent(dev, size,
- eq->buf[idx],
- eq->buf_dma[idx]);
- break;
- }
- dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
- eq->buf_dma[idx]);
- }
- }
+ if (!ecc_info.is_ecc_err) {
+ dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n");
+ return;
}
- kfree(eq->buf_dma);
- kfree(eq->buf);
- kfree(eq->l1_dma);
- kfree(eq->bt_l1);
- eq->buf_dma = NULL;
- eq->buf = NULL;
- eq->l1_dma = NULL;
- eq->bt_l1 = NULL;
+
+ fmea_ram_ecc_recover(hr_dev, &ecc_info);
}
-static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
{
- u32 buf_chk_sz;
+ struct hns_roce_dev *hr_dev = dev_id;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_st;
- buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT);
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
- if (hr_dev->caps.eqe_hop_num) {
- hns_roce_mhop_free_eq(hr_dev, eq);
- return;
+ if (int_st) {
+ int_work = abnormal_interrupt_basic(hr_dev, int_st);
+ } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ queue_work(hr_dev->irq_workq, &hr_dev->ecc_work);
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no abnormal irq found.\n");
}
- if (eq->buf_list)
- dma_free_coherent(hr_dev->dev, buf_chk_sz,
- eq->buf_list->buf, eq->buf_list->map);
+ return IRQ_RETVAL(int_work);
}
-static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq,
- void *mb_buf)
+static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
+ int eq_num, u32 enable_flag)
{
- struct hns_roce_eq_context *eqc;
+ int i;
- eqc = mb_buf;
- memset(eqc, 0, sizeof(struct hns_roce_eq_context));
+ for (i = 0; i < eq_num; i++)
+ roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
+ i * EQ_REG_OFFSET, enable_flag);
- /* init eqc */
- eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
- eq->hop_num = hr_dev->caps.eqe_hop_num;
- eq->cons_index = 0;
- eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
- eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
- eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
- eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz;
- eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz;
- eq->shift = ilog2((unsigned int)eq->entries);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, enable_flag);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag);
+}
- if (!eq->hop_num)
- eq->eqe_ba = eq->buf_list->map;
- else
- eq->eqe_ba = eq->l0_dma;
-
- /* set eqc state */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_EQ_ST_M,
- HNS_ROCE_EQC_EQ_ST_S,
- HNS_ROCE_V2_EQ_STATE_VALID);
-
- /* set eqe hop num */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_HOP_NUM_M,
- HNS_ROCE_EQC_HOP_NUM_S, eq->hop_num);
-
- /* set eqc over_ignore */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_OVER_IGNORE_M,
- HNS_ROCE_EQC_OVER_IGNORE_S, eq->over_ignore);
-
- /* set eqc coalesce */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_COALESCE_M,
- HNS_ROCE_EQC_COALESCE_S, eq->coalesce);
-
- /* set eqc arm_state */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_ARM_ST_M,
- HNS_ROCE_EQC_ARM_ST_S, eq->arm_st);
-
- /* set eqn */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_EQN_M,
- HNS_ROCE_EQC_EQN_S, eq->eqn);
-
- /* set eqe_cnt */
- roce_set_field(eqc->byte_4,
- HNS_ROCE_EQC_EQE_CNT_M,
- HNS_ROCE_EQC_EQE_CNT_S,
- HNS_ROCE_EQ_INIT_EQE_CNT);
-
- /* set eqe_ba_pg_sz */
- roce_set_field(eqc->byte_8,
- HNS_ROCE_EQC_BA_PG_SZ_M,
- HNS_ROCE_EQC_BA_PG_SZ_S,
- eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET);
-
- /* set eqe_buf_pg_sz */
- roce_set_field(eqc->byte_8,
- HNS_ROCE_EQC_BUF_PG_SZ_M,
- HNS_ROCE_EQC_BUF_PG_SZ_S,
- eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET);
-
- /* set eq_producer_idx */
- roce_set_field(eqc->byte_8,
- HNS_ROCE_EQC_PROD_INDX_M,
- HNS_ROCE_EQC_PROD_INDX_S,
- HNS_ROCE_EQ_INIT_PROD_IDX);
-
- /* set eq_max_cnt */
- roce_set_field(eqc->byte_12,
- HNS_ROCE_EQC_MAX_CNT_M,
- HNS_ROCE_EQC_MAX_CNT_S, eq->eq_max_cnt);
-
- /* set eq_period */
- roce_set_field(eqc->byte_12,
- HNS_ROCE_EQC_PERIOD_M,
- HNS_ROCE_EQC_PERIOD_S, eq->eq_period);
-
- /* set eqe_report_timer */
- roce_set_field(eqc->eqe_report_timer,
- HNS_ROCE_EQC_REPORT_TIMER_M,
- HNS_ROCE_EQC_REPORT_TIMER_S,
- HNS_ROCE_EQ_INIT_REPORT_TIMER);
-
- /* set eqe_ba [34:3] */
- roce_set_field(eqc->eqe_ba0,
- HNS_ROCE_EQC_EQE_BA_L_M,
- HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3);
-
- /* set eqe_ba [64:35] */
- roce_set_field(eqc->eqe_ba1,
- HNS_ROCE_EQC_EQE_BA_H_M,
- HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35);
-
- /* set eq shift */
- roce_set_field(eqc->byte_28,
- HNS_ROCE_EQC_SHIFT_M,
- HNS_ROCE_EQC_SHIFT_S, eq->shift);
-
- /* set eq MSI_IDX */
- roce_set_field(eqc->byte_28,
- HNS_ROCE_EQC_MSI_INDX_M,
- HNS_ROCE_EQC_MSI_INDX_S,
- HNS_ROCE_EQ_INIT_MSI_IDX);
-
- /* set cur_eqe_ba [27:12] */
- roce_set_field(eqc->byte_28,
- HNS_ROCE_EQC_CUR_EQE_BA_L_M,
- HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12);
-
- /* set cur_eqe_ba [59:28] */
- roce_set_field(eqc->byte_32,
- HNS_ROCE_EQC_CUR_EQE_BA_M_M,
- HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28);
-
- /* set cur_eqe_ba [63:60] */
- roce_set_field(eqc->byte_36,
- HNS_ROCE_EQC_CUR_EQE_BA_H_M,
- HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60);
-
- /* set eq consumer idx */
- roce_set_field(eqc->byte_36,
- HNS_ROCE_EQC_CONS_INDX_M,
- HNS_ROCE_EQC_CONS_INDX_S,
- HNS_ROCE_EQ_INIT_CONS_IDX);
-
- /* set nex_eqe_ba[43:12] */
- roce_set_field(eqc->nxt_eqe_ba0,
- HNS_ROCE_EQC_NXT_EQE_BA_L_M,
- HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12);
-
- /* set nex_eqe_ba[63:44] */
- roce_set_field(eqc->nxt_eqe_ba1,
- HNS_ROCE_EQC_NXT_EQE_BA_H_M,
- HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44);
-}
-
-static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
{
- struct device *dev = hr_dev->dev;
- int eq_alloc_done = 0;
- int eq_buf_cnt = 0;
- int eqe_alloc;
- u32 buf_chk_sz;
- u32 bt_chk_sz;
- u32 mhop_num;
- u64 size;
- u64 idx;
- int ba_num;
- int bt_num;
- int record_i;
- int record_j;
- int i = 0;
- int j = 0;
+ hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+}
- mhop_num = hr_dev->caps.eqe_hop_num;
- buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
- bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ int eqn = eq->eqn;
+ int ret;
+ u8 cmd;
- ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1)
- / buf_chk_sz;
- bt_num = (ba_num + bt_chk_sz / 8 - 1) / (bt_chk_sz / 8);
+ if (eqn < hr_dev->caps.num_comp_vectors)
+ cmd = HNS_ROCE_CMD_DESTROY_CEQC;
+ else
+ cmd = HNS_ROCE_CMD_DESTROY_AEQC;
- /* hop_num = 0 */
- if (mhop_num == HNS_ROCE_HOP_NUM_0) {
- if (eq->entries > buf_chk_sz / eq->eqe_size) {
- dev_err(dev, "eq entries %d is larger than buf_pg_sz!",
- eq->entries);
- return -EINVAL;
- }
- eq->bt_l0 = dma_alloc_coherent(dev, eq->entries * eq->eqe_size,
- &(eq->l0_dma), GFP_KERNEL);
- if (!eq->bt_l0)
- return -ENOMEM;
+ ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M);
+ if (ret)
+ dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
- eq->cur_eqe_ba = eq->l0_dma;
- eq->nxt_eqe_ba = 0;
+ free_eq_buf(hr_dev, eq);
+}
- memset(eq->bt_l0, 0, eq->entries * eq->eqe_size);
+static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ eq->db_reg = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
+ eq->cons_index = 0;
+ eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
+ eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
+ eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
+ eq->shift = ilog2((unsigned int)eq->entries);
+}
- return 0;
- }
+static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+ void *mb_buf)
+{
+ u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
+ struct hns_roce_eq_context *eqc;
+ u64 bt_ba = 0;
+ int ret;
- eq->buf_dma = kcalloc(ba_num, sizeof(*eq->buf_dma), GFP_KERNEL);
- if (!eq->buf_dma)
- return -ENOMEM;
- eq->buf = kcalloc(ba_num, sizeof(*eq->buf), GFP_KERNEL);
- if (!eq->buf)
- goto err_kcalloc_buf;
-
- if (mhop_num == 2) {
- eq->l1_dma = kcalloc(bt_num, sizeof(*eq->l1_dma), GFP_KERNEL);
- if (!eq->l1_dma)
- goto err_kcalloc_l1_dma;
-
- eq->bt_l1 = kcalloc(bt_num, sizeof(*eq->bt_l1), GFP_KERNEL);
- if (!eq->bt_l1)
- goto err_kcalloc_bt_l1;
- }
-
- /* alloc L0 BT */
- eq->bt_l0 = dma_alloc_coherent(dev, bt_chk_sz, &eq->l0_dma, GFP_KERNEL);
- if (!eq->bt_l0)
- goto err_dma_alloc_l0;
-
- if (mhop_num == 1) {
- if (ba_num > (bt_chk_sz / 8))
- dev_err(dev, "ba_num %d is too large for 1 hop\n",
- ba_num);
-
- /* alloc buf */
- for (i = 0; i < bt_chk_sz / 8; i++) {
- if (eq_buf_cnt + 1 < ba_num) {
- size = buf_chk_sz;
- } else {
- eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
- size = (eq->entries - eqe_alloc) * eq->eqe_size;
- }
- eq->buf[i] = dma_alloc_coherent(dev, size,
- &(eq->buf_dma[i]),
- GFP_KERNEL);
- if (!eq->buf[i])
- goto err_dma_alloc_buf;
+ eqc = mb_buf;
+ memset(eqc, 0, sizeof(struct hns_roce_eq_context));
- *(eq->bt_l0 + i) = eq->buf_dma[i];
+ init_eq_config(hr_dev, eq);
- eq_buf_cnt++;
- if (eq_buf_cnt >= ba_num)
- break;
- }
- eq->cur_eqe_ba = eq->buf_dma[0];
- eq->nxt_eqe_ba = eq->buf_dma[1];
-
- } else if (mhop_num == 2) {
- /* alloc L1 BT and buf */
- for (i = 0; i < bt_chk_sz / 8; i++) {
- eq->bt_l1[i] = dma_alloc_coherent(dev, bt_chk_sz,
- &(eq->l1_dma[i]),
- GFP_KERNEL);
- if (!eq->bt_l1[i])
- goto err_dma_alloc_l1;
- *(eq->bt_l0 + i) = eq->l1_dma[i];
-
- for (j = 0; j < bt_chk_sz / 8; j++) {
- idx = i * bt_chk_sz / 8 + j;
- if (eq_buf_cnt + 1 < ba_num) {
- size = buf_chk_sz;
- } else {
- eqe_alloc = (buf_chk_sz / eq->eqe_size)
- * idx;
- size = (eq->entries - eqe_alloc)
- * eq->eqe_size;
- }
- eq->buf[idx] = dma_alloc_coherent(dev, size,
- &(eq->buf_dma[idx]),
- GFP_KERNEL);
- if (!eq->buf[idx])
- goto err_dma_alloc_buf;
-
- *(eq->bt_l1[i] + j) = eq->buf_dma[idx];
-
- eq_buf_cnt++;
- if (eq_buf_cnt >= ba_num) {
- eq_alloc_done = 1;
- break;
- }
- }
-
- if (eq_alloc_done)
- break;
- }
- eq->cur_eqe_ba = eq->buf_dma[0];
- eq->nxt_eqe_ba = eq->buf_dma[1];
+ /* if not multi-hop, eqe buffer only use one trunk */
+ ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba,
+ ARRAY_SIZE(eqe_ba));
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret);
+ return ret;
}
- eq->l0_last_num = i + 1;
- if (mhop_num == 2)
- eq->l1_last_num = j + 1;
+ bt_ba = hns_roce_get_mtr_ba(&eq->mtr);
+
+ hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID);
+ hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num);
+ hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore);
+ hr_reg_write(eqc, EQC_COALESCE, eq->coalesce);
+ hr_reg_write(eqc, EQC_ARM_ST, eq->arm_st);
+ hr_reg_write(eqc, EQC_EQN, eq->eqn);
+ hr_reg_write(eqc, EQC_EQE_CNT, HNS_ROCE_EQ_INIT_EQE_CNT);
+ hr_reg_write(eqc, EQC_EQE_BA_PG_SZ,
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(eqc, EQC_EQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
+ hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
+ eq->eq_period);
+ eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
+ }
+ eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+
+ hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
+ hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
+ hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);
+ hr_reg_write(eqc, EQC_EQE_BA_H, bt_ba >> 35);
+ hr_reg_write(eqc, EQC_SHIFT, eq->shift);
+ hr_reg_write(eqc, EQC_MSI_INDX, HNS_ROCE_EQ_INIT_MSI_IDX);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_L, eqe_ba[0] >> 12);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_M, eqe_ba[0] >> 28);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_H, eqe_ba[0] >> 60);
+ hr_reg_write(eqc, EQC_EQ_CONS_INDX, HNS_ROCE_EQ_INIT_CONS_IDX);
+ hr_reg_write(eqc, EQC_NEX_EQE_BA_L, eqe_ba[1] >> 12);
+ hr_reg_write(eqc, EQC_NEX_EQE_BA_H, eqe_ba[1] >> 44);
+ hr_reg_write(eqc, EQC_EQE_SIZE, eq->eqe_size == HNS_ROCE_V3_EQE_SIZE);
return 0;
+}
-err_dma_alloc_l1:
- dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
- eq->bt_l0 = NULL;
- eq->l0_dma = 0;
- for (i -= 1; i >= 0; i--) {
- dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
- eq->l1_dma[i]);
-
- for (j = 0; j < bt_chk_sz / 8; j++) {
- idx = i * bt_chk_sz / 8 + j;
- dma_free_coherent(dev, buf_chk_sz, eq->buf[idx],
- eq->buf_dma[idx]);
- }
- }
- goto err_dma_alloc_l0;
-
-err_dma_alloc_buf:
- dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma);
- eq->bt_l0 = NULL;
- eq->l0_dma = 0;
-
- if (mhop_num == 1)
- for (i -= 1; i >= 0; i--)
- dma_free_coherent(dev, buf_chk_sz, eq->buf[i],
- eq->buf_dma[i]);
- else if (mhop_num == 2) {
- record_i = i;
- record_j = j;
- for (; i >= 0; i--) {
- dma_free_coherent(dev, bt_chk_sz, eq->bt_l1[i],
- eq->l1_dma[i]);
-
- for (j = 0; j < bt_chk_sz / 8; j++) {
- if (i == record_i && j >= record_j)
- break;
-
- idx = i * bt_chk_sz / 8 + j;
- dma_free_coherent(dev, buf_chk_sz,
- eq->buf[idx],
- eq->buf_dma[idx]);
- }
- }
- }
-
-err_dma_alloc_l0:
- kfree(eq->bt_l1);
- eq->bt_l1 = NULL;
+static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
-err_kcalloc_bt_l1:
- kfree(eq->l1_dma);
- eq->l1_dma = NULL;
+ if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0)
+ eq->hop_num = 0;
+ else
+ eq->hop_num = hr_dev->caps.eqe_hop_num;
-err_kcalloc_l1_dma:
- kfree(eq->buf);
- eq->buf = NULL;
+ buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = eq->entries * eq->eqe_size;
+ buf_attr.region[0].hopnum = eq->hop_num;
+ buf_attr.region_count = 1;
-err_kcalloc_buf:
- kfree(eq->buf_dma);
- eq->buf_dma = NULL;
+ err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
+ hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL,
+ 0);
+ if (err)
+ dev_err(hr_dev->dev, "failed to alloc EQE mtr, err %d\n", err);
- return -ENOMEM;
+ return err;
}
static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq,
- unsigned int eq_cmd)
+ struct hns_roce_eq *eq, u8 eq_cmd)
{
- struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
- u32 buf_chk_sz = 0;
int ret;
/* Allocate mailbox memory */
@@ -5567,38 +6741,17 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
- if (!hr_dev->caps.eqe_hop_num) {
- buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
-
- eq->buf_list = kzalloc(sizeof(struct hns_roce_buf_list),
- GFP_KERNEL);
- if (!eq->buf_list) {
- ret = -ENOMEM;
- goto free_cmd_mbox;
- }
-
- eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
- &(eq->buf_list->map),
- GFP_KERNEL);
- if (!eq->buf_list->buf) {
- ret = -ENOMEM;
- goto err_alloc_buf;
- }
-
- } else {
- ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
- if (ret) {
- ret = -ENOMEM;
- goto free_cmd_mbox;
- }
- }
+ ret = alloc_eq_buf(hr_dev, eq);
+ if (ret)
+ goto free_cmd_mbox;
- hns_roce_config_eqc(hr_dev, eq, mailbox->buf);
+ ret = config_eqc(hr_dev, eq, mailbox->buf);
+ if (ret)
+ goto err_cmd_mbox;
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0,
- eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, eq_cmd, eq->eqn);
if (ret) {
- dev_err(dev, "[mailbox cmd] create eqc failed.\n");
+ dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
goto err_cmd_mbox;
}
@@ -5607,16 +6760,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
return 0;
err_cmd_mbox:
- if (!hr_dev->caps.eqe_hop_num)
- dma_free_coherent(dev, buf_chk_sz, eq->buf_list->buf,
- eq->buf_list->map);
- else {
- hns_roce_mhop_free_eq(hr_dev, eq);
- goto free_cmd_mbox;
- }
-
-err_alloc_buf:
- kfree(eq->buf_list);
+ free_eq_buf(hr_dev, eq);
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -5624,144 +6768,112 @@ free_cmd_mbox:
return ret;
}
-static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
+static void hns_roce_ceq_work(struct work_struct *work)
{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- struct device *dev = hr_dev->dev;
- struct hns_roce_eq *eq;
- unsigned int eq_cmd;
- int irq_num;
- int eq_num;
- int other_num;
- int comp_num;
- int aeq_num;
- int i, j, k;
- int ret;
+ struct hns_roce_eq *eq = from_work(eq, work, work);
+ struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ int ceqe_num = 0;
+ u32 cqn;
- other_num = hr_dev->caps.num_other_vectors;
- comp_num = hr_dev->caps.num_comp_vectors;
- aeq_num = hr_dev->caps.num_aeq_vectors;
+ while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
- eq_num = comp_num + aeq_num;
- irq_num = eq_num + other_num;
+ cqn = hr_reg_read(ceqe, CEQE_CQN);
- eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
- if (!eq_table->eq)
- return -ENOMEM;
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ++ceqe_num;
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
+
+ ceqe = next_ceqe_sw_v2(eq);
+ }
+
+ update_eq_db(eq);
+}
+
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+ int comp_num, int aeq_num, int other_num)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int i, j;
+ int ret;
for (i = 0; i < irq_num; i++) {
hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
GFP_KERNEL);
if (!hr_dev->irq_names[i]) {
ret = -ENOMEM;
- goto err_failed_kzalloc;
+ goto err_kzalloc_failed;
}
}
- /* create eq */
- for (j = 0; j < eq_num; j++) {
- eq = &eq_table->eq[j];
- eq->hr_dev = hr_dev;
- eq->eqn = j;
- if (j < comp_num) {
- /* CEQ */
- eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
- eq->type_flag = HNS_ROCE_CEQ;
- eq->entries = hr_dev->caps.ceqe_depth;
- eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE;
- eq->irq = hr_dev->irq[j + other_num + aeq_num];
- eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
- eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
- } else {
- /* AEQ */
- eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
- eq->type_flag = HNS_ROCE_AEQ;
- eq->entries = hr_dev->caps.aeqe_depth;
- eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE;
- eq->irq = hr_dev->irq[j - comp_num + other_num];
- eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
- eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
- }
+ /* irq contains: abnormal + AEQ + CEQ */
+ for (j = 0; j < other_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-%s-abn-%d", pci_name(hr_dev->pci_dev), j);
- ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
- if (ret) {
- dev_err(dev, "eq create failed.\n");
- goto err_create_eq_fail;
- }
- }
+ for (j = other_num; j < (other_num + aeq_num); j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-%s-aeq-%d", pci_name(hr_dev->pci_dev), j - other_num);
- /* enable irq */
- hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
+ for (j = (other_num + aeq_num); j < irq_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-%s-ceq-%d", pci_name(hr_dev->pci_dev),
+ j - other_num - aeq_num);
- /* irq contains: abnormal + AEQ + CEQ*/
- for (k = 0; k < irq_num; k++)
- if (k < other_num)
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k);
- else if (k < (other_num + aeq_num))
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d",
- k - other_num);
- else
- snprintf((char *)hr_dev->irq_names[k],
- HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d",
- k - other_num - aeq_num);
-
- for (k = 0; k < irq_num; k++) {
- if (k < other_num)
- ret = request_irq(hr_dev->irq[k],
+ for (j = 0; j < irq_num; j++) {
+ if (j < other_num) {
+ ret = request_irq(hr_dev->irq[j],
hns_roce_v2_msix_interrupt_abn,
- 0, hr_dev->irq_names[k], hr_dev);
-
- else if (k < (other_num + comp_num))
- ret = request_irq(eq_table->eq[k - other_num].irq,
+ 0, hr_dev->irq_names[j], hr_dev);
+ } else if (j < (other_num + comp_num)) {
+ INIT_WORK(&eq_table->eq[j - other_num].work,
+ hns_roce_ceq_work);
+ ret = request_irq(eq_table->eq[j - other_num].irq,
hns_roce_v2_msix_interrupt_eq,
- 0, hr_dev->irq_names[k + aeq_num],
- &eq_table->eq[k - other_num]);
- else
- ret = request_irq(eq_table->eq[k - other_num].irq,
+ 0, hr_dev->irq_names[j + aeq_num],
+ &eq_table->eq[j - other_num]);
+ } else {
+ ret = request_irq(eq_table->eq[j - other_num].irq,
hns_roce_v2_msix_interrupt_eq,
- 0, hr_dev->irq_names[k - comp_num],
- &eq_table->eq[k - other_num]);
- if (ret) {
- dev_err(dev, "Request irq error!\n");
- goto err_request_irq_fail;
+ 0, hr_dev->irq_names[j - comp_num],
+ &eq_table->eq[j - other_num]);
}
- }
- hr_dev->irq_workq =
- create_singlethread_workqueue("hns_roce_irq_workqueue");
- if (!hr_dev->irq_workq) {
- dev_err(dev, "Create irq workqueue failed!\n");
- ret = -ENOMEM;
- goto err_request_irq_fail;
+ if (ret) {
+ dev_err(hr_dev->dev, "request irq error!\n");
+ goto err_request_failed;
+ }
}
return 0;
-err_request_irq_fail:
- for (k -= 1; k >= 0; k--)
- if (k < other_num)
- free_irq(hr_dev->irq[k], hr_dev);
- else
- free_irq(eq_table->eq[k - other_num].irq,
- &eq_table->eq[k - other_num]);
-
-err_create_eq_fail:
- for (j -= 1; j >= 0; j--)
- hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]);
+err_request_failed:
+ for (j -= 1; j >= 0; j--) {
+ if (j < other_num) {
+ free_irq(hr_dev->irq[j], hr_dev);
+ continue;
+ }
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+ if (j < other_num + comp_num)
+ cancel_work_sync(&eq_table->eq[j - other_num].work);
+ }
-err_failed_kzalloc:
+err_kzalloc_failed:
for (i -= 1; i >= 0; i--)
kfree(hr_dev->irq_names[i]);
- kfree(eq_table->eq);
return ret;
}
-static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
int irq_num;
int eq_num;
int i;
@@ -5769,312 +6881,131 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
irq_num = eq_num + hr_dev->caps.num_other_vectors;
- /* Disable irq */
- hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
-
for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
free_irq(hr_dev->irq[i], hr_dev);
for (i = 0; i < eq_num; i++) {
- hns_roce_v2_destroy_eqc(hr_dev, i);
-
- free_irq(eq_table->eq[i].irq, &eq_table->eq[i]);
-
- hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]);
+ free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+ if (i < hr_dev->caps.num_comp_vectors)
+ cancel_work_sync(&hr_dev->eq_table.eq[i].work);
}
for (i = 0; i < irq_num; i++)
kfree(hr_dev->irq_names[i]);
-
- kfree(eq_table->eq);
-
- flush_workqueue(hr_dev->irq_workq);
- destroy_workqueue(hr_dev->irq_workq);
-}
-
-static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
- u32 cqn, void *mb_buf, u64 *mtts_wqe,
- u64 *mtts_idx, dma_addr_t dma_handle_wqe,
- dma_addr_t dma_handle_idx)
-{
- struct hns_roce_srq_context *srq_context;
-
- srq_context = mb_buf;
- memset(srq_context, 0, sizeof(*srq_context));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
- SRQC_BYTE_4_SRQ_ST_S, 1);
-
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
- SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
- (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
- hr_dev->caps.srqwqe_hop_num));
- roce_set_field(srq_context->byte_4_srqn_srqst,
- SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
- ilog2(srq->max));
-
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
- SRQC_BYTE_4_SRQN_S, srq->srqn);
-
- roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
-
- roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
- SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
-
- srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
-
- roce_set_field(srq_context->byte_24_wqe_bt_ba,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
- SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
- cpu_to_le32(dma_handle_wqe >> 35));
-
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
- SRQC_BYTE_28_PD_S, pdn);
- roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
- SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
- fls(srq->max_gs - 1));
-
- srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
- srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
- roce_set_field(srq_context->rsv_idx_bt_ba,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
- SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
- cpu_to_le32(dma_handle_idx >> 35));
-
- srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
- srq_context->idx_cur_blk_addr =
- cpu_to_le32(srq_context->idx_cur_blk_addr);
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
- SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
- cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
- SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
- hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
- hr_dev->caps.idx_hop_num);
-
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
- hr_dev->caps.idx_ba_pg_sz);
- roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
- SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
- hr_dev->caps.idx_buf_pg_sz);
-
- srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
- srq_context->idx_nxt_blk_addr =
- cpu_to_le32(srq_context->idx_nxt_blk_addr);
- roce_set_field(srq_context->rsv_idxnxtblkaddr,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
- SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
- cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
- cqn);
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
- hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
- roce_set_field(srq_context->byte_56_xrc_cqn,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
- SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
- hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);
-
- roce_set_bit(srq_context->db_record_addr_record_en,
- SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
}
-static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
- struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata)
+static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
- struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_srq_context *srq_context;
- struct hns_roce_srq_context *srqc_mask;
- struct hns_roce_cmd_mailbox *mailbox;
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_eq *eq;
+ int other_num;
+ int comp_num;
+ int aeq_num;
+ int irq_num;
+ int eq_num;
+ u8 eq_cmd;
int ret;
+ int i;
- if (srq_attr_mask & IB_SRQ_LIMIT) {
- if (srq_attr->srq_limit >= srq->max)
- return -EINVAL;
+ if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET)
+ return -EINVAL;
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ other_num = hr_dev->caps.num_other_vectors;
+ comp_num = hr_dev->caps.num_comp_vectors;
+ aeq_num = hr_dev->caps.num_aeq_vectors;
- srq_context = mailbox->buf;
- srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+ eq_num = comp_num + aeq_num;
+ irq_num = eq_num + other_num;
- memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+ eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
+ if (!eq_table->eq)
+ return -ENOMEM;
- roce_set_field(srq_context->byte_8_limit_wl,
- SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
- roce_set_field(srqc_mask->byte_8_limit_wl,
- SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+ /* create eq */
+ for (i = 0; i < eq_num; i++) {
+ eq = &eq_table->eq[i];
+ eq->hr_dev = hr_dev;
+ eq->eqn = i;
+ if (i < comp_num) {
+ /* CEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
+ eq->type_flag = HNS_ROCE_CEQ;
+ eq->entries = hr_dev->caps.ceqe_depth;
+ eq->eqe_size = hr_dev->caps.ceqe_size;
+ eq->irq = hr_dev->irq[i + other_num + aeq_num];
+ eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
+ } else {
+ /* AEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
+ eq->type_flag = HNS_ROCE_AEQ;
+ eq->entries = hr_dev->caps.aeqe_depth;
+ eq->eqe_size = hr_dev->caps.aeqe_size;
+ eq->irq = hr_dev->irq[i - comp_num + other_num];
+ eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
+ }
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
- HNS_ROCE_CMD_MODIFY_SRQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
if (ret) {
- dev_err(hr_dev->dev,
- "MODIFY SRQ Failed to cmd mailbox.\n");
- return ret;
+ dev_err(dev, "failed to create eq.\n");
+ goto err_create_eq_fail;
}
}
- return 0;
-}
+ INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
-static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
- struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_srq_context *srq_context;
- struct hns_roce_cmd_mailbox *mailbox;
- int limit_wl;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
+ if (!hr_dev->irq_workq) {
+ dev_err(dev, "failed to create irq workqueue.\n");
+ ret = -ENOMEM;
+ goto err_create_eq_fail;
+ }
- srq_context = mailbox->buf;
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
- HNS_ROCE_CMD_QUERY_SRQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num, aeq_num,
+ other_num);
if (ret) {
- dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
- goto out;
+ dev_err(dev, "failed to request irq.\n");
+ goto err_request_irq_fail;
}
- limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
- SRQC_BYTE_8_SRQ_LIMIT_WL_M,
- SRQC_BYTE_8_SRQ_LIMIT_WL_S);
-
- attr->srq_limit = limit_wl;
- attr->max_wr = srq->max - 1;
- attr->max_sge = srq->max_gs;
-
- memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
-
-out:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
-}
-
-static int find_empty_entry(struct hns_roce_idx_que *idx_que)
-{
- int bit_num;
- int i;
+ /* enable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
- /* bitmap[i] is set zero if all bits are allocated */
- for (i = 0; idx_que->bitmap[i] == 0; ++i)
- ;
- bit_num = ffs(idx_que->bitmap[i]);
- idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
+ return 0;
- return i * sizeof(u64) * 8 + (bit_num - 1);
-}
+err_request_irq_fail:
+ destroy_workqueue(hr_dev->irq_workq);
-static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
- int cur_idx, int wqe_idx)
-{
- unsigned int *addr;
+err_create_eq_fail:
+ for (i -= 1; i >= 0; i--)
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
+ kfree(eq_table->eq);
- addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
- cur_idx * idx_que->entry_sz);
- *addr = wqe_idx;
+ return ret;
}
-static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
- const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
+static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
- struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_v2_wqe_data_seg *dseg;
- struct hns_roce_v2_db srq_db;
- unsigned long flags;
- int ret = 0;
- int wqe_idx;
- void *wqe;
- int nreq;
- int ind;
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int eq_num;
int i;
- spin_lock_irqsave(&srq->lock, flags);
-
- ind = srq->head & (srq->max - 1);
-
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(wr->num_sge > srq->max_gs)) {
- ret = -EINVAL;
- *bad_wr = wr;
- break;
- }
-
- if (unlikely(srq->head == srq->tail)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- break;
- }
-
- wqe_idx = find_empty_entry(&srq->idx_que);
- fill_idx_queue(&srq->idx_que, ind, wqe_idx);
- wqe = get_srq_wqe(srq, wqe_idx);
- dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
-
- for (i = 0; i < wr->num_sge; ++i) {
- dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
- dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
- dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
- }
-
- if (i < srq->max_gs) {
- dseg->len = 0;
- dseg->lkey = cpu_to_le32(0x100);
- dseg->addr = 0;
- }
-
- srq->wrid[wqe_idx] = wr->wr_id;
- ind = (ind + 1) & (srq->max - 1);
- }
-
- if (likely(nreq)) {
- srq->head += nreq;
-
- /*
- * Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
-
- srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
- srq_db.parameter = srq->head;
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l);
+ /* Disable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
- }
+ __hns_roce_free_irq(hr_dev);
+ destroy_workqueue(hr_dev->irq_workq);
- spin_unlock_irqrestore(&srq->lock, flags);
+ for (i = 0; i < eq_num; i++)
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
- return ret;
+ kfree(eq_table->eq);
}
-static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = {
- .query_cqc_info = hns_roce_v2_query_cqc_info,
-};
-
static const struct ib_device_ops hns_roce_v2_dev_ops = {
.destroy_qp = hns_roce_v2_destroy_qp,
.modify_cq = hns_roce_v2_modify_cq,
@@ -6097,33 +7028,30 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.hw_profile = hns_roce_v2_profile,
.hw_init = hns_roce_v2_init,
.hw_exit = hns_roce_v2_exit,
- .post_mbox = hns_roce_v2_post_mbox,
- .chk_mbox = hns_roce_v2_chk_mbox,
- .rst_prc_mbox = hns_roce_v2_rst_process_cmd,
+ .post_mbox = v2_post_mbox,
+ .poll_mbox_done = v2_poll_mbox_done,
+ .chk_mbox_avail = v2_chk_mbox_is_avail,
.set_gid = hns_roce_v2_set_gid,
.set_mac = hns_roce_v2_set_mac,
.write_mtpt = hns_roce_v2_write_mtpt,
.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
.frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
- .mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
.write_cqc = hns_roce_v2_write_cqc,
.set_hem = hns_roce_v2_set_hem,
.clear_hem = hns_roce_v2_clear_hem,
.modify_qp = hns_roce_v2_modify_qp,
- .query_qp = hns_roce_v2_query_qp,
- .destroy_qp = hns_roce_v2_destroy_qp,
+ .dereg_mr = hns_roce_v2_dereg_mr,
.qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
- .modify_cq = hns_roce_v2_modify_cq,
- .post_send = hns_roce_v2_post_send,
- .post_recv = hns_roce_v2_post_recv,
- .req_notify_cq = hns_roce_v2_req_notify_cq,
- .poll_cq = hns_roce_v2_poll_cq,
.init_eq = hns_roce_v2_init_eq_table,
.cleanup_eq = hns_roce_v2_cleanup_eq_table,
.write_srqc = hns_roce_v2_write_srqc,
- .modify_srq = hns_roce_v2_modify_srq,
- .query_srq = hns_roce_v2_query_srq,
- .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .query_cqc = hns_roce_v2_query_cqc,
+ .query_qpc = hns_roce_v2_query_qpc,
+ .query_mpt = hns_roce_v2_query_mpt,
+ .query_srqc = hns_roce_v2_query_srqc,
+ .query_sccc = hns_roce_v2_query_sccc,
+ .query_hw_counter = hns_roce_hw_v2_query_counter,
+ .get_dscp = hns_roce_hw_v2_get_dscp,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
@@ -6134,25 +7062,33 @@ static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
+ HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
/* required last entry */
{0, }
};
MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
-static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
+static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
struct hnae3_handle *handle)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
+ const struct pci_device_id *id;
int i;
+ hr_dev->pci_dev = handle->pdev;
+ id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev);
+ hr_dev->is_vf = id->driver_data;
+ hr_dev->dev = &handle->pdev->dev;
hr_dev->hw = &hns_roce_hw_v2;
- hr_dev->dfx = &hns_roce_dfx_hw_v2;
hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
hr_dev->odb_offset = hr_dev->sdb_offset;
/* Get info from NIC driver. */
hr_dev->reg_base = handle->rinfo.roce_io_base;
+ hr_dev->mem_base = handle->rinfo.roce_mem_base;
hr_dev->caps.num_ports = 1;
hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
hr_dev->iboe.phy_port[0] = 0;
@@ -6160,7 +7096,7 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
addrconf_addr_eui48((u8 *)&hr_dev->ib_dev.node_guid,
hr_dev->iboe.netdevs[0]->dev_addr);
- for (i = 0; i < HNS_ROCE_V2_MAX_IRQ_NUM; i++)
+ for (i = 0; i < handle->rinfo.num_vectors; i++)
hr_dev->irq[i] = pci_irq_vector(handle->pdev,
i + handle->rinfo.base_vector);
@@ -6170,8 +7106,6 @@ static int hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle);
priv->handle = handle;
-
- return 0;
}
static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
@@ -6189,26 +7123,19 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
goto error_failed_kzalloc;
}
- hr_dev->pci_dev = handle->pdev;
- hr_dev->dev = &handle->pdev->dev;
-
- ret = hns_roce_hw_v2_get_cfg(hr_dev, handle);
- if (ret) {
- dev_err(hr_dev->dev, "Get Configuration failed!\n");
- goto error_failed_get_cfg;
- }
+ hns_roce_hw_v2_get_cfg(hr_dev, handle);
ret = hns_roce_init(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "RoCE Engine init failed!\n");
- goto error_failed_get_cfg;
+ goto error_failed_roce_init;
}
handle->priv = hr_dev;
return 0;
-error_failed_get_cfg:
+error_failed_roce_init:
kfree(hr_dev->priv);
error_failed_kzalloc:
@@ -6218,15 +7145,19 @@ error_failed_kzalloc:
}
static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
- bool reset)
+ bool reset, bool bond_cleanup)
{
- struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct hns_roce_dev *hr_dev = handle->priv;
if (!hr_dev)
return;
handle->priv = NULL;
- hns_roce_exit(hr_dev);
+
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
+ hns_roce_handle_device_err(hr_dev);
+
+ hns_roce_exit(hr_dev, bond_cleanup);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
}
@@ -6249,6 +7180,9 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
if (!id)
return 0;
+ if (id->driver_data && handle->pdev->revision == PCI_REVISION_ID_HIP08)
+ return 0;
+
ret = __hns_roce_hw_v2_init_instance(handle);
if (ret) {
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
@@ -6262,7 +7196,6 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
handle->rinfo.instance_state = HNS_ROCE_STATE_INITED;
-
return 0;
reset_chk_err:
@@ -6275,19 +7208,61 @@ reset_chk_err:
static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
bool reset)
{
+ /* Suspend bond to avoid concurrency */
+ hns_roce_bond_suspend(handle);
+
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
- return;
+ goto out;
handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT;
- __hns_roce_hw_v2_uninit_instance(handle, reset);
+ __hns_roce_hw_v2_uninit_instance(handle, reset, true);
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+
+out:
+ hns_roce_bond_resume(handle);
+}
+
+struct hns_roce_dev
+ *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx)
+{
+ struct hnae3_handle *handle;
+ int ret;
+
+ handle = bond_grp->bond_func_info[func_idx].handle;
+ if (!handle || !handle->client)
+ return NULL;
+
+ ret = hns_roce_hw_v2_init_instance(handle);
+ if (ret)
+ return NULL;
+
+ return handle->priv;
+}
+
+void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx)
+{
+ struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle;
+
+ if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
+ return;
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
+
+ __hns_roce_hw_v2_uninit_instance(handle, false, false);
handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
}
+
static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
{
struct hns_roce_dev *hr_dev;
- struct ib_event event;
+
+ /* Suspend bond to avoid concurrency */
+ hns_roce_bond_suspend(handle);
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
@@ -6297,17 +7272,16 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN;
clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
- hr_dev = (struct hns_roce_dev *)handle->priv;
+ hr_dev = handle->priv;
if (!hr_dev)
return 0;
hr_dev->active = false;
hr_dev->dis_db = true;
- event.event = IB_EVENT_DEVICE_FATAL;
- event.device = &hr_dev->ib_dev;
- event.element.port_num = 1;
- ib_dispatch_event(&event);
+ rdma_user_mmap_disassociate(&hr_dev->ib_dev);
+
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
return 0;
}
@@ -6320,6 +7294,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN,
&handle->rinfo.state)) {
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
+ hns_roce_bond_resume(handle);
return 0;
}
@@ -6336,9 +7311,10 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret);
} else {
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
- dev_info(dev, "Reset done, RoCE client reinit finished.\n");
+ dev_info(dev, "reset done, RoCE client reinit finished.\n");
}
+ hns_roce_bond_resume(handle);
return ret;
}
@@ -6349,8 +7325,8 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
- msleep(100);
- __hns_roce_hw_v2_uninit_instance(handle, false);
+ msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
+ __hns_roce_hw_v2_uninit_instance(handle, false, false);
return 0;
}
@@ -6377,9 +7353,30 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
return ret;
}
+static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle,
+ bool linkup)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct net_device *netdev = handle->rinfo.netdev;
+
+ if (linkup || !hr_dev)
+ return;
+
+ /* For bond device, the link status depends on the upper netdev,
+ * and the upper device's link status depends on all the slaves'
+ * netdev but not only one. So bond device cannot get a correct
+ * link status from this path.
+ */
+ if (hns_roce_get_bond_grp(netdev, get_hr_bus_num(hr_dev)))
+ return;
+
+ ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev);
+}
+
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .link_status_change = hns_roce_hw_v2_link_status_change,
.reset_notify = hns_roce_hw_v2_reset_notify,
};
@@ -6391,12 +7388,15 @@ static struct hnae3_client hns_roce_hw_v2_client = {
static int __init hns_roce_hw_v2_init(void)
{
+ hns_roce_init_debugfs();
return hnae3_register_client(&hns_roce_hw_v2_client);
}
static void __exit hns_roce_hw_v2_exit(void)
{
+ hns_roce_dealloc_bond_grp();
hnae3_unregister_client(&hns_roce_hw_v2_client);
+ hns_roce_cleanup_debugfs();
}
module_init(hns_roce_hw_v2_init);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index edfdbe2ce0db..285fe0875fac 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -34,69 +34,38 @@
#define _HNS_ROCE_HW_V2_H
#include <linux/bitops.h>
+#include "hnae3.h"
+#include "hns_roce_bond.h"
-#define HNS_ROCE_VF_QPC_BT_NUM 256
-#define HNS_ROCE_VF_SCCC_BT_NUM 64
-#define HNS_ROCE_VF_SRQC_BT_NUM 64
-#define HNS_ROCE_VF_CQC_BT_NUM 64
-#define HNS_ROCE_VF_MPT_BT_NUM 64
-#define HNS_ROCE_VF_EQC_NUM 64
-#define HNS_ROCE_VF_SMAC_NUM 32
-#define HNS_ROCE_VF_SGID_NUM 32
-#define HNS_ROCE_VF_SL_NUM 8
-
-#define HNS_ROCE_V2_MAX_QP_NUM 0x100000
-#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM 0x200
-#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
-#define HNS_ROCE_V2_MAX_SRQ 0x100000
-#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
-#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100
-#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
-#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
-#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
-#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
-#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
-#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
-#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
-#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100
-#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
-#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
-#define HNS_ROCE_V2_UAR_NUM 256
-#define HNS_ROCE_V2_PHY_UAR_NUM 1
-#define HNS_ROCE_V2_MAX_IRQ_NUM 65
-#define HNS_ROCE_V2_COMP_VEC_NUM 63
+#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
+#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_AEQE_VEC_NUM 1
#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
-#define HNS_ROCE_V2_MAX_MTPT_NUM 0x100000
-#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
-#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
-#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
-#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128
-#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
-#define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64
-#define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16
-#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64
-#define HNS_ROCE_V2_QPC_ENTRY_SZ 256
-#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
-#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
-#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
-#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
-#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
-#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
-#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
-#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32
-#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ 4096
-#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ 4096
-#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
+#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
+
+#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
+
+#define HNS_ROCE_V3_SCCC_SZ 64
+#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
+
+#define HNS_ROCE_V2_EXT_LLM_ENTRY_SZ 8
+#define HNS_ROCE_V2_EXT_LLM_MAX_DEPTH 4096
+
+#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
-#define HNS_ROCE_INVALID_LKEY 0x100
+#define HNS_ROCE_INVALID_LKEY 0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
-#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
-#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
+#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
+#define HNS_ROCE_V2_HW_RST_UNINT_DELAY 100
+
+#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_SCCC_HOP_NUM 1
@@ -104,33 +73,39 @@
#define HNS_ROCE_CQE_HOP_NUM 1
#define HNS_ROCE_SRQWQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
-#define HNS_ROCE_EQE_HOP_NUM 2
#define HNS_ROCE_IDX_HOP_NUM 1
+#define HNS_ROCE_SQWQE_HOP_NUM 2
+#define HNS_ROCE_EXT_SGE_HOP_NUM 1
+#define HNS_ROCE_RQWQE_HOP_NUM 2
-#define HNS_ROCE_V2_GID_INDEX_NUM 256
+#define HNS_ROCE_V2_EQE_HOP_NUM 2
+#define HNS_ROCE_V3_EQE_HOP_NUM 1
+
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2
+#define HNS_ROCE_V2_GID_INDEX_NUM 16
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
-#define HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT 0
-#define HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT 1
-#define HNS_ROCE_CMD_FLAG_NEXT_SHIFT 2
-#define HNS_ROCE_CMD_FLAG_WR_OR_RD_SHIFT 3
-#define HNS_ROCE_CMD_FLAG_NO_INTR_SHIFT 4
-#define HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT 5
+/* budget must be smaller than aeqe_depth to guarantee that we update
+ * the ci before we polled all the entries in the EQ.
+ */
+#define HNS_AEQ_POLLING_BUDGET 64
-#define HNS_ROCE_CMD_FLAG_IN BIT(HNS_ROCE_CMD_FLAG_IN_VALID_SHIFT)
-#define HNS_ROCE_CMD_FLAG_OUT BIT(HNS_ROCE_CMD_FLAG_OUT_VALID_SHIFT)
-#define HNS_ROCE_CMD_FLAG_NEXT BIT(HNS_ROCE_CMD_FLAG_NEXT_SHIFT)
-#define HNS_ROCE_CMD_FLAG_WR BIT(HNS_ROCE_CMD_FLAG_WR_OR_RD_SHIFT)
-#define HNS_ROCE_CMD_FLAG_NO_INTR BIT(HNS_ROCE_CMD_FLAG_NO_INTR_SHIFT)
-#define HNS_ROCE_CMD_FLAG_ERR_INTR BIT(HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT)
+enum {
+ HNS_ROCE_CMD_FLAG_IN = BIT(0),
+ HNS_ROCE_CMD_FLAG_OUT = BIT(1),
+ HNS_ROCE_CMD_FLAG_NEXT = BIT(2),
+ HNS_ROCE_CMD_FLAG_WR = BIT(3),
+ HNS_ROCE_CMD_FLAG_ERR_INTR = BIT(5),
+};
#define HNS_ROCE_CMQ_DESC_NUM_S 3
-#define HNS_ROCE_CMQ_EN_B 16
-#define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B)
#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5
+#define HNS_ROCE_CONG_SIZE 64
+
#define check_whether_last_step(hop_num, step_idx) \
((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
(step_idx == 1 && hop_num == 1) || \
@@ -141,12 +116,29 @@
#define CMD_CSQ_DESC_NUM 1024
#define CMD_CRQ_DESC_NUM 1024
+/* Free mr used parameters */
+#define HNS_ROCE_FREE_MR_USED_CQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_QP_NUM 0x8
+#define HNS_ROCE_FREE_MR_USED_PSN 0x0808
+#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT 0x7
+#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT 0x12
+#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM 0x2
+#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM 0x2
+#define HNS_ROCE_V2_FREE_MR_TIMEOUT 4500
+
enum {
NO_ARMED = 0x0,
REG_NXT_CEQE = 0x2,
REG_NXT_SE_CEQE = 0x3
};
+enum {
+ CQE_SIZE_32B = 0x0,
+ CQE_SIZE_64B = 0x1
+};
+
#define V2_CQ_DB_REQ_NOT_SOL 0
#define V2_CQ_DB_REQ_NOT 1
@@ -155,8 +147,6 @@ enum {
#define GID_LEN_V2 16
-#define HNS_ROCE_V2_CQE_QPN_MASK 0x3ffff
-
enum {
HNS_ROCE_V2_WQE_OP_SEND = 0x0,
HNS_ROCE_V2_WQE_OP_SEND_WITH_INV = 0x1,
@@ -169,28 +159,11 @@ enum {
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
- HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
- HNS_ROCE_V2_WQE_OP_BIND_MW_TYPE = 0xc,
+ HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
};
enum {
- HNS_ROCE_SQ_OPCODE_SEND = 0x0,
- HNS_ROCE_SQ_OPCODE_SEND_WITH_INV = 0x1,
- HNS_ROCE_SQ_OPCODE_SEND_WITH_IMM = 0x2,
- HNS_ROCE_SQ_OPCODE_RDMA_WRITE = 0x3,
- HNS_ROCE_SQ_OPCODE_RDMA_WRITE_WITH_IMM = 0x4,
- HNS_ROCE_SQ_OPCODE_RDMA_READ = 0x5,
- HNS_ROCE_SQ_OPCODE_ATOMIC_COMP_AND_SWAP = 0x6,
- HNS_ROCE_SQ_OPCODE_ATOMIC_FETCH_AND_ADD = 0x7,
- HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_COMP_AND_SWAP = 0x8,
- HNS_ROCE_SQ_OPCODE_ATOMIC_MASK_FETCH_AND_ADD = 0x9,
- HNS_ROCE_SQ_OPCODE_FAST_REG_WR = 0xa,
- HNS_ROCE_SQ_OPCODE_LOCAL_INV = 0xb,
- HNS_ROCE_SQ_OPCODE_BIND_MW = 0xc,
-};
-
-enum {
/* rq operations */
HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0,
HNS_ROCE_V2_OPCODE_SEND = 0x1,
@@ -199,11 +172,11 @@ enum {
};
enum {
- HNS_ROCE_V2_SQ_DB = 0x0,
- HNS_ROCE_V2_RQ_DB = 0x1,
- HNS_ROCE_V2_SRQ_DB = 0x2,
- HNS_ROCE_V2_CQ_DB_PTR = 0x3,
- HNS_ROCE_V2_CQ_DB_NTR = 0x4,
+ HNS_ROCE_V2_SQ_DB,
+ HNS_ROCE_V2_RQ_DB,
+ HNS_ROCE_V2_SRQ_DB,
+ HNS_ROCE_V2_CQ_DB,
+ HNS_ROCE_V2_CQ_DB_NOTIFY
};
enum {
@@ -221,6 +194,7 @@ enum {
HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15,
HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16,
HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22,
+ HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23,
HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
};
@@ -231,20 +205,41 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
+ HNS_ROCE_OPC_QUERY_COUNTER = 0x8206,
HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
- HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
+ HNS_ROCE_OPC_QUERY_FUNC_INFO = 0x8407,
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408,
+ HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409,
+ HNS_ROCE_OPC_QUERY_VF_CAPS_NUM = 0x8410,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
HNS_ROCE_OPC_POST_MB = 0x8504,
HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_ROCE_OPC_FUNC_CLEAR = 0x8508,
HNS_ROCE_OPC_CLR_SCCC = 0x8509,
HNS_ROCE_OPC_QUERY_SCCC = 0x850a,
HNS_ROCE_OPC_RESET_SCCC = 0x850b,
+ HNS_ROCE_OPC_CLEAR_EXTDB_LIST_INFO = 0x850d,
+ HNS_ROCE_OPC_QUERY_VF_RES = 0x850e,
+ HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
+ HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
+ HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
+ HNS_ROCE_OPC_SET_BOND_INFO = 0x8601,
+ HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602,
+ HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603,
+};
+
+#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
+#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
+#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
+struct hns_roce_cmdq_tx_timeout_map {
+ u16 opcode;
+ u32 tx_timeout;
};
enum {
@@ -253,10 +248,25 @@ enum {
};
enum hns_roce_cmd_return_status {
- CMD_EXEC_SUCCESS = 0,
- CMD_NO_AUTH = 1,
- CMD_NOT_EXEC = 2,
- CMD_QUEUE_FULL = 3,
+ CMD_EXEC_SUCCESS,
+ CMD_NO_AUTH,
+ CMD_NOT_EXIST,
+ CMD_CRQ_FULL,
+ CMD_NEXT_ERR,
+ CMD_NOT_EXEC,
+ CMD_PARA_ERR,
+ CMD_RESULT_ERR,
+ CMD_TIMEOUT,
+ CMD_HILINK_ERR,
+ CMD_INFO_ILLEGAL,
+ CMD_INVALID,
+ CMD_ROH_CHECK_FAIL,
+ CMD_OTHER_ERR = 0xff
+};
+
+struct hns_roce_cmd_errcode {
+ enum hns_roce_cmd_return_status return_status;
+ int errno;
};
enum hns_roce_sgid_type {
@@ -266,181 +276,114 @@ enum hns_roce_sgid_type {
};
struct hns_roce_v2_cq_context {
- __le32 byte_4_pg_ceqn;
- __le32 byte_8_cqn;
- __le32 cqe_cur_blk_addr;
- __le32 byte_16_hop_addr;
- __le32 cqe_nxt_blk_addr;
- __le32 byte_24_pgsz_addr;
- __le32 byte_28_cq_pi;
- __le32 byte_32_cq_ci;
- __le32 cqe_ba;
- __le32 byte_40_cqe_ba;
- __le32 byte_44_db_record;
- __le32 db_record_addr;
- __le32 byte_52_cqe_cnt;
- __le32 byte_56_cqe_period_maxcnt;
- __le32 cqe_report_timer;
- __le32 byte_64_se_cqe_idx;
+ __le32 byte_4_pg_ceqn;
+ __le32 byte_8_cqn;
+ __le32 cqe_cur_blk_addr;
+ __le32 byte_16_hop_addr;
+ __le32 cqe_nxt_blk_addr;
+ __le32 byte_24_pgsz_addr;
+ __le32 byte_28_cq_pi;
+ __le32 byte_32_cq_ci;
+ __le32 cqe_ba;
+ __le32 byte_40_cqe_ba;
+ __le32 byte_44_db_record;
+ __le32 db_record_addr;
+ __le32 byte_52_cqe_cnt;
+ __le32 byte_56_cqe_period_maxcnt;
+ __le32 cqe_report_timer;
+ __le32 byte_64_se_cqe_idx;
};
-#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
-#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
-
-#define V2_CQC_BYTE_4_CQ_ST_S 0
-#define V2_CQC_BYTE_4_CQ_ST_M GENMASK(1, 0)
-
-#define V2_CQC_BYTE_4_POLL_S 2
-#define V2_CQC_BYTE_4_SE_S 3
+#define CQC_CQE_BA_L_S 3
+#define CQC_CQE_BA_H_S (32 + CQC_CQE_BA_L_S)
+#define CQC_CQE_DB_RECORD_ADDR_H_S 32
-#define V2_CQC_BYTE_4_OVER_IGNORE_S 4
-
-#define V2_CQC_BYTE_4_COALESCE_S 5
-
-#define V2_CQC_BYTE_4_ARM_ST_S 6
-#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6)
-
-#define V2_CQC_BYTE_4_SHIFT_S 8
-#define V2_CQC_BYTE_4_SHIFT_M GENMASK(12, 8)
-
-#define V2_CQC_BYTE_4_CMD_SN_S 13
-#define V2_CQC_BYTE_4_CMD_SN_M GENMASK(14, 13)
-
-#define V2_CQC_BYTE_4_CEQN_S 15
-#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15)
-
-#define V2_CQC_BYTE_4_PAGE_OFFSET_S 24
-#define V2_CQC_BYTE_4_PAGE_OFFSET_M GENMASK(31, 24)
-
-#define V2_CQC_BYTE_8_CQN_S 0
-#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0
-#define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30
-#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30)
-
-#define V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S 0
-#define V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_CQC_BYTE_24_CQE_BA_PG_SZ_S 24
-#define V2_CQC_BYTE_24_CQE_BA_PG_SZ_M GENMASK(27, 24)
-
-#define V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S 28
-#define V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M GENMASK(31, 28)
-
-#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0
-#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0
-#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_40_CQE_BA_S 0
-#define V2_CQC_BYTE_40_CQE_BA_M GENMASK(28, 0)
-
-#define V2_CQC_BYTE_44_DB_RECORD_EN_S 0
-
-#define V2_CQC_BYTE_44_DB_RECORD_ADDR_S 1
-#define V2_CQC_BYTE_44_DB_RECORD_ADDR_M GENMASK(31, 1)
-
-#define V2_CQC_BYTE_52_CQE_CNT_S 0
-#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0)
-
-#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0
-#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0)
-
-#define V2_CQC_BYTE_56_CQ_PERIOD_S 16
-#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16)
+#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
-#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0
-#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
+#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
+
+#define CQC_CQ_ST CQC_FIELD_LOC(1, 0)
+#define CQC_POLL CQC_FIELD_LOC(2, 2)
+#define CQC_SE CQC_FIELD_LOC(3, 3)
+#define CQC_OVER_IGNORE CQC_FIELD_LOC(4, 4)
+#define CQC_ARM_ST CQC_FIELD_LOC(7, 6)
+#define CQC_SHIFT CQC_FIELD_LOC(12, 8)
+#define CQC_CMD_SN CQC_FIELD_LOC(14, 13)
+#define CQC_CEQN CQC_FIELD_LOC(23, 15)
+#define CQC_CQN CQC_FIELD_LOC(55, 32)
+#define CQC_POE_EN CQC_FIELD_LOC(56, 56)
+#define CQC_POE_NUM CQC_FIELD_LOC(58, 57)
+#define CQC_CQE_SIZE CQC_FIELD_LOC(60, 59)
+#define CQC_CQ_CNT_MODE CQC_FIELD_LOC(61, 61)
+#define CQC_STASH CQC_FIELD_LOC(63, 63)
+#define CQC_CQE_CUR_BLK_ADDR_L CQC_FIELD_LOC(95, 64)
+#define CQC_CQE_CUR_BLK_ADDR_H CQC_FIELD_LOC(115, 96)
+#define CQC_POE_QID CQC_FIELD_LOC(125, 116)
+#define CQC_CQE_HOP_NUM CQC_FIELD_LOC(127, 126)
+#define CQC_CQE_NEX_BLK_ADDR_L CQC_FIELD_LOC(159, 128)
+#define CQC_CQE_NEX_BLK_ADDR_H CQC_FIELD_LOC(179, 160)
+#define CQC_CQE_BAR_PG_SZ CQC_FIELD_LOC(187, 184)
+#define CQC_CQE_BUF_PG_SZ CQC_FIELD_LOC(191, 188)
+#define CQC_CQ_PRODUCER_IDX CQC_FIELD_LOC(215, 192)
+#define CQC_CQ_CONSUMER_IDX CQC_FIELD_LOC(247, 224)
+#define CQC_CQE_BA_L CQC_FIELD_LOC(287, 256)
+#define CQC_CQE_BA_H CQC_FIELD_LOC(316, 288)
+#define CQC_POE_QID_H_0 CQC_FIELD_LOC(319, 317)
+#define CQC_DB_RECORD_EN CQC_FIELD_LOC(320, 320)
+#define CQC_CQE_DB_RECORD_ADDR_L CQC_FIELD_LOC(351, 321)
+#define CQC_CQE_DB_RECORD_ADDR_H CQC_FIELD_LOC(383, 352)
+#define CQC_CQE_CNT CQC_FIELD_LOC(407, 384)
+#define CQC_CQ_MAX_CNT CQC_FIELD_LOC(431, 416)
+#define CQC_CQ_PERIOD CQC_FIELD_LOC(447, 432)
+#define CQC_CQE_REPORT_TIMER CQC_FIELD_LOC(471, 448)
+#define CQC_WR_CQE_IDX CQC_FIELD_LOC(479, 472)
+#define CQC_SE_CQE_IDX CQC_FIELD_LOC(503, 480)
+#define CQC_POE_QID_H_1 CQC_FIELD_LOC(511, 511)
struct hns_roce_srq_context {
- __le32 byte_4_srqn_srqst;
- __le32 byte_8_limit_wl;
- __le32 byte_12_xrcd;
- __le32 byte_16_pi_ci;
- __le32 wqe_bt_ba;
- __le32 byte_24_wqe_bt_ba;
- __le32 byte_28_rqws_pd;
- __le32 idx_bt_ba;
- __le32 rsv_idx_bt_ba;
- __le32 idx_cur_blk_addr;
- __le32 byte_44_idxbufpgsz_addr;
- __le32 idx_nxt_blk_addr;
- __le32 rsv_idxnxtblkaddr;
- __le32 byte_56_xrc_cqn;
- __le32 db_record_addr_record_en;
- __le32 db_record_addr;
+ __le32 data[16];
};
-#define SRQC_BYTE_4_SRQ_ST_S 0
-#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
-
-#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2
-#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2)
-
-#define SRQC_BYTE_4_SRQ_SHIFT_S 4
-#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4)
-
-#define SRQC_BYTE_4_SRQN_S 8
-#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8)
-
-#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0
-#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0)
-
-#define SRQC_BYTE_12_SRQ_XRCD_S 0
-#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0)
-
-#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0
-#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0)
+#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l)
+
+#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0)
+#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2)
+#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4)
+#define SRQC_SRQN SRQC_FIELD_LOC(31, 8)
+#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32)
+#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48)
+#define SRQC_XRCD SRQC_FIELD_LOC(87, 64)
+#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88)
+#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96)
+#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112)
+#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128)
+#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160)
+#define SRQC_RSV2 SRQC_FIELD_LOC(190, 189)
+#define SRQC_SRQ_TYPE SRQC_FIELD_LOC(191, 191)
+#define SRQC_PD SRQC_FIELD_LOC(215, 192)
+#define SRQC_RQWS SRQC_FIELD_LOC(219, 216)
+#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220)
+#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224)
+#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256)
+#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285)
+#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288)
+#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320)
+#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340)
+#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342)
+#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344)
+#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348)
+#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352)
+#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384)
+#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404)
+#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416)
+#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440)
+#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444)
+#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448)
+#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449)
+#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480)
-#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0
-#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16)
-
-#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0
-#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0)
-
-#define SRQC_BYTE_28_PD_S 0
-#define SRQC_BYTE_28_PD_M GENMASK(23, 0)
-
-#define SRQC_BYTE_28_RQWS_S 24
-#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24)
-
-#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0
-#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0)
-
-#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0
-#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0)
-
-#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22
-#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22)
-
-#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24
-#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24)
-
-#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28
-#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28)
-
-#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0
-#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0)
-
-#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0
-#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0)
-
-#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24
-#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24)
-
-#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28
-#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28)
-
-#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0
-
-#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
-#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)
-
-enum{
+enum {
V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
};
@@ -450,441 +393,296 @@ enum hns_roce_v2_qp_state {
HNS_ROCE_QP_ST_INIT,
HNS_ROCE_QP_ST_RTR,
HNS_ROCE_QP_ST_RTS,
- HNS_ROCE_QP_ST_SQER,
HNS_ROCE_QP_ST_SQD,
+ HNS_ROCE_QP_ST_SQER,
HNS_ROCE_QP_ST_ERR,
HNS_ROCE_QP_ST_SQ_DRAINING,
HNS_ROCE_QP_NUM_ST
};
-struct hns_roce_v2_qp_context {
- __le32 byte_4_sqpn_tst;
- __le32 wqe_sge_ba;
- __le32 byte_12_sq_hop;
- __le32 byte_16_buf_ba_pg_sz;
- __le32 byte_20_smac_sgid_idx;
- __le32 byte_24_mtu_tc;
- __le32 byte_28_at_fl;
- u8 dgid[GID_LEN_V2];
- __le32 dmac;
- __le32 byte_52_udpspn_dmac;
- __le32 byte_56_dqpn_err;
- __le32 byte_60_qpst_tempid;
- __le32 qkey_xrcd;
- __le32 byte_68_rq_db;
- __le32 rq_db_record_addr;
- __le32 byte_76_srqn_op_en;
- __le32 byte_80_rnr_rx_cqn;
- __le32 byte_84_rq_ci_pi;
- __le32 rq_cur_blk_addr;
- __le32 byte_92_srq_info;
- __le32 byte_96_rx_reqmsn;
- __le32 rq_nxt_blk_addr;
- __le32 byte_104_rq_sge;
- __le32 byte_108_rx_reqepsn;
- __le32 rq_rnr_timer;
- __le32 rx_msg_len;
- __le32 rx_rkey_pkt_info;
- __le64 rx_va;
- __le32 byte_132_trrl;
- __le32 trrl_ba;
- __le32 byte_140_raq;
- __le32 byte_144_raq;
- __le32 byte_148_raq;
- __le32 byte_152_raq;
- __le32 byte_156_raq;
- __le32 byte_160_sq_ci_pi;
- __le32 sq_cur_blk_addr;
- __le32 byte_168_irrl_idx;
- __le32 byte_172_sq_psn;
- __le32 byte_176_msg_pktn;
- __le32 sq_cur_sge_blk_addr;
- __le32 byte_184_irrl_idx;
- __le32 cur_sge_offset;
- __le32 byte_192_ext_sge;
- __le32 byte_196_sq_psn;
- __le32 byte_200_sq_max;
- __le32 irrl_ba;
- __le32 byte_208_irrl;
- __le32 byte_212_lsn;
- __le32 sq_timer;
- __le32 byte_220_retry_psn_msn;
- __le32 byte_224_retry_msg;
- __le32 rx_sq_cur_blk_addr;
- __le32 byte_232_irrl_sge;
- __le32 irrl_cur_sge_offset;
- __le32 byte_240_irrl_tail;
- __le32 byte_244_rnr_rxack;
- __le32 byte_248_ack_psn;
- __le32 byte_252_err_txcqn;
- __le32 byte_256_sqflush_rqcqe;
+struct hns_roce_v2_qp_context_ex {
+ __le32 data[64];
};
-#define V2_QPC_BYTE_4_TST_S 0
-#define V2_QPC_BYTE_4_TST_M GENMASK(2, 0)
-
-#define V2_QPC_BYTE_4_SGE_SHIFT_S 3
-#define V2_QPC_BYTE_4_SGE_SHIFT_M GENMASK(7, 3)
-
-#define V2_QPC_BYTE_4_SQPN_S 8
-#define V2_QPC_BYTE_4_SQPN_M GENMASK(31, 8)
-
-#define V2_QPC_BYTE_12_WQE_SGE_BA_S 0
-#define V2_QPC_BYTE_12_WQE_SGE_BA_M GENMASK(28, 0)
-
-#define V2_QPC_BYTE_12_SQ_HOP_NUM_S 29
-#define V2_QPC_BYTE_12_SQ_HOP_NUM_M GENMASK(30, 29)
-
-#define V2_QPC_BYTE_12_RSVD_LKEY_EN_S 31
-
-#define V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S 0
-#define V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M GENMASK(3, 0)
-
-#define V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S 4
-#define V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M GENMASK(7, 4)
-
-#define V2_QPC_BYTE_16_PD_S 8
-#define V2_QPC_BYTE_16_PD_M GENMASK(31, 8)
-
-#define V2_QPC_BYTE_20_RQ_HOP_NUM_S 0
-#define V2_QPC_BYTE_20_RQ_HOP_NUM_M GENMASK(1, 0)
-
-#define V2_QPC_BYTE_20_SGE_HOP_NUM_S 2
-#define V2_QPC_BYTE_20_SGE_HOP_NUM_M GENMASK(3, 2)
-
-#define V2_QPC_BYTE_20_RQWS_S 4
-#define V2_QPC_BYTE_20_RQWS_M GENMASK(7, 4)
-
-#define V2_QPC_BYTE_20_SQ_SHIFT_S 8
-#define V2_QPC_BYTE_20_SQ_SHIFT_M GENMASK(11, 8)
-
-#define V2_QPC_BYTE_20_RQ_SHIFT_S 12
-#define V2_QPC_BYTE_20_RQ_SHIFT_M GENMASK(15, 12)
-
-#define V2_QPC_BYTE_20_SGID_IDX_S 16
-#define V2_QPC_BYTE_20_SGID_IDX_M GENMASK(23, 16)
-
-#define V2_QPC_BYTE_20_SMAC_IDX_S 24
-#define V2_QPC_BYTE_20_SMAC_IDX_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_24_HOP_LIMIT_S 0
-#define V2_QPC_BYTE_24_HOP_LIMIT_M GENMASK(7, 0)
-
-#define V2_QPC_BYTE_24_TC_S 8
-#define V2_QPC_BYTE_24_TC_M GENMASK(15, 8)
-
-#define V2_QPC_BYTE_24_VLAN_ID_S 16
-#define V2_QPC_BYTE_24_VLAN_ID_M GENMASK(27, 16)
-
-#define V2_QPC_BYTE_24_MTU_S 28
-#define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28)
-
-#define V2_QPC_BYTE_28_FL_S 0
-#define V2_QPC_BYTE_28_FL_M GENMASK(19, 0)
-
-#define V2_QPC_BYTE_28_SL_S 20
-#define V2_QPC_BYTE_28_SL_M GENMASK(23, 20)
-
-#define V2_QPC_BYTE_28_CNP_TX_FLAG_S 24
-
-#define V2_QPC_BYTE_28_CE_FLAG_S 25
-
-#define V2_QPC_BYTE_28_LBI_S 26
-
-#define V2_QPC_BYTE_28_AT_S 27
-#define V2_QPC_BYTE_28_AT_M GENMASK(31, 27)
-
-#define V2_QPC_BYTE_52_DMAC_S 0
-#define V2_QPC_BYTE_52_DMAC_M GENMASK(15, 0)
-
-#define V2_QPC_BYTE_52_UDPSPN_S 16
-#define V2_QPC_BYTE_52_UDPSPN_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_56_DQPN_S 0
-#define V2_QPC_BYTE_56_DQPN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_56_SQ_TX_ERR_S 24
-#define V2_QPC_BYTE_56_SQ_RX_ERR_S 25
-#define V2_QPC_BYTE_56_RQ_TX_ERR_S 26
-#define V2_QPC_BYTE_56_RQ_RX_ERR_S 27
-
-#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
-#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
-
-#define V2_QPC_BYTE_60_TEMPID_S 0
-#define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
-
-#define V2_QPC_BYTE_60_SCC_TOKEN_S 8
-#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
-
-#define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
-
-#define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
-
-#define V2_QPC_BYTE_60_QP_ST_S 29
-#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
-
-#define V2_QPC_BYTE_68_RQ_RECORD_EN_S 0
-
-#define V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S 1
-#define V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M GENMASK(31, 1)
-
-#define V2_QPC_BYTE_76_SRQN_S 0
-#define V2_QPC_BYTE_76_SRQN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_76_SRQ_EN_S 24
-
-#define V2_QPC_BYTE_76_RRE_S 25
-
-#define V2_QPC_BYTE_76_RWE_S 26
-
-#define V2_QPC_BYTE_76_ATE_S 27
-
-#define V2_QPC_BYTE_76_RQIE_S 28
-
-#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
-#define V2_QPC_BYTE_80_RX_CQN_S 0
-#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_80_MIN_RNR_TIME_S 27
-#define V2_QPC_BYTE_80_MIN_RNR_TIME_M GENMASK(31, 27)
-
-#define V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S 0
-#define V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M GENMASK(15, 0)
-
-#define V2_QPC_BYTE_84_RQ_CONSUMER_IDX_S 16
-#define V2_QPC_BYTE_84_RQ_CONSUMER_IDX_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S 0
-#define V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_QPC_BYTE_92_SRQ_INFO_S 20
-#define V2_QPC_BYTE_92_SRQ_INFO_M GENMASK(31, 20)
-
-#define V2_QPC_BYTE_96_RX_REQ_MSN_S 0
-#define V2_QPC_BYTE_96_RX_REQ_MSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S 0
-#define V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_S 24
-#define V2_QPC_BYTE_104_RQ_CUR_WQE_SGE_NUM_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_108_INV_CREDIT_S 0
-
-#define V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S 3
-
-#define V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_S 4
-#define V2_QPC_BYTE_108_RX_REQ_LAST_OPTYPE_M GENMASK(6, 4)
-
-#define V2_QPC_BYTE_108_RX_REQ_RNR_S 7
-
-#define V2_QPC_BYTE_108_RX_REQ_EPSN_S 8
-#define V2_QPC_BYTE_108_RX_REQ_EPSN_M GENMASK(31, 8)
-
-#define V2_QPC_BYTE_132_TRRL_HEAD_MAX_S 0
-#define V2_QPC_BYTE_132_TRRL_HEAD_MAX_M GENMASK(7, 0)
-
-#define V2_QPC_BYTE_132_TRRL_TAIL_MAX_S 8
-#define V2_QPC_BYTE_132_TRRL_TAIL_MAX_M GENMASK(15, 8)
-
-#define V2_QPC_BYTE_132_TRRL_BA_S 16
-#define V2_QPC_BYTE_132_TRRL_BA_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_140_TRRL_BA_S 0
-#define V2_QPC_BYTE_140_TRRL_BA_M GENMASK(11, 0)
-
-#define V2_QPC_BYTE_140_RR_MAX_S 12
-#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
-
-#define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
-
-#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
-#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
-
-#define V2_QPC_BYTE_140_RAQ_TRRL_TAIL_S 24
-#define V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
-#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
-#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
-
-#define V2_QPC_BYTE_144_RESP_RTY_FLG_S 31
-
-#define V2_QPC_BYTE_148_RQ_MSN_S 0
-#define V2_QPC_BYTE_148_RQ_MSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_148_RAQ_SYNDROME_S 24
-#define V2_QPC_BYTE_148_RAQ_SYNDROME_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_152_RAQ_PSN_S 0
-#define V2_QPC_BYTE_152_RAQ_PSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_S 24
-#define V2_QPC_BYTE_152_RAQ_TRRL_RTY_HEAD_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_156_RAQ_USE_PKTN_S 0
-#define V2_QPC_BYTE_156_RAQ_USE_PKTN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S 0
-#define V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M GENMASK(15, 0)
-
-#define V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S 16
-#define V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S 0
-#define V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S 20
-
-#define V2_QPC_BYTE_168_SQ_INVLD_FLG_S 21
-
-#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
-#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
-
-#define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
-#define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
-#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
-#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
-#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
-#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
-
-#define V2_QPC_BYTE_172_ACK_REQ_FREQ_S 0
-#define V2_QPC_BYTE_172_ACK_REQ_FREQ_M GENMASK(5, 0)
-
-#define V2_QPC_BYTE_172_MSG_RNR_FLG_S 6
-
-#define V2_QPC_BYTE_172_FRE_S 7
-
-#define V2_QPC_BYTE_172_SQ_CUR_PSN_S 8
-#define V2_QPC_BYTE_172_SQ_CUR_PSN_M GENMASK(31, 8)
-
-#define V2_QPC_BYTE_176_MSG_USE_PKTN_S 0
-#define V2_QPC_BYTE_176_MSG_USE_PKTN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_176_IRRL_HEAD_PRE_S 24
-#define V2_QPC_BYTE_176_IRRL_HEAD_PRE_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S 0
-#define V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_QPC_BYTE_184_IRRL_IDX_MSB_S 20
-#define V2_QPC_BYTE_184_IRRL_IDX_MSB_M GENMASK(31, 20)
-
-#define V2_QPC_BYTE_192_CUR_SGE_IDX_S 0
-#define V2_QPC_BYTE_192_CUR_SGE_IDX_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_S 24
-#define V2_QPC_BYTE_192_EXT_SGE_NUM_LEFT_M GENMASK(31, 24)
-
-#define V2_QPC_BYTE_196_IRRL_HEAD_S 0
-#define V2_QPC_BYTE_196_IRRL_HEAD_M GENMASK(7, 0)
-
-#define V2_QPC_BYTE_196_SQ_MAX_PSN_S 8
-#define V2_QPC_BYTE_196_SQ_MAX_PSN_M GENMASK(31, 8)
-
-#define V2_QPC_BYTE_200_SQ_MAX_IDX_S 0
-#define V2_QPC_BYTE_200_SQ_MAX_IDX_M GENMASK(15, 0)
-
-#define V2_QPC_BYTE_200_LCL_OPERATED_CNT_S 16
-#define V2_QPC_BYTE_200_LCL_OPERATED_CNT_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_208_IRRL_BA_S 0
-#define V2_QPC_BYTE_208_IRRL_BA_M GENMASK(25, 0)
-
-#define V2_QPC_BYTE_208_PKT_RNR_FLG_S 26
-
-#define V2_QPC_BYTE_208_PKT_RTY_FLG_S 27
-
-#define V2_QPC_BYTE_208_RMT_E2E_S 28
-
-#define V2_QPC_BYTE_208_SR_MAX_S 29
-#define V2_QPC_BYTE_208_SR_MAX_M GENMASK(31, 29)
-
-#define V2_QPC_BYTE_212_LSN_S 0
-#define V2_QPC_BYTE_212_LSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_212_RETRY_NUM_INIT_S 24
-#define V2_QPC_BYTE_212_RETRY_NUM_INIT_M GENMASK(26, 24)
-
-#define V2_QPC_BYTE_212_CHECK_FLG_S 27
-#define V2_QPC_BYTE_212_CHECK_FLG_M GENMASK(28, 27)
-
-#define V2_QPC_BYTE_212_RETRY_CNT_S 29
-#define V2_QPC_BYTE_212_RETRY_CNT_M GENMASK(31, 29)
-
-#define V2_QPC_BYTE_220_RETRY_MSG_MSN_S 0
-#define V2_QPC_BYTE_220_RETRY_MSG_MSN_M GENMASK(15, 0)
-
-#define V2_QPC_BYTE_220_RETRY_MSG_PSN_S 16
-#define V2_QPC_BYTE_220_RETRY_MSG_PSN_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_224_RETRY_MSG_PSN_S 0
-#define V2_QPC_BYTE_224_RETRY_MSG_PSN_M GENMASK(7, 0)
-
-#define V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_S 8
-#define V2_QPC_BYTE_224_RETRY_MSG_FPKT_PSN_M GENMASK(31, 8)
-
-#define V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S 0
-#define V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M GENMASK(19, 0)
-
-#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
-#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
-
-#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
-#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
-#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
-
-#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
-#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
-
-#define V2_QPC_BYTE_240_IRRL_TAIL_RD_S 8
-#define V2_QPC_BYTE_240_IRRL_TAIL_RD_M GENMASK(15, 8)
-
-#define V2_QPC_BYTE_240_RX_ACK_MSN_S 16
-#define V2_QPC_BYTE_240_RX_ACK_MSN_M GENMASK(31, 16)
-
-#define V2_QPC_BYTE_244_RX_ACK_EPSN_S 0
-#define V2_QPC_BYTE_244_RX_ACK_EPSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_244_RNR_NUM_INIT_S 24
-#define V2_QPC_BYTE_244_RNR_NUM_INIT_M GENMASK(26, 24)
-
-#define V2_QPC_BYTE_244_RNR_CNT_S 27
-#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
-
-#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
-#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
-
-#define V2_QPC_BYTE_248_IRRL_PSN_S 0
-#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_248_ACK_PSN_ERR_S 24
-
-#define V2_QPC_BYTE_248_ACK_LAST_OPTYPE_S 25
-#define V2_QPC_BYTE_248_ACK_LAST_OPTYPE_M GENMASK(26, 25)
-
-#define V2_QPC_BYTE_248_IRRL_PSN_VLD_S 27
-
-#define V2_QPC_BYTE_248_RNR_RETRY_FLAG_S 28
-
-#define V2_QPC_BYTE_248_CQ_ERR_IND_S 31
-
-#define V2_QPC_BYTE_252_TX_CQN_S 0
-#define V2_QPC_BYTE_252_TX_CQN_M GENMASK(23, 0)
-
-#define V2_QPC_BYTE_252_SIG_TYPE_S 24
-
-#define V2_QPC_BYTE_252_ERR_TYPE_S 25
-#define V2_QPC_BYTE_252_ERR_TYPE_M GENMASK(31, 25)
+struct hns_roce_v2_qp_context {
+ __le32 byte_4_sqpn_tst;
+ __le32 wqe_sge_ba;
+ __le32 byte_12_sq_hop;
+ __le32 byte_16_buf_ba_pg_sz;
+ __le32 byte_20_smac_sgid_idx;
+ __le32 byte_24_mtu_tc;
+ __le32 byte_28_at_fl;
+ u8 dgid[GID_LEN_V2];
+ __le32 dmac;
+ __le32 byte_52_udpspn_dmac;
+ __le32 byte_56_dqpn_err;
+ __le32 byte_60_qpst_tempid;
+ __le32 qkey_xrcd;
+ __le32 byte_68_rq_db;
+ __le32 rq_db_record_addr;
+ __le32 byte_76_srqn_op_en;
+ __le32 byte_80_rnr_rx_cqn;
+ __le32 byte_84_rq_ci_pi;
+ __le32 rq_cur_blk_addr;
+ __le32 byte_92_srq_info;
+ __le32 byte_96_rx_reqmsn;
+ __le32 rq_nxt_blk_addr;
+ __le32 byte_104_rq_sge;
+ __le32 byte_108_rx_reqepsn;
+ __le32 rq_rnr_timer;
+ __le32 rx_msg_len;
+ __le32 rx_rkey_pkt_info;
+ __le64 rx_va;
+ __le32 byte_132_trrl;
+ __le32 trrl_ba;
+ __le32 byte_140_raq;
+ __le32 byte_144_raq;
+ __le32 byte_148_raq;
+ __le32 byte_152_raq;
+ __le32 byte_156_raq;
+ __le32 byte_160_sq_ci_pi;
+ __le32 sq_cur_blk_addr;
+ __le32 byte_168_irrl_idx;
+ __le32 byte_172_sq_psn;
+ __le32 byte_176_msg_pktn;
+ __le32 sq_cur_sge_blk_addr;
+ __le32 byte_184_irrl_idx;
+ __le32 cur_sge_offset;
+ __le32 byte_192_ext_sge;
+ __le32 byte_196_sq_psn;
+ __le32 byte_200_sq_max;
+ __le32 irrl_ba;
+ __le32 byte_208_irrl;
+ __le32 byte_212_lsn;
+ __le32 sq_timer;
+ __le32 byte_220_retry_psn_msn;
+ __le32 byte_224_retry_msg;
+ __le32 rx_sq_cur_blk_addr;
+ __le32 byte_232_irrl_sge;
+ __le32 irrl_cur_sge_offset;
+ __le32 byte_240_irrl_tail;
+ __le32 byte_244_rnr_rxack;
+ __le32 byte_248_ack_psn;
+ __le32 byte_252_err_txcqn;
+ __le32 byte_256_sqflush_rqcqe;
+
+ struct hns_roce_v2_qp_context_ex ext;
+};
-#define V2_QPC_BYTE_256_RQ_CQE_IDX_S 0
-#define V2_QPC_BYTE_256_RQ_CQE_IDX_M GENMASK(15, 0)
+#define QPC_TRRL_BA_L_S 4
+#define QPC_TRRL_BA_M_S (16 + QPC_TRRL_BA_L_S)
+#define QPC_TRRL_BA_H_S (32 + QPC_TRRL_BA_M_S)
+#define QPC_IRRL_BA_L_S 6
+#define QPC_IRRL_BA_H_S (32 + QPC_IRRL_BA_L_S)
+
+#define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l)
+
+#define QPC_TST QPC_FIELD_LOC(2, 0)
+#define QPC_SGE_SHIFT QPC_FIELD_LOC(7, 3)
+#define QPC_CNP_TIMER QPC_FIELD_LOC(31, 8)
+#define QPC_WQE_SGE_BA_L QPC_FIELD_LOC(63, 32)
+#define QPC_WQE_SGE_BA_H QPC_FIELD_LOC(92, 64)
+#define QPC_SQ_HOP_NUM QPC_FIELD_LOC(94, 93)
+#define QPC_CIRE_EN QPC_FIELD_LOC(95, 95)
+#define QPC_WQE_SGE_BA_PG_SZ QPC_FIELD_LOC(99, 96)
+#define QPC_WQE_SGE_BUF_PG_SZ QPC_FIELD_LOC(103, 100)
+#define QPC_PD QPC_FIELD_LOC(127, 104)
+#define QPC_RQ_HOP_NUM QPC_FIELD_LOC(129, 128)
+#define QPC_SGE_HOP_NUM QPC_FIELD_LOC(131, 130)
+#define QPC_RQWS QPC_FIELD_LOC(135, 132)
+#define QPC_SQ_SHIFT QPC_FIELD_LOC(139, 136)
+#define QPC_RQ_SHIFT QPC_FIELD_LOC(143, 140)
+#define QPC_GMV_IDX QPC_FIELD_LOC(159, 144)
+#define QPC_HOPLIMIT QPC_FIELD_LOC(167, 160)
+#define QPC_TC QPC_FIELD_LOC(175, 168)
+#define QPC_VLAN_ID QPC_FIELD_LOC(187, 176)
+#define QPC_MTU QPC_FIELD_LOC(191, 188)
+#define QPC_FL QPC_FIELD_LOC(211, 192)
+#define QPC_SL QPC_FIELD_LOC(215, 212)
+#define QPC_CNP_TX_FLAG QPC_FIELD_LOC(216, 216)
+#define QPC_CE_FLAG QPC_FIELD_LOC(217, 217)
+#define QPC_LBI QPC_FIELD_LOC(218, 218)
+#define QPC_AT QPC_FIELD_LOC(223, 219)
+#define QPC_DGID QPC_FIELD_LOC(351, 224)
+#define QPC_DMAC_L QPC_FIELD_LOC(383, 352)
+#define QPC_DMAC_H QPC_FIELD_LOC(399, 384)
+#define QPC_UDPSPN QPC_FIELD_LOC(415, 400)
+#define QPC_DQPN QPC_FIELD_LOC(439, 416)
+#define QPC_SQ_TX_ERR QPC_FIELD_LOC(440, 440)
+#define QPC_SQ_RX_ERR QPC_FIELD_LOC(441, 441)
+#define QPC_RQ_TX_ERR QPC_FIELD_LOC(442, 442)
+#define QPC_RQ_RX_ERR QPC_FIELD_LOC(443, 443)
+#define QPC_LP_PKTN_INI QPC_FIELD_LOC(447, 444)
+#define QPC_CONG_ALGO_TMPL_ID QPC_FIELD_LOC(455, 448)
+#define QPC_SCC_TOKEN QPC_FIELD_LOC(474, 456)
+#define QPC_SQ_DB_DOING QPC_FIELD_LOC(475, 475)
+#define QPC_RQ_DB_DOING QPC_FIELD_LOC(476, 476)
+#define QPC_QP_ST QPC_FIELD_LOC(479, 477)
+#define QPC_QKEY_XRCD QPC_FIELD_LOC(511, 480)
+#define QPC_RQ_RECORD_EN QPC_FIELD_LOC(512, 512)
+#define QPC_RQ_DB_RECORD_ADDR_L QPC_FIELD_LOC(543, 513)
+#define QPC_RQ_DB_RECORD_ADDR_H QPC_FIELD_LOC(575, 544)
+#define QPC_SRQN QPC_FIELD_LOC(599, 576)
+#define QPC_SRQ_EN QPC_FIELD_LOC(600, 600)
+#define QPC_RRE QPC_FIELD_LOC(601, 601)
+#define QPC_RWE QPC_FIELD_LOC(602, 602)
+#define QPC_ATE QPC_FIELD_LOC(603, 603)
+#define QPC_RQIE QPC_FIELD_LOC(604, 604)
+#define QPC_EXT_ATE QPC_FIELD_LOC(605, 605)
+#define QPC_RQ_VLAN_EN QPC_FIELD_LOC(606, 606)
+#define QPC_RQ_RTY_TX_ERR QPC_FIELD_LOC(607, 607)
+#define QPC_RX_CQN QPC_FIELD_LOC(631, 608)
+#define QPC_XRC_QP_TYPE QPC_FIELD_LOC(632, 632)
+#define QPC_CQEIE QPC_FIELD_LOC(633, 633)
+#define QPC_CQEIS QPC_FIELD_LOC(634, 634)
+#define QPC_MIN_RNR_TIME QPC_FIELD_LOC(639, 635)
+#define QPC_RQ_PRODUCER_IDX QPC_FIELD_LOC(655, 640)
+#define QPC_RQ_CONSUMER_IDX QPC_FIELD_LOC(671, 656)
+#define QPC_RQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(703, 672)
+#define QPC_RQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(723, 704)
+#define QPC_SRQ_INFO QPC_FIELD_LOC(735, 724)
+#define QPC_RX_REQ_MSN QPC_FIELD_LOC(759, 736)
+#define QPC_REDUCE_CODE QPC_FIELD_LOC(766, 760)
+#define QPC_RX_XRC_PKT_CQE_FLG QPC_FIELD_LOC(767, 767)
+#define QPC_RQ_NXT_BLK_ADDR_L QPC_FIELD_LOC(799, 768)
+#define QPC_RQ_NXT_BLK_ADDR_H QPC_FIELD_LOC(819, 800)
+#define QPC_REDUCE_EN QPC_FIELD_LOC(820, 820)
+#define QPC_FLUSH_EN QPC_FIELD_LOC(821, 821)
+#define QPC_AW_EN QPC_FIELD_LOC(822, 822)
+#define QPC_WN_EN QPC_FIELD_LOC(823, 823)
+#define QPC_RQ_CUR_WQE_SGE_NUM QPC_FIELD_LOC(831, 824)
+#define QPC_INV_CREDIT QPC_FIELD_LOC(832, 832)
+#define QPC_LAST_WRITE_TYPE QPC_FIELD_LOC(834, 833)
+#define QPC_RX_REQ_PSN_ERR QPC_FIELD_LOC(835, 835)
+#define QPC_RX_REQ_LAST_OPTYPE QPC_FIELD_LOC(838, 836)
+#define QPC_RX_REQ_RNR QPC_FIELD_LOC(839, 839)
+#define QPC_RX_REQ_EPSN QPC_FIELD_LOC(863, 840)
+#define QPC_RQ_RNR_TIMER QPC_FIELD_LOC(895, 864)
+#define QPC_RX_MSG_LEN QPC_FIELD_LOC(927, 896)
+#define QPC_RX_RKEY_PKT_INFO QPC_FIELD_LOC(959, 928)
+#define QPC_RX_VA QPC_FIELD_LOC(1023, 960)
+#define QPC_TRRL_HEAD_MAX QPC_FIELD_LOC(1031, 1024)
+#define QPC_TRRL_TAIL_MAX QPC_FIELD_LOC(1039, 1032)
+#define QPC_TRRL_BA_L QPC_FIELD_LOC(1055, 1040)
+#define QPC_TRRL_BA_M QPC_FIELD_LOC(1087, 1056)
+#define QPC_TRRL_BA_H QPC_FIELD_LOC(1099, 1088)
+#define QPC_RR_MAX QPC_FIELD_LOC(1102, 1100)
+#define QPC_RQ_RTY_WAIT_DO QPC_FIELD_LOC(1103, 1103)
+#define QPC_RAQ_TRRL_HEAD QPC_FIELD_LOC(1111, 1104)
+#define QPC_RAQ_TRRL_TAIL QPC_FIELD_LOC(1119, 1112)
+#define QPC_RAQ_RTY_INI_PSN QPC_FIELD_LOC(1143, 1120)
+#define QPC_CIRE_SLV_RQ_EN QPC_FIELD_LOC(1144, 1144)
+#define QPC_RAQ_CREDIT QPC_FIELD_LOC(1149, 1145)
+#define QPC_RQ_DB_IN_EXT QPC_FIELD_LOC(1150, 1150)
+#define QPC_RESP_RTY_FLG QPC_FIELD_LOC(1151, 1151)
+#define QPC_RAQ_MSN QPC_FIELD_LOC(1175, 1152)
+#define QPC_RAQ_SYNDROME QPC_FIELD_LOC(1183, 1176)
+#define QPC_RAQ_PSN QPC_FIELD_LOC(1207, 1184)
+#define QPC_RAQ_TRRL_RTY_HEAD QPC_FIELD_LOC(1215, 1208)
+#define QPC_RAQ_USE_PKTN QPC_FIELD_LOC(1239, 1216)
+#define QPC_RQ_SCC_TOKEN QPC_FIELD_LOC(1245, 1240)
+#define QPC_RVD10 QPC_FIELD_LOC(1247, 1246)
+#define QPC_SQ_PRODUCER_IDX QPC_FIELD_LOC(1263, 1248)
+#define QPC_SQ_CONSUMER_IDX QPC_FIELD_LOC(1279, 1264)
+#define QPC_SQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(1311, 1280)
+#define QPC_SQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(1331, 1312)
+#define QPC_MSG_RTY_LP_FLG QPC_FIELD_LOC(1332, 1332)
+#define QPC_SQ_INVLD_FLG QPC_FIELD_LOC(1333, 1333)
+#define QPC_LP_SGEN_INI QPC_FIELD_LOC(1335, 1334)
+#define QPC_SQ_VLAN_EN QPC_FIELD_LOC(1336, 1336)
+#define QPC_POLL_DB_WAIT_DO QPC_FIELD_LOC(1337, 1337)
+#define QPC_SCC_TOKEN_FORBID_SQ_DEQ QPC_FIELD_LOC(1338, 1338)
+#define QPC_WAIT_ACK_TIMEOUT QPC_FIELD_LOC(1339, 1339)
+#define QPC_IRRL_IDX_LSB QPC_FIELD_LOC(1343, 1340)
+#define QPC_ACK_REQ_FREQ QPC_FIELD_LOC(1349, 1344)
+#define QPC_MSG_RNR_FLG QPC_FIELD_LOC(1350, 1350)
+#define QPC_FRE QPC_FIELD_LOC(1351, 1351)
+#define QPC_SQ_CUR_PSN QPC_FIELD_LOC(1375, 1352)
+#define QPC_MSG_USE_PKTN QPC_FIELD_LOC(1399, 1376)
+#define QPC_IRRL_HEAD_PRE QPC_FIELD_LOC(1407, 1400)
+#define QPC_SQ_CUR_SGE_BLK_ADDR_L QPC_FIELD_LOC(1439, 1408)
+#define QPC_SQ_CUR_SGE_BLK_ADDR_H QPC_FIELD_LOC(1459, 1440)
+#define QPC_IRRL_IDX_MSB QPC_FIELD_LOC(1471, 1460)
+#define QPC_CUR_SGE_OFFSET QPC_FIELD_LOC(1503, 1472)
+#define QPC_CUR_SGE_IDX QPC_FIELD_LOC(1527, 1504)
+#define QPC_EXT_SGE_NUM_LEFT QPC_FIELD_LOC(1535, 1528)
+#define QPC_OWNER_MODE QPC_FIELD_LOC(1536, 1536)
+#define QPC_CIRE_SLV_SQ_EN QPC_FIELD_LOC(1537, 1537)
+#define QPC_CIRE_DOING QPC_FIELD_LOC(1538, 1538)
+#define QPC_CIRE_RESULT QPC_FIELD_LOC(1539, 1539)
+#define QPC_OWNER_DB_WAIT_DO QPC_FIELD_LOC(1540, 1540)
+#define QPC_SQ_WQE_INVLD QPC_FIELD_LOC(1541, 1541)
+#define QPC_DCA_MODE QPC_FIELD_LOC(1542, 1542)
+#define QPC_RTY_OWNER_NOCHK QPC_FIELD_LOC(1543, 1543)
+#define QPC_V2_IRRL_HEAD QPC_FIELD_LOC(1543, 1536)
+#define QPC_SQ_MAX_PSN QPC_FIELD_LOC(1567, 1544)
+#define QPC_SQ_MAX_IDX QPC_FIELD_LOC(1583, 1568)
+#define QPC_LCL_OPERATED_CNT QPC_FIELD_LOC(1599, 1584)
+#define QPC_IRRL_BA_L QPC_FIELD_LOC(1631, 1600)
+#define QPC_IRRL_BA_H QPC_FIELD_LOC(1657, 1632)
+#define QPC_PKT_RNR_FLG QPC_FIELD_LOC(1658, 1658)
+#define QPC_PKT_RTY_FLG QPC_FIELD_LOC(1659, 1659)
+#define QPC_RMT_E2E QPC_FIELD_LOC(1660, 1660)
+#define QPC_SR_MAX QPC_FIELD_LOC(1663, 1661)
+#define QPC_LSN QPC_FIELD_LOC(1687, 1664)
+#define QPC_RETRY_NUM_INIT QPC_FIELD_LOC(1690, 1688)
+#define QPC_CHECK_FLG QPC_FIELD_LOC(1692, 1691)
+#define QPC_RETRY_CNT QPC_FIELD_LOC(1695, 1693)
+#define QPC_SQ_TIMER QPC_FIELD_LOC(1727, 1696)
+#define QPC_RETRY_MSG_MSN QPC_FIELD_LOC(1743, 1728)
+#define QPC_RETRY_MSG_PSN_L QPC_FIELD_LOC(1759, 1744)
+#define QPC_RETRY_MSG_PSN_H QPC_FIELD_LOC(1767, 1760)
+#define QPC_RETRY_MSG_FPKT_PSN QPC_FIELD_LOC(1791, 1768)
+#define QPC_RX_SQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(1823, 1792)
+#define QPC_RX_SQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(1843, 1824)
+#define QPC_IRRL_SGE_IDX QPC_FIELD_LOC(1851, 1844)
+#define QPC_LSAN_EN QPC_FIELD_LOC(1852, 1852)
+#define QPC_SO_LP_VLD QPC_FIELD_LOC(1853, 1853)
+#define QPC_FENCE_LP_VLD QPC_FIELD_LOC(1854, 1854)
+#define QPC_IRRL_LP_VLD QPC_FIELD_LOC(1855, 1855)
+#define QPC_IRRL_CUR_SGE_OFFSET QPC_FIELD_LOC(1887, 1856)
+#define QPC_IRRL_TAIL_REAL QPC_FIELD_LOC(1895, 1888)
+#define QPC_IRRL_TAIL_RD QPC_FIELD_LOC(1903, 1896)
+#define QPC_RX_ACK_MSN QPC_FIELD_LOC(1919, 1904)
+#define QPC_RX_ACK_EPSN QPC_FIELD_LOC(1943, 1920)
+#define QPC_RNR_NUM_INIT QPC_FIELD_LOC(1946, 1944)
+#define QPC_RNR_CNT QPC_FIELD_LOC(1949, 1947)
+#define QPC_LCL_OP_FLG QPC_FIELD_LOC(1950, 1950)
+#define QPC_IRRL_RD_FLG QPC_FIELD_LOC(1951, 1951)
+#define QPC_IRRL_PSN QPC_FIELD_LOC(1975, 1952)
+#define QPC_ACK_PSN_ERR QPC_FIELD_LOC(1976, 1976)
+#define QPC_ACK_LAST_OPTYPE QPC_FIELD_LOC(1978, 1977)
+#define QPC_IRRL_PSN_VLD QPC_FIELD_LOC(1979, 1979)
+#define QPC_RNR_RETRY_FLAG QPC_FIELD_LOC(1980, 1980)
+#define QPC_SQ_RTY_TX_ERR QPC_FIELD_LOC(1981, 1981)
+#define QPC_LAST_IND QPC_FIELD_LOC(1982, 1982)
+#define QPC_CQ_ERR_IND QPC_FIELD_LOC(1983, 1983)
+#define QPC_TX_CQN QPC_FIELD_LOC(2007, 1984)
+#define QPC_SIG_TYPE QPC_FIELD_LOC(2008, 2008)
+#define QPC_ERR_TYPE QPC_FIELD_LOC(2015, 2009)
+#define QPC_RQ_CQE_IDX QPC_FIELD_LOC(2031, 2016)
+#define QPC_SQ_FLUSH_IDX QPC_FIELD_LOC(2047, 2032)
+
+#define RETRY_MSG_PSN_SHIFT 16
+
+#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l)
+
+#define QPCEX_CONG_ALG_SEL QPCEX_FIELD_LOC(0, 0)
+#define QPCEX_CONG_ALG_SUB_SEL QPCEX_FIELD_LOC(1, 1)
+#define QPCEX_DIP_CTX_IDX_VLD QPCEX_FIELD_LOC(2, 2)
+#define QPCEX_DIP_CTX_IDX QPCEX_FIELD_LOC(22, 3)
+#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
+#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+
+#define SCC_CONTEXT_SIZE 16
+
+struct hns_roce_v2_scc_context {
+ __le32 data[SCC_CONTEXT_SIZE];
+};
-#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_S 16
-#define V2_QPC_BYTE_256_SQ_FLUSH_IDX_M GENMASK(31, 16)
+#define V2_QP_RWE_S 1 /* rdma write enable */
+#define V2_QP_RRE_S 2 /* rdma read enable */
+#define V2_QP_ATE_S 3 /* rdma atomic enable */
struct hns_roce_v2_cqe {
__le32 byte_4;
@@ -898,58 +696,34 @@ struct hns_roce_v2_cqe {
u8 smac[4];
__le32 byte_28;
__le32 byte_32;
+ __le32 rsv[8];
};
-#define V2_CQE_BYTE_4_OPCODE_S 0
-#define V2_CQE_BYTE_4_OPCODE_M GENMASK(4, 0)
-
-#define V2_CQE_BYTE_4_RQ_INLINE_S 5
-
-#define V2_CQE_BYTE_4_S_R_S 6
-
-#define V2_CQE_BYTE_4_OWNER_S 7
-
-#define V2_CQE_BYTE_4_STATUS_S 8
-#define V2_CQE_BYTE_4_STATUS_M GENMASK(15, 8)
-
-#define V2_CQE_BYTE_4_WQE_INDX_S 16
-#define V2_CQE_BYTE_4_WQE_INDX_M GENMASK(31, 16)
-
-#define V2_CQE_BYTE_12_XRC_SRQN_S 0
-#define V2_CQE_BYTE_12_XRC_SRQN_M GENMASK(23, 0)
-
-#define V2_CQE_BYTE_16_LCL_QPN_S 0
-#define V2_CQE_BYTE_16_LCL_QPN_M GENMASK(23, 0)
-
-#define V2_CQE_BYTE_16_SUB_STATUS_S 24
-#define V2_CQE_BYTE_16_SUB_STATUS_M GENMASK(31, 24)
-
-#define V2_CQE_BYTE_28_SMAC_4_S 0
-#define V2_CQE_BYTE_28_SMAC_4_M GENMASK(7, 0)
-
-#define V2_CQE_BYTE_28_SMAC_5_S 8
-#define V2_CQE_BYTE_28_SMAC_5_M GENMASK(15, 8)
-
-#define V2_CQE_BYTE_28_PORT_TYPE_S 16
-#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
-
-#define V2_CQE_BYTE_28_VID_S 18
-#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
-
-#define V2_CQE_BYTE_28_VID_VLD_S 30
-
-#define V2_CQE_BYTE_32_RMT_QPN_S 0
-#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
-
-#define V2_CQE_BYTE_32_SL_S 24
-#define V2_CQE_BYTE_32_SL_M GENMASK(26, 24)
-
-#define V2_CQE_BYTE_32_PORTN_S 27
-#define V2_CQE_BYTE_32_PORTN_M GENMASK(29, 27)
-
-#define V2_CQE_BYTE_32_GRH_S 30
-
-#define V2_CQE_BYTE_32_LPK_S 31
+#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l)
+
+#define CQE_OPCODE CQE_FIELD_LOC(4, 0)
+#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5)
+#define CQE_S_R CQE_FIELD_LOC(6, 6)
+#define CQE_OWNER CQE_FIELD_LOC(7, 7)
+#define CQE_STATUS CQE_FIELD_LOC(15, 8)
+#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16)
+#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32)
+#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64)
+#define CQE_RSV0 CQE_FIELD_LOC(95, 88)
+#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96)
+#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120)
+#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128)
+#define CQE_SMAC CQE_FIELD_LOC(207, 160)
+#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208)
+#define CQE_VID CQE_FIELD_LOC(221, 210)
+#define CQE_VID_VLD CQE_FIELD_LOC(222, 222)
+#define CQE_RSV2 CQE_FIELD_LOC(223, 223)
+#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224)
+#define CQE_SL CQE_FIELD_LOC(250, 248)
+#define CQE_PORTN CQE_FIELD_LOC(253, 251)
+#define CQE_GRH CQE_FIELD_LOC(254, 254)
+#define CQE_LPK CQE_FIELD_LOC(255, 255)
+#define CQE_RSV3 CQE_FIELD_LOC(511, 256)
struct hns_roce_v2_mpt_entry {
__le32 byte_4_pd_hop_st;
@@ -970,6 +744,52 @@ struct hns_roce_v2_mpt_entry {
__le32 byte_64_buf_pa1;
};
+#define MPT_PBL_BUF_ADDR_S 6
+#define MPT_PBL_BA_ADDR_S 3
+
+#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
+
+#define MPT_ST MPT_FIELD_LOC(1, 0)
+#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2)
+#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4)
+#define MPT_PD MPT_FIELD_LOC(31, 8)
+#define MPT_RA_EN MPT_FIELD_LOC(32, 32)
+#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33)
+#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34)
+#define MPT_BIND_EN MPT_FIELD_LOC(35, 35)
+#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36)
+#define MPT_RR_EN MPT_FIELD_LOC(37, 37)
+#define MPT_RW_EN MPT_FIELD_LOC(38, 38)
+#define MPT_LW_EN MPT_FIELD_LOC(39, 39)
+#define MPT_MW_CNT MPT_FIELD_LOC(63, 40)
+#define MPT_FRE MPT_FIELD_LOC(64, 64)
+#define MPT_PA MPT_FIELD_LOC(65, 65)
+#define MPT_ZBVA MPT_FIELD_LOC(66, 66)
+#define MPT_SHARE MPT_FIELD_LOC(67, 67)
+#define MPT_MR_MW MPT_FIELD_LOC(68, 68)
+#define MPT_BPD MPT_FIELD_LOC(69, 69)
+#define MPT_BQP MPT_FIELD_LOC(70, 70)
+#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71)
+#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72)
+#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96)
+#define MPT_LEN_L MPT_FIELD_LOC(159, 128)
+#define MPT_LEN_H MPT_FIELD_LOC(191, 160)
+#define MPT_LKEY MPT_FIELD_LOC(223, 192)
+#define MPT_VA MPT_FIELD_LOC(287, 224)
+#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
+#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
+#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
+#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
+#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
+#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
+#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
+#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
+#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
+#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
+#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
+#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
+#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
+
#define V2_MPT_BYTE_4_MPT_ST_S 0
#define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0)
@@ -998,24 +818,16 @@ struct hns_roce_v2_mpt_entry {
#define V2_MPT_BYTE_8_LW_EN_S 7
-#define V2_MPT_BYTE_8_MW_CNT_S 8
-#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
-
#define V2_MPT_BYTE_12_FRE_S 0
#define V2_MPT_BYTE_12_PA_S 1
-#define V2_MPT_BYTE_12_MR_MW_S 4
-
#define V2_MPT_BYTE_12_BPD_S 5
#define V2_MPT_BYTE_12_BQP_S 6
#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
-#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
-#define V2_MPT_BYTE_12_MW_BIND_QPN_M GENMASK(31, 8)
-
#define V2_MPT_BYTE_48_PBL_BA_H_S 0
#define V2_MPT_BYTE_48_PBL_BA_H_M GENMASK(28, 0)
@@ -1030,36 +842,29 @@ struct hns_roce_v2_mpt_entry {
#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S 28
#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M GENMASK(31, 28)
-#define V2_DB_BYTE_4_TAG_S 0
-#define V2_DB_BYTE_4_TAG_M GENMASK(23, 0)
-
-#define V2_DB_BYTE_4_CMD_S 24
-#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24)
-
-#define V2_DB_PARAMETER_IDX_S 0
-#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0)
-
-#define V2_DB_PARAMETER_SL_S 16
-#define V2_DB_PARAMETER_SL_M GENMASK(18, 16)
-
-struct hns_roce_v2_cq_db {
- __le32 byte_4;
- __le32 parameter;
+struct hns_roce_v2_db {
+ __le32 data[2];
};
-#define V2_CQ_DB_BYTE_4_TAG_S 0
-#define V2_CQ_DB_BYTE_4_TAG_M GENMASK(23, 0)
+#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_db, h, l)
-#define V2_CQ_DB_BYTE_4_CMD_S 24
-#define V2_CQ_DB_BYTE_4_CMD_M GENMASK(27, 24)
+#define DB_TAG DB_FIELD_LOC(23, 0)
+#define DB_CMD DB_FIELD_LOC(27, 24)
+#define DB_FLAG DB_FIELD_LOC(31, 31)
+#define DB_PI DB_FIELD_LOC(47, 32)
+#define DB_SL DB_FIELD_LOC(50, 48)
+#define DB_CQ_CI DB_FIELD_LOC(55, 32)
+#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56)
+#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57)
+#define EQ_DB_TAG DB_FIELD_LOC(7, 0)
+#define EQ_DB_CMD DB_FIELD_LOC(17, 16)
+#define EQ_DB_CI DB_FIELD_LOC(55, 32)
-#define V2_CQ_DB_PARAMETER_CONS_IDX_S 0
-#define V2_CQ_DB_PARAMETER_CONS_IDX_M GENMASK(23, 0)
+#define V2_DB_PRODUCER_IDX_S 0
+#define V2_DB_PRODUCER_IDX_M GENMASK(15, 0)
-#define V2_CQ_DB_PARAMETER_CMD_SN_S 25
-#define V2_CQ_DB_PARAMETER_CMD_SN_M GENMASK(26, 25)
-
-#define V2_CQ_DB_PARAMETER_NOTIFY_S 24
+#define V2_CQ_DB_CONS_IDX_S 0
+#define V2_CQ_DB_CONS_IDX_M GENMASK(23, 0)
struct hns_roce_v2_ud_send_wqe {
__le32 byte_4;
@@ -1072,80 +877,30 @@ struct hns_roce_v2_ud_send_wqe {
__le32 byte_32;
__le32 byte_36;
__le32 byte_40;
- __le32 dmac;
- __le32 byte_48;
+ u8 dmac[ETH_ALEN];
+ u8 sgid_index;
+ u8 smac_index;
u8 dgid[GID_LEN_V2];
-
};
-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
-#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
-
-#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
-
-#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
-
-#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
-
-#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
-#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
-
-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
-#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
-
-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
-#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
-
-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
-#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
-
-#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
-#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
-
-#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
-#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
-
-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
-#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
-
-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
-#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
-
-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
-#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
-
-#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
-#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
-
-#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
-#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
-
-#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
-#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
-
-#define V2_UD_SEND_WQE_DMAC_0_S 0
-#define V2_UD_SEND_WQE_DMAC_0_M GENMASK(7, 0)
-
-#define V2_UD_SEND_WQE_DMAC_1_S 8
-#define V2_UD_SEND_WQE_DMAC_1_M GENMASK(15, 8)
-
-#define V2_UD_SEND_WQE_DMAC_2_S 16
-#define V2_UD_SEND_WQE_DMAC_2_M GENMASK(23, 16)
-
-#define V2_UD_SEND_WQE_DMAC_3_S 24
-#define V2_UD_SEND_WQE_DMAC_3_M GENMASK(31, 24)
-
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_S 0
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_4_M GENMASK(7, 0)
-
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_S 8
-#define V2_UD_SEND_WQE_BYTE_48_DMAC_5_M GENMASK(15, 8)
-
-#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S 16
-#define V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M GENMASK(23, 16)
-
-#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_S 24
-#define V2_UD_SEND_WQE_BYTE_48_SMAC_INDX_M GENMASK(31, 24)
+#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
+
+#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
+#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
+#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
+#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
+#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
+#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
+#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
+#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
+#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
+#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
+#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
+#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
+#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
+#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
+#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
+#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
struct hns_roce_v2_rc_send_wqe {
__le32 byte_4;
@@ -1160,49 +915,41 @@ struct hns_roce_v2_rc_send_wqe {
__le64 va;
};
-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
-#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
-
-#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
-
-#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
-
-#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9
-
-#define V2_RC_SEND_WQE_BYTE_4_SO_S 10
-
-#define V2_RC_SEND_WQE_BYTE_4_SE_S 11
-
-#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
-
-#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
-
-#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
-
-#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
-
-#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
-
-#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
-
-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
-#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
-
-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24
-#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
-
-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
-#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
+#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
+
+#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
+#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
+#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
+#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
+#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
+#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
+#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
+#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
+#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
+#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
+#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
+#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
+#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
+#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
+#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
- __le32 mode_buf_pg_sz;
+ __le32 byte_40;
};
-#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
-#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
+#define FRMR_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_wqe_frmr_seg, h, l)
-#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
+#define FRMR_PBL_SIZE FRMR_WQE_FIELD_LOC(31, 0)
+#define FRMR_BLOCK_SIZE FRMR_WQE_FIELD_LOC(35, 32)
+#define FRMR_PBL_BUF_PG_SZ FRMR_WQE_FIELD_LOC(39, 36)
+#define FRMR_BLK_MODE FRMR_WQE_FIELD_LOC(40, 40)
+#define FRMR_ZBVA FRMR_WQE_FIELD_LOC(41, 41)
+#define FRMR_BIND_EN FRMR_WQE_FIELD_LOC(42, 42)
+#define FRMR_ATOMIC FRMR_WQE_FIELD_LOC(43, 43)
+#define FRMR_RR FRMR_WQE_FIELD_LOC(44, 44)
+#define FRMR_RW FRMR_WQE_FIELD_LOC(45, 45)
+#define FRMR_LW FRMR_WQE_FIELD_LOC(46, 46)
struct hns_roce_v2_wqe_data_seg {
__le32 len;
@@ -1210,11 +957,6 @@ struct hns_roce_v2_wqe_data_seg {
__le64 addr;
};
-struct hns_roce_v2_db {
- __le32 byte_4;
- __le32 parameter;
-};
-
struct hns_roce_query_version {
__le16 rocee_vendor_id;
__le16 rocee_hw_version;
@@ -1226,216 +968,75 @@ struct hns_roce_query_fw_info {
__le32 rsv[5];
};
-struct hns_roce_cfg_llm_a {
- __le32 base_addr_l;
- __le32 base_addr_h;
- __le32 depth_pgsz_init_en;
- __le32 head_ba_l;
- __le32 head_ba_h_nxtptr;
- __le32 head_ptr;
+struct hns_roce_func_clear {
+ __le32 rst_funcid_en;
+ __le32 func_done;
+ __le32 rsv[4];
};
-#define CFG_LLM_QUE_DEPTH_S 0
-#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0)
-
-#define CFG_LLM_QUE_PGSZ_S 16
-#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16)
-
-#define CFG_LLM_INIT_EN_S 20
-#define CFG_LLM_INIT_EN_M GENMASK(20, 20)
-
-#define CFG_LLM_HEAD_PTR_S 0
-#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0)
-
-struct hns_roce_cfg_llm_b {
- __le32 tail_ba_l;
- __le32 tail_ba_h;
- __le32 tail_ptr;
- __le32 rsv[3];
-};
-
-#define CFG_LLM_TAIL_BA_H_S 0
-#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0)
-
-#define CFG_LLM_TAIL_PTR_S 0
-#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0)
-
-struct hns_roce_cfg_global_param {
- __le32 time_cfg_udp_port;
- __le32 rsv[5];
-};
-
-#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S 0
-#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_M GENMASK(9, 0)
-
-#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16
-#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16)
-
-struct hns_roce_pf_res_a {
- __le32 rsv;
- __le32 qpc_bt_idx_num;
- __le32 srqc_bt_idx_num;
- __le32 cqc_bt_idx_num;
- __le32 mpt_bt_idx_num;
- __le32 eqc_bt_idx_num;
-};
-
-#define PF_RES_DATA_1_PF_QPC_BT_IDX_S 0
-#define PF_RES_DATA_1_PF_QPC_BT_IDX_M GENMASK(10, 0)
-
-#define PF_RES_DATA_1_PF_QPC_BT_NUM_S 16
-#define PF_RES_DATA_1_PF_QPC_BT_NUM_M GENMASK(27, 16)
-
-#define PF_RES_DATA_2_PF_SRQC_BT_IDX_S 0
-#define PF_RES_DATA_2_PF_SRQC_BT_IDX_M GENMASK(8, 0)
-
-#define PF_RES_DATA_2_PF_SRQC_BT_NUM_S 16
-#define PF_RES_DATA_2_PF_SRQC_BT_NUM_M GENMASK(25, 16)
-
-#define PF_RES_DATA_3_PF_CQC_BT_IDX_S 0
-#define PF_RES_DATA_3_PF_CQC_BT_IDX_M GENMASK(8, 0)
-
-#define PF_RES_DATA_3_PF_CQC_BT_NUM_S 16
-#define PF_RES_DATA_3_PF_CQC_BT_NUM_M GENMASK(25, 16)
-
-#define PF_RES_DATA_4_PF_MPT_BT_IDX_S 0
-#define PF_RES_DATA_4_PF_MPT_BT_IDX_M GENMASK(8, 0)
-
-#define PF_RES_DATA_4_PF_MPT_BT_NUM_S 16
-#define PF_RES_DATA_4_PF_MPT_BT_NUM_M GENMASK(25, 16)
-
-#define PF_RES_DATA_5_PF_EQC_BT_IDX_S 0
-#define PF_RES_DATA_5_PF_EQC_BT_IDX_M GENMASK(8, 0)
-
-#define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16
-#define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16)
-
-struct hns_roce_pf_res_b {
- __le32 rsv0;
- __le32 smac_idx_num;
- __le32 sgid_idx_num;
- __le32 qid_idx_sl_num;
- __le32 sccc_bt_idx_num;
- __le32 rsv;
-};
-
-#define PF_RES_DATA_1_PF_SMAC_IDX_S 0
-#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0)
-
-#define PF_RES_DATA_1_PF_SMAC_NUM_S 8
-#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8)
-
-#define PF_RES_DATA_2_PF_SGID_IDX_S 0
-#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0)
-
-#define PF_RES_DATA_2_PF_SGID_NUM_S 8
-#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8)
-
-#define PF_RES_DATA_3_PF_QID_IDX_S 0
-#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0)
-
-#define PF_RES_DATA_3_PF_SL_NUM_S 16
-#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16)
-
-#define PF_RES_DATA_4_PF_SCCC_BT_IDX_S 0
-#define PF_RES_DATA_4_PF_SCCC_BT_IDX_M GENMASK(8, 0)
-
-#define PF_RES_DATA_4_PF_SCCC_BT_NUM_S 9
-#define PF_RES_DATA_4_PF_SCCC_BT_NUM_M GENMASK(17, 9)
-
-struct hns_roce_pf_timer_res_a {
- __le32 rsv0;
- __le32 qpc_timer_bt_idx_num;
- __le32 cqc_timer_bt_idx_num;
- __le32 rsv[3];
-};
-
-#define PF_RES_DATA_1_PF_QPC_TIMER_BT_IDX_S 0
-#define PF_RES_DATA_1_PF_QPC_TIMER_BT_IDX_M GENMASK(11, 0)
-
-#define PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_S 16
-#define PF_RES_DATA_1_PF_QPC_TIMER_BT_NUM_M GENMASK(28, 16)
-
-#define PF_RES_DATA_2_PF_CQC_TIMER_BT_IDX_S 0
-#define PF_RES_DATA_2_PF_CQC_TIMER_BT_IDX_M GENMASK(10, 0)
-
-#define PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_S 16
-#define PF_RES_DATA_2_PF_CQC_TIMER_BT_NUM_M GENMASK(27, 16)
-
-struct hns_roce_vf_res_a {
- __le32 vf_id;
- __le32 vf_qpc_bt_idx_num;
- __le32 vf_srqc_bt_idx_num;
- __le32 vf_cqc_bt_idx_num;
- __le32 vf_mpt_bt_idx_num;
- __le32 vf_eqc_bt_idx_num;
-};
-
-#define VF_RES_A_DATA_1_VF_QPC_BT_IDX_S 0
-#define VF_RES_A_DATA_1_VF_QPC_BT_IDX_M GENMASK(10, 0)
-
-#define VF_RES_A_DATA_1_VF_QPC_BT_NUM_S 16
-#define VF_RES_A_DATA_1_VF_QPC_BT_NUM_M GENMASK(27, 16)
-
-#define VF_RES_A_DATA_2_VF_SRQC_BT_IDX_S 0
-#define VF_RES_A_DATA_2_VF_SRQC_BT_IDX_M GENMASK(8, 0)
-
-#define VF_RES_A_DATA_2_VF_SRQC_BT_NUM_S 16
-#define VF_RES_A_DATA_2_VF_SRQC_BT_NUM_M GENMASK(25, 16)
-
-#define VF_RES_A_DATA_3_VF_CQC_BT_IDX_S 0
-#define VF_RES_A_DATA_3_VF_CQC_BT_IDX_M GENMASK(8, 0)
-
-#define VF_RES_A_DATA_3_VF_CQC_BT_NUM_S 16
-#define VF_RES_A_DATA_3_VF_CQC_BT_NUM_M GENMASK(25, 16)
-
-#define VF_RES_A_DATA_4_VF_MPT_BT_IDX_S 0
-#define VF_RES_A_DATA_4_VF_MPT_BT_IDX_M GENMASK(8, 0)
-
-#define VF_RES_A_DATA_4_VF_MPT_BT_NUM_S 16
-#define VF_RES_A_DATA_4_VF_MPT_BT_NUM_M GENMASK(25, 16)
-
-#define VF_RES_A_DATA_5_VF_EQC_IDX_S 0
-#define VF_RES_A_DATA_5_VF_EQC_IDX_M GENMASK(8, 0)
-
-#define VF_RES_A_DATA_5_VF_EQC_NUM_S 16
-#define VF_RES_A_DATA_5_VF_EQC_NUM_M GENMASK(25, 16)
-
-struct hns_roce_vf_res_b {
- __le32 rsv0;
- __le32 vf_smac_idx_num;
- __le32 vf_sgid_idx_num;
- __le32 vf_qid_idx_sl_num;
- __le32 vf_sccc_idx_num;
- __le32 rsv1;
-};
+#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l)
-#define VF_RES_B_DATA_0_VF_ID_S 0
-#define VF_RES_B_DATA_0_VF_ID_M GENMASK(7, 0)
+#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32)
-#define VF_RES_B_DATA_1_VF_SMAC_IDX_S 0
-#define VF_RES_B_DATA_1_VF_SMAC_IDX_M GENMASK(7, 0)
-
-#define VF_RES_B_DATA_1_VF_SMAC_NUM_S 8
-#define VF_RES_B_DATA_1_VF_SMAC_NUM_M GENMASK(16, 8)
-
-#define VF_RES_B_DATA_2_VF_SGID_IDX_S 0
-#define VF_RES_B_DATA_2_VF_SGID_IDX_M GENMASK(7, 0)
-
-#define VF_RES_B_DATA_2_VF_SGID_NUM_S 8
-#define VF_RES_B_DATA_2_VF_SGID_NUM_M GENMASK(16, 8)
-
-#define VF_RES_B_DATA_3_VF_QID_IDX_S 0
-#define VF_RES_B_DATA_3_VF_QID_IDX_M GENMASK(9, 0)
-
-#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
-#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
-
-#define VF_RES_B_DATA_4_VF_SCCC_BT_IDX_S 0
-#define VF_RES_B_DATA_4_VF_SCCC_BT_IDX_M GENMASK(8, 0)
+/* Each physical function manages up to 248 virtual functions, it takes up to
+ * 100ms for each function to execute clear. If an abnormal reset occurs, it is
+ * executed twice at most, so it takes up to 249 * 2 * 100ms.
+ */
+#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100)
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
+
+#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32)
+#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64)
+#define CFG_LLM_A_PGSZ CMQ_REQ_FIELD_LOC(83, 80)
+#define CFG_LLM_A_INIT_EN CMQ_REQ_FIELD_LOC(84, 84)
+#define CFG_LLM_A_HEAD_BA_L CMQ_REQ_FIELD_LOC(127, 96)
+#define CFG_LLM_A_HEAD_BA_H CMQ_REQ_FIELD_LOC(147, 128)
+#define CFG_LLM_A_HEAD_NXTPTR CMQ_REQ_FIELD_LOC(159, 148)
+#define CFG_LLM_A_HEAD_PTR CMQ_REQ_FIELD_LOC(171, 160)
+#define CFG_LLM_B_TAIL_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_LLM_B_TAIL_BA_H CMQ_REQ_FIELD_LOC(63, 32)
+#define CFG_LLM_B_TAIL_PTR CMQ_REQ_FIELD_LOC(75, 64)
+
+/* Fields of HNS_ROCE_OPC_CFG_GLOBAL_PARAM */
+#define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0)
+#define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16)
-#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_S 9
-#define VF_RES_B_DATA_4_VF_SCCC_BT_NUM_M GENMASK(17, 9)
+/*
+ * Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES
+ * and HNS_ROCE_OPC_ALLOC_VF_RES
+ */
+#define FUNC_RES_A_VF_ID CMQ_REQ_FIELD_LOC(7, 0)
+#define FUNC_RES_A_QPC_BT_IDX CMQ_REQ_FIELD_LOC(42, 32)
+#define FUNC_RES_A_QPC_BT_NUM CMQ_REQ_FIELD_LOC(59, 48)
+#define FUNC_RES_A_SRQC_BT_IDX CMQ_REQ_FIELD_LOC(72, 64)
+#define FUNC_RES_A_SRQC_BT_NUM CMQ_REQ_FIELD_LOC(89, 80)
+#define FUNC_RES_A_CQC_BT_IDX CMQ_REQ_FIELD_LOC(104, 96)
+#define FUNC_RES_A_CQC_BT_NUM CMQ_REQ_FIELD_LOC(121, 112)
+#define FUNC_RES_A_MPT_BT_IDX CMQ_REQ_FIELD_LOC(136, 128)
+#define FUNC_RES_A_MPT_BT_NUM CMQ_REQ_FIELD_LOC(153, 144)
+#define FUNC_RES_A_EQC_BT_IDX CMQ_REQ_FIELD_LOC(168, 160)
+#define FUNC_RES_A_EQC_BT_NUM CMQ_REQ_FIELD_LOC(185, 176)
+#define FUNC_RES_B_SMAC_IDX CMQ_REQ_FIELD_LOC(39, 32)
+#define FUNC_RES_B_SMAC_NUM CMQ_REQ_FIELD_LOC(48, 40)
+#define FUNC_RES_B_SGID_IDX CMQ_REQ_FIELD_LOC(71, 64)
+#define FUNC_RES_B_SGID_NUM CMQ_REQ_FIELD_LOC(80, 72)
+#define FUNC_RES_B_QID_IDX CMQ_REQ_FIELD_LOC(105, 96)
+#define FUNC_RES_B_QID_NUM CMQ_REQ_FIELD_LOC(122, 112)
+#define FUNC_RES_V_QID_NUM CMQ_REQ_FIELD_LOC(115, 112)
+
+#define FUNC_RES_B_SCCC_BT_IDX CMQ_REQ_FIELD_LOC(136, 128)
+#define FUNC_RES_B_SCCC_BT_NUM CMQ_REQ_FIELD_LOC(145, 137)
+#define FUNC_RES_B_GMV_BT_IDX CMQ_REQ_FIELD_LOC(167, 160)
+#define FUNC_RES_B_GMV_BT_NUM CMQ_REQ_FIELD_LOC(176, 168)
+#define FUNC_RES_V_GMV_BT_NUM CMQ_REQ_FIELD_LOC(184, 176)
+
+/* Fields of HNS_ROCE_OPC_QUERY_PF_TIMER_RES */
+#define PF_TIMER_RES_QPC_ITEM_IDX CMQ_REQ_FIELD_LOC(43, 32)
+#define PF_TIMER_RES_QPC_ITEM_NUM CMQ_REQ_FIELD_LOC(60, 48)
+#define PF_TIMER_RES_CQC_ITEM_IDX CMQ_REQ_FIELD_LOC(74, 64)
+#define PF_TIMER_RES_CQC_ITEM_NUM CMQ_REQ_FIELD_LOC(91, 80)
struct hns_roce_vf_switch {
__le32 rocee_sel;
@@ -1446,12 +1047,12 @@ struct hns_roce_vf_switch {
__le32 resv3;
};
-#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
-#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
+#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
-#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
-#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
-#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
+#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
+#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
+#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
+#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
struct hns_roce_post_mbox {
__le32 in_param_l;
@@ -1467,59 +1068,48 @@ struct hns_roce_mbox_status {
__le32 rsv[5];
};
-struct hns_roce_cfg_bt_attr {
- __le32 vf_qpc_cfg;
- __le32 vf_srqc_cfg;
- __le32 vf_cqc_cfg;
- __le32 vf_mpt_cfg;
- __le32 vf_sccc_cfg;
- __le32 rsv;
+#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000
+
+#define MB_ST_HW_RUN_M BIT(31)
+#define MB_ST_COMPLETE_M GENMASK(7, 0)
+
+#define MB_ST_COMPLETE_SUCC 1
+
+/* Fields of HNS_ROCE_OPC_CFG_BT_ATTR */
+#define CFG_BT_ATTR_QPC_BA_PGSZ CMQ_REQ_FIELD_LOC(3, 0)
+#define CFG_BT_ATTR_QPC_BUF_PGSZ CMQ_REQ_FIELD_LOC(7, 4)
+#define CFG_BT_ATTR_QPC_HOPNUM CMQ_REQ_FIELD_LOC(9, 8)
+#define CFG_BT_ATTR_SRQC_BA_PGSZ CMQ_REQ_FIELD_LOC(35, 32)
+#define CFG_BT_ATTR_SRQC_BUF_PGSZ CMQ_REQ_FIELD_LOC(39, 36)
+#define CFG_BT_ATTR_SRQC_HOPNUM CMQ_REQ_FIELD_LOC(41, 40)
+#define CFG_BT_ATTR_CQC_BA_PGSZ CMQ_REQ_FIELD_LOC(67, 64)
+#define CFG_BT_ATTR_CQC_BUF_PGSZ CMQ_REQ_FIELD_LOC(71, 68)
+#define CFG_BT_ATTR_CQC_HOPNUM CMQ_REQ_FIELD_LOC(73, 72)
+#define CFG_BT_ATTR_MPT_BA_PGSZ CMQ_REQ_FIELD_LOC(99, 96)
+#define CFG_BT_ATTR_MPT_BUF_PGSZ CMQ_REQ_FIELD_LOC(103, 100)
+#define CFG_BT_ATTR_MPT_HOPNUM CMQ_REQ_FIELD_LOC(105, 104)
+#define CFG_BT_ATTR_SCCC_BA_PGSZ CMQ_REQ_FIELD_LOC(131, 128)
+#define CFG_BT_ATTR_SCCC_BUF_PGSZ CMQ_REQ_FIELD_LOC(135, 132)
+#define CFG_BT_ATTR_SCCC_HOPNUM CMQ_REQ_FIELD_LOC(137, 136)
+
+/* Fields of HNS_ROCE_OPC_CFG_ENTRY_SIZE */
+#define CFG_HEM_ENTRY_SIZE_TYPE CMQ_REQ_FIELD_LOC(31, 0)
+enum {
+ HNS_ROCE_CFG_QPC_SIZE = BIT(0),
+ HNS_ROCE_CFG_SCCC_SIZE = BIT(1),
};
-#define CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S 0
-#define CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M GENMASK(3, 0)
-
-#define CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S 4
-#define CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M GENMASK(7, 4)
-
-#define CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S 8
-#define CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M GENMASK(9, 8)
-
-#define CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S 0
-#define CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M GENMASK(3, 0)
-
-#define CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S 4
-#define CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M GENMASK(7, 4)
+#define CFG_HEM_ENTRY_SIZE_VALUE CMQ_REQ_FIELD_LOC(191, 160)
-#define CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S 8
-#define CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M GENMASK(9, 8)
+/* Fields of HNS_ROCE_OPC_CFG_GMV_BT */
+#define CFG_GMV_BT_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
+#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
-#define CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S 0
-#define CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M GENMASK(3, 0)
-
-#define CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S 4
-#define CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M GENMASK(7, 4)
-
-#define CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S 8
-#define CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M GENMASK(9, 8)
-
-#define CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S 0
-#define CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M GENMASK(3, 0)
-
-#define CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S 4
-#define CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M GENMASK(7, 4)
-
-#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8
-#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8)
-
-#define CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_S 0
-#define CFG_BT_ATTR_DATA_4_VF_SCCC_BA_PGSZ_M GENMASK(3, 0)
-
-#define CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_S 4
-#define CFG_BT_ATTR_DATA_4_VF_SCCC_BUF_PGSZ_M GENMASK(7, 4)
-
-#define CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_S 8
-#define CFG_BT_ATTR_DATA_4_VF_SCCC_HOPNUM_M GENMASK(9, 8)
+/* Fields of HNS_ROCE_QUERY_RAM_ECC */
+#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
+#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
+#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
struct hns_roce_cfg_sgid_tb {
__le32 table_idx_rsv;
@@ -1529,11 +1119,11 @@ struct hns_roce_cfg_sgid_tb {
__le32 vf_sgid_h;
__le32 vf_sgid_type_rsv;
};
-#define CFG_SGID_TB_TABLE_IDX_S 0
-#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
-#define CFG_SGID_TB_VF_SGID_TYPE_S 0
-#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
+#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
+
+#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
+#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
struct hns_roce_cfg_smac_tb {
__le32 tb_idx_rsv;
@@ -1541,90 +1131,245 @@ struct hns_roce_cfg_smac_tb {
__le32 vf_smac_h_rsv;
__le32 rsv[3];
};
-#define CFG_SMAC_TB_IDX_S 0
-#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
-#define CFG_SMAC_TB_VF_SMAC_H_S 0
-#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
+#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
+
+#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
+#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
+
+struct hns_roce_cfg_gmv_tb_a {
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_vlan;
+ __le32 resv;
+};
+
+#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
+
+#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
+#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
+#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
+
+struct hns_roce_cfg_gmv_tb_b {
+ __le32 vf_smac_l;
+ __le32 vf_smac_h;
+ __le32 table_idx_rsv;
+ __le32 resv[3];
+};
+
+#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
+
+#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
+#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
+
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 5
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 6
+struct hns_roce_query_pf_caps_a {
+ u8 number_ports;
+ u8 local_ca_ack_delay;
+ __le16 max_sq_sg;
+ __le16 max_sq_inline;
+ __le16 max_rq_sg;
+ __le32 rsv0;
+ __le16 num_qpc_timer;
+ __le16 num_cqc_timer;
+ __le16 max_srq_sges;
+ u8 num_aeq_vectors;
+ u8 num_other_vectors;
+ u8 max_sq_desc_sz;
+ u8 max_rq_desc_sz;
+ u8 rsv1;
+ u8 cqe_sz;
+};
+
+struct hns_roce_query_pf_caps_b {
+ u8 mtpt_entry_sz;
+ u8 irrl_entry_sz;
+ u8 trrl_entry_sz;
+ u8 cqc_entry_sz;
+ u8 srqc_entry_sz;
+ u8 idx_entry_sz;
+ u8 sccc_sz;
+ u8 max_mtu;
+ __le16 qpc_sz;
+ __le16 qpc_timer_entry_sz;
+ __le16 cqc_timer_entry_sz;
+ u8 min_cqes;
+ u8 min_wqes;
+ __le32 page_size_cap;
+ u8 pkey_table_len;
+ u8 phy_num_uars;
+ u8 ctx_hop_num;
+ u8 pbl_hop_num;
+};
+
+struct hns_roce_query_pf_caps_c {
+ __le32 cap_flags_num_pds;
+ __le32 max_gid_num_cqs;
+ __le32 cq_depth;
+ __le32 num_mrws;
+ __le32 ord_num_qps;
+ __le16 sq_depth;
+ __le16 rq_depth;
+};
+
+#define PF_CAPS_C_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l)
+
+#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0)
+#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20)
+#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
+#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
+#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
+#define PF_CAPS_C_NUM_XRCDS PF_CAPS_C_FIELD_LOC(91, 87)
+#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
+#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
+#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
+
+struct hns_roce_query_pf_caps_d {
+ __le32 wq_hop_num_max_srqs;
+ __le16 srq_depth;
+ __le16 cap_flags_ex;
+ __le32 num_ceqs_ceq_depth;
+ __le32 arm_st_aeq_depth;
+ __le32 num_uars_rsv_pds;
+ __le32 rsv_uars_rsv_qps;
+};
+
+#define PF_CAPS_D_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l)
+
+#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0)
+#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
+#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
+#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
+#define PF_CAPS_D_CONG_CAP PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
+#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
+#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
+#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
+#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
+#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
+#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
+#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
+#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180)
+
+#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
+
+struct hns_roce_congestion_algorithm {
+ u8 alg_sel;
+ u8 alg_sub_sel;
+ u8 dip_vld;
+ u8 wnd_mode_sel;
+};
+
+struct hns_roce_query_pf_caps_e {
+ __le32 chunk_size_shift_rsv_mrws;
+ __le32 rsv_cqs;
+ __le32 rsv_srqs;
+ __le32 rsv_lkey;
+ __le16 ceq_max_cnt;
+ __le16 ceq_period;
+ __le16 aeq_max_cnt;
+ __le16 aeq_period;
+};
+
+struct hns_roce_query_pf_caps_f {
+ __le32 max_ack_req_msg_len;
+ __le32 rsv[5];
+};
+
+#define PF_CAPS_E_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
+
+#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
+#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
+#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
+#define PF_CAPS_E_RSV_XRCDS PF_CAPS_E_FIELD_LOC(63, 52)
+#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
+#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
+
+struct hns_roce_cmq_req {
+ __le32 data[6];
+};
+
+#define CMQ_REQ_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cmq_req, h, l)
struct hns_roce_cmq_desc {
__le16 opcode;
__le16 flag;
__le16 retval;
__le16 rsv;
- __le32 data[6];
+ union {
+ __le32 data[6];
+ struct {
+ __le32 own_func_num;
+ __le32 own_mac_id;
+ __le32 rsv[4];
+ } func_info;
+ };
};
-#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000
-
-#define HNS_ROCE_HW_RUN_BIT_SHIFT 31
-#define HNS_ROCE_HW_MB_STATUS_MASK 0xFF
-
struct hns_roce_v2_cmq_ring {
dma_addr_t desc_dma_addr;
struct hns_roce_cmq_desc *desc;
u32 head;
- u32 tail;
-
u16 buf_size;
u16 desc_num;
- int next_to_use;
- int next_to_clean;
u8 flag;
spinlock_t lock; /* command queue lock */
};
struct hns_roce_v2_cmq {
struct hns_roce_v2_cmq_ring csq;
- struct hns_roce_v2_cmq_ring crq;
u16 tx_timeout;
- u16 last_status;
-};
-
-enum hns_roce_link_table_type {
- TSQ_LINK_TABLE,
- TPQ_LINK_TABLE,
};
struct hns_roce_link_table {
struct hns_roce_buf_list table;
- struct hns_roce_buf_list *pg_list;
- u32 npages;
- u32 pg_sz;
+ struct hns_roce_buf *buf;
};
-struct hns_roce_link_table_entry {
- u32 blk_ba0;
- u32 blk_ba1_nxt_ptr;
-};
-#define HNS_ROCE_LINK_TABLE_BA1_S 0
-#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0)
+#define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12))
+#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
-#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20
-#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20)
+struct hns_roce_v2_free_mr {
+ struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+ struct hns_roce_cq *rsv_cq;
+ struct hns_roce_pd *rsv_pd;
+ struct mutex mutex;
+};
struct hns_roce_v2_priv {
struct hnae3_handle *handle;
struct hns_roce_v2_cmq cmq;
- struct hns_roce_link_table tsq;
- struct hns_roce_link_table tpq;
+ struct hns_roce_link_table ext_llm;
+ struct hns_roce_v2_free_mr free_mr;
};
-struct hns_roce_eq_context {
- __le32 byte_4;
- __le32 byte_8;
- __le32 byte_12;
- __le32 eqe_report_timer;
- __le32 eqe_ba0;
- __le32 eqe_ba1;
- __le32 byte_28;
- __le32 byte_32;
- __le32 byte_36;
- __le32 nxt_eqe_ba0;
- __le32 nxt_eqe_ba1;
- __le32 rsv[5];
+struct hns_roce_dip {
+ u8 dgid[GID_LEN_V2];
+ u32 dip_idx;
+ u32 qp_cnt;
};
+struct fmea_ram_ecc {
+ u32 is_ecc_err;
+ u32 res_type;
+ u32 index;
+};
+
+/* only for RNR timeout issue of HIP08 */
+#define HNS_ROCE_CLOCK_ADJUST 1000
+#define HNS_ROCE_MAX_CQ_PERIOD_HIP08 65
+#define HNS_ROCE_MAX_EQ_PERIOD 65
+#define HNS_ROCE_RNR_TIMER_10NS 1
+#define HNS_ROCE_1US_CFG 999
+#define HNS_ROCE_1NS_CFG 0
+
#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
@@ -1652,15 +1397,10 @@ struct hns_roce_eq_context {
#define HNS_ROCE_EQ_INIT_CONS_IDX 0
#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
-#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31
-#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31
-
#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
-#define HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S 1
-#define HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S 2
#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
@@ -1675,114 +1415,44 @@ struct hns_roce_eq_context {
#define HNS_ROCE_INT_NAME_LEN 32
#define HNS_ROCE_V2_EQN_M GENMASK(23, 0)
-#define HNS_ROCE_V2_CONS_IDX_M GENMASK(23, 0)
-
#define HNS_ROCE_V2_VF_ABN_INT_EN_S 0
#define HNS_ROCE_V2_VF_ABN_INT_EN_M GENMASK(0, 0)
#define HNS_ROCE_V2_VF_ABN_INT_ST_M GENMASK(2, 0)
#define HNS_ROCE_V2_VF_ABN_INT_CFG_M GENMASK(2, 0)
#define HNS_ROCE_V2_VF_EVENT_INT_EN_M GENMASK(0, 0)
-/* WORD0 */
-#define HNS_ROCE_EQC_EQ_ST_S 0
-#define HNS_ROCE_EQC_EQ_ST_M GENMASK(1, 0)
-
-#define HNS_ROCE_EQC_HOP_NUM_S 2
-#define HNS_ROCE_EQC_HOP_NUM_M GENMASK(3, 2)
-
-#define HNS_ROCE_EQC_OVER_IGNORE_S 4
-#define HNS_ROCE_EQC_OVER_IGNORE_M GENMASK(4, 4)
-
-#define HNS_ROCE_EQC_COALESCE_S 5
-#define HNS_ROCE_EQC_COALESCE_M GENMASK(5, 5)
-
-#define HNS_ROCE_EQC_ARM_ST_S 6
-#define HNS_ROCE_EQC_ARM_ST_M GENMASK(7, 6)
-
-#define HNS_ROCE_EQC_EQN_S 8
-#define HNS_ROCE_EQC_EQN_M GENMASK(15, 8)
-
-#define HNS_ROCE_EQC_EQE_CNT_S 16
-#define HNS_ROCE_EQC_EQE_CNT_M GENMASK(31, 16)
-
-/* WORD1 */
-#define HNS_ROCE_EQC_BA_PG_SZ_S 0
-#define HNS_ROCE_EQC_BA_PG_SZ_M GENMASK(3, 0)
-
-#define HNS_ROCE_EQC_BUF_PG_SZ_S 4
-#define HNS_ROCE_EQC_BUF_PG_SZ_M GENMASK(7, 4)
-
-#define HNS_ROCE_EQC_PROD_INDX_S 8
-#define HNS_ROCE_EQC_PROD_INDX_M GENMASK(31, 8)
-
-/* WORD2 */
-#define HNS_ROCE_EQC_MAX_CNT_S 0
-#define HNS_ROCE_EQC_MAX_CNT_M GENMASK(15, 0)
-
-#define HNS_ROCE_EQC_PERIOD_S 16
-#define HNS_ROCE_EQC_PERIOD_M GENMASK(31, 16)
-
-/* WORD3 */
-#define HNS_ROCE_EQC_REPORT_TIMER_S 0
-#define HNS_ROCE_EQC_REPORT_TIMER_M GENMASK(31, 0)
-
-/* WORD4 */
-#define HNS_ROCE_EQC_EQE_BA_L_S 0
-#define HNS_ROCE_EQC_EQE_BA_L_M GENMASK(31, 0)
-
-/* WORD5 */
-#define HNS_ROCE_EQC_EQE_BA_H_S 0
-#define HNS_ROCE_EQC_EQE_BA_H_M GENMASK(28, 0)
-
-/* WORD6 */
-#define HNS_ROCE_EQC_SHIFT_S 0
-#define HNS_ROCE_EQC_SHIFT_M GENMASK(7, 0)
-
-#define HNS_ROCE_EQC_MSI_INDX_S 8
-#define HNS_ROCE_EQC_MSI_INDX_M GENMASK(15, 8)
-
-#define HNS_ROCE_EQC_CUR_EQE_BA_L_S 16
-#define HNS_ROCE_EQC_CUR_EQE_BA_L_M GENMASK(31, 16)
-
-/* WORD7 */
-#define HNS_ROCE_EQC_CUR_EQE_BA_M_S 0
-#define HNS_ROCE_EQC_CUR_EQE_BA_M_M GENMASK(31, 0)
-
-/* WORD8 */
-#define HNS_ROCE_EQC_CUR_EQE_BA_H_S 0
-#define HNS_ROCE_EQC_CUR_EQE_BA_H_M GENMASK(3, 0)
-
-#define HNS_ROCE_EQC_CONS_INDX_S 8
-#define HNS_ROCE_EQC_CONS_INDX_M GENMASK(31, 8)
-
-/* WORD9 */
-#define HNS_ROCE_EQC_NXT_EQE_BA_L_S 0
-#define HNS_ROCE_EQC_NXT_EQE_BA_L_M GENMASK(31, 0)
-
-/* WORD10 */
-#define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0
-#define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0)
-
-#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
-#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
-
-#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0
-#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0)
-
-#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8
-#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8)
-
-#define HNS_ROCE_V2_EQ_DB_CMD_S 16
-#define HNS_ROCE_V2_EQ_DB_CMD_M GENMASK(17, 16)
-
-#define HNS_ROCE_V2_EQ_DB_TAG_S 0
-#define HNS_ROCE_V2_EQ_DB_TAG_M GENMASK(7, 0)
-
-#define HNS_ROCE_V2_EQ_DB_PARA_S 0
-#define HNS_ROCE_V2_EQ_DB_PARA_M GENMASK(23, 0)
+struct hns_roce_eq_context {
+ __le32 data[16];
+};
-#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
-#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
+#define EQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_eq_context, h, l)
+
+#define EQC_EQ_ST EQC_FIELD_LOC(1, 0)
+#define EQC_EQE_HOP_NUM EQC_FIELD_LOC(3, 2)
+#define EQC_OVER_IGNORE EQC_FIELD_LOC(4, 4)
+#define EQC_COALESCE EQC_FIELD_LOC(5, 5)
+#define EQC_ARM_ST EQC_FIELD_LOC(7, 6)
+#define EQC_EQN EQC_FIELD_LOC(15, 8)
+#define EQC_EQE_CNT EQC_FIELD_LOC(31, 16)
+#define EQC_EQE_BA_PG_SZ EQC_FIELD_LOC(35, 32)
+#define EQC_EQE_BUF_PG_SZ EQC_FIELD_LOC(39, 36)
+#define EQC_EQ_PROD_INDX EQC_FIELD_LOC(63, 40)
+#define EQC_EQ_MAX_CNT EQC_FIELD_LOC(79, 64)
+#define EQC_EQ_PERIOD EQC_FIELD_LOC(95, 80)
+#define EQC_EQE_REPORT_TIMER EQC_FIELD_LOC(127, 96)
+#define EQC_EQE_BA_L EQC_FIELD_LOC(159, 128)
+#define EQC_EQE_BA_H EQC_FIELD_LOC(188, 160)
+#define EQC_SHIFT EQC_FIELD_LOC(199, 192)
+#define EQC_MSI_INDX EQC_FIELD_LOC(207, 200)
+#define EQC_CUR_EQE_BA_L EQC_FIELD_LOC(223, 208)
+#define EQC_CUR_EQE_BA_M EQC_FIELD_LOC(255, 224)
+#define EQC_CUR_EQE_BA_H EQC_FIELD_LOC(259, 256)
+#define EQC_EQ_CONS_INDX EQC_FIELD_LOC(287, 264)
+#define EQC_NEX_EQE_BA_L EQC_FIELD_LOC(319, 288)
+#define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320)
+#define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340)
+
+#define MAX_SERVICE_LEVEL 0x7
struct hns_roce_wqe_atomic_seg {
__le64 fetchadd_swap_data;
@@ -1799,13 +1469,28 @@ struct hns_roce_sccc_clr_done {
__le32 rsv[5];
};
-int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn,
- int *buffer);
+struct hns_roce_bond_info {
+ __le32 bond_id;
+ __le32 bond_mode;
+ __le32 active_slave_cnt;
+ __le32 active_slave_mask;
+ __le32 slave_mask;
+ __le32 hash_policy;
+};
+
+struct hns_roce_dev
+ *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx);
+void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx);
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
+ enum hns_roce_bond_cmd_type bond_type);
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
void __iomem *dest)
{
- struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c
deleted file mode 100644
index 5a97b5a0b7be..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-// Copyright (c) 2019 Hisilicon Limited.
-
-#include "hnae3.h"
-#include "hns_roce_device.h"
-#include "hns_roce_cmd.h"
-#include "hns_roce_hw_v2.h"
-
-int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn,
- int *buffer)
-{
- struct hns_roce_v2_cq_context *cq_context;
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- cq_context = mailbox->buf;
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cqn, 0,
- HNS_ROCE_CMD_QUERY_CQC,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (ret) {
- dev_err(hr_dev->dev, "QUERY cqc cmd process error\n");
- goto err_mailbox;
- }
-
- memcpy(buffer, cq_context, sizeof(*cq_context));
-
-err_mailbox:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return ret;
-}
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8da5f18bf820..2f4864ab7d4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -31,7 +31,6 @@
* SOFTWARE.
*/
#include <linux/acpi.h>
-#include <linux/of_platform.h>
#include <linux/module.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
@@ -39,35 +38,23 @@
#include <rdma/ib_cache.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
-#include <rdma/hns-abi.h>
#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
+#include "hns_roce_bond.h"
-/**
- * hns_get_gid_index - Get gid index.
- * @hr_dev: pointer to structure hns_roce_dev.
- * @port: port, value range: 0 ~ MAX
- * @gid_index: gid_index, value range: 0 ~ MAX
- * Description:
- * N ports shared gids, allocation method as follow:
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * And so on
- */
-int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
-{
- return gid_index * hr_dev->caps.num_ports + port;
-}
-EXPORT_SYMBOL_GPL(hns_get_gid_index);
-
-static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
+static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
+ const u8 *addr)
{
u8 phy_port;
- u32 i = 0;
+ u32 i;
- if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM))
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
return 0;
- for (i = 0; i < MAC_ADDR_OCTET_NUM; i++)
+ if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN))
+ return 0;
+
+ for (i = 0; i < ETH_ALEN; i++)
hr_dev->dev_addr[port][i] = addr[i];
phy_port = hr_dev->iboe.phy_port[port];
@@ -77,18 +64,13 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
- u8 port = attr->port_num - 1;
- unsigned long flags;
+ u32 port = attr->port_num - 1;
int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
- ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
-
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, &attr->gid, attr);
return ret;
}
@@ -96,47 +78,86 @@ static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
- struct ib_gid_attr zattr = { };
- u8 port = attr->port_num - 1;
- unsigned long flags;
+ u32 port = attr->port_num - 1;
int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, NULL, NULL);
- ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &zgid, &zattr);
+ return ret;
+}
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+static int hns_roce_get_port_state(struct hns_roce_dev *hr_dev, u32 port_num,
+ enum ib_port_state *state)
+{
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+ struct net_device *net_dev;
- return ret;
+ net_dev = ib_device_get_netdev(&hr_dev->ib_dev, port_num);
+ if (!net_dev)
+ return -ENODEV;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp) {
+ *state = ib_get_curr_port_state(bond_grp->upper_dev);
+ goto out;
+ }
+ }
+
+ *state = ib_get_curr_port_state(net_dev);
+out:
+ dev_put(net_dev);
+ return 0;
}
-static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
- unsigned long event)
+static int handle_en_event(struct net_device *netdev,
+ struct hns_roce_dev *hr_dev,
+ u32 port, unsigned long event)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct device *dev = hr_dev->dev;
- struct net_device *netdev;
+ enum ib_port_state curr_state;
+ struct ib_event ibevent;
int ret = 0;
- netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
- dev_err(dev, "port(%d) can't find netdev\n", port);
+ dev_err(dev, "can't find netdev on port(%u)!\n", port);
return -ENODEV;
}
switch (event) {
- case NETDEV_UP:
- case NETDEV_CHANGE:
case NETDEV_REGISTER:
case NETDEV_CHANGEADDR:
ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
break;
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
+ if (ret)
+ return ret;
+ fallthrough;
case NETDEV_DOWN:
- /*
- * In v1 engine, only support all ports closed together.
- */
+ if (!netif_is_lag_master(netdev))
+ break;
+ curr_state = ib_get_curr_port_state(netdev);
+
+ write_lock_irq(&ibdev->cache_lock);
+ if (ibdev->port_data[port].cache.last_port_state == curr_state) {
+ write_unlock_irq(&ibdev->cache_lock);
+ return 0;
+ }
+ ibdev->port_data[port].cache.last_port_state = curr_state;
+ write_unlock_irq(&ibdev->cache_lock);
+
+ ibevent.event = (curr_state == IB_PORT_DOWN) ?
+ IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
+ ibevent.device = ibdev;
+ ibevent.element.port_num = port + 1;
+ ib_dispatch_event(&ibevent);
break;
default:
dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
@@ -150,17 +171,25 @@ static int hns_roce_netdev_event(struct notifier_block *self,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct hns_roce_bond_group *bond_grp;
struct hns_roce_ib_iboe *iboe = NULL;
struct hns_roce_dev *hr_dev = NULL;
- u8 port = 0;
- int ret = 0;
+ struct net_device *upper = NULL;
+ int ret;
+ u32 port;
hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
iboe = &hr_dev->iboe;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0),
+ get_hr_bus_num(hr_dev));
+ upper = bond_grp ? bond_grp->upper_dev : NULL;
+ }
for (port = 0; port < hr_dev->caps.num_ports; port++) {
- if (dev == iboe->netdevs[port]) {
- ret = handle_en_event(hr_dev, port, event);
+ if ((!upper && dev == iboe->netdevs[port]) ||
+ (upper && dev == upper)) {
+ ret = handle_en_event(dev, hr_dev, port, event);
if (ret)
return NOTIFY_DONE;
break;
@@ -172,15 +201,13 @@ static int hns_roce_netdev_event(struct notifier_block *self,
static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
{
+ struct net_device *net_dev;
int ret;
u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) {
- if (hr_dev->hw->set_mtu)
- hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i],
- hr_dev->caps.max_mtu);
- ret = hns_roce_set_mac(hr_dev, i,
- hr_dev->iboe.netdevs[i]->dev_addr);
+ net_dev = get_hr_netdev(hr_dev, i);
+ ret = hns_roce_set_mac(hr_dev, i, net_dev->dev_addr);
if (ret)
return ret;
}
@@ -209,7 +236,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_DEVICE_RC_RNR_NAK_GEN;
props->max_send_sge = hr_dev->caps.max_sq_sg;
props->max_recv_sge = hr_dev->caps.max_rq_sg;
- props->max_sge_rd = 1;
+ props->max_sge_rd = hr_dev->caps.max_sq_sg;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
props->max_mr = hr_dev->caps.num_mtpts;
@@ -220,31 +247,39 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ props->max_ah = INT_MAX;
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD;
+ props->cq_caps.max_cq_moderation_count = HNS_ROCE_MAX_CQ_COUNT;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
- props->max_srq = hr_dev->caps.max_srqs;
+ props->max_srq = hr_dev->caps.num_srqs;
props->max_srq_wr = hr_dev->caps.max_srq_wrs;
props->max_srq_sge = hr_dev->caps.max_srq_sges;
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR &&
+ hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ props->device_cap_flags |= IB_DEVICE_XRC;
+
return 0;
}
-static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
+static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
struct ib_port_attr *props)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = hr_dev->dev;
struct net_device *net_dev;
- unsigned long flags;
enum ib_mtu mtu;
- u8 port;
+ u32 port;
+ int ret;
- assert(port_num > 0);
port = port_num - 1;
/* props being zeroed by the caller, avoid zeroing it here */
@@ -256,38 +291,46 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
IB_PORT_BOOT_MGMT_SUP;
props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN;
props->pkey_tbl_len = 1;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
-
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed,
+ &props->active_width);
+ if (ret)
+ ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
- net_dev = hr_dev->iboe.netdevs[port];
+ net_dev = ib_device_get_netdev(ib_dev, port_num);
if (!net_dev) {
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- dev_err(dev, "find netdev %d failed!\r\n", port);
+ ibdev_err(ib_dev, "find netdev %u failed!\n", port);
return -EINVAL;
}
mtu = iboe_get_mtu(net_dev->mtu);
props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
- props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
- IB_PORT_ACTIVE : IB_PORT_DOWN;
- props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3;
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+ dev_put(net_dev);
+
+ ret = hns_roce_get_port_state(hr_dev, port_num, &props->state);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to get port state.\n");
+ return ret;
+ }
+ props->phys_state = props->state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP :
+ IB_PORT_PHYS_STATE_DISABLED;
return 0;
}
static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
-static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index,
+static int hns_roce_query_pkey(struct ib_device *ib_dev, u32 port, u16 index,
u16 *pkey)
{
+ if (index > 0)
+ return -EINVAL;
+
*pkey = PKEY_ID;
return 0;
@@ -310,85 +353,217 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
return 0;
}
-static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
- struct ib_port_modify *props)
+struct hns_user_mmap_entry *
+hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
+ size_t length,
+ enum hns_roce_mmap_type mmap_type)
+{
+ struct hns_user_mmap_entry *entry;
+ int ret;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_type = mmap_type;
+
+ switch (mmap_type) {
+ /* pgoff 0 must be used by DB for compatibility */
+ case HNS_ROCE_MMAP_TYPE_DB:
+ ret = rdma_user_mmap_entry_insert_exact(
+ ucontext, &entry->rdma_entry, length, 0);
+ break;
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ ret = rdma_user_mmap_entry_insert_range(
+ ucontext, &entry->rdma_entry, length, 1,
+ U32_MAX);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+
+ return entry;
+}
+
+static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context)
{
+ if (context->db_mmap_entry)
+ rdma_user_mmap_entry_remove(
+ &context->db_mmap_entry->rdma_entry);
+}
+
+static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+ u64 address;
+
+ address = context->uar.pfn << PAGE_SHIFT;
+ context->db_mmap_entry = hns_roce_user_mmap_entry_insert(
+ uctx, address, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_DB);
+ if (!context->db_mmap_entry)
+ return -ENOMEM;
+
return 0;
}
static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
- int ret = 0;
struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
- struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct hns_roce_ib_alloc_ucontext_resp resp = {};
+ struct hns_roce_ib_alloc_ucontext ucmd = {};
+ int ret = -EAGAIN;
if (!hr_dev->active)
- return -EAGAIN;
+ goto error_out;
resp.qp_tab_size = hr_dev->caps.num_qps;
+ resp.srq_tab_size = hr_dev->caps.num_srqs;
+
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret)
+ goto error_out;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
+
+ if (context->config & HNS_ROCE_EXSGE_FLAGS) {
+ resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
+ resp.max_inline_data = hr_dev->caps.max_sq_inline;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ context->config |= ucmd.config & HNS_ROCE_RQ_INLINE_FLAGS;
+ if (context->config & HNS_ROCE_RQ_INLINE_FLAGS)
+ resp.config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) {
+ context->config |= ucmd.config & HNS_ROCE_CQE_INLINE_FLAGS;
+ if (context->config & HNS_ROCE_CQE_INLINE_FLAGS)
+ resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
+ }
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ resp.congest_type = hr_dev->caps.cong_cap;
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
- goto error_fail_uar_alloc;
+ goto error_out;
+
+ ret = hns_roce_alloc_uar_entry(uctx);
+ if (ret)
+ goto error_fail_uar_entry;
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex);
}
- ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ resp.cqe_size = hr_dev->caps.cqe_sz;
+
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
if (ret)
goto error_fail_copy_to_udata;
+ hns_roce_get_cq_bankid_for_uctx(context);
+
return 0;
error_fail_copy_to_udata:
- hns_roce_uar_free(hr_dev, &context->uar);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
+ hns_roce_dealloc_uar_entry(context);
+
+error_fail_uar_entry:
+ ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
+
+error_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]);
-error_fail_uar_alloc:
return ret;
}
static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
+
+ hns_roce_put_cq_bankid_for_uctx(context);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
- hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
+ hns_roce_dealloc_uar_entry(context);
+
+ ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
}
-static int hns_roce_mmap(struct ib_ucontext *context,
- struct vm_area_struct *vma)
+static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct hns_user_mmap_entry *entry;
+ phys_addr_t pfn;
+ pgprot_t prot;
+ int ret;
- switch (vma->vm_pgoff) {
- case 0:
- return rdma_user_mmap_io(context, vma,
- to_hr_ucontext(context)->uar.pfn,
- PAGE_SIZE,
- pgprot_noncached(vma->vm_page_prot));
+ if (hr_dev->dis_db) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+ return -EPERM;
+ }
- /* vm_pgoff: 1 -- TPTR */
- case 1:
- if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
- return -EINVAL;
- /*
- * FIXME: using io_remap_pfn_range on the dma address returned
- * by dma_alloc_coherent is totally wrong.
- */
- return rdma_user_mmap_io(context, vma,
- hr_dev->tptr_dma_addr >> PAGE_SHIFT,
- hr_dev->tptr_size,
- vma->vm_page_prot);
+ rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
+ if (!rdma_entry) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+ return -EINVAL;
+ }
+
+ entry = to_hns_mmap(rdma_entry);
+ pfn = entry->address >> PAGE_SHIFT;
+ switch (entry->mmap_type) {
+ case HNS_ROCE_MMAP_TYPE_DB:
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
+
+ ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
+ prot, rdma_entry);
+
+out:
+ rdma_user_mmap_entry_put(rdma_entry);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+
+ return ret;
+}
+
+static void hns_roce_free_mmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct hns_user_mmap_entry *entry = to_hns_mmap(rdma_entry);
+
+ kfree(entry);
}
-static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
+static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -413,9 +588,121 @@ static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
}
-static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
+static void hns_roce_get_fw_ver(struct ib_device *device, char *str)
{
+ u64 fw_ver = to_hr_dev(device)->caps.fw_ver;
+ unsigned int major, minor, sub_minor;
+
+ major = upper_32_bits(fw_ver);
+ minor = high_16_bits(lower_32_bits(fw_ver));
+ sub_minor = low_16_bits(fw_ver);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%04u", major, minor,
+ sub_minor);
+}
+
+#define HNS_ROCE_HW_CNT(ename, cname) \
+ [HNS_ROCE_HW_##ename##_CNT].name = cname
+
+static const struct rdma_stat_desc hns_roce_port_stats_descs[] = {
+ HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"),
+ HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"),
+ HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"),
+ HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"),
+ HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"),
+ HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"),
+ HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"),
+ HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"),
+ HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"),
+ HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"),
+ HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"),
+ HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"),
+ HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"),
+ HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"),
+ HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"),
+ HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"),
+ HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"),
+ HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"),
+ HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"),
+ HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"),
+ HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"),
+ HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"),
+};
+
+static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(
+ struct ib_device *device, u32 port_num)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+
+ if (port_num > hr_dev->caps.num_ports) {
+ ibdev_err(device, "invalid port num.\n");
+ return NULL;
+ }
+
+ return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs,
+ ARRAY_SIZE(hns_roce_port_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int hns_roce_get_hw_stats(struct ib_device *device,
+ struct rdma_hw_stats *stats,
+ u32 port, int index)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ int num_counters = HNS_ROCE_HW_CNT_TOTAL;
+ int ret;
+
+ if (port == 0)
+ return 0;
+
+ if (port > hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port,
+ &num_counters);
+ if (ret) {
+ ibdev_err(device, "failed to query hw counter, ret = %d\n",
+ ret);
+ return ret;
+ }
+
+ return num_counters;
+}
+
+static void
+ hns_roce_unregister_bond_cleanup(struct hns_roce_dev *hr_dev,
+ struct hns_roce_bond_group *bond_grp)
+{
+ struct net_device *net_dev;
+ int i;
+
+ /* To avoid the loss of other slave devices when main_hr_dev
+ * is unregistered, re-initialize the remaining slaves before
+ * the bond resources cleanup.
+ */
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ net_dev = bond_grp->bond_func_info[i].net_dev;
+ if (net_dev && net_dev != get_hr_netdev(hr_dev, 0))
+ hns_roce_bond_init_client(bond_grp, i);
+ }
+
+ hns_roce_cleanup_bond(bond_grp);
+}
+
+static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev,
+ bool bond_cleanup)
+{
+ struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+
+ if (bond_cleanup && hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp)
+ hns_roce_unregister_bond_cleanup(hr_dev, bond_grp);
+ }
hr_dev->active = false;
unregister_netdevice_notifier(&iboe->nb);
@@ -423,26 +710,32 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
}
static const struct ib_device_ops hns_roce_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HNS,
+ .uverbs_abi_ver = 1,
+ .uverbs_no_driver_id_binding = 1,
+
+ .get_dev_fw_str = hns_roce_get_fw_ver,
.add_gid = hns_roce_add_gid,
.alloc_pd = hns_roce_alloc_pd,
.alloc_ucontext = hns_roce_alloc_ucontext,
.create_ah = hns_roce_create_ah,
- .create_cq = hns_roce_ib_create_cq,
+ .create_user_ah = hns_roce_create_ah,
+ .create_cq = hns_roce_create_cq,
.create_qp = hns_roce_create_qp,
.dealloc_pd = hns_roce_dealloc_pd,
.dealloc_ucontext = hns_roce_dealloc_ucontext,
.del_gid = hns_roce_del_gid,
.dereg_mr = hns_roce_dereg_mr,
.destroy_ah = hns_roce_destroy_ah,
- .destroy_cq = hns_roce_ib_destroy_cq,
+ .destroy_cq = hns_roce_destroy_cq,
.disassociate_ucontext = hns_roce_disassociate_ucontext,
- .fill_res_entry = hns_roce_fill_res_entry,
.get_dma_mr = hns_roce_get_dma_mr,
.get_link_layer = hns_roce_get_link_layer,
.get_port_immutable = hns_roce_port_immutable,
.mmap = hns_roce_mmap,
+ .mmap_free = hns_roce_free_mmap,
.modify_device = hns_roce_modify_device,
- .modify_port = hns_roce_modify_port,
.modify_qp = hns_roce_modify_qp,
.query_ah = hns_roce_query_ah,
.query_device = hns_roce_query_device,
@@ -451,17 +744,19 @@ static const struct ib_device_ops hns_roce_dev_ops = {
.reg_user_mr = hns_roce_reg_user_mr,
INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq),
INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, hns_roce_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
};
-static const struct ib_device_ops hns_roce_dev_mr_ops = {
- .rereg_user_mr = hns_roce_rereg_user_mr,
+static const struct ib_device_ops hns_roce_dev_hw_stats_ops = {
+ .alloc_hw_port_stats = hns_roce_alloc_hw_port_stats,
+ .get_hw_stats = hns_roce_get_hw_stats,
};
-static const struct ib_device_ops hns_roce_dev_mw_ops = {
- .alloc_mw = hns_roce_alloc_mw,
- .dealloc_mw = hns_roce_dealloc_mw,
+static const struct ib_device_ops hns_roce_dev_mr_ops = {
+ .rereg_user_mr = hns_roce_rereg_user_mr,
};
static const struct ib_device_ops hns_roce_dev_frmr_ops = {
@@ -476,88 +771,98 @@ static const struct ib_device_ops hns_roce_dev_srq_ops = {
INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq),
};
+static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
+ .alloc_xrcd = hns_roce_alloc_xrcd,
+ .dealloc_xrcd = hns_roce_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
+};
+
+static const struct ib_device_ops hns_roce_dev_restrack_ops = {
+ .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = hns_roce_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = hns_roce_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw,
+ .fill_res_srq_entry = hns_roce_fill_res_srq_entry,
+ .fill_res_srq_entry_raw = hns_roce_fill_res_srq_entry_raw,
+};
+
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
- int ret;
struct hns_roce_ib_iboe *iboe = NULL;
- struct ib_device *ib_dev = NULL;
struct device *dev = hr_dev->dev;
+ struct ib_device *ib_dev = NULL;
+ struct net_device *net_dev;
unsigned int i;
+ int ret;
iboe = &hr_dev->iboe;
spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev;
- ib_dev->owner = THIS_MODULE;
- ib_dev->node_type = RDMA_NODE_IB_CA;
- ib_dev->dev.parent = dev;
-
- ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
- ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
- ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
- ib_dev->uverbs_abi_ver = 1;
- ib_dev->uverbs_cmd_mask =
- (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_REG_MR) |
- (1ULL << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
-
- ib_dev->uverbs_ex_cmd_mask |=
- (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
-
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) {
- ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
- ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
- }
+ ib_dev->node_type = RDMA_NODE_IB_CA;
+ ib_dev->dev.parent = dev;
- /* MW */
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
- ib_dev->uverbs_cmd_mask |=
- (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
- ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
- }
+ ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
+ ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
+ ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
- /* FRMR */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
- /* SRQ */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
- ib_dev->uverbs_cmd_mask |=
- (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV);
ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
}
- ib_dev->driver_id = RDMA_DRIVER_HNS;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09 &&
+ !hr_dev->is_vf)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_hw_stats_ops);
+
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
- for (i = 0; i < hr_dev->caps.num_ports; i++) {
- if (!hr_dev->iboe.netdevs[i])
- continue;
+ ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops);
- ret = ib_device_set_netdev(ib_dev, hr_dev->iboe.netdevs[i],
- i + 1);
- if (ret)
+ dma_set_max_seg_size(dev, SZ_2G);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ ret = hns_roce_alloc_bond_grp(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to alloc bond_grp for bus %u, ret = %d\n",
+ get_hr_bus_num(hr_dev), ret);
+ return ret;
+ }
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND &&
+ hns_roce_bond_is_active(hr_dev)) {
+ ret = hns_roce_bond_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to init bond!\n");
return ret;
+ }
+ ret = ib_register_device(ib_dev, "hns_bond_%d", dev);
+ } else {
+ for (i = 0; i < hr_dev->caps.num_ports; i++) {
+ net_dev = get_hr_netdev(hr_dev, i);
+ if (!net_dev)
+ continue;
+
+ ret = ib_device_set_netdev(ib_dev, net_dev, i + 1);
+ if (ret)
+ return ret;
+ }
+ ret = ib_register_device(ib_dev, "hns_%d", dev);
}
- ret = ib_register_device(ib_dev, "hns_%d");
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
return ret;
@@ -587,41 +892,22 @@ error_failed_setup_mtu_mac:
static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
{
- int ret;
struct device *dev = hr_dev->dev;
-
- ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table,
- HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_mtt_segs, 1);
- if (ret) {
- dev_err(dev, "Failed to init MTT context memory, aborting.\n");
- return ret;
- }
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table,
- HEM_TYPE_CQE, hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_cqe_segs, 1);
- if (ret) {
- dev_err(dev, "Failed to init MTT CQE context memory, aborting.\n");
- goto err_unmap_cqe;
- }
- }
+ int ret;
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table,
HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz,
- hr_dev->caps.num_mtpts, 1);
+ hr_dev->caps.num_mtpts);
if (ret) {
- dev_err(dev, "Failed to init MTPT context memory, aborting.\n");
- goto err_unmap_mtt;
+ dev_err(dev, "failed to init MTPT context memory, aborting.\n");
+ return ret;
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
- HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz,
- hr_dev->caps.num_qps, 1);
+ HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
+ hr_dev->caps.num_qps);
if (ret) {
- dev_err(dev, "Failed to init QP context memory, aborting.\n");
+ dev_err(dev, "failed to init QP context memory, aborting.\n");
goto err_unmap_dmpt;
}
@@ -629,9 +915,9 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
HEM_TYPE_IRRL,
hr_dev->caps.irrl_entry_sz *
hr_dev->caps.max_qp_init_rdma,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
- dev_err(dev, "Failed to init irrl_table memory, aborting.\n");
+ dev_err(dev, "failed to init irrl_table memory, aborting.\n");
goto err_unmap_qp;
}
@@ -641,123 +927,100 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
HEM_TYPE_TRRL,
hr_dev->caps.trrl_entry_sz *
hr_dev->caps.max_qp_dest_rdma,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
dev_err(dev,
- "Failed to init trrl_table memory, aborting.\n");
+ "failed to init trrl_table memory, aborting.\n");
goto err_unmap_irrl;
}
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table,
HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz,
- hr_dev->caps.num_cqs, 1);
+ hr_dev->caps.num_cqs);
if (ret) {
- dev_err(dev, "Failed to init CQ context memory, aborting.\n");
+ dev_err(dev, "failed to init CQ context memory, aborting.\n");
goto err_unmap_trrl;
}
- if (hr_dev->caps.srqc_entry_sz) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
HEM_TYPE_SRQC,
hr_dev->caps.srqc_entry_sz,
- hr_dev->caps.num_srqs, 1);
+ hr_dev->caps.num_srqs);
if (ret) {
dev_err(dev,
- "Failed to init SRQ context memory, aborting.\n");
+ "failed to init SRQ context memory, aborting.\n");
goto err_unmap_cq;
}
}
- if (hr_dev->caps.num_srqwqe_segs) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table,
- HEM_TYPE_SRQWQE,
- hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_srqwqe_segs, 1);
- if (ret) {
- dev_err(dev,
- "Failed to init MTT srqwqe memory, aborting.\n");
- goto err_unmap_srq;
- }
- }
-
- if (hr_dev->caps.num_idx_segs) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_idx_table,
- HEM_TYPE_IDX,
- hr_dev->caps.idx_entry_sz,
- hr_dev->caps.num_idx_segs, 1);
- if (ret) {
- dev_err(dev,
- "Failed to init MTT idx memory, aborting.\n");
- goto err_unmap_srqwqe;
- }
- }
-
- if (hr_dev->caps.sccc_entry_sz) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
ret = hns_roce_init_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table,
HEM_TYPE_SCCC,
- hr_dev->caps.sccc_entry_sz,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.sccc_sz,
+ hr_dev->caps.num_qps);
if (ret) {
dev_err(dev,
- "Failed to init SCC context memory, aborting.\n");
- goto err_unmap_idx;
+ "failed to init SCC context memory, aborting.\n");
+ goto err_unmap_srq;
}
}
if (hr_dev->caps.qpc_timer_entry_sz) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->qpc_timer_table,
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
HEM_TYPE_QPC_TIMER,
hr_dev->caps.qpc_timer_entry_sz,
- hr_dev->caps.num_qpc_timer, 1);
+ hr_dev->caps.qpc_timer_bt_num);
if (ret) {
dev_err(dev,
- "Failed to init QPC timer memory, aborting.\n");
+ "failed to init QPC timer memory, aborting.\n");
goto err_unmap_ctx;
}
}
if (hr_dev->caps.cqc_timer_entry_sz) {
- ret = hns_roce_init_hem_table(hr_dev,
- &hr_dev->cqc_timer_table,
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
HEM_TYPE_CQC_TIMER,
hr_dev->caps.cqc_timer_entry_sz,
- hr_dev->caps.num_cqc_timer, 1);
+ hr_dev->caps.cqc_timer_bt_num);
if (ret) {
dev_err(dev,
- "Failed to init CQC timer memory, aborting.\n");
+ "failed to init CQC timer memory, aborting.\n");
goto err_unmap_qpc_timer;
}
}
+ if (hr_dev->caps.gmv_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table,
+ HEM_TYPE_GMV,
+ hr_dev->caps.gmv_entry_sz,
+ hr_dev->caps.gmv_entry_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init gmv table memory, ret = %d\n",
+ ret);
+ goto err_unmap_cqc_timer;
+ }
+ }
+
return 0;
+err_unmap_cqc_timer:
+ if (hr_dev->caps.cqc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table);
+
err_unmap_qpc_timer:
if (hr_dev->caps.qpc_timer_entry_sz)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->qpc_timer_table);
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
err_unmap_ctx:
- if (hr_dev->caps.sccc_entry_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
hns_roce_cleanup_hem_table(hr_dev,
&hr_dev->qp_table.sccc_table);
-
-err_unmap_idx:
- if (hr_dev->caps.num_idx_segs)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_idx_table);
-
-err_unmap_srqwqe:
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table);
-
err_unmap_srq:
- if (hr_dev->caps.srqc_entry_sz)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
err_unmap_cq:
@@ -777,17 +1040,15 @@ err_unmap_qp:
err_unmap_dmpt:
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
-err_unmap_mtt:
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_cleanup_hem_table(hr_dev,
- &hr_dev->mr_table.mtt_cqe_table);
-
-err_unmap_cqe:
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
-
return ret;
}
+static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_cleanup_bitmap(hr_dev);
+ mutex_destroy(&hr_dev->pgdir_mutex);
+}
+
/**
* hns_roce_setup_hca - setup host channel adapter
* @hr_dev: pointer to hns roce device
@@ -795,110 +1056,133 @@ err_unmap_cqe:
*/
static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
{
- int ret;
struct device *dev = hr_dev->dev;
+ int ret;
spin_lock_init(&hr_dev->sm_lock);
- spin_lock_init(&hr_dev->bt_cmd_lock);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
- INIT_LIST_HEAD(&hr_dev->pgdir_list);
- mutex_init(&hr_dev->pgdir_mutex);
- }
+ INIT_LIST_HEAD(&hr_dev->qp_list);
+ spin_lock_init(&hr_dev->qp_list_lock);
- ret = hns_roce_init_uar_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to initialize uar table. aborting\n");
- return ret;
- }
+ INIT_LIST_HEAD(&hr_dev->pgdir_list);
+ mutex_init(&hr_dev->pgdir_mutex);
+
+ hns_roce_init_uar_table(hr_dev);
ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar);
if (ret) {
- dev_err(dev, "Failed to allocate priv_uar.\n");
+ dev_err(dev, "failed to allocate priv_uar.\n");
goto err_uar_table_free;
}
- ret = hns_roce_init_pd_table(hr_dev);
+ ret = hns_roce_init_qp_table(hr_dev);
if (ret) {
- dev_err(dev, "Failed to init protected domain table.\n");
- goto err_uar_alloc_free;
+ dev_err(dev, "failed to init qp_table.\n");
+ goto err_uar_table_free;
}
- ret = hns_roce_init_mr_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init memory region table.\n");
- goto err_pd_table_free;
- }
+ hns_roce_init_pd_table(hr_dev);
- ret = hns_roce_init_cq_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init completion queue table.\n");
- goto err_mr_table_free;
- }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ hns_roce_init_xrcd_table(hr_dev);
- ret = hns_roce_init_qp_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init queue pair table.\n");
- goto err_cq_table_free;
- }
+ hns_roce_init_mr_table(hr_dev);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
- ret = hns_roce_init_srq_table(hr_dev);
- if (ret) {
- dev_err(dev,
- "Failed to init share receive queue table.\n");
- goto err_qp_table_free;
+ hns_roce_init_cq_table(hr_dev);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_init_srq_table(hr_dev);
+
+ return 0;
+
+err_uar_table_free:
+ ida_destroy(&hr_dev->uar_ida.ida);
+ mutex_destroy(&hr_dev->pgdir_mutex);
+
+ return ret;
+}
+
+static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+ if (cq->comp_handler) {
+ if (!hr_cq->is_armed) {
+ hr_cq->is_armed = 1;
+ list_add_tail(&hr_cq->node, cq_list);
}
}
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+}
- return 0;
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_qp *hr_qp;
+ struct hns_roce_cq *hr_cq;
+ struct list_head cq_list;
+ unsigned long flags_qp;
+ unsigned long flags;
-err_qp_table_free:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
- hns_roce_cleanup_qp_table(hr_dev);
+ INIT_LIST_HEAD(&cq_list);
-err_cq_table_free:
- hns_roce_cleanup_cq_table(hr_dev);
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ list_for_each_entry(hr_qp, &hr_dev->qp_list, node) {
+ spin_lock_irqsave(&hr_qp->sq.lock, flags_qp);
+ if (hr_qp->sq.tail != hr_qp->sq.head)
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq);
+ spin_unlock_irqrestore(&hr_qp->sq.lock, flags_qp);
-err_mr_table_free:
- hns_roce_cleanup_mr_table(hr_dev);
+ spin_lock_irqsave(&hr_qp->rq.lock, flags_qp);
+ if ((!hr_qp->ibqp.srq) && (hr_qp->rq.tail != hr_qp->rq.head))
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.recv_cq);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags_qp);
+ }
-err_pd_table_free:
- hns_roce_cleanup_pd_table(hr_dev);
+ list_for_each_entry(hr_cq, &cq_list, node)
+ hns_roce_cq_completion(hr_dev, hr_cq->cqn);
-err_uar_alloc_free:
- hns_roce_uar_free(hr_dev, &hr_dev->priv_uar);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
-err_uar_table_free:
- hns_roce_cleanup_uar_table(hr_dev);
- return ret;
+static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev)
+{
+ hr_dev->dfx_cnt = kvcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t),
+ GFP_KERNEL);
+ if (!hr_dev->dfx_cnt)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev)
+{
+ kvfree(hr_dev->dfx_cnt);
}
int hns_roce_init(struct hns_roce_dev *hr_dev)
{
- int ret;
struct device *dev = hr_dev->dev;
+ int ret;
- if (hr_dev->hw->reset) {
- ret = hr_dev->hw->reset(hr_dev, true);
- if (ret) {
- dev_err(dev, "Reset RoCE engine failed!\n");
- return ret;
- }
- }
hr_dev->is_reset = false;
+ ret = hns_roce_alloc_dfx_cnt(hr_dev);
+ if (ret)
+ return ret;
+
if (hr_dev->hw->cmq_init) {
ret = hr_dev->hw->cmq_init(hr_dev);
if (ret) {
- dev_err(dev, "Init RoCE Command Queue failed!\n");
- goto error_failed_cmq_init;
+ dev_err(dev, "init RoCE Command Queue failed!\n");
+ goto error_failed_alloc_dfx_cnt;
}
}
ret = hr_dev->hw->hw_profile(hr_dev);
if (ret) {
- dev_err(dev, "Get RoCE engine profile failed!\n");
+ dev_err(dev, "get RoCE engine profile failed!\n");
goto error_failed_cmd_init;
}
@@ -908,6 +1192,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
goto error_failed_cmd_init;
}
+ /* EQ depends on poll mode, event mode depends on EQ */
ret = hr_dev->hw->init_eq(hr_dev);
if (ret) {
dev_err(dev, "eq init failed!\n");
@@ -916,10 +1201,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
if (hr_dev->cmd_mod) {
ret = hns_roce_cmd_use_events(hr_dev);
- if (ret) {
- dev_err(dev, "Switch to event-driven cmd failed!\n");
- goto error_failed_use_event;
- }
+ if (ret)
+ dev_warn(dev,
+ "Cmd event mode failed, set back to poll!\n");
}
ret = hns_roce_init_hem(hr_dev);
@@ -946,6 +1230,8 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
if (ret)
goto error_failed_register_device;
+ hns_roce_register_debugfs(hr_dev);
+
return 0;
error_failed_register_device:
@@ -953,7 +1239,7 @@ error_failed_register_device:
hr_dev->hw->hw_exit(hr_dev);
error_failed_engine_init:
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
error_failed_setup_hca:
hns_roce_cleanup_hem(hr_dev);
@@ -961,8 +1247,6 @@ error_failed_setup_hca:
error_failed_init_hem:
if (hr_dev->cmd_mod)
hns_roce_cmd_use_polling(hr_dev);
-
-error_failed_use_event:
hr_dev->hw->cleanup_eq(hr_dev);
error_failed_eq_table:
@@ -972,23 +1256,20 @@ error_failed_cmd_init:
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
-error_failed_cmq_init:
- if (hr_dev->hw->reset) {
- if (hr_dev->hw->reset(hr_dev, false))
- dev_err(dev, "Dereset RoCE engine failed!\n");
- }
+error_failed_alloc_dfx_cnt:
+ hns_roce_dealloc_dfx_cnt(hr_dev);
return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_init);
-void hns_roce_exit(struct hns_roce_dev *hr_dev)
+void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup)
{
- hns_roce_unregister_device(hr_dev);
+ hns_roce_unregister_debugfs(hr_dev);
+ hns_roce_unregister_device(hr_dev, bond_cleanup);
if (hr_dev->hw->hw_exit)
hr_dev->hw->hw_exit(hr_dev);
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod)
@@ -998,10 +1279,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
hns_roce_cmd_cleanup(hr_dev);
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
- if (hr_dev->hw->reset)
- hr_dev->hw->reset(hr_dev, false);
+ hns_roce_dealloc_dfx_cnt(hr_dev);
}
-EXPORT_SYMBOL_GPL(hns_roce_exit);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 6110ec408626..31cb8699e198 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -31,1290 +31,361 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <linux/vmalloc.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem.h>
+#include <linux/math.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
+#include "hns_roce_trace.h"
-static u32 hw_index_to_key(unsigned long ind)
+static u32 hw_index_to_key(int ind)
{
- return (u32)(ind >> 24) | (ind << 8);
+ return ((u32)ind >> 24) | ((u32)ind << 8);
}
unsigned long key_to_hw_index(u32 key)
{
return (key << 24) | (key >> 8);
}
-EXPORT_SYMBOL_GPL(key_to_hw_index);
-static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
- HNS_ROCE_CMD_SW2HW_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
-{
- return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
- mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-EXPORT_SYMBOL_GPL(hns_roce_hw2sw_mpt);
-
-static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
- unsigned long *seg)
-{
- int o;
- u32 m;
-
- spin_lock(&buddy->lock);
-
- for (o = order; o <= buddy->max_order; ++o) {
- if (buddy->num_free[o]) {
- m = 1 << (buddy->max_order - o);
- *seg = find_first_bit(buddy->bits[o], m);
- if (*seg < m)
- goto found;
- }
- }
- spin_unlock(&buddy->lock);
- return -1;
-
- found:
- clear_bit(*seg, buddy->bits[o]);
- --buddy->num_free[o];
-
- while (o > order) {
- --o;
- *seg <<= 1;
- set_bit(*seg ^ 1, buddy->bits[o]);
- ++buddy->num_free[o];
- }
-
- spin_unlock(&buddy->lock);
-
- *seg <<= order;
- return 0;
-}
-
-static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
- int order)
-{
- seg >>= order;
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int err;
+ int id;
- spin_lock(&buddy->lock);
-
- while (test_bit(seg ^ 1, buddy->bits[order])) {
- clear_bit(seg ^ 1, buddy->bits[order]);
- --buddy->num_free[order];
- seg >>= 1;
- ++order;
- }
-
- set_bit(seg, buddy->bits[order]);
- ++buddy->num_free[order];
-
- spin_unlock(&buddy->lock);
-}
-
-static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
-{
- int i, s;
-
- buddy->max_order = max_order;
- spin_lock_init(&buddy->lock);
- buddy->bits = kcalloc(buddy->max_order + 1,
- sizeof(*buddy->bits),
- GFP_KERNEL);
- buddy->num_free = kcalloc(buddy->max_order + 1,
- sizeof(*buddy->num_free),
- GFP_KERNEL);
- if (!buddy->bits || !buddy->num_free)
- goto err_out;
-
- for (i = 0; i <= buddy->max_order; ++i) {
- s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
- __GFP_NOWARN);
- if (!buddy->bits[i]) {
- buddy->bits[i] = vzalloc(array_size(s, sizeof(long)));
- if (!buddy->bits[i])
- goto err_out_free;
- }
- }
-
- set_bit(0, buddy->bits[buddy->max_order]);
- buddy->num_free[buddy->max_order] = 1;
-
- return 0;
-
-err_out_free:
- for (i = 0; i <= buddy->max_order; ++i)
- kvfree(buddy->bits[i]);
-
-err_out:
- kfree(buddy->bits);
- kfree(buddy->num_free);
- return -ENOMEM;
-}
-
-static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
-{
- int i;
-
- for (i = 0; i <= buddy->max_order; ++i)
- kvfree(buddy->bits[i]);
-
- kfree(buddy->bits);
- kfree(buddy->num_free);
-}
-
-static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
- unsigned long *seg, u32 mtt_type)
-{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- struct hns_roce_hem_table *table;
- struct hns_roce_buddy *buddy;
- int ret;
-
- switch (mtt_type) {
- case MTT_TYPE_WQE:
- buddy = &mr_table->mtt_buddy;
- table = &mr_table->mtt_table;
- break;
- case MTT_TYPE_CQE:
- buddy = &mr_table->mtt_cqe_buddy;
- table = &mr_table->mtt_cqe_table;
- break;
- case MTT_TYPE_SRQWQE:
- buddy = &mr_table->mtt_srqwqe_buddy;
- table = &mr_table->mtt_srqwqe_table;
- break;
- case MTT_TYPE_IDX:
- buddy = &mr_table->mtt_idx_buddy;
- table = &mr_table->mtt_idx_table;
- break;
- default:
- dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
- mtt_type);
- return -EINVAL;
+ /* Allocate a key for mr from mr_table */
+ id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ibdev, "failed to alloc id for MR key, id(%d)\n", id);
+ return -ENOMEM;
}
- ret = hns_roce_buddy_alloc(buddy, order, seg);
- if (ret == -1)
- return -1;
+ mr->key = hw_index_to_key(id); /* MR key */
- if (hns_roce_table_get_range(hr_dev, table, *seg,
- *seg + (1 << order) - 1)) {
- hns_roce_buddy_free(buddy, *seg, order);
- return -1;
+ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
+ (unsigned long)id);
+ if (err) {
+ ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
+ goto err_free_bitmap;
}
return 0;
+err_free_bitmap:
+ ida_free(&mtpt_ida->ida, id);
+ return err;
}
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
- struct hns_roce_mtt *mtt)
+static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- int ret;
- int i;
-
- /* Page num is zero, correspond to DMA memory register */
- if (!npages) {
- mtt->order = -1;
- mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
- return 0;
- }
-
- /* Note: if page_shift is zero, FAST memory register */
- mtt->page_shift = page_shift;
-
- /* Compute MTT entry necessary */
- for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
- i <<= 1)
- ++mtt->order;
-
- /* Allocate MTT entry */
- ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg,
- mtt->mtt_type);
- if (ret == -1)
- return -ENOMEM;
-
- return 0;
-}
-
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
-{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
- if (mtt->order < 0)
- return;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- case MTT_TYPE_CQE:
- hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- case MTT_TYPE_SRQWQE:
- hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- case MTT_TYPE_IDX:
- hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
- mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
- mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
- break;
- default:
- dev_err(hr_dev->dev,
- "Unsupport mtt type %d, clean mtt failed\n",
- mtt->mtt_type);
- break;
- }
-}
-EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
-
-static void hns_roce_loop_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr, int err_loop_index,
- int loop_i, int loop_j)
-{
- struct device *dev = hr_dev->dev;
- u32 mhop_num;
- u32 pbl_bt_sz;
- u64 bt_idx;
- int i, j;
-
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- mhop_num = hr_dev->caps.pbl_hop_num;
-
- i = loop_i;
- if (mhop_num == 3 && err_loop_index == 2) {
- for (; i >= 0; i--) {
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- if (i == loop_i && j >= loop_j)
- break;
-
- bt_idx = i * pbl_bt_sz / 8 + j;
- dma_free_coherent(dev, pbl_bt_sz,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
- }
- }
- } else if (mhop_num == 3 && err_loop_index == 1) {
- for (i -= 1; i >= 0; i--) {
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * pbl_bt_sz / 8 + j;
- dma_free_coherent(dev, pbl_bt_sz,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
- }
- }
- } else if (mhop_num == 2 && err_loop_index == 1) {
- for (i -= 1; i >= 0; i--)
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
- } else {
- dev_warn(dev, "not support: mhop_num=%d, err_loop_index=%d.",
- mhop_num, err_loop_index);
- return;
- }
+ unsigned long obj = key_to_hw_index(mr->key);
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0, mr->pbl_l0_dma_addr);
- mr->pbl_bt_l0 = NULL;
- mr->pbl_l0_dma_addr = 0;
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+ ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj);
}
-/* PBL multi hop addressing */
-static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
- struct hns_roce_mr *mr)
+static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+ struct ib_udata *udata, u64 start)
{
- struct device *dev = hr_dev->dev;
- int mr_alloc_done = 0;
- int npages_allocated;
- int i = 0, j = 0;
- u32 pbl_bt_sz;
- u32 mhop_num;
- u64 pbl_last_bt_num;
- u64 pbl_bt_cnt = 0;
- u64 bt_idx;
- u64 size;
-
- mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
-
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
- return 0;
-
- /* hop_num = 1 */
- if (mhop_num == 1) {
- if (npages > pbl_bt_sz / 8) {
- dev_err(dev, "npages %d is larger than buf_pg_sz!",
- npages);
- return -EINVAL;
- }
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
- return -ENOMEM;
-
- mr->pbl_size = npages;
- mr->pbl_ba = mr->pbl_dma_addr;
- mr->pbl_hop_num = mhop_num;
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
- return 0;
- }
-
- mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8,
- sizeof(*mr->pbl_l1_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_l1_dma_addr)
- return -ENOMEM;
-
- mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1)
- goto err_kcalloc_bt_l1;
-
- if (mhop_num == 3) {
- mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_l2_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_l2_dma_addr)
- goto err_kcalloc_l2_dma;
-
- mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num,
- sizeof(*mr->pbl_bt_l2),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2)
- goto err_kcalloc_bt_l2;
- }
-
- /* alloc L0 BT */
- mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l0_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_bt_l0)
- goto err_dma_alloc_l0;
-
- if (mhop_num == 2) {
- /* alloc L1 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
- size = pbl_bt_sz;
- } else {
- npages_allocated = i * (pbl_bt_sz / 8);
- size = (npages - npages_allocated) * 8;
- }
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
- goto err_dma_alloc_l0;
- }
-
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
-
- pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num)
- break;
- }
- } else if (mhop_num == 3) {
- /* alloc L1, L2 BT */
- for (i = 0; i < pbl_bt_sz / 8; i++) {
- mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz,
- &(mr->pbl_l1_dma_addr[i]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l1[i]) {
- hns_roce_loop_free(hr_dev, mr, 1, i, 0);
- goto err_dma_alloc_l0;
- }
-
- *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i];
-
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * pbl_bt_sz / 8 + j;
-
- if (pbl_bt_cnt + 1 < pbl_last_bt_num) {
- size = pbl_bt_sz;
- } else {
- npages_allocated = bt_idx *
- (pbl_bt_sz / 8);
- size = (npages - npages_allocated) * 8;
- }
- mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent(
- dev, size,
- &(mr->pbl_l2_dma_addr[bt_idx]),
- GFP_KERNEL);
- if (!mr->pbl_bt_l2[bt_idx]) {
- hns_roce_loop_free(hr_dev, mr, 2, i, j);
- goto err_dma_alloc_l0;
- }
-
- *(mr->pbl_bt_l1[i] + j) =
- mr->pbl_l2_dma_addr[bt_idx];
-
- pbl_bt_cnt++;
- if (pbl_bt_cnt >= pbl_last_bt_num) {
- mr_alloc_done = 1;
- break;
- }
- }
-
- if (mr_alloc_done)
- break;
- }
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ bool is_fast = mr->type == MR_TYPE_FRMR;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
+ buf_attr.page_shift = is_fast ? PAGE_SHIFT :
+ hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = mr->size;
+ buf_attr.region[0].hopnum = mr->pbl_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.user_access = mr->access;
+ /* fast MR's buffer is alloced before mapping, not at creation */
+ buf_attr.mtt_only = is_fast;
+ buf_attr.iova = mr->iova;
+ /* pagesize and hopnum is fixed for fast MR */
+ buf_attr.adaptive = !is_fast;
+ buf_attr.type = MTR_PBL;
+
+ err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
+ hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
+ udata, start);
+ if (err) {
+ ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
+ return err;
}
- mr->l0_chunk_last_num = i + 1;
- if (mhop_num == 3)
- mr->l1_chunk_last_num = j + 1;
-
- mr->pbl_size = npages;
- mr->pbl_ba = mr->pbl_l0_dma_addr;
- mr->pbl_hop_num = hr_dev->caps.pbl_hop_num;
- mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
-
- return 0;
-
-err_dma_alloc_l0:
- kfree(mr->pbl_bt_l2);
- mr->pbl_bt_l2 = NULL;
-
-err_kcalloc_bt_l2:
- kfree(mr->pbl_l2_dma_addr);
- mr->pbl_l2_dma_addr = NULL;
-
-err_kcalloc_l2_dma:
- kfree(mr->pbl_bt_l1);
- mr->pbl_bt_l1 = NULL;
-
-err_kcalloc_bt_l1:
- kfree(mr->pbl_l1_dma_addr);
- mr->pbl_l1_dma_addr = NULL;
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ mr->pbl_hop_num = buf_attr.region[0].hopnum;
- return -ENOMEM;
+ return err;
}
-static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
- u64 size, u32 access, int npages,
- struct hns_roce_mr *mr)
+static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- struct device *dev = hr_dev->dev;
- unsigned long index = 0;
- int ret = 0;
-
- /* Allocate a key for mr from mr_table */
- ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
- if (ret == -1)
- return -ENOMEM;
-
- mr->iova = iova; /* MR va starting addr */
- mr->size = size; /* MR addr range */
- mr->pd = pd; /* MR num */
- mr->access = access; /* MR access permit */
- mr->enabled = 0; /* MR active status */
- mr->key = hw_index_to_key(index); /* MR key */
-
- if (size == ~0ull) {
- mr->pbl_buf = NULL;
- mr->pbl_dma_addr = 0;
- /* PBL multi-hop addressing parameters */
- mr->pbl_bt_l2 = NULL;
- mr->pbl_bt_l1 = NULL;
- mr->pbl_bt_l0 = NULL;
- mr->pbl_l2_dma_addr = NULL;
- mr->pbl_l1_dma_addr = NULL;
- mr->pbl_l0_dma_addr = 0;
- } else {
- if (!hr_dev->caps.pbl_hop_num) {
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
- return -ENOMEM;
- } else {
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
- }
- }
-
- return ret;
+ hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
}
-static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr)
+static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- struct device *dev = hr_dev->dev;
- int npages_allocated;
- int npages;
- int i, j;
- u32 pbl_bt_sz;
- u32 mhop_num;
- u64 bt_idx;
-
- npages = mr->pbl_size;
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
-
- if (mhop_num == HNS_ROCE_HOP_NUM_0)
- return;
-
- /* hop_num = 1 */
- if (mhop_num == 1) {
- dma_free_coherent(dev, (unsigned int)(npages * 8),
- mr->pbl_buf, mr->pbl_dma_addr);
- return;
- }
-
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l0,
- mr->pbl_l0_dma_addr);
-
- if (mhop_num == 2) {
- for (i = 0; i < mr->l0_chunk_last_num; i++) {
- if (i == mr->l0_chunk_last_num - 1) {
- npages_allocated = i * (pbl_bt_sz / 8);
-
- dma_free_coherent(dev,
- (npages - npages_allocated) * 8,
- mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- break;
- }
-
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
- }
- } else if (mhop_num == 3) {
- for (i = 0; i < mr->l0_chunk_last_num; i++) {
- dma_free_coherent(dev, pbl_bt_sz, mr->pbl_bt_l1[i],
- mr->pbl_l1_dma_addr[i]);
-
- for (j = 0; j < pbl_bt_sz / 8; j++) {
- bt_idx = i * (pbl_bt_sz / 8) + j;
-
- if ((i == mr->l0_chunk_last_num - 1)
- && j == mr->l1_chunk_last_num - 1) {
- npages_allocated = bt_idx *
- (pbl_bt_sz / 8);
-
- dma_free_coherent(dev,
- (npages - npages_allocated) * 8,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
-
- break;
- }
-
- dma_free_coherent(dev, pbl_bt_sz,
- mr->pbl_bt_l2[bt_idx],
- mr->pbl_l2_dma_addr[bt_idx]);
- }
- }
- }
-
- kfree(mr->pbl_bt_l1);
- kfree(mr->pbl_l1_dma_addr);
- mr->pbl_bt_l1 = NULL;
- mr->pbl_l1_dma_addr = NULL;
- if (mhop_num == 3) {
- kfree(mr->pbl_bt_l2);
- kfree(mr->pbl_l2_dma_addr);
- mr->pbl_bt_l2 = NULL;
- mr->pbl_l2_dma_addr = NULL;
- }
-}
-
-static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr)
-{
- struct device *dev = hr_dev->dev;
- int npages = 0;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
if (mr->enabled) {
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
- & (hr_dev->caps.num_mtpts - 1));
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ key_to_hw_index(mr->key) &
+ (hr_dev->caps.num_mtpts - 1));
if (ret)
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
- }
-
- if (mr->size != ~0ULL) {
- if (mr->type == MR_TYPE_MR)
- npages = ib_umem_page_count(mr->umem);
-
- if (!hr_dev->caps.pbl_hop_num)
- dma_free_coherent(dev, (unsigned int)(npages * 8),
- mr->pbl_buf, mr->pbl_dma_addr);
- else
- hns_roce_mhop_free(hr_dev, mr);
+ ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
+ ret);
}
- if (mr->enabled)
- hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
- key_to_hw_index(mr->key));
-
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key), BITMAP_NO_RR);
+ free_mr_pbl(hr_dev, mr);
+ free_mr_key(hr_dev, mr);
}
static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr)
{
- int ret;
unsigned long mtpt_idx = key_to_hw_index(mr->key);
- struct device *dev = hr_dev->dev;
struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
- /* Prepare HEM entry memory */
- ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
- if (ret)
- return ret;
+ struct device *dev = hr_dev->dev;
+ int ret;
/* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_table;
- }
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+ trace_hns_mr(mr);
if (mr->type != MR_TYPE_FRMR)
- ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
+ ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
else
ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) {
- dev_err(dev, "Write mtpt fail!\n");
+ dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
goto err_page;
}
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
- mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
+ dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
goto err_page;
}
mr->enabled = 1;
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return 0;
err_page:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-err_table:
- hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
return ret;
}
-static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, u32 start_index,
- u32 npages, u64 *page_list)
-{
- struct hns_roce_hem_table *table;
- dma_addr_t dma_handle;
- __le64 *mtts;
- u32 bt_page_size;
- u32 i;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- table = &hr_dev->mr_table.mtt_table;
- bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_CQE:
- table = &hr_dev->mr_table.mtt_cqe_table;
- bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_SRQWQE:
- table = &hr_dev->mr_table.mtt_srqwqe_table;
- bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_IDX:
- table = &hr_dev->mr_table.mtt_idx_table;
- bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
- break;
- default:
- return -EINVAL;
- }
-
- /* All MTTs must fit in the same page */
- if (start_index / (bt_page_size / sizeof(u64)) !=
- (start_index + npages - 1) / (bt_page_size / sizeof(u64)))
- return -EINVAL;
-
- if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
- return -EINVAL;
-
- mtts = hns_roce_table_find(hr_dev, table,
- mtt->first_seg +
- start_index / HNS_ROCE_MTT_ENTRY_PER_SEG,
- &dma_handle);
- if (!mtts)
- return -ENOMEM;
-
- /* Save page addr, low 12 bits : 0 */
- for (i = 0; i < npages; ++i) {
- if (!hr_dev->caps.mtt_hop_num)
- mtts[i] = cpu_to_le64(page_list[i] >> PAGE_ADDR_SHIFT);
- else
- mtts[i] = cpu_to_le64(page_list[i]);
- }
-
- return 0;
-}
-
-static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, u32 start_index,
- u32 npages, u64 *page_list)
+void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
{
- int chunk;
- int ret;
- u32 bt_page_size;
-
- if (mtt->order < 0)
- return -EINVAL;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_CQE:
- bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_SRQWQE:
- bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
- break;
- case MTT_TYPE_IDX:
- bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
- break;
- default:
- dev_err(hr_dev->dev,
- "Unsupport mtt type %d, write mtt failed\n",
- mtt->mtt_type);
- return -EINVAL;
- }
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
- while (npages > 0) {
- chunk = min_t(int, bt_page_size / sizeof(u64), npages);
-
- ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
- page_list);
- if (ret)
- return ret;
-
- npages -= chunk;
- start_index += chunk;
- page_list += chunk;
- }
-
- return 0;
-}
-
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
-{
- u64 *page_list;
- int ret;
- u32 i;
-
- page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
-
- for (i = 0; i < buf->npages; ++i) {
- if (buf->nbufs == 1)
- page_list[i] = buf->direct.map + (i << buf->page_shift);
- else
- page_list[i] = buf->page_list[i].map;
-
- }
- ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
-
- kfree(page_list);
-
- return ret;
-}
-
-int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- int ret;
-
- ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap,
- hr_dev->caps.num_mtpts,
- hr_dev->caps.num_mtpts - 1,
- hr_dev->caps.reserved_mrws, 0);
- if (ret)
- return ret;
-
- ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
- ilog2(hr_dev->caps.num_mtt_segs));
- if (ret)
- goto err_buddy;
-
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) {
- ret = hns_roce_buddy_init(&mr_table->mtt_cqe_buddy,
- ilog2(hr_dev->caps.num_cqe_segs));
- if (ret)
- goto err_buddy_cqe;
- }
-
- if (hr_dev->caps.num_srqwqe_segs) {
- ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
- ilog2(hr_dev->caps.num_srqwqe_segs));
- if (ret)
- goto err_buddy_srqwqe;
- }
-
- if (hr_dev->caps.num_idx_segs) {
- ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
- ilog2(hr_dev->caps.num_idx_segs));
- if (ret)
- goto err_buddy_idx;
- }
-
- return 0;
-
-err_buddy_idx:
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
-
-err_buddy_srqwqe:
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
-
-err_buddy_cqe:
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
-
-err_buddy:
- hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
- return ret;
-}
-
-void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
-
- if (hr_dev->caps.num_idx_segs)
- hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
- if (hr_dev->caps.num_srqwqe_segs)
- hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
- if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
- hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
- hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
+ ida_init(&mtpt_ida->ida);
+ mtpt_ida->max = hr_dev->caps.num_mtpts - 1;
+ mtpt_ida->min = hr_dev->caps.reserved_mrws;
}
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_mr *mr;
int ret;
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
- if (mr == NULL)
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = acc;
/* Allocate memory region key */
- ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
- ~0ULL, acc, 0, mr);
+ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_free;
- ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
+ ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->umem = NULL;
return &mr->ibmr;
-
err_mr:
- hns_roce_mr_free(to_hr_dev(pd->device), mr);
+ free_mr_key(hr_dev, mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
}
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct ib_umem *umem)
-{
- struct device *dev = hr_dev->dev;
- struct sg_dma_page_iter sg_iter;
- unsigned int order;
- int npage = 0;
- int ret = 0;
- int i;
- u64 page_addr;
- u64 *pages;
- u32 bt_page_size;
- u32 n;
-
- switch (mtt->mtt_type) {
- case MTT_TYPE_WQE:
- order = hr_dev->caps.mtt_ba_pg_sz;
- break;
- case MTT_TYPE_CQE:
- order = hr_dev->caps.cqe_ba_pg_sz;
- break;
- case MTT_TYPE_SRQWQE:
- order = hr_dev->caps.srqwqe_ba_pg_sz;
- break;
- case MTT_TYPE_IDX:
- order = hr_dev->caps.idx_ba_pg_sz;
- break;
- default:
- dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
- mtt->mtt_type);
- return -EINVAL;
- }
-
- bt_page_size = 1 << (order + PAGE_SHIFT);
-
- pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
- if (!pages)
- return -ENOMEM;
-
- i = n = 0;
-
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- page_addr = sg_page_iter_dma_address(&sg_iter);
- if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
- if (page_addr & ((1 << mtt->page_shift) - 1)) {
- dev_err(dev,
- "page_addr 0x%llx is not page_shift %d alignment!\n",
- page_addr, mtt->page_shift);
- ret = -EINVAL;
- goto out;
- }
- pages[i++] = page_addr;
- }
- npage++;
- if (i == bt_page_size / sizeof(u64)) {
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
- if (ret)
- goto out;
- n += i;
- i = 0;
- }
- }
-
- if (i)
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
-
-out:
- free_pages((unsigned long) pages, order);
- return ret;
-}
-
-static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr,
- struct ib_umem *umem)
-{
- struct sg_dma_page_iter sg_iter;
- int i = 0, j = 0;
- u64 page_addr;
- u32 pbl_bt_sz;
-
- if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
- return 0;
-
- pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
- for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
- page_addr = sg_page_iter_dma_address(&sg_iter);
- if (!hr_dev->caps.pbl_hop_num) {
- mr->pbl_buf[i++] = page_addr >> 12;
- } else if (hr_dev->caps.pbl_hop_num == 1) {
- mr->pbl_buf[i++] = page_addr;
- } else {
- if (hr_dev->caps.pbl_hop_num == 2)
- mr->pbl_bt_l1[i][j] = page_addr;
- else if (hr_dev->caps.pbl_hop_num == 3)
- mr->pbl_bt_l2[i][j] = page_addr;
-
- j++;
- if (j >= (pbl_bt_sz / 8)) {
- i++;
- j = 0;
- }
- }
- }
-
- /* Memory barrier */
- mb();
-
- return 0;
-}
-
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
- struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
- int bt_size;
int ret;
- int n;
- int i;
-
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- goto err_free;
+ if (dmah) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
}
- n = ib_umem_page_count(mr->umem);
-
- if (!hr_dev->caps.pbl_hop_num) {
- if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
- dev_err(dev,
- " MR len %lld err. MR is limited to 4G at most!\n",
- length);
- ret = -EINVAL;
- goto err_umem;
- }
- } else {
- u64 pbl_size = 1;
-
- bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
- for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
- pbl_size *= bt_size;
- if (n > pbl_size) {
- dev_err(dev,
- " MR len %lld err. MR page num is limited to %lld!\n",
- length, pbl_size);
- ret = -EINVAL;
- goto err_umem;
- }
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto err_out;
}
+ mr->iova = virt_addr;
+ mr->size = length;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = access_flags;
mr->type = MR_TYPE_MR;
- ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
- access_flags, n, mr);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
- goto err_umem;
+ goto err_alloc_mr;
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
if (ret)
- goto err_mr;
+ goto err_alloc_key;
ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
- goto err_mr;
+ goto err_alloc_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
return &mr->ibmr;
-err_mr:
- hns_roce_mr_free(hr_dev, mr);
-
-err_umem:
- ib_umem_release(mr->umem);
-
-err_free:
+err_alloc_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_alloc_key:
+ free_mr_key(hr_dev, mr);
+err_alloc_mr:
kfree(mr);
+err_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]);
+
return ERR_PTR(ret);
}
-int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata)
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ib_dev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_cmd_mailbox *mailbox;
- struct device *dev = hr_dev->dev;
unsigned long mtpt_idx;
- u32 pdn = 0;
- int npages;
int ret;
- if (!mr->enabled)
- return -EINVAL;
+ if (!mr->enabled) {
+ ret = -EINVAL;
+ goto err_out;
+ }
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ ret = PTR_ERR_OR_ZERO(mailbox);
+ if (ret)
+ goto err_out;
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0,
- HNS_ROCE_CMD_QUERY_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ mtpt_idx);
if (ret)
goto free_cmd_mbox;
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, mtpt_idx);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ mtpt_idx);
if (ret)
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+ ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
mr->enabled = 0;
+ mr->iova = virt_addr;
+ mr->size = length;
if (flags & IB_MR_REREG_PD)
- pdn = to_hr_pd(pd)->pdn;
+ mr->pd = to_hr_pd(pd)->pdn;
- if (flags & IB_MR_REREG_TRANS) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
- mr->pbl_dma_addr);
- }
- ib_umem_release(mr->umem);
+ if (flags & IB_MR_REREG_ACCESS)
+ mr->access = mr_access_flags;
- mr->umem =
- ib_umem_get(udata, start, length, mr_access_flags, 0);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- mr->umem = NULL;
+ if (flags & IB_MR_REREG_TRANS) {
+ free_mr_pbl(hr_dev, mr);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
+ ret);
goto free_cmd_mbox;
}
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num) {
- ret = hns_roce_mhop_alloc(hr_dev, npages, mr);
- if (ret)
- goto release_umem;
- } else {
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf) {
- ret = -ENOMEM;
- goto release_umem;
- }
- }
}
- ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn,
- mr_access_flags, virt_addr,
- length, mailbox->buf);
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
if (ret) {
- if (flags & IB_MR_REREG_TRANS)
- goto release_umem;
- else
- goto free_cmd_mbox;
- }
-
- if (flags & IB_MR_REREG_TRANS) {
- ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem);
- if (ret) {
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
-
- if (hr_dev->caps.pbl_hop_num)
- hns_roce_mhop_free(hr_dev, mr);
- else
- dma_free_coherent(dev, npages * 8,
- mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
- goto release_umem;
- }
+ ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto free_cmd_mbox;
}
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx);
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx);
if (ret) {
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
- goto release_umem;
+ ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
+ goto free_cmd_mbox;
}
mr->enabled = 1;
- if (flags & IB_MR_REREG_ACCESS)
- mr->access = mr_access_flags;
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return 0;
-
-release_umem:
- ib_umem_release(mr->umem);
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
+err_out:
+ if (ret) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]);
+ return ERR_PTR(ret);
+ }
+
+ return NULL;
}
int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct hns_roce_mr *mr = to_hr_mr(ibmr);
- int ret = 0;
-
- if (hr_dev->hw->dereg_mr) {
- ret = hr_dev->hw->dereg_mr(hr_dev, mr, udata);
- } else {
- hns_roce_mr_free(hr_dev, mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ if (hr_dev->hw->dereg_mr)
+ hr_dev->hw->dereg_mr(hr_dev);
- kfree(mr);
- }
+ hns_roce_mr_free(hr_dev, mr);
+ kfree(mr);
- return ret;
+ return 0;
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
- u64 length;
- u32 page_size;
int ret;
- page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
- length = max_num_sg * page_size;
-
if (mr_type != IB_MR_TYPE_MEM_REG)
return ERR_PTR(-EINVAL);
@@ -1329,25 +400,31 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_FRMR;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->size = max_num_sg * (1 << PAGE_SHIFT);
/* Allocate memory region key */
- ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
- 0, max_num_sg, mr);
+ ret = alloc_mr_key(hr_dev, mr);
if (ret)
goto err_free;
+ ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
+ if (ret)
+ goto err_key;
+
ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
- goto err_mr;
+ goto err_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->umem = NULL;
+ mr->ibmr.length = mr->size;
return &mr->ibmr;
-err_mr:
- hns_roce_mr_free(to_hr_dev(pd->device), mr);
-
+err_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_key:
+ free_mr_key(hr_dev, mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
@@ -1357,137 +434,671 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
- mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
+ if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
+ mr->page_list[mr->npages++] = addr;
+ return 0;
+ }
- return 0;
+ return -ENOBUFS;
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
- unsigned int *sg_offset)
+ unsigned int *sg_offset_p)
{
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct hns_roce_mtr *mtr = &mr->pbl_mtr;
+ int ret, sg_num = 0;
+
+ if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
+ ibmr->page_size < HNS_HW_PAGE_SIZE ||
+ ibmr->page_size > HNS_HW_MAX_PAGE_SIZE)
+ return sg_num;
mr->npages = 0;
+ mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (!mr->page_list)
+ return sg_num;
+
+ sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page);
+ if (sg_num < 1) {
+ ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
+ goto err_page_list;
+ }
+
+ mtr->hem_cfg.region[0].offset = 0;
+ mtr->hem_cfg.region[0].count = mr->npages;
+ mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
+ mtr->hem_cfg.region_count = 1;
+ ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
+ sg_num = 0;
+ } else {
+ mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
+ }
+
+err_page_list:
+ kvfree(mr->page_list);
+ mr->page_list = NULL;
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ return sg_num;
}
-static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mw *mw)
+static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_region *region, dma_addr_t *pages,
+ int max_count)
{
- struct device *dev = hr_dev->dev;
- int ret;
+ int count, npage;
+ int offset, end;
+ __le64 *mtts;
+ u64 addr;
+ int i;
- if (mw->enabled) {
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
- & (hr_dev->caps.num_mtpts - 1));
- if (ret)
- dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
+ offset = region->offset;
+ end = offset + region->count;
+ npage = 0;
+ while (offset < end && npage < max_count) {
+ count = 0;
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ offset, &count);
+ if (!mtts)
+ return -ENOBUFS;
+
+ for (i = 0; i < count && npage < max_count; i++) {
+ addr = pages[npage];
+
+ mtts[i] = cpu_to_le64(addr);
+ npage++;
+ }
+ offset += count;
+ }
+
+ return npage;
+}
+
+static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
+{
+ int i;
+
+ for (i = 0; i < attr->region_count; i++)
+ if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
+ attr->region[i].hopnum > 0)
+ return true;
+
+ /* because the mtr only one root base address, when hopnum is 0 means
+ * root base address equals the first buffer address, thus all alloced
+ * memory must in a continuous space accessed by direct mode.
+ */
+ return false;
+}
- hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
- key_to_hw_index(mw->rkey));
+static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
+{
+ size_t size = 0;
+ int i;
+
+ for (i = 0; i < attr->region_count; i++)
+ size += attr->region[i].size;
+
+ return size;
+}
+
+/*
+ * check the given pages in continuous address space
+ * Returns 0 on success, or the error page num.
+ */
+static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
+ unsigned int page_shift)
+{
+ size_t page_size = 1 << page_shift;
+ int i;
+
+ for (i = 1; i < page_count; i++)
+ if (pages[i] - pages[i - 1] != page_size)
+ return i;
+
+ return 0;
+}
+
+static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release user buffers */
+ if (mtr->umem) {
+ ib_umem_release(mtr->umem);
+ mtr->umem = NULL;
}
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mw->rkey), BITMAP_NO_RR);
+ /* release kernel buffers */
+ if (mtr->kmem) {
+ hns_roce_buf_free(hr_dev, mtr->kmem);
+ mtr->kmem = NULL;
+ }
}
-static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
- struct hns_roce_mw *mw)
+static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ struct ib_udata *udata, unsigned long user_addr)
{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- struct hns_roce_cmd_mailbox *mailbox;
- struct device *dev = hr_dev->dev;
- unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t total_size;
+
+ total_size = mtr_bufs_size(buf_attr);
+
+ if (udata) {
+ mtr->kmem = NULL;
+ mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
+ buf_attr->user_access);
+ if (IS_ERR(mtr->umem)) {
+ ibdev_err(ibdev, "failed to get umem, ret = %pe.\n",
+ mtr->umem);
+ return -ENOMEM;
+ }
+ } else {
+ mtr->umem = NULL;
+ mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
+ buf_attr->page_shift,
+ !mtr_has_mtt(buf_attr) ?
+ HNS_ROCE_BUF_DIRECT : 0);
+ if (IS_ERR(mtr->kmem)) {
+ ibdev_err(ibdev, "failed to alloc kmem, ret = %pe.\n",
+ mtr->kmem);
+ return PTR_ERR(mtr->kmem);
+ }
+ }
+
+ return 0;
+}
+
+static int cal_mtr_pg_cnt(struct hns_roce_mtr *mtr)
+{
+ struct hns_roce_buf_region *region;
+ int page_cnt = 0;
+ int i;
+
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ region = &mtr->hem_cfg.region[i];
+ page_cnt += region->count;
+ }
+
+ return page_cnt;
+}
+
+static bool need_split_huge_page(struct hns_roce_mtr *mtr)
+{
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size. If the current MTR has multiple
+ * regions, we split the buffer into small pages(4k, required by hns
+ * ROCEE). These pages will be used in multiple regions.
+ */
+ return mtr->hem_cfg.is_direct && mtr->hem_cfg.region_count > 1;
+}
+
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int page_count = cal_mtr_pg_cnt(mtr);
+ unsigned int page_shift;
+ dma_addr_t *pages;
+ int npage;
int ret;
- /* prepare HEM entry memory */
- ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ page_shift = need_split_huge_page(mtr) ? HNS_HW_PAGE_SHIFT :
+ mtr->hem_cfg.buf_pg_shift;
+ /* alloc a tmp array to store buffer's dma address */
+ pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+ if (mtr->umem)
+ npage = hns_roce_get_umem_bufs(pages, page_count,
+ mtr->umem, page_shift);
+ else
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
+ mtr->kmem, page_shift);
+
+ if (npage != page_count) {
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
+ page_count);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
+
+ if (need_split_huge_page(mtr) && npage > 1) {
+ ret = mtr_check_direct_pages(pages, npage, page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
+ mtr->umem ? "umtr" : "kmtr", ret, npage);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
+ }
+
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
if (ret)
- return ret;
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_table;
+err_alloc_list:
+ kvfree(pages);
+
+ return ret;
+}
+
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, unsigned int page_cnt)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_region *r;
+ unsigned int i, mapped_cnt;
+ int ret = 0;
+
+ /*
+ * Only use the first page address as root ba when hopnum is 0, this
+ * is because the addresses of all pages are consecutive in this case.
+ */
+ if (mtr->hem_cfg.is_direct) {
+ mtr->hem_cfg.root_ba = pages[0];
+ return 0;
}
- ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
- if (ret) {
- dev_err(dev, "MW write mtpt fail!\n");
- goto err_page;
+ for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
+ mapped_cnt < page_cnt; i++) {
+ r = &mtr->hem_cfg.region[i];
+
+ if (r->offset + r->count > page_cnt) {
+ ret = -EINVAL;
+ ibdev_err(ibdev,
+ "failed to check mtr%u count %u + %u > %u.\n",
+ i, r->offset, r->count, page_cnt);
+ return ret;
+ }
+
+ ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
+ page_cnt - mapped_cnt);
+ if (ret < 0) {
+ ibdev_err(ibdev,
+ "failed to map mtr%u offset %u, ret = %d.\n",
+ i, r->offset, ret);
+ return ret;
+ }
+ mapped_cnt += ret;
+ ret = 0;
}
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
- mtpt_idx & (hr_dev->caps.num_mtpts - 1));
- if (ret) {
- dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
- goto err_page;
+ if (mapped_cnt < page_cnt) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
+ mapped_cnt, page_cnt);
}
- mw->enabled = 1;
+ return ret;
+}
+
+static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg,
+ u32 start_index, u64 *mtt_buf,
+ int mtt_cnt)
+{
+ int mtt_count;
+ int total = 0;
+ u32 npage;
+ u64 addr;
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (mtt_cnt > cfg->region_count)
+ return -EINVAL;
+
+ for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt;
+ mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
+
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+ mtt_buf[total] = addr;
+
+ total++;
+ }
+
+ if (!total)
+ return -ENOENT;
return 0;
+}
-err_page:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+static int hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr, u32 start_index,
+ u64 *mtt_buf, int mtt_cnt)
+{
+ int left = mtt_cnt;
+ int total = 0;
+ int mtt_count;
+ __le64 *mtts;
+ u32 npage;
+
+ while (left > 0) {
+ mtt_count = 0;
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ start_index + total,
+ &mtt_count);
+ if (!mtts || !mtt_count)
+ break;
+
+ npage = min(mtt_count, left);
+ left -= npage;
+ for (mtt_count = 0; mtt_count < npage; mtt_count++)
+ mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
+ }
-err_table:
- hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
+ if (!total)
+ return -ENOENT;
- return ret;
+ return 0;
}
-struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
- struct hns_roce_mw *mw;
- unsigned long index = 0;
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ u32 start_index;
int ret;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
+ if (!mtt_buf || mtt_max < 1)
+ return -EINVAL;
- /* Allocate a key for mw from bitmap */
- ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
- if (ret)
- goto err_bitmap;
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ ret = hns_roce_get_direct_addr_mtt(cfg, start_index,
+ mtt_buf, mtt_max);
+ } else {
+ start_index = offset >> cfg->buf_pg_shift;
+ ret = hns_roce_get_mhop_mtt(hr_dev, mtr, start_index,
+ mtt_buf, mtt_max);
+ }
+ return ret;
+}
- mw->rkey = hw_index_to_key(index);
+static int get_best_page_shift(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr)
+{
+ unsigned int page_sz;
- mw->ibmw.rkey = mw->rkey;
- mw->ibmw.type = type;
- mw->pdn = to_hr_pd(ib_pd)->pdn;
- mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
- mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
- mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL || !mtr->umem)
+ return 0;
- ret = hns_roce_mw_enable(hr_dev, mw);
- if (ret)
- goto err_mw;
+ page_sz = ib_umem_find_best_pgsz(mtr->umem,
+ hr_dev->caps.page_size_cap,
+ buf_attr->iova);
+ if (!page_sz)
+ return -EINVAL;
- return &mw->ibmw;
+ buf_attr->page_shift = order_base_2(page_sz);
+ return 0;
+}
+
+static int get_best_hop_num(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_pg_shift)
+{
+#define INVALID_HOPNUM -1
+#define MIN_BA_CNT 1
+ size_t buf_pg_sz = 1 << buf_attr->page_shift;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t ba_pg_sz = 1 << ba_pg_shift;
+ int hop_num = INVALID_HOPNUM;
+ size_t unit = MIN_BA_CNT;
+ size_t ba_cnt;
+ int j;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL)
+ return 0;
-err_mw:
- hns_roce_mw_free(hr_dev, mw);
+ /* Caculating the number of buf pages, each buf page need a BA */
+ if (mtr->umem)
+ ba_cnt = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ ba_cnt = DIV_ROUND_UP(buf_attr->region[0].size, buf_pg_sz);
-err_bitmap:
- kfree(mw);
+ for (j = 0; j <= HNS_ROCE_MAX_HOP_NUM; j++) {
+ if (ba_cnt <= unit) {
+ hop_num = j;
+ break;
+ }
+ /* Number of BAs can be represented at per hop */
+ unit *= ba_pg_sz / BA_BYTE_LEN;
+ }
- return ERR_PTR(ret);
+ if (hop_num < 0) {
+ ibdev_err(ibdev,
+ "failed to calculate a valid hopnum.\n");
+ return -EINVAL;
+ }
+
+ buf_attr->region[0].hopnum = hop_num;
+
+ return 0;
+}
+
+static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (attr->region_count > ARRAY_SIZE(attr->region) ||
+ attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev,
+ "invalid buf attr, region count %u, page shift %u.\n",
+ attr->region_count, attr->page_shift);
+ return false;
+ }
+
+ return true;
}
-int hns_roce_dealloc_mw(struct ib_mw *ibmw)
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *attr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
- struct hns_roce_mw *mw = to_hr_mw(ibmw);
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ struct hns_roce_buf_region *r;
+ size_t buf_pg_sz;
+ size_t buf_size;
+ int page_cnt, i;
+ u64 pgoff = 0;
+
+ if (!is_buf_attr_valid(hr_dev, attr))
+ return -EINVAL;
+
+ /* If mtt is disabled, all pages must be within a continuous range */
+ cfg->is_direct = !mtr_has_mtt(attr);
+ cfg->region_count = attr->region_count;
+ buf_size = mtr_bufs_size(attr);
+ if (need_split_huge_page(mtr)) {
+ buf_pg_sz = HNS_HW_PAGE_SIZE;
+ cfg->buf_pg_count = 1;
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
+ order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
+ } else {
+ buf_pg_sz = 1 << attr->page_shift;
+ cfg->buf_pg_count = mtr->umem ?
+ ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz) :
+ DIV_ROUND_UP(buf_size, buf_pg_sz);
+ cfg->buf_pg_shift = attr->page_shift;
+ pgoff = mtr->umem ? mtr->umem->address & ~PAGE_MASK : 0;
+ }
- hns_roce_mw_free(hr_dev, mw);
- kfree(mw);
+ /* Convert buffer size to page index and page count for each region and
+ * the buffer's offset needs to be appended to the first region.
+ */
+ for (page_cnt = 0, i = 0; i < attr->region_count; i++) {
+ r = &cfg->region[i];
+ r->offset = page_cnt;
+ buf_size = hr_hw_page_align(attr->region[i].size + pgoff);
+ if (attr->type == MTR_PBL && mtr->umem)
+ r->count = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ r->count = DIV_ROUND_UP(buf_size, buf_pg_sz);
+
+ pgoff = 0;
+ page_cnt += r->count;
+ r->hopnum = to_hr_hem_hopnum(attr->region[i].hopnum, r->count);
+ }
+
+ return 0;
+}
+
+static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
+{
+ return int_pow(ba_per_bt, hopnum - 1);
+}
+
+static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ unsigned int pg_shift)
+{
+ unsigned long cap = hr_dev->caps.page_size_cap;
+ struct hns_roce_buf_region *re;
+ unsigned int pgs_per_l1ba;
+ unsigned int ba_per_bt;
+ unsigned int ba_num;
+ int i;
+
+ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
+ if (!(BIT(pg_shift) & cap))
+ continue;
+
+ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
+ ba_num = 0;
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ re = &mtr->hem_cfg.region[i];
+ if (re->hopnum == 0)
+ continue;
+
+ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
+ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
+ }
+
+ if (ba_num <= ba_per_bt)
+ return pg_shift;
+ }
+
+ return 0;
+}
+
+static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ unsigned int ba_page_shift)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int ret;
+
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
+ if (!ba_page_shift)
+ return -ERANGE;
+
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret)
+ return ret;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
+ }
+
+ return 0;
+}
+
+static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+}
+
+/**
+ * hns_roce_mtr_create - Create hns memory translate region.
+ *
+ * @hr_dev: RoCE device struct pointer
+ * @mtr: memory translate region
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
+ * @udata: user space context, if it's NULL, means kernel space
+ * @user_addr: userspace virtual address to start at
+ */
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_page_shift, struct ib_udata *udata,
+ unsigned long user_addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ trace_hns_buf_attr(buf_attr);
+ /* The caller has its own buffer list and invokes the hns_roce_mtr_map()
+ * to finish the MTT configuration.
+ */
+ if (buf_attr->mtt_only) {
+ mtr->umem = NULL;
+ mtr->kmem = NULL;
+ } else {
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = get_best_page_shift(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = get_best_hop_num(hr_dev, mtr, buf_attr, ba_page_shift);
+ if (ret)
+ goto err_init_buf;
+ }
+
+ ret = mtr_init_buf_cfg(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ goto err_init_buf;
+ }
+
+ if (buf_attr->mtt_only)
+ return 0;
+
+ /* Write buffer's dma address to MTT */
+ ret = mtr_map_bufs(hr_dev, mtr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
+ goto err_alloc_mtt;
+ }
return 0;
+
+err_alloc_mtt:
+ mtr_free_mtt(hr_dev, mtr);
+err_init_buf:
+ mtr_free_bufs(hr_dev, mtr);
+
+ return ret;
+}
+
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release multi-hop addressing resource */
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+
+ /* free buffers */
+ mtr_free_bufs(hr_dev, mtr);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 813401384d78..225c3e328e0e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -30,76 +30,70 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
-#include <linux/pci.h>
-#include <uapi/rdma/hns-abi.h>
#include "hns_roce_device.h"
-static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
+void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
{
- return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0;
-}
-
-static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
-{
- hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR);
-}
-
-int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
-{
- return hns_roce_bitmap_init(&hr_dev->pd_bitmap, hr_dev->caps.num_pds,
- hr_dev->caps.num_pds - 1,
- hr_dev->caps.reserved_pds, 0);
-}
+ struct hns_roce_ida *pd_ida = &hr_dev->pd_ida;
-void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev)
-{
- hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap);
+ ida_init(&pd_ida->ida);
+ pd_ida->max = hr_dev->caps.num_pds - 1;
+ pd_ida->min = hr_dev->caps.reserved_pds;
}
int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct ib_device *ib_dev = ibpd->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = hr_dev->dev;
+ struct hns_roce_ida *pd_ida = &hr_dev->pd_ida;
struct hns_roce_pd *pd = to_hr_pd(ibpd);
- int ret;
+ int ret = 0;
+ int id;
- ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
- if (ret) {
- dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n");
- return ret;
+ id = ida_alloc_range(&pd_ida->ida, pd_ida->min, pd_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ib_dev, "failed to alloc pd, id = %d.\n", id);
+ return -ENOMEM;
}
+ pd->pdn = (unsigned long)id;
if (udata) {
- struct hns_roce_ib_alloc_pd_resp uresp = {.pdn = pd->pdn};
+ struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn};
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
- hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
- dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
- return -EFAULT;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret) {
+ ida_free(&pd_ida->ida, id);
+ ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret);
}
}
- return 0;
+ return ret;
}
-EXPORT_SYMBOL_GPL(hns_roce_alloc_pd);
-void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
- hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+
+ ida_free(&hr_dev->pd_ida.ida, (int)to_hr_pd(pd)->pdn);
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd);
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
- struct resource *res;
- int ret = 0;
+ struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
+ int id;
/* Using bitmap to manager UAR index */
- ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx);
- if (ret == -1)
+ id = ida_alloc_range(&uar_ida->ida, uar_ida->min, uar_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc uar id(%d).\n", id);
return -ENOMEM;
+ }
+ uar->logic_idx = (unsigned long)id;
if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
uar->index = (uar->logic_idx - 1) %
@@ -107,36 +101,73 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
else
uar->index = 0;
- if (!dev_is_pci(hr_dev->dev)) {
- res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&hr_dev->pdev->dev, "memory resource not found!\n");
- return -EINVAL;
- }
- uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index;
- } else {
- uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2))
- >> PAGE_SHIFT);
+ uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) >> PAGE_SHIFT);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_dev->dwqe_page = pci_resource_start(hr_dev->pci_dev, 4);
+
+ return 0;
+}
+
+void hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
+
+ ida_init(&uar_ida->ida);
+ uar_ida->max = hr_dev->caps.num_uars - 1;
+ uar_ida->min = hr_dev->caps.reserved_uars;
+}
+
+static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
+{
+ struct hns_roce_ida *xrcd_ida = &hr_dev->xrcd_ida;
+ int id;
+
+ id = ida_alloc_range(&xrcd_ida->ida, xrcd_ida->min, xrcd_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc xrcdn(%d).\n", id);
+ return -ENOMEM;
}
+ *xrcdn = (u32)id;
return 0;
}
-void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
+void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->logic_idx,
- BITMAP_NO_RR);
+ struct hns_roce_ida *xrcd_ida = &hr_dev->xrcd_ida;
+
+ ida_init(&xrcd_ida->ida);
+ xrcd_ida->max = hr_dev->caps.num_xrcds - 1;
+ xrcd_ida->min = hr_dev->caps.reserved_xrcds;
}
-int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
- return hns_roce_bitmap_init(&hr_dev->uar_table.bitmap,
- hr_dev->caps.num_uars,
- hr_dev->caps.num_uars - 1,
- hr_dev->caps.reserved_uars, 0);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+ struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
+ int ret;
+
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT]);
+
+ return ret;
}
-void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev)
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
- hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+ u32 xrcdn = to_hr_xrcd(ib_xrcd)->xrcdn;
+
+ ida_free(&hr_dev->xrcd_ida.ida, (int)xrcdn);
+
+ return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 8db2817a249e..d1640c5fbaab 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -31,46 +31,127 @@
* SOFTWARE.
*/
-#include <linux/pci.h>
-#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
-#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
-
-void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
+static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
+ u32 qpn)
{
struct device *dev = hr_dev->dev;
struct hns_roce_qp *qp;
+ unsigned long flags;
- xa_lock(&hr_dev->qp_table_xa);
+ xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
qp = __hns_roce_qp_lookup(hr_dev, qpn);
if (qp)
- atomic_inc(&qp->refcount);
- xa_unlock(&hr_dev->qp_table_xa);
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
+
+ if (!qp)
+ dev_warn(dev, "async event for bogus QP %08x\n", qpn);
+
+ return qp;
+}
+
+static void flush_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *flush_work = container_of(work,
+ struct hns_roce_work, work);
+ struct hns_roce_qp *hr_qp = container_of(flush_work,
+ struct hns_roce_qp, flush_work);
+ struct device *dev = flush_work->hr_dev->dev;
+ struct ib_qp_attr attr;
+ int attr_mask;
+ int ret;
- if (!qp) {
- dev_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ attr_mask = IB_QP_STATE;
+ attr.qp_state = IB_QPS_ERR;
+
+ if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
+ if (ret)
+ dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n",
+ ret);
+ }
+
+ /*
+ * make sure we signal QP destroy leg that flush QP was completed
+ * so that it can safely proceed ahead now and destroy QP
+ */
+ if (refcount_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+}
+
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ /* Exit directly after destroy_qp() */
+ if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
return;
}
+ refcount_inc(&hr_qp->refcount);
+ queue_work(hr_dev->irq_workq, &flush_work->work);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+}
+
+void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
+{
+ /*
+ * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state
+ * gets into errored mode. Hence, as a workaround to this
+ * hardware limitation, driver needs to assist in flushing. But
+ * the flushing operation uses mailbox to convey the QP state to
+ * the hardware and which can sleep due to the mutex protection
+ * around the mailbox calls. Hence, use the deferred flush for
+ * now.
+ */
+ if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
+ init_flush_work(dev, qp);
+}
+
+void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
+{
+ struct hns_roce_qp *qp;
+
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
+ return;
+
qp->event(qp, (enum hns_roce_event)event_type);
- if (atomic_dec_and_test(&qp->refcount))
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+ struct hns_roce_qp *qp;
+
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
+ return;
+
+ qp->state = IB_QPS_ERR;
+ flush_cqe(hr_dev, qp);
+
+ if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
}
-EXPORT_SYMBOL_GPL(hns_roce_qp_event);
static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
enum hns_roce_event type)
{
- struct ib_event event;
struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct ib_event event;
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -98,6 +179,8 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
event.event = IB_EVENT_QP_REQ_ERR;
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
@@ -109,82 +192,156 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
}
}
-static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
- int align, unsigned long *base)
+static u8 get_affinity_cq_bank(u8 qp_bank)
+{
+ return (qp_bank >> 1) & CQ_BANKID_MASK;
+}
+
+static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
+ struct hns_roce_bank *bank)
+{
+#define INVALID_LOAD_QPNUM 0xFFFFFFFF
+ struct ib_cq *scq = init_attr->send_cq;
+ u32 least_load = INVALID_LOAD_QPNUM;
+ unsigned long cqn = 0;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ if (scq)
+ cqn = to_hr_cq(scq)->cqn;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
+ continue;
+
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
+ }
+
+ return bankid;
+}
+
+static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
+ unsigned long *qpn)
+{
+ int id;
+
+ id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+ }
+
+ /* the QPN should keep increasing until the max value is reached. */
+ bank->next = (id + 1) > bank->max ? bank->min : id + 1;
+
+ /* the lower 3 bits is bankid */
+ *qpn = (id << 3) | bankid;
+
+ return 0;
+}
+static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ unsigned long num = 0;
+ u8 bankid;
+ int ret;
- return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
- base) ?
- -ENOMEM :
- 0;
-}
-
-enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
-{
- switch (state) {
- case IB_QPS_RESET:
- return HNS_ROCE_QP_STATE_RST;
- case IB_QPS_INIT:
- return HNS_ROCE_QP_STATE_INIT;
- case IB_QPS_RTR:
- return HNS_ROCE_QP_STATE_RTR;
- case IB_QPS_RTS:
- return HNS_ROCE_QP_STATE_RTS;
- case IB_QPS_SQD:
- return HNS_ROCE_QP_STATE_SQD;
- case IB_QPS_ERR:
- return HNS_ROCE_QP_STATE_ERR;
- default:
- return HNS_ROCE_QP_NUM_STATE;
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ num = 1;
+ } else {
+ mutex_lock(&qp_table->bank_mutex);
+ bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
+
+ ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
+ &num);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to alloc QPN, ret = %d\n", ret);
+ mutex_unlock(&qp_table->bank_mutex);
+ return ret;
+ }
+
+ qp_table->bank[bankid].inuse++;
+ mutex_unlock(&qp_table->bank_mutex);
}
+
+ hr_qp->qpn = num;
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(to_hns_roce_state);
-static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
- struct hns_roce_qp *hr_qp)
+static void add_qp_to_list(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_cq *send_cq, struct ib_cq *recv_cq)
+{
+ struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
+ unsigned long flags;
+
+ hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
+ hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
+
+ list_add_tail(&hr_qp->node, &hr_dev->qp_list);
+ if (hr_send_cq)
+ list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
+ if (hr_recv_cq)
+ list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
+
+ hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
+static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
{
struct xarray *xa = &hr_dev->qp_table_xa;
int ret;
- if (!qpn)
+ if (!hr_qp->qpn)
return -EINVAL;
- hr_qp->qpn = qpn;
- atomic_set(&hr_qp->refcount, 1);
- init_completion(&hr_qp->free);
-
- ret = xa_err(xa_store_irq(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1),
- hr_qp, GFP_KERNEL));
+ ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
if (ret)
- dev_err(hr_dev->dev, "QPC xa_store failed\n");
+ dev_err(hr_dev->dev, "failed to xa store for QPC\n");
+ else
+ /* add QP to device's QP list for softwc */
+ add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
+ init_attr->recv_cq);
return ret;
}
-static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
- struct hns_roce_qp *hr_qp)
+static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
struct device *dev = hr_dev->dev;
int ret;
- if (!qpn)
+ if (!hr_qp->qpn)
return -EINVAL;
- hr_qp->qpn = qpn;
-
/* Alloc memory for QPC */
ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
if (ret) {
- dev_err(dev, "QPC table get failed\n");
+ dev_err(dev, "failed to get QPC table\n");
goto err_out;
}
/* Alloc memory for IRRL */
ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
if (ret) {
- dev_err(dev, "IRRL table get failed\n");
+ dev_err(dev, "failed to get IRRL table\n");
goto err_put_qp;
}
@@ -193,32 +350,23 @@ static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
hr_qp->qpn);
if (ret) {
- dev_err(dev, "TRRL table get failed\n");
+ dev_err(dev, "failed to get TRRL table\n");
goto err_put_irrl;
}
}
- if (hr_dev->caps.sccc_entry_sz) {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
/* Alloc memory for SCC CTX */
ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
hr_qp->qpn);
if (ret) {
- dev_err(dev, "SCC CTX table get failed\n");
+ dev_err(dev, "failed to get SCC CTX table\n");
goto err_put_trrl;
}
}
- ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
- if (ret)
- goto err_put_sccc;
-
return 0;
-err_put_sccc:
- if (hr_dev->caps.sccc_entry_sz)
- hns_roce_table_put(hr_dev, &qp_table->sccc_table,
- hr_qp->qpn);
-
err_put_trrl:
if (hr_dev->caps.trrl_entry_sz)
hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
@@ -233,61 +381,92 @@ err_out:
return ret;
}
+static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
+{
+ rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
+}
+
void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct xarray *xa = &hr_dev->qp_table_xa;
unsigned long flags;
+ list_del(&hr_qp->node);
+
+ if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+ list_del(&hr_qp->sq_node);
+
+ if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
+ hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+ list_del(&hr_qp->rq_node);
+
xa_lock_irqsave(xa, flags);
- __xa_erase(xa, hr_qp->qpn & (hr_dev->caps.num_qps - 1));
+ __xa_erase(xa, hr_qp->qpn);
xa_unlock_irqrestore(xa, flags);
}
-EXPORT_SYMBOL_GPL(hns_roce_qp_remove);
-void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- if (atomic_dec_and_test(&hr_qp->refcount))
- complete(&hr_qp->free);
- wait_for_completion(&hr_qp->free);
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
+ hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+}
- if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
- if (hr_dev->caps.trrl_entry_sz)
- hns_roce_table_put(hr_dev, &qp_table->trrl_table,
- hr_qp->qpn);
- hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
- hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
- }
+static inline u8 get_qp_bankid(unsigned long qpn)
+{
+ /* The lower 3 bits of QPN are used to hash to different banks */
+ return (u8)(qpn & GENMASK(2, 0));
}
-EXPORT_SYMBOL_GPL(hns_roce_qp_free);
-void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
- int cnt)
+static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
- struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ u8 bankid;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ return;
- if (base_qpn < SQP_NUM)
+ if (hr_qp->qpn < hr_dev->caps.reserved_qps)
return;
- hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
+ bankid = get_qp_bankid(hr_qp->qpn);
+
+ ida_free(&hr_dev->qp_table.bank[bankid].ida,
+ hr_qp->qpn / HNS_ROCE_QP_BANK_NUM);
+
+ mutex_lock(&hr_dev->qp_table.bank_mutex);
+ hr_dev->qp_table.bank[bankid].inuse--;
+ mutex_unlock(&hr_dev->qp_table.bank_mutex);
}
-EXPORT_SYMBOL_GPL(hns_roce_release_range_qp);
-static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap, bool is_user, int has_rq,
- struct hns_roce_qp *hr_qp)
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+ bool user)
{
- struct device *dev = hr_dev->dev;
- u32 max_cnt;
+ u32 max_sge = dev->caps.max_rq_sg;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_qp->rq.rsv_sge = 1;
- /* Check the validity of QP support capacity */
- if (cap->max_recv_wr > hr_dev->caps.max_wqes ||
- cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
- dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n",
- cap->max_recv_wr, cap->max_recv_sge);
- return -EINVAL;
- }
+ return max_sge;
+}
+
+static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ struct hns_roce_qp *hr_qp, int has_rq, bool user)
+{
+ u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
+ u32 cnt;
/* If srq exist, set zero for relative number of rq */
if (!has_rq) {
@@ -295,217 +474,274 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
hr_qp->rq.max_gs = 0;
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
- } else {
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
- dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
- return -EINVAL;
- }
-
- if (hr_dev->caps.min_wqes)
- max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes);
- else
- max_cnt = cap->max_recv_wr;
- hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
+ return 0;
+ }
- if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
- dev_err(dev, "while setting rq size, rq.wqe_cnt too large\n");
- return -EINVAL;
- }
+ /* Check the validity of QP support capacity */
+ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
+ cap->max_recv_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "RQ config error, depth = %u, sge = %u\n",
+ cap->max_recv_wr, cap->max_recv_sge);
+ return -EINVAL;
+ }
- max_cnt = max(1U, cap->max_recv_sge);
- hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
- if (hr_dev->caps.max_rq_sg <= 2)
- hr_qp->rq.wqe_shift =
- ilog2(hr_dev->caps.max_rq_desc_sz);
- else
- hr_qp->rq.wqe_shift =
- ilog2(hr_dev->caps.max_rq_desc_sz
- * hr_qp->rq.max_gs);
+ cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
+ cap->max_recv_wr);
+ return -EINVAL;
}
- cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt;
- cap->max_recv_sge = hr_qp->rq.max_gs;
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
+
+ hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+ hr_qp->rq.max_gs);
+
+ hr_qp->rq.wqe_cnt = cnt;
+
+ cap->max_recv_wr = cnt;
+ cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0;
}
-static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_ib_create_qp *ucmd)
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap)
+{
+ if (cap->max_inline_data) {
+ cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+ return min(cap->max_inline_data,
+ hr_dev->caps.max_sq_inline);
+ }
+
+ return 0;
+}
+
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD))
+ sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+ cap->max_inline_data = max(cap->max_inline_data,
+ sge_num * HNS_ROCE_SGE_SIZE);
+ }
+
+ hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+ u32 max_send_sge)
+{
+ unsigned int std_sge_num;
+ unsigned int min_sge;
+
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud_or_gsi ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+ u32 max_inline_data)
+{
+ unsigned int inline_sge;
+
+ if (!max_inline_data)
+ return 0;
+
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+ if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+ inline_sge = 0;
+
+ return inline_sge;
+}
+
+static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
+ struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
+{
+ bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD);
+ unsigned int std_sge_num;
+ u32 inline_ext_sge = 0;
+ u32 ext_wqe_sge_cnt;
+ u32 total_sge_cnt;
+
+ cap->max_inline_data = get_max_inline_data(hr_dev, cap);
+
+ hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+ cap->max_send_sge);
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud_or_gsi,
+ cap->max_inline_data));
+ hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
+
+ hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+ ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+ } else {
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+ hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+ }
+
+ /* If the number of extended sge is not zero, they MUST use the
+ * space of HNS_HW_PAGE_SIZE at least.
+ */
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
+ hr_qp->sge.sge_cnt = max(total_sge_cnt,
+ (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ }
+
+ update_inline_data(hr_qp, cap);
+}
+
+static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap,
+ struct hns_roce_ib_create_qp *ucmd)
{
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
u8 max_sq_stride = ilog2(roundup_sq_stride);
- u32 ex_sge_num;
- u32 page_size;
- u32 max_cnt;
/* Sanity check SQ size before proceeding */
- if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes ||
- ucmd->log_sq_stride > max_sq_stride ||
- ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
- dev_err(hr_dev->dev, "check SQ size error!\n");
+ if (ucmd->log_sq_stride > max_sq_stride ||
+ ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n");
return -EINVAL;
}
if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
- dev_err(hr_dev->dev, "SQ sge error! max_send_sge=%d\n",
- cap->max_send_sge);
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n",
+ cap->max_send_sge);
return -EINVAL;
}
- hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
- hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
+ return 0;
+}
- max_cnt = max(1U, cap->max_send_sge);
- if (hr_dev->caps.max_sq_sg <= 2)
- hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
- else
- hr_qp->sq.max_gs = max_cnt;
+static int set_user_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt = 0;
+ int ret;
- if (hr_qp->sq.max_gs > 2)
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- (hr_qp->sq.max_gs - 2));
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
+ cnt > hr_dev->caps.max_wqes)
+ return -EINVAL;
- if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
- if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
- dev_err(hr_dev->dev,
- "The extended sge cnt error! sge_cnt=%d\n",
- hr_qp->sge.sge_cnt);
- return -EINVAL;
- }
+ ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
+ ret);
+ return ret;
}
- hr_qp->sge.sge_shift = 4;
- ex_sge_num = hr_qp->sge.sge_cnt;
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- /* Get buf size, SQ and RQ are aligned to page_szie */
- if (hr_dev->caps.max_sq_sg <= 2) {
- hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
- hr_qp->rq.wqe_shift), PAGE_SIZE) +
- HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
-
- hr_qp->sq.offset = 0;
- hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
- } else {
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- hr_qp->sge.sge_cnt =
- max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
- hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
- hr_qp->rq.wqe_shift), page_size) +
- HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift), page_size) +
- HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), page_size);
-
- hr_qp->sq.offset = 0;
- if (ex_sge_num) {
- hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
- (hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift),
- page_size);
- hr_qp->rq.offset = hr_qp->sge.offset +
- HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift),
- page_size);
- } else {
- hr_qp->rq.offset = HNS_ROCE_ALOGN_UP(
- (hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift),
- page_size);
- }
- }
+ hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
+ hr_qp->sq.wqe_cnt = cnt;
return 0;
}
-static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap,
- struct hns_roce_qp *hr_qp)
+static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_buf_attr *buf_attr)
{
- struct device *dev = hr_dev->dev;
- u32 page_size;
- u32 max_cnt;
- int size;
-
- if (cap->max_send_wr > hr_dev->caps.max_wqes ||
- cap->max_send_sge > hr_dev->caps.max_sq_sg ||
- cap->max_inline_data > hr_dev->caps.max_sq_inline) {
- dev_err(dev, "SQ WR or sge or inline data error!\n");
- return -EINVAL;
+ int buf_size;
+ int idx = 0;
+
+ hr_qp->buff_size = 0;
+
+ /* SQ WQE */
+ hr_qp->sq.offset = 0;
+ buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
+ hr_qp->sq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
}
- hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
- hr_qp->sq_max_wqes_per_wr = 1;
- hr_qp->sq_spare_wqes = 0;
+ /* extend SGE WQE in SQ */
+ hr_qp->sge.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
- if (hr_dev->caps.min_wqes)
- max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
- else
- max_cnt = cap->max_send_wr;
+ /* RQ WQE */
+ hr_qp->rq.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
+ hr_qp->rq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
- hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
- if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
- dev_err(dev, "while setting kernel sq size, sq.wqe_cnt too large\n");
+ if (hr_qp->buff_size < 1)
return -EINVAL;
- }
- /* Get data_seg numbers */
- max_cnt = max(1U, cap->max_send_sge);
- if (hr_dev->caps.max_sq_sg <= 2)
- hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
- else
- hr_qp->sq.max_gs = max_cnt;
+ buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+ buf_attr->region_count = idx;
- if (hr_qp->sq.max_gs > 2) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- (hr_qp->sq.max_gs - 2));
- hr_qp->sge.sge_shift = 4;
- }
+ return 0;
+}
- /* ud sqwqe's sge use extend sge */
- if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) {
- hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
- hr_qp->sq.max_gs);
- hr_qp->sge.sge_shift = 4;
- }
+static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt;
- if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
- if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
- dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
- hr_qp->sge.sge_cnt);
- return -EINVAL;
- }
+ if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
+ cap->max_send_sge > hr_dev->caps.max_sq_sg) {
+ ibdev_err(ibdev, "failed to check SQ WR or SGE num.\n");
+ return -EINVAL;
}
- /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
- page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
- hr_qp->sq.offset = 0;
- size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
- page_size);
-
- if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
- hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
- (u32)hr_qp->sge.sge_cnt);
- hr_qp->sge.offset = size;
- size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
- hr_qp->sge.sge_shift, page_size);
+ cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n",
+ cnt);
+ return -EINVAL;
}
- hr_qp->rq.offset = size;
- size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
- page_size);
- hr_qp->buff_size = size;
+ hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
+ hr_qp->sq.wqe_cnt = cnt;
- /* Get wr and sge number which send */
- cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt;
- cap->max_send_sge = hr_qp->sq.max_gs;
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- /* We don't support inline sends for kernel QPs (yet) */
- cap->max_inline_data = 0;
+ /* sync the parameters of kernel QP to user's configuration */
+ cap->max_send_wr = cnt;
return 0;
}
@@ -528,522 +764,718 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
-static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
- struct ib_pd *ib_pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, unsigned long sqpn,
- struct hns_roce_qp *hr_qp)
+static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, unsigned long addr)
{
- struct device *dev = hr_dev->dev;
- struct hns_roce_ib_create_qp ucmd;
- struct hns_roce_ib_create_qp_resp resp = {};
- struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
- udata, struct hns_roce_ucontext, ibucontext);
- unsigned long qpn = 0;
- int ret = 0;
- u32 page_shift;
- u32 npages;
- int i;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
- mutex_init(&hr_qp->mutex);
- spin_lock_init(&hr_qp->sq.lock);
- spin_lock_init(&hr_qp->rq.lock);
+ ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
+ goto err_inline;
+ }
+ ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
+ PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
+ goto err_inline;
+ }
- hr_qp->state = IB_QPS_RESET;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
- hr_qp->ibqp.qp_type = init_attr->qp_type;
+ return 0;
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_ALL_WR);
- else
- hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
+err_inline:
- ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
- hns_roce_qp_has_rq(init_attr), hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_set_rq_size failed\n");
- goto err_out;
- }
+ return ret;
+}
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
- hns_roce_qp_has_rq(init_attr)) {
- /* allocate recv inline buf */
- hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
- sizeof(struct hns_roce_rinl_wqe),
- GFP_KERNEL);
- if (!hr_qp->rq_inl_buf.wqe_list) {
- ret = -ENOMEM;
- goto err_out;
- }
+static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
+}
- hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt;
+static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+ hns_roce_qp_has_sq(init_attr) &&
+ udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr));
+}
- /* Firstly, allocate a list of sge space buffer */
- hr_qp->rq_inl_buf.wqe_list[0].sg_list =
- kcalloc(hr_qp->rq_inl_buf.wqe_cnt,
- init_attr->cap.max_recv_sge *
- sizeof(struct hns_roce_rinl_sge),
- GFP_KERNEL);
- if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list) {
- ret = -ENOMEM;
- goto err_wqe_list;
- }
+static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+ hns_roce_qp_has_rq(init_attr));
+}
+
+static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ hns_roce_qp_has_rq(init_attr));
+}
- for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++)
- /* Secondly, reallocate the buffer */
- hr_qp->rq_inl_buf.wqe_list[i].sg_list =
- &hr_qp->rq_inl_buf.wqe_list[0].sg_list[i *
- init_attr->cap.max_recv_sge];
+static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
+ struct hns_roce_dev *hr_dev,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ struct hns_roce_ucontext *uctx =
+ rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ struct rdma_user_mmap_entry *rdma_entry;
+ u64 address;
+
+ address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
+
+ hr_qp->dwqe_mmap_entry =
+ hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
+ HNS_ROCE_DWQE_SIZE,
+ HNS_ROCE_MMAP_TYPE_DWQE);
+
+ if (!hr_qp->dwqe_mmap_entry) {
+ ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
+ return -ENOMEM;
}
- if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "ib_copy_from_udata error for create qp\n");
- ret = -EFAULT;
- goto err_rq_sge_list;
- }
+ rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
+ resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
- ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp,
- &ucmd);
- if (ret) {
- dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
- goto err_rq_sge_list;
- }
+ return 0;
+}
- hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
- hr_qp->buff_size, 0, 0);
- if (IS_ERR(hr_qp->umem)) {
- dev_err(dev, "ib_umem_get error for create qp\n");
- ret = PTR_ERR(hr_qp->umem);
- goto err_rq_sge_list;
- }
+static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd);
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
- hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
- page_shift = PAGE_SHIFT;
- if (hr_dev->caps.mtt_buf_pg_sz) {
- npages = (ib_umem_page_count(hr_qp->umem) +
- (1 << hr_dev->caps.mtt_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.mtt_buf_pg_sz);
- page_shift += hr_dev->caps.mtt_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift,
- &hr_qp->mtt);
- } else {
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(hr_qp->umem),
- page_shift, &hr_qp->mtt);
- }
+ if (has_sdb) {
+ ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
if (ret) {
- dev_err(dev, "hns_roce_mtt_init error for create qp\n");
- goto err_buf;
+ ibdev_err(ibdev,
+ "failed to map user SQ doorbell, ret = %d.\n",
+ ret);
+ goto err_out;
}
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
+ }
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt,
- hr_qp->umem);
+ if (has_rdb) {
+ ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
if (ret) {
- dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n");
- goto err_mtt;
+ ibdev_err(ibdev,
+ "failed to map user RQ doorbell, ret = %d.\n",
+ ret);
+ goto err_sdb;
}
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ }
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
- (udata->inlen >= sizeof(ucmd)) &&
- (udata->outlen >= sizeof(resp)) &&
- hns_roce_qp_has_sq(init_attr)) {
- ret = hns_roce_db_map_user(uctx, udata, ucmd.sdb_addr,
- &hr_qp->sdb);
- if (ret) {
- dev_err(dev, "sq record doorbell map failed!\n");
- goto err_mtt;
- }
-
- /* indicate kernel supports sq record db */
- resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
- hr_qp->sdb_en = 1;
- }
+ return 0;
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp)) &&
- hns_roce_qp_has_rq(init_attr)) {
- ret = hns_roce_db_map_user(uctx, udata, ucmd.db_addr,
- &hr_qp->rdb);
- if (ret) {
- dev_err(dev, "rq record doorbell map failed!\n");
- goto err_sq_dbmap;
- }
-
- /* indicate kernel supports rq record db */
- resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
- hr_qp->rdb_en = 1;
- }
- } else {
- if (init_attr->create_flags &
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- dev_err(dev, "init_attr->create_flags error!\n");
- ret = -EINVAL;
- goto err_rq_sge_list;
- }
+err_sdb:
+ if (has_sdb)
+ hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+err_out:
+ return ret;
+}
- if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
- dev_err(dev, "init_attr->create_flags error!\n");
- ret = -EINVAL;
- goto err_rq_sge_list;
- }
+static int alloc_kernel_qp_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->sq.db_reg = hr_dev->mem_base +
+ HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
+ else
+ hr_qp->sq.db_reg = hr_dev->reg_base + hr_dev->sdb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
- /* Set SQ size */
- ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap,
- hr_qp);
+ if (kernel_qp_has_rdb(hr_dev, init_attr)) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
if (ret) {
- dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
- goto err_rq_sge_list;
+ ibdev_err(ibdev,
+ "failed to alloc kernel RQ doorbell, ret = %d.\n",
+ ret);
+ return ret;
}
+ *hr_qp->rdb.db_record = 0;
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ }
- /* QP doorbell register address */
- hr_qp->sq.db_reg_l = hr_dev->reg_base + hr_dev->sdb_offset +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
- hr_qp->rq.db_reg_l = hr_dev->reg_base + hr_dev->odb_offset +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
-
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- hns_roce_qp_has_rq(init_attr)) {
- ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
- if (ret) {
- dev_err(dev, "rq record doorbell alloc failed!\n");
- goto err_rq_sge_list;
- }
- *hr_qp->rdb.db_record = 0;
- hr_qp->rdb_en = 1;
- }
+ return 0;
+}
- /* Allocate QP buf */
- page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
- if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
- (1 << page_shift) * 2,
- &hr_qp->hr_buf, page_shift)) {
- dev_err(dev, "hns_roce_buf_alloc error!\n");
- ret = -ENOMEM;
- goto err_db;
- }
+static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ int ret;
- hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
- /* Write MTT */
- ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages,
- hr_qp->hr_buf.page_shift, &hr_qp->mtt);
- if (ret) {
- dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n");
- goto err_buf;
- }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
- ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt,
- &hr_qp->hr_buf);
- if (ret) {
- dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n");
- goto err_mtt;
+ if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
+ ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
+ if (ret)
+ return ret;
}
- hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64),
- GFP_KERNEL);
- hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64),
- GFP_KERNEL);
- if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) {
+ ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
+ resp);
+ if (ret)
+ goto err_remove_qp;
+ } else {
+ ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+err_remove_qp:
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
+
+ return ret;
+}
+
+static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
+ } else {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_qp->rdb);
+ }
+}
+
+static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 *sq_wrid = NULL;
+ u64 *rq_wrid = NULL;
+ int ret;
+
+ sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!sq_wrid) {
+ ibdev_err(ibdev, "failed to alloc SQ wrid.\n");
+ return -ENOMEM;
+ }
+
+ if (hr_qp->rq.wqe_cnt) {
+ rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!rq_wrid) {
+ ibdev_err(ibdev, "failed to alloc RQ wrid.\n");
ret = -ENOMEM;
- goto err_wrid;
+ goto err_sq;
}
}
- if (sqpn) {
- qpn = sqpn;
- } else {
- /* Get QPN */
- ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn);
+ hr_qp->sq.wrid = sq_wrid;
+ hr_qp->rq.wrid = rq_wrid;
+ return 0;
+err_sq:
+ kfree(sq_wrid);
+
+ return ret;
+}
+
+static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
+{
+ kfree(hr_qp->rq.wrid);
+ kfree(hr_qp->sq.wrid);
+}
+
+static void default_congest_type(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ else
+ hr_qp->cong_type = hr_dev->caps.default_cong_type;
+}
+
+static int set_congest_type(struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+
+ switch (ucmd->cong_type_flags) {
+ case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
+ hr_qp->cong_type = CONG_TYPE_LDCP;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_HC3:
+ hr_qp->cong_type = CONG_TYPE_HC3;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_DIP:
+ hr_qp->cong_type = CONG_TYPE_DIP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
+ return -EOPNOTSUPP;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
+ hr_qp->cong_type != CONG_TYPE_DCQCN)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int set_congest_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
+ return set_congest_type(hr_qp, ucmd);
+
+ default_congest_type(hr_dev, hr_qp);
+
+ return 0;
+}
+
+static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ucontext *uctx;
+ int ret;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
+ else
+ hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+
+ ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
+ hns_roce_qp_has_rq(init_attr), !!udata);
+ if (ret) {
+ ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ if (udata) {
+ ret = ib_copy_from_udata(ucmd, udata,
+ min(udata->inlen, sizeof(*ucmd)));
if (ret) {
- dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n");
- goto err_wrid;
+ ibdev_err(ibdev,
+ "failed to copy QP ucmd, ret = %d\n", ret);
+ return ret;
}
- }
- if (init_attr->qp_type == IB_QPT_GSI &&
- hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
- /* In v1 engine, GSI QP context in RoCE engine's register */
- ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
+ uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext);
+ hr_qp->config = uctx->config;
+ ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
if (ret) {
- dev_err(dev, "hns_roce_qp_alloc failed!\n");
- goto err_qpn;
+ ibdev_err(ibdev,
+ "failed to set user SQ size, ret = %d.\n",
+ ret);
+ return ret;
}
+
+ ret = set_congest_param(hr_dev, hr_qp, ucmd);
} else {
- ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp);
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ default_congest_type(hr_dev, hr_qp);
+ ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to set kernel SQ size, ret = %d.\n",
+ ret);
+ }
+
+ return ret;
+}
+
+static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
+ struct hns_roce_ib_create_qp_resp resp = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_qp ucmd = {};
+ int ret;
+
+ mutex_init(&hr_qp->mutex);
+ spin_lock_init(&hr_qp->sq.lock);
+ spin_lock_init(&hr_qp->rq.lock);
+ spin_lock_init(&hr_qp->flush_lock);
+
+ hr_qp->state = IB_QPS_RESET;
+ hr_qp->flush_flag = 0;
+ flush_work->hr_dev = hr_dev;
+ INIT_WORK(&flush_work->work, flush_work_handle);
+
+ if (init_attr->create_flags)
+ return -EOPNOTSUPP;
+
+ ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
+ if (ret) {
+ ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
+ goto err_out;
+ }
+
+ if (!udata) {
+ ret = alloc_kernel_wrid(hr_dev, hr_qp);
if (ret) {
- dev_err(dev, "hns_roce_qp_alloc failed!\n");
- goto err_qpn;
+ ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
+ ret);
+ goto err_out;
}
}
- if (sqpn)
- hr_qp->doorbell_qpn = 1;
- else
- hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
+ ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
+ goto err_buf;
+ }
+
+ ret = alloc_qpn(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
+ goto err_qpn;
+ }
+
+ ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n",
+ ret);
+ goto err_db;
+ }
+
+ ret = alloc_qpc(hr_dev, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n",
+ ret);
+ goto err_qpc;
+ }
+
+ ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret);
+ goto err_store;
+ }
if (udata) {
+ resp.cap_flags = hr_qp->en_flags;
ret = ib_copy_to_udata(udata, &resp,
min(udata->outlen, sizeof(resp)));
- if (ret)
- goto err_qp;
+ if (ret) {
+ ibdev_err(ibdev, "copy qp resp failed!\n");
+ goto err_flow_ctrl;
+ }
}
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
if (ret)
- goto err_qp;
+ goto err_flow_ctrl;
}
+ hr_qp->ibqp.qp_num = hr_qp->qpn;
hr_qp->event = hns_roce_ib_qp_event;
+ refcount_set(&hr_qp->refcount, 1);
+ init_completion(&hr_qp->free);
return 0;
-err_qp:
- if (init_attr->qp_type == IB_QPT_GSI &&
- hr_dev->hw_rev == HNS_ROCE_HW_VER1)
- hns_roce_qp_remove(hr_dev, hr_qp);
- else
- hns_roce_qp_free(hr_dev, hr_qp);
-
+err_flow_ctrl:
+ hns_roce_qp_remove(hr_dev, hr_qp);
+err_store:
+ free_qpc(hr_dev, hr_qp);
+err_qpc:
+ free_qp_db(hr_dev, hr_qp, udata);
+err_db:
+ free_qpn(hr_dev, hr_qp);
err_qpn:
- if (!sqpn)
- hns_roce_release_range_qp(hr_dev, qpn, 1);
+ free_qp_buf(hr_dev, hr_qp);
+err_buf:
+ free_kernel_wrid(hr_qp);
+err_out:
+ mutex_destroy(&hr_qp->mutex);
+ return ret;
+}
-err_wrid:
- if (udata) {
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
- (udata->outlen >= sizeof(resp)) &&
- hns_roce_qp_has_rq(init_attr))
- hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
- } else {
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ if (refcount_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+ wait_for_completion(&hr_qp->free);
+
+ free_qpc(hr_dev, hr_qp);
+ free_qpn(hr_dev, hr_qp);
+ free_qp_buf(hr_dev, hr_qp);
+ free_kernel_wrid(hr_qp);
+ free_qp_db(hr_dev, hr_qp, udata);
+ mutex_destroy(&hr_qp->mutex);
+}
+
+static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
+ bool is_user)
+{
+ switch (type) {
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+ goto out;
+ break;
+ case IB_QPT_UD:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 &&
+ is_user)
+ goto out;
+ break;
+ case IB_QPT_RC:
+ case IB_QPT_GSI:
+ break;
+ default:
+ goto out;
}
-err_sq_dbmap:
- if (udata)
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
- (udata->inlen >= sizeof(ucmd)) &&
- (udata->outlen >= sizeof(resp)) &&
- hns_roce_qp_has_sq(init_attr))
- hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+ return 0;
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+out:
+ ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type);
-err_buf:
- if (hr_qp->umem)
- ib_umem_release(hr_qp->umem);
- else
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+ return -EOPNOTSUPP;
+}
-err_db:
- if (!udata && hns_roce_qp_has_rq(init_attr) &&
- (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
- hns_roce_free_db(hr_dev, &hr_qp->rdb);
+int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_device *ibdev = qp->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_qp *hr_qp = to_hr_qp(qp);
+ int ret;
-err_rq_sge_list:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
- kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
+ ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
+ if (ret)
+ goto err_out;
-err_wqe_list:
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE)
- kfree(hr_qp->rq_inl_buf.wqe_list);
+ if (init_attr->qp_type == IB_QPT_XRC_TGT)
+ hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
+
+ if (init_attr->qp_type == IB_QPT_GSI) {
+ hr_qp->port = init_attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+ }
+
+ ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
+ if (ret)
+ ibdev_err(ibdev, "create QP type %d failed(%d)\n",
+ init_attr->qp_type, ret);
err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]);
+
return ret;
}
-struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+int to_hr_qp_type(int qp_type)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
- struct device *dev = hr_dev->dev;
- struct hns_roce_sqp *hr_sqp;
- struct hns_roce_qp *hr_qp;
- int ret;
-
- switch (init_attr->qp_type) {
- case IB_QPT_RC: {
- hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
- if (!hr_qp)
- return ERR_PTR(-ENOMEM);
+ switch (qp_type) {
+ case IB_QPT_RC:
+ return SERV_TYPE_RC;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return SERV_TYPE_UD;
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ return SERV_TYPE_XRC;
+ default:
+ return -1;
+ }
+}
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
- hr_qp);
- if (ret) {
- dev_err(dev, "Create RC QP failed\n");
- kfree(hr_qp);
- return ERR_PTR(ret);
- }
+static int check_mtu_validate(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ struct net_device *net_dev;
+ enum ib_mtu active_mtu;
+ int p;
- hr_qp->ibqp.qp_num = hr_qp->qpn;
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ net_dev = get_hr_netdev(hr_dev, p);
+ active_mtu = iboe_get_mtu(net_dev->mtu);
- break;
+ if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
+ attr->path_mtu > hr_dev->caps.max_mtu) ||
+ attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr path_mtu(%d)invalid while modify qp",
+ attr->path_mtu);
+ return -EINVAL;
}
- case IB_QPT_GSI: {
- /* Userspace is not allowed to create special QPs: */
- if (udata) {
- dev_err(dev, "not support usr space GSI\n");
- return ERR_PTR(-EINVAL);
- }
- hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL);
- if (!hr_sqp)
- return ERR_PTR(-ENOMEM);
+ return 0;
+}
- hr_qp = &hr_sqp->hr_qp;
- hr_qp->port = init_attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int p;
- /* when hw version is v1, the sqpn is allocated */
- if (hr_dev->caps.max_sq_sg <= 2)
- hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
- hr_dev->iboe.phy_port[hr_qp->port];
- else
- hr_qp->ibqp.qp_num = 1;
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
+ ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n",
+ attr->port_num);
+ return -EINVAL;
+ }
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
- hr_qp->ibqp.qp_num, hr_qp);
- if (ret) {
- dev_err(dev, "Create GSI QP failed!\n");
- kfree(hr_sqp);
- return ERR_PTR(ret);
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, pkey_index = %u.\n",
+ attr->pkey_index);
+ return -EINVAL;
}
-
- break;
- }
- default:{
- dev_err(dev, "not support QP type %d\n", init_attr->qp_type);
- return ERR_PTR(-EINVAL);
- }
}
- return &hr_qp->ibqp;
-}
-EXPORT_SYMBOL_GPL(hns_roce_create_qp);
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, max_rd_atomic = %u.\n",
+ attr->max_rd_atomic);
+ return -EINVAL;
+ }
-int to_hr_qp_type(int qp_type)
-{
- int transport_type;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, max_dest_rd_atomic = %u.\n",
+ attr->max_dest_rd_atomic);
+ return -EINVAL;
+ }
- if (qp_type == IB_QPT_RC)
- transport_type = SERV_TYPE_RC;
- else if (qp_type == IB_QPT_UC)
- transport_type = SERV_TYPE_UC;
- else if (qp_type == IB_QPT_UD)
- transport_type = SERV_TYPE_UD;
- else if (qp_type == IB_QPT_GSI)
- transport_type = SERV_TYPE_UD;
- else
- transport_type = -1;
+ if (attr_mask & IB_QP_PATH_MTU)
+ return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
- return transport_type;
+ return 0;
}
-EXPORT_SYMBOL_GPL(to_hr_qp_type);
int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ib_modify_qp_resp resp = {};
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
enum ib_qp_state cur_state, new_state;
- struct device *dev = hr_dev->dev;
int ret = -EINVAL;
- int p;
- enum ib_mtu active_mtu;
mutex_lock(&hr_qp->mutex);
- cur_state = attr_mask & IB_QP_CUR_STATE ?
- attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
- new_state = attr_mask & IB_QP_STATE ?
- attr->qp_state : cur_state;
+ if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
+ goto out;
+
+ cur_state = hr_qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (ibqp->uobject &&
(attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
- if (hr_qp->sdb_en == 1) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) {
hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
- if (hr_qp->rdb_en == 1)
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
} else {
- dev_warn(dev, "flush cqe is not supported in userspace!\n");
+ ibdev_warn(&hr_dev->ib_dev,
+ "flush cqe is not supported in userspace!\n");
goto out;
}
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask)) {
- dev_err(dev, "ib_modify_qp_is_ok failed\n");
+ ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
goto out;
}
- if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
- dev_err(dev, "attr port_num invalid.attr->port_num=%d\n",
- attr->port_num);
+ ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
+ if (ret)
goto out;
- }
- if (attr_mask & IB_QP_PKEY_INDEX) {
- p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
- if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
- dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n",
- attr->pkey_index);
- goto out;
- }
- }
-
- if (attr_mask & IB_QP_PATH_MTU) {
- p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
- active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
-
- if ((hr_dev->caps.max_mtu == IB_MTU_4096 &&
- attr->path_mtu > IB_MTU_4096) ||
- (hr_dev->caps.max_mtu == IB_MTU_2048 &&
- attr->path_mtu > IB_MTU_2048) ||
- attr->path_mtu < IB_MTU_256 ||
- attr->path_mtu > active_mtu) {
- dev_err(dev, "attr path_mtu(%d)invalid while modify qp",
- attr->path_mtu);
- goto out;
- }
- }
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
- dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
- attr->max_rd_atomic);
+ if (cur_state == new_state && cur_state == IB_QPS_RESET)
goto out;
- }
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
- dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
- attr->max_dest_rd_atomic);
+ ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
+ new_state, udata);
+ if (ret)
goto out;
- }
-
- if (cur_state == new_state && cur_state == IB_QPS_RESET) {
- if (hr_dev->caps.min_wqes) {
- ret = -EPERM;
- dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
- new_state);
- } else {
- ret = 0;
- }
- goto out;
+ if (udata && udata->outlen) {
+ resp.tc_mode = hr_qp->tc_mode;
+ resp.priority = hr_qp->sl;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to copy modify qp resp.\n");
}
- ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
- new_state);
-
out:
mutex_unlock(&hr_qp->mutex);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]);
return ret;
}
@@ -1051,69 +1483,80 @@ out:
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq) {
- spin_lock_irq(&send_cq->lock);
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __acquire(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ spin_lock(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ spin_lock(&recv_cq->lock);
+ __acquire(&send_cq->lock);
+ } else if (send_cq == recv_cq) {
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
-EXPORT_SYMBOL_GPL(hns_roce_lock_cqs);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
__releases(&recv_cq->lock)
{
- if (send_cq == recv_cq) {
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ __release(&send_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ spin_unlock(&send_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ __release(&send_cq->lock);
+ spin_unlock(&recv_cq->lock);
+ } else if (send_cq == recv_cq) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
}
-EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs);
-static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
+static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset)
{
-
- return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
+ return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
}
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
{
return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
}
-EXPORT_SYMBOL_GPL(get_recv_wqe);
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
{
return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
}
-EXPORT_SYMBOL_GPL(get_send_wqe);
-void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n)
{
- return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset +
- (n << hr_qp->sge.sge_shift));
+ return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
}
-EXPORT_SYMBOL_GPL(get_send_extend_sge);
-bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq)
{
struct hns_roce_cq *hr_cq;
u32 cur;
cur = hr_wq->head - hr_wq->tail;
- if (likely(cur + nreq < hr_wq->max_post))
+ if (likely(cur + nreq < hr_wq->wqe_cnt))
return false;
hr_cq = to_hr_cq(ib_cq);
@@ -1121,33 +1564,32 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
cur = hr_wq->head - hr_wq->tail;
spin_unlock(&hr_cq->lock);
- return cur + nreq >= hr_wq->max_post;
+ return cur + nreq >= hr_wq->wqe_cnt;
}
-EXPORT_SYMBOL_GPL(hns_roce_wq_overflow);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- int reserved_from_top = 0;
- int reserved_from_bot;
- int ret;
+ unsigned int reserved_from_bot;
+ unsigned int i;
mutex_init(&qp_table->scc_mutex);
+ mutex_init(&qp_table->bank_mutex);
xa_init(&hr_dev->qp_table_xa);
+ xa_init(&qp_table->dip_xa);
- /* In hw v1, a port include two SQP, six ports total 12 */
- if (hr_dev->caps.max_sq_sg <= 2)
- reserved_from_bot = SQP_NUM;
- else
- reserved_from_bot = hr_dev->caps.reserved_qps;
+ reserved_from_bot = hr_dev->caps.reserved_qps;
- ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
- hr_dev->caps.num_qps - 1, reserved_from_bot,
- reserved_from_top);
- if (ret) {
- dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",
- ret);
- return ret;
+ for (i = 0; i < reserved_from_bot; i++) {
+ hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++;
+ hr_dev->qp_table.bank[get_qp_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ ida_init(&hr_dev->qp_table.bank[i].ida);
+ hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps /
+ HNS_ROCE_QP_BANK_NUM - 1;
+ hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
}
return 0;
@@ -1155,5 +1597,12 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
+ ida_destroy(&hr_dev->qp_table.bank[i].ida);
+ xa_destroy(&hr_dev->qp_table.dip_xa);
+ xa_destroy(&hr_dev->qp_table_xa);
+ mutex_destroy(&hr_dev->qp_table.bank_mutex);
+ mutex_destroy(&hr_dev->qp_table.scc_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 0a31d0a3d657..230187dda6a0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -4,123 +4,231 @@
#include <rdma/rdma_cm.h>
#include <rdma/restrack.h>
#include <uapi/rdma/rdma_netlink.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
-static int hns_roce_fill_cq(struct sk_buff *msg,
- struct hns_roce_v2_cq_context *context)
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
{
- if (rdma_nl_put_driver_u32(msg, "state",
- roce_get_field(context->byte_4_pg_ceqn,
- V2_CQC_BYTE_4_ARM_ST_M,
- V2_CQC_BYTE_4_ARM_ST_S)))
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", hr_cq->cq_depth))
goto err;
- if (rdma_nl_put_driver_u32(msg, "ceqn",
- roce_get_field(context->byte_4_pg_ceqn,
- V2_CQC_BYTE_4_CEQN_M,
- V2_CQC_BYTE_4_CEQN_S)))
+ if (rdma_nl_put_driver_u32(msg, "cons_index", hr_cq->cons_index))
goto err;
- if (rdma_nl_put_driver_u32(msg, "cqn",
- roce_get_field(context->byte_8_cqn,
- V2_CQC_BYTE_8_CQN_M,
- V2_CQC_BYTE_8_CQN_S)))
+ if (rdma_nl_put_driver_u32(msg, "cqe_size", hr_cq->cqe_size))
goto err;
- if (rdma_nl_put_driver_u32(msg, "hopnum",
- roce_get_field(context->byte_16_hop_addr,
- V2_CQC_BYTE_16_CQE_HOP_NUM_M,
- V2_CQC_BYTE_16_CQE_HOP_NUM_S)))
+ if (rdma_nl_put_driver_u32(msg, "arm_sn", hr_cq->arm_sn))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "pi",
- roce_get_field(context->byte_28_cq_pi,
- V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M,
- V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S)))
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct hns_roce_v2_cq_context context;
+ int ret;
+
+ if (!hr_dev->hw->query_cqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context);
+ if (ret)
+ return -EINVAL;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_wqe_cnt", hr_qp->sq.wqe_cnt))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "ci",
- roce_get_field(context->byte_32_cq_ci,
- V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M,
- V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_max_gs", hr_qp->sq.max_gs))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "coalesce",
- roce_get_field(context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_MAX_CNT_M,
- V2_CQC_BYTE_56_CQ_MAX_CNT_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_wqe_cnt", hr_qp->rq.wqe_cnt))
goto err;
- if (rdma_nl_put_driver_u32(
- msg, "period",
- roce_get_field(context->byte_56_cqe_period_maxcnt,
- V2_CQC_BYTE_56_CQ_PERIOD_M,
- V2_CQC_BYTE_56_CQ_PERIOD_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_max_gs", hr_qp->rq.max_gs))
goto err;
- if (rdma_nl_put_driver_u32(msg, "cnt",
- roce_get_field(context->byte_52_cqe_cnt,
- V2_CQC_BYTE_52_CQE_CNT_M,
- V2_CQC_BYTE_52_CQE_CNT_S)))
+ if (rdma_nl_put_driver_u32_hex(msg, "ext_sge_sge_cnt", hr_qp->sge.sge_cnt))
goto err;
+ nla_nest_end(msg, table_attr);
+
return 0;
err:
+ nla_nest_cancel(msg, table_attr);
+
return -EMSGSIZE;
}
-static int hns_roce_fill_res_cq_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
{
- struct ib_cq *ib_cq = container_of(res, struct ib_cq, res);
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
- struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- struct hns_roce_v2_cq_context *context;
- struct nlattr *table_attr;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct hns_roce_full_qp_ctx {
+ struct hns_roce_v2_qp_context qpc;
+ struct hns_roce_v2_scc_context sccc;
+ } context = {};
+ u32 sccn = hr_qp->qpn;
int ret;
- if (!hr_dev->dfx->query_cqc_info)
+ if (!hr_dev->hw->query_qpc)
return -EINVAL;
- context = kzalloc(sizeof(struct hns_roce_v2_cq_context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context);
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context.qpc);
if (ret)
- goto err;
+ return ret;
+
+ /* If SCC is disabled or the query fails, the queried SCCC will
+ * be all 0.
+ */
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) ||
+ !hr_dev->hw->query_sccc)
+ goto out;
+
+ if (hr_qp->cong_type == CONG_TYPE_DIP) {
+ if (!hr_qp->dip)
+ goto out;
+ sccn = hr_qp->dip->dip_idx;
+ }
+
+ ret = hr_dev->hw->query_sccc(hr_dev, sccn, &context.sccc);
+ if (ret)
+ ibdev_warn_ratelimited(&hr_dev->ib_dev,
+ "failed to query SCCC, ret = %d.\n",
+ ret);
+
+out:
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct nlattr *table_attr;
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "pbl_hop_num", hr_mr->pbl_hop_num))
goto err;
- if (hns_roce_fill_cq(msg, context))
- goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.ba_pg_shift))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.buf_pg_shift))
+ goto err;
nla_nest_end(msg, table_attr);
- kfree(context);
return 0;
-err_cancel_table:
- nla_nest_cancel(msg, table_attr);
err:
- kfree(context);
+ nla_nest_cancel(msg, table_attr);
+
return -EMSGSIZE;
}
-int hns_roce_fill_res_entry(struct sk_buff *msg,
- struct rdma_restrack_entry *res)
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device);
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct hns_roce_v2_mpt_entry context;
+ int ret;
+
+ if (!hr_dev->hw->query_mpt)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context);
+ if (ret)
+ return -EINVAL;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
{
- if (res->type == RDMA_RESTRACK_CQ)
- return hns_roce_fill_res_cq_entry(msg, res);
+ struct hns_roce_srq *hr_srq = to_hr_srq(ib_srq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "srqn", hr_srq->srqn))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "wqe_cnt", hr_srq->wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "max_gs", hr_srq->max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "xrcdn", hr_srq->xrcdn))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_srq *hr_srq = to_hr_srq(ib_srq);
+ struct hns_roce_srq_context context;
+ int ret;
+
+ if (!hr_dev->hw->query_srqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_srqc(hr_dev, hr_srq->srqn, &context);
+ if (ret)
+ return ret;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index b3421b1f21e0..8a6efb6b9c9e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -4,7 +4,7 @@
*/
#include <rdma/ib_umem.h>
-#include <rdma/hns-abi.h>
+#include <rdma/uverbs_ioctl.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
@@ -17,7 +17,7 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
xa_lock(&srq_table->xa);
srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
if (srq)
- atomic_inc(&srq->refcount);
+ refcount_inc(&srq->refcount);
xa_unlock(&srq_table->xa);
if (!srq) {
@@ -27,10 +27,9 @@ void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
srq->event(srq, event_type);
- if (atomic_dec_and_test(&srq->refcount))
+ if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
}
-EXPORT_SYMBOL_GPL(hns_roce_srq_event);
static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
enum hns_roce_event event_type)
@@ -51,7 +50,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
break;
default:
dev_err(hr_dev->dev,
- "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ "hns_roce:Unexpected event type %d on SRQ %06lx\n",
event_type, srq->srqn);
return;
}
@@ -60,388 +59,491 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
}
}
-static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long srq_num)
+static int alloc_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
- HNS_ROCE_CMD_SW2HW_SRQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida;
+ int id;
+
+ id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc srq(%d).\n", id);
+ return -ENOMEM;
+ }
+
+ srq->srqn = id;
+
+ return 0;
}
-static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long srq_num)
+static void free_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
- return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
- mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ ida_free(&hr_dev->srq_table.srq_ida.ida, (int)srq->srqn);
}
-static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn,
- u16 xrcd, struct hns_roce_mtt *hr_mtt,
- u64 db_rec_addr, struct hns_roce_srq *srq)
+static int hns_roce_create_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
{
- struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_cmd_mailbox *mailbox;
- dma_addr_t dma_handle_wqe;
- dma_addr_t dma_handle_idx;
- u64 *mtts_wqe;
- u64 *mtts_idx;
int ret;
- /* Get the physical address of srq buf */
- mtts_wqe = hns_roce_table_find(hr_dev,
- &hr_dev->mr_table.mtt_srqwqe_table,
- srq->mtt.first_seg,
- &dma_handle_wqe);
- if (!mtts_wqe) {
- dev_err(hr_dev->dev,
- "SRQ alloc.Failed to find srq buf addr.\n");
- return -EINVAL;
- }
-
- /* Get physical address of idx que buf */
- mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table,
- srq->idx_que.mtt.first_seg,
- &dma_handle_idx);
- if (!mtts_idx) {
- dev_err(hr_dev->dev,
- "SRQ alloc.Failed to find idx que buf addr.\n");
- return -EINVAL;
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
+ return PTR_ERR(mailbox);
}
- ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
- if (ret == -1) {
- dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
- return -ENOMEM;
+ ret = hr_dev->hw->write_srqc(srq, mailbox->buf);
+ if (ret) {
+ ibdev_err(ibdev, "failed to write SRQC.\n");
+ goto err_mbox;
}
- ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_SRQ,
+ srq->srqn);
if (ret)
- goto err_out;
+ ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
- ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
- if (ret)
- goto err_put;
+err_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_xa;
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret) {
+ ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret);
+ return ret;
}
- hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
- mtts_wqe, mtts_idx, dma_handle_wqe,
- dma_handle_idx);
+ ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret) {
+ ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
+ goto err_put;
+ }
- ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ ret = hns_roce_create_srqc(hr_dev, srq);
if (ret)
goto err_xa;
- atomic_set(&srq->refcount, 1);
- init_completion(&srq->free);
- return ret;
+ return 0;
err_xa:
- xa_erase(&srq_table->xa, srq->srqn);
-
+ xa_erase_irq(&srq_table->xa, srq->srqn);
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
-err_out:
- hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
return ret;
}
-static void hns_roce_srq_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_srq *srq)
+static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
int ret;
- ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
+ srq->srqn);
if (ret)
- dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
- ret, srq->srqn);
+ dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
+ ret, srq->srqn);
- xa_erase(&srq_table->xa, srq->srqn);
+ xa_erase_irq(&srq_table->xa, srq->srqn);
- if (atomic_dec_and_test(&srq->refcount))
+ if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
wait_for_completion(&srq->free);
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
- hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
}
-static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
- u32 page_shift)
+static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct hns_roce_idx_que *idx_que = &srq->idx_que;
- u32 bitmap_num;
- int i;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
- bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64));
+ srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
+
+ buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->idx_que.entry_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
+ hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc SRQ idx mtr, ret = %d.\n", ret);
+ return ret;
+ }
- idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL);
- if (!idx_que->bitmap)
- return -ENOMEM;
+ if (!udata) {
+ idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
+ if (!idx_que->bitmap) {
+ ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n");
+ ret = -ENOMEM;
+ goto err_idx_mtr;
+ }
+ }
- bitmap_num = bitmap_num / (8 * sizeof(u64));
+ idx_que->head = 0;
+ idx_que->tail = 0;
- idx_que->buf_size = srq->idx_que.buf_size;
+ return 0;
+err_idx_mtr:
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
- if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
- &idx_que->idx_buf, page_shift)) {
- kfree(idx_que->bitmap);
- return -ENOMEM;
- }
+ return ret;
+}
- for (i = 0; i < bitmap_num; i++)
- idx_que->bitmap[i] = ~(0UL);
+static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+
+ bitmap_free(idx_que->bitmap);
+ idx_que->bitmap = NULL;
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
+}
+
+static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+ HNS_ROCE_SGE_SIZE *
+ srq->max_gs)));
+
+ buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->wqe_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
+}
+
+static int alloc_srq_wrid(struct hns_roce_srq *srq)
+{
+ srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid)
+ return -ENOMEM;
return 0;
}
-int hns_roce_create_srq(struct ib_srq *ib_srq,
- struct ib_srq_init_attr *srq_init_attr,
- struct ib_udata *udata)
+static void free_srq_wrid(struct hns_roce_srq *srq)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
- struct hns_roce_ib_create_srq_resp resp = {};
- struct hns_roce_srq *srq = to_hr_srq(ib_srq);
- int srq_desc_size;
- int srq_buf_size;
- u32 page_shift;
- int ret = 0;
- u32 npages;
- u32 cqn;
-
- /* Check the actual SRQ wqe and SRQ sge num */
- if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
- srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+ kvfree(srq->wrid);
+ srq->wrid = NULL;
+}
+
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_srq_sges;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_srq->rsv_sge = 1;
+
+ return max_sge;
+}
+
+static int set_srq_basic_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq_attr *attr = &init_attr->attr;
+ u32 max_sge;
+
+ max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+ if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
+ attr->max_sge > max_sge || !attr->max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid SRQ attr, depth = %u, sge = %u.\n",
+ attr->max_wr, attr->max_sge);
return -EINVAL;
+ }
- mutex_init(&srq->mutex);
- spin_lock_init(&srq->lock);
+ attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM);
+ srq->wqe_cnt = roundup_pow_of_two(attr->max_wr);
+ srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
- srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
- srq->max_gs = srq_init_attr->attr.max_sge;
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+ attr->srq_limit = 0;
- srq_desc_size = max(16, 16 * srq->max_gs);
+ return 0;
+}
- srq->wqe_shift = ilog2(srq_desc_size);
+static void set_srq_ext_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr)
+{
+ srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_hr_cq(init_attr->ext.cq)->cqn : 0;
- srq_buf_size = srq->max * srq_desc_size;
+ srq->xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_hr_xrcd(init_attr->ext.xrc.xrcd)->xrcdn : 0;
+}
- srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
- srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
- srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
- srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
+static int set_srq_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int ret;
- if (udata) {
- struct hns_roce_ib_create_srq ucmd;
-
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
- return -EFAULT;
-
- srq->umem =
- ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
- if (IS_ERR(srq->umem))
- return PTR_ERR(srq->umem);
-
- if (hr_dev->caps.srqwqe_buf_pg_sz) {
- npages = (ib_umem_page_count(srq->umem) +
- (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.srqwqe_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift,
- &srq->mtt);
- } else
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(srq->umem),
- srq->umem->page_shift,
- &srq->mtt);
- if (ret)
- goto err_buf;
+ ret = set_srq_basic_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
- if (ret)
- goto err_srq_mtt;
+ set_srq_ext_param(srq, init_attr);
- /* config index queue BA */
- srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
- srq->idx_que.buf_size, 0, 0);
- if (IS_ERR(srq->idx_que.umem)) {
- dev_err(hr_dev->dev,
- "ib_umem_get error for index queue\n");
- ret = PTR_ERR(srq->idx_que.umem);
- goto err_srq_mtt;
- }
+ return 0;
+}
- if (hr_dev->caps.idx_buf_pg_sz) {
- npages = (ib_umem_page_count(srq->idx_que.umem) +
- (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
- (1 << hr_dev->caps.idx_buf_pg_sz);
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_mtt_init(hr_dev, npages,
- page_shift, &srq->idx_que.mtt);
- } else {
- ret = hns_roce_mtt_init(hr_dev,
- ib_umem_page_count(srq->idx_que.umem),
- srq->idx_que.umem->page_shift,
- &srq->idx_que.mtt);
- }
+static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ int ret;
+ if (udata) {
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_mtt_init error for idx que\n");
- goto err_idx_mtt;
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to copy SRQ udata, ret = %d.\n",
+ ret);
+ return ret;
}
+ }
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
- srq->idx_que.umem);
- if (ret) {
- dev_err(hr_dev->dev,
- "hns_roce_ib_umem_write_mtt error for idx que\n");
- goto err_idx_buf;
- }
- } else {
- page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
- if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
- (1 << page_shift) * 2, &srq->buf,
- page_shift))
- return -ENOMEM;
+ ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
+ if (ret)
+ return ret;
- srq->head = 0;
- srq->tail = srq->max - 1;
+ ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr);
+ if (ret)
+ goto err_idx;
- ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
- srq->buf.page_shift, &srq->mtt);
+ if (!udata) {
+ ret = alloc_srq_wrid(srq);
if (ret)
- goto err_buf;
+ goto err_wqe_buf;
+ }
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
- if (ret)
- goto err_srq_mtt;
+ return 0;
- page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
- ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift);
- if (ret) {
- dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
- ret);
- goto err_srq_mtt;
- }
+err_wqe_buf:
+ free_srq_wqe_buf(hr_dev, srq);
+err_idx:
+ free_srq_idx(hr_dev, srq);
- /* Init mtt table for idx_que */
- ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
- srq->idx_que.idx_buf.page_shift,
- &srq->idx_que.mtt);
- if (ret)
- goto err_create_idx;
+ return ret;
+}
- /* Write buffer address into the mtt table */
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
- &srq->idx_que.idx_buf);
- if (ret)
- goto err_idx_buf;
+static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ free_srq_wrid(srq);
+ free_srq_wqe_buf(hr_dev, srq);
+ free_srq_idx(hr_dev, srq);
+}
- srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
- if (!srq->wrid) {
- ret = -ENOMEM;
- goto err_idx_buf;
- }
+static int get_srq_ucmd(struct hns_roce_srq *srq, struct ib_udata *udata,
+ struct hns_roce_ib_create_srq *ucmd)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ int ret;
+
+ ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret);
+ return ret;
}
- cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
- to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+ return 0;
+}
- srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+static void free_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx;
- ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0,
- &srq->mtt, 0, srq);
- if (ret)
- goto err_wrid;
+ if (!(srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB))
+ return;
- srq->event = hns_roce_ib_srq_event;
- srq->ibsrq.ext.xrc.srq_num = srq->srqn;
- resp.srqn = srq->srqn;
+ srq->cap_flags &= ~HNS_ROCE_SRQ_CAP_RECORD_DB;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &srq->rdb);
+ } else {
+ hns_roce_free_db(hr_dev, &srq->rdb);
+ }
+}
+
+static int alloc_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_srq_resp *resp)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ struct hns_roce_ucontext *uctx;
+ int ret;
if (udata) {
- if (ib_copy_to_udata(udata, &resp,
- min(udata->outlen, sizeof(resp)))) {
- ret = -EFAULT;
- goto err_srqc_alloc;
+ ret = get_srq_ucmd(srq, udata, &ucmd);
+ if (ret)
+ return ret;
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB) &&
+ (ucmd.req_cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ ret = hns_roce_db_map_user(uctx, ucmd.db_addr,
+ &srq->rdb);
+ if (ret)
+ return ret;
+
+ srq->cap_flags |= HNS_ROCE_RSP_SRQ_CAP_RECORD_DB;
+ }
+ } else {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB) {
+ ret = hns_roce_alloc_db(hr_dev, &srq->rdb, 1);
+ if (ret)
+ return ret;
+
+ *srq->rdb.db_record = 0;
+ srq->cap_flags |= HNS_ROCE_RSP_SRQ_CAP_RECORD_DB;
}
+ srq->db_reg = hr_dev->reg_base + SRQ_DB_REG;
}
return 0;
+}
-err_srqc_alloc:
- hns_roce_srq_free(hr_dev, srq);
+int hns_roce_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_ib_create_srq_resp resp = {};
+ struct hns_roce_srq *srq = to_hr_srq(ib_srq);
+ int ret;
-err_wrid:
- kvfree(srq->wrid);
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
-err_idx_buf:
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ ret = set_srq_param(srq, init_attr, udata);
+ if (ret)
+ goto err_out;
-err_idx_mtt:
- if (udata)
- ib_umem_release(srq->idx_que.umem);
+ ret = alloc_srq_buf(hr_dev, srq, udata);
+ if (ret)
+ goto err_out;
-err_create_idx:
- hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
- &srq->idx_que.idx_buf);
- kfree(srq->idx_que.bitmap);
+ ret = alloc_srq_db(hr_dev, srq, udata, &resp);
+ if (ret)
+ goto err_srq_buf;
-err_srq_mtt:
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+ ret = alloc_srqn(hr_dev, srq);
+ if (ret)
+ goto err_srq_db;
-err_buf:
- if (udata)
- ib_umem_release(srq->umem);
- else
- hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+ ret = alloc_srqc(hr_dev, srq);
+ if (ret)
+ goto err_srqn;
+
+ if (udata) {
+ resp.cap_flags = srq->cap_flags;
+ resp.srqn = srq->srqn;
+ if (ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)))) {
+ ret = -EFAULT;
+ goto err_srqc;
+ }
+ }
+
+ srq->event = hns_roce_ib_srq_event;
+ refcount_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ return 0;
+
+err_srqc:
+ free_srqc(hr_dev, srq);
+err_srqn:
+ free_srqn(hr_dev, srq);
+err_srq_db:
+ free_srq_db(hr_dev, srq, udata);
+err_srq_buf:
+ free_srq_buf(hr_dev, srq);
+err_out:
+ mutex_destroy(&srq->mutex);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]);
return ret;
}
-void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- hns_roce_srq_free(hr_dev, srq);
- hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
-
- if (ibsrq->uobject) {
- hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
- ib_umem_release(srq->idx_que.umem);
- ib_umem_release(srq->umem);
- } else {
- kvfree(srq->wrid);
- hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
- &srq->buf);
- }
+ free_srqc(hr_dev, srq);
+ free_srqn(hr_dev, srq);
+ free_srq_db(hr_dev, srq, udata);
+ free_srq_buf(hr_dev, srq);
+ mutex_destroy(&srq->mutex);
+ return 0;
}
-int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_ida *srq_ida = &srq_table->srq_ida;
xa_init(&srq_table->xa);
- return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs,
- hr_dev->caps.num_srqs - 1,
- hr_dev->caps.reserved_srqs, 0);
-}
-
-void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev)
-{
- hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap);
+ ida_init(&srq_ida->ida);
+ srq_ida->max = hr_dev->caps.num_srqs - 1;
+ srq_ida->min = hr_dev->caps.reserved_srqs;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_trace.h b/drivers/infiniband/hw/hns/hns_roce_trace.h
new file mode 100644
index 000000000000..59ceb591b3a1
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_trace.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns_roce
+
+#if !defined(__HNS_ROCE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HNS_ROCE_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/string_choices.h>
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+DECLARE_EVENT_CLASS(flush_head_template,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, pi)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->pi = pi;
+ __entry->type = type;
+ ),
+
+ TP_printk("%s 0x%lx flush head 0x%x.",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->pi)
+);
+
+DEFINE_EVENT(flush_head_template, hns_sq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+DEFINE_EVENT(flush_head_template, hns_rq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+
+#define MAX_SGE_PER_WQE 64
+#define MAX_WQE_SIZE (MAX_SGE_PER_WQE * HNS_ROCE_SGE_SIZE)
+DECLARE_EVENT_CLASS(wqe_template,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len,
+ u64 id, enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, idx)
+ __array(u32, wqe,
+ MAX_WQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ __field(u64, id)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->idx = idx;
+ __entry->id = id;
+ __entry->len = len / sizeof(__le32);
+ __entry->type = type;
+ for (int i = 0; i < __entry->len; i++)
+ __entry->wqe[i] = le32_to_cpu(((__le32 *)wqe)[i]);
+ ),
+
+ TP_printk("%s 0x%lx wqe(0x%x/0x%llx): %s",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->idx, __entry->id,
+ __print_array(__entry->wqe, __entry->len,
+ sizeof(__le32)))
+);
+
+DEFINE_EVENT(wqe_template, hns_sq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_rq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_srq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+
+TRACE_EVENT(hns_ae_info,
+ TP_PROTO(int event_type, void *aeqe, unsigned int len),
+ TP_ARGS(event_type, aeqe, len),
+
+ TP_STRUCT__entry(__field(int, event_type)
+ __array(u32, aeqe,
+ HNS_ROCE_V3_EQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ ),
+
+ TP_fast_assign(__entry->event_type = event_type;
+ __entry->len = len / sizeof(__le32);
+ for (int i = 0; i < __entry->len; i++)
+ __entry->aeqe[i] = le32_to_cpu(((__le32 *)aeqe)[i]);
+ ),
+
+ TP_printk("event %2d aeqe: %s", __entry->event_type,
+ __print_array(__entry->aeqe, __entry->len, sizeof(__le32)))
+);
+
+TRACE_EVENT(hns_mr,
+ TP_PROTO(struct hns_roce_mr *mr),
+ TP_ARGS(mr),
+
+ TP_STRUCT__entry(__field(u64, iova)
+ __field(u64, size)
+ __field(u32, key)
+ __field(u32, pd)
+ __field(u32, pbl_hop_num)
+ __field(u32, npages)
+ __field(int, type)
+ __field(int, enabled)
+ ),
+
+ TP_fast_assign(__entry->iova = mr->iova;
+ __entry->size = mr->size;
+ __entry->key = mr->key;
+ __entry->pd = mr->pd;
+ __entry->pbl_hop_num = mr->pbl_hop_num;
+ __entry->npages = mr->npages;
+ __entry->type = mr->type;
+ __entry->enabled = mr->enabled;
+ ),
+
+ TP_printk("iova:0x%llx, size:%llu, key:%u, pd:%u, pbl_hop:%u, npages:%u, type:%d, status:%d",
+ __entry->iova, __entry->size, __entry->key,
+ __entry->pd, __entry->pbl_hop_num, __entry->npages,
+ __entry->type, __entry->enabled)
+);
+
+TRACE_EVENT(hns_buf_attr,
+ TP_PROTO(struct hns_roce_buf_attr *attr),
+ TP_ARGS(attr),
+
+ TP_STRUCT__entry(__field(unsigned int, region_count)
+ __field(unsigned int, region0_size)
+ __field(int, region0_hopnum)
+ __field(unsigned int, region1_size)
+ __field(int, region1_hopnum)
+ __field(unsigned int, region2_size)
+ __field(int, region2_hopnum)
+ __field(unsigned int, page_shift)
+ __field(bool, mtt_only)
+ ),
+
+ TP_fast_assign(__entry->region_count = attr->region_count;
+ __entry->region0_size = attr->region[0].size;
+ __entry->region0_hopnum = attr->region[0].hopnum;
+ __entry->region1_size = attr->region[1].size;
+ __entry->region1_hopnum = attr->region[1].hopnum;
+ __entry->region2_size = attr->region[2].size;
+ __entry->region2_hopnum = attr->region[2].hopnum;
+ __entry->page_shift = attr->page_shift;
+ __entry->mtt_only = attr->mtt_only;
+ ),
+
+ TP_printk("rg cnt:%u, pg_sft:0x%x, mtt_only:%s, rg 0 (sz:%u, hop:%u), rg 1 (sz:%u, hop:%u), rg 2 (sz:%u, hop:%u)\n",
+ __entry->region_count, __entry->page_shift,
+ str_yes_no(__entry->mtt_only),
+ __entry->region0_size, __entry->region0_hopnum,
+ __entry->region1_size, __entry->region1_hopnum,
+ __entry->region2_size, __entry->region2_hopnum)
+);
+
+DECLARE_EVENT_CLASS(cmdq,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc),
+
+ TP_STRUCT__entry(__string(dev_name, dev_name(hr_dev->dev))
+ __field(u16, opcode)
+ __field(u16, flag)
+ __field(u16, retval)
+ __array(u32, data, 6)
+ ),
+
+ TP_fast_assign(__assign_str(dev_name);
+ __entry->opcode = le16_to_cpu(desc->opcode);
+ __entry->flag = le16_to_cpu(desc->flag);
+ __entry->retval = le16_to_cpu(desc->retval);
+ for (int i = 0; i < 6; i++)
+ __entry->data[i] = le32_to_cpu(desc->data[i]);
+ ),
+
+ TP_printk("%s cmdq opcode:0x%x, flag:0x%x, retval:0x%x, data:%s\n",
+ __get_str(dev_name), __entry->opcode,
+ __entry->flag, __entry->retval,
+ __print_array(__entry->data, 6, sizeof(__le32)))
+);
+
+DEFINE_EVENT(cmdq, hns_cmdq_req,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+DEFINE_EVENT(cmdq, hns_cmdq_resp,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+
+#endif /* __HNS_ROCE_TRACE_H */
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hns_roce_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
deleted file mode 100644
index d867ef1ac72a..000000000000
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ /dev/null
@@ -1,8 +0,0 @@
-config INFINIBAND_I40IW
- tristate "Intel(R) Ethernet X722 iWARP Driver"
- depends on INET && I40E
- depends on IPV6 || !IPV6
- depends on PCI
- select GENERIC_ALLOCATOR
- ---help---
- Intel(R) Ethernet X722 iWARP Driver
diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
deleted file mode 100644
index 8942f8229945..000000000000
--- a/drivers/infiniband/hw/i40iw/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I $(srctree)/drivers/net/ethernet/intel/i40e
-
-obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
-
-i40iw-objs :=\
- i40iw_cm.o i40iw_ctrl.o \
- i40iw_hmc.o i40iw_hw.o i40iw_main.o \
- i40iw_pble.o i40iw_puda.o i40iw_uk.o i40iw_utils.o \
- i40iw_verbs.o i40iw_virtchnl.o i40iw_vf.o
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
deleted file mode 100644
index 8feec35f95a7..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ /dev/null
@@ -1,602 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_IW_H
-#define I40IW_IW_H
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/crc32c.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-#include <rdma/rdma_cm.h>
-#include <rdma/iw_cm.h>
-#include <crypto/hash.h>
-
-#include "i40iw_status.h"
-#include "i40iw_osdep.h"
-#include "i40iw_d.h"
-#include "i40iw_hmc.h"
-
-#include <i40e_client.h>
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include <rdma/i40iw-abi.h>
-#include "i40iw_pble.h"
-#include "i40iw_verbs.h"
-#include "i40iw_cm.h"
-#include "i40iw_user.h"
-#include "i40iw_puda.h"
-
-#define I40IW_FW_VERSION 2
-#define I40IW_HW_VERSION 2
-
-#define I40IW_ARP_ADD 1
-#define I40IW_ARP_DELETE 2
-#define I40IW_ARP_RESOLVE 3
-
-#define I40IW_MACIP_ADD 1
-#define I40IW_MACIP_DELETE 2
-
-#define IW_CCQ_SIZE (I40IW_CQP_SW_SQSIZE_2048 + 1)
-#define IW_CEQ_SIZE 2048
-#define IW_AEQ_SIZE 2048
-
-#define RX_BUF_SIZE (1536 + 8)
-#define IW_REG0_SIZE (4 * 1024)
-#define IW_TX_TIMEOUT (6 * HZ)
-#define IW_FIRST_QPN 1
-#define IW_SW_CONTEXT_ALIGN 1024
-
-#define MAX_DPC_ITERATIONS 128
-
-#define I40IW_EVENT_TIMEOUT 100000
-#define I40IW_VCHNL_EVENT_TIMEOUT 100000
-
-#define I40IW_NO_VLAN 0xffff
-#define I40IW_NO_QSET 0xffff
-
-/* access to mcast filter list */
-#define IW_ADD_MCAST false
-#define IW_DEL_MCAST true
-
-#define I40IW_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
-#define I40IW_DRV_OPT_DISABLE_MPA_CRC 0x00000002
-#define I40IW_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
-#define I40IW_DRV_OPT_DISABLE_INTF 0x00000008
-#define I40IW_DRV_OPT_ENABLE_MSI 0x00000010
-#define I40IW_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
-#define I40IW_DRV_OPT_NO_INLINE_DATA 0x00000080
-#define I40IW_DRV_OPT_DISABLE_INT_MOD 0x00000100
-#define I40IW_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
-#define I40IW_DRV_OPT_ENABLE_PAU 0x00000400
-#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
-
-#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
-#define IW_CFG_FPM_QP_COUNT 32768
-#define I40IW_MAX_PAGES_PER_FMR 512
-#define I40IW_MIN_PAGES_PER_FMR 1
-#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED 2
-#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3
-#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4
-
-struct i40iw_cqp_compl_info {
- u32 op_ret_val;
- u16 maj_err_code;
- u16 min_err_code;
- bool error;
- u8 op_code;
-};
-
-#define i40iw_pr_err(fmt, args ...) pr_err("%s: "fmt, __func__, ## args)
-
-#define i40iw_pr_info(fmt, args ...) pr_info("%s: " fmt, __func__, ## args)
-
-#define i40iw_pr_warn(fmt, args ...) pr_warn("%s: " fmt, __func__, ## args)
-
-struct i40iw_cqp_request {
- struct cqp_commands_info info;
- wait_queue_head_t waitq;
- struct list_head list;
- atomic_t refcount;
- void (*callback_fcn)(struct i40iw_cqp_request*, u32);
- void *param;
- struct i40iw_cqp_compl_info compl_info;
- bool waiting;
- bool request_done;
- bool dynamic;
-};
-
-struct i40iw_cqp {
- struct i40iw_sc_cqp sc_cqp;
- spinlock_t req_lock; /*cqp request list */
- wait_queue_head_t waitq;
- struct i40iw_dma_mem sq;
- struct i40iw_dma_mem host_ctx;
- u64 *scratch_array;
- struct i40iw_cqp_request *cqp_requests;
- struct list_head cqp_avail_reqs;
- struct list_head cqp_pending_reqs;
-};
-
-struct i40iw_device;
-
-struct i40iw_ccq {
- struct i40iw_sc_cq sc_cq;
- spinlock_t lock; /* ccq control */
- wait_queue_head_t waitq;
- struct i40iw_dma_mem mem_cq;
- struct i40iw_dma_mem shadow_area;
-};
-
-struct i40iw_ceq {
- struct i40iw_sc_ceq sc_ceq;
- struct i40iw_dma_mem mem;
- u32 irq;
- u32 msix_idx;
- struct i40iw_device *iwdev;
- struct tasklet_struct dpc_tasklet;
-};
-
-struct i40iw_aeq {
- struct i40iw_sc_aeq sc_aeq;
- struct i40iw_dma_mem mem;
-};
-
-struct i40iw_arp_entry {
- u32 ip_addr[4];
- u8 mac_addr[ETH_ALEN];
-};
-
-enum init_completion_state {
- INVALID_STATE = 0,
- INITIAL_STATE,
- CQP_CREATED,
- HMC_OBJS_CREATED,
- PBLE_CHUNK_MEM,
- CCQ_CREATED,
- AEQ_CREATED,
- CEQ_CREATED,
- ILQ_CREATED,
- IEQ_CREATED,
- IP_ADDR_REGISTERED,
- RDMA_DEV_REGISTERED
-};
-
-struct i40iw_msix_vector {
- u32 idx;
- u32 irq;
- u32 cpu_affinity;
- u32 ceq_id;
- cpumask_t mask;
-};
-
-struct l2params_work {
- struct work_struct work;
- struct i40iw_device *iwdev;
- struct i40iw_l2params l2params;
-};
-
-#define I40IW_MSIX_TABLE_SIZE 65
-
-struct virtchnl_work {
- struct work_struct work;
- union {
- struct i40iw_cqp_request *cqp_request;
- struct i40iw_virtchnl_work_info work_info;
- };
-};
-
-struct i40e_qvlist_info;
-
-struct i40iw_device {
- struct i40iw_ib_device *iwibdev;
- struct net_device *netdev;
- wait_queue_head_t vchnl_waitq;
- struct i40iw_sc_dev sc_dev;
- struct i40iw_sc_vsi vsi;
- struct i40iw_handler *hdl;
- struct i40e_info *ldev;
- struct i40e_client *client;
- struct i40iw_hw hw;
- struct i40iw_cm_core cm_core;
- u8 *mem_resources;
- unsigned long *allocated_qps;
- unsigned long *allocated_cqs;
- unsigned long *allocated_mrs;
- unsigned long *allocated_pds;
- unsigned long *allocated_arps;
- struct i40iw_qp **qp_table;
- bool msix_shared;
- u32 msix_count;
- struct i40iw_msix_vector *iw_msixtbl;
- struct i40e_qvlist_info *iw_qvlist;
-
- struct i40iw_hmc_pble_rsrc *pble_rsrc;
- struct i40iw_arp_entry *arp_table;
- struct i40iw_cqp cqp;
- struct i40iw_ccq ccq;
- u32 ceqs_count;
- struct i40iw_ceq *ceqlist;
- struct i40iw_aeq aeq;
- u32 arp_table_size;
- u32 next_arp_index;
- spinlock_t resource_lock; /* hw resource access */
- spinlock_t qptable_lock;
- u32 vendor_id;
- u32 vendor_part_id;
- u32 of_device_registered;
-
- u32 device_cap_flags;
- unsigned long db_start;
- u8 resource_profile;
- u8 max_rdma_vfs;
- u8 max_enabled_vfs;
- u8 max_sge;
- u8 iw_status;
- u8 send_term_ok;
- bool push_mode; /* Initialized from parameter passed to driver */
-
- /* x710 specific */
- struct mutex pbl_mutex;
- struct tasklet_struct dpc_tasklet;
- struct workqueue_struct *virtchnl_wq;
- struct virtchnl_work virtchnl_w[I40IW_MAX_PE_ENABLED_VF_COUNT];
- struct i40iw_dma_mem obj_mem;
- struct i40iw_dma_mem obj_next;
- u8 *hmc_info_mem;
- u32 sd_type;
- struct workqueue_struct *param_wq;
- atomic_t params_busy;
- enum init_completion_state init_state;
- u16 mac_ip_table_idx;
- atomic_t vchnl_msgs;
- u32 max_mr;
- u32 max_qp;
- u32 max_cq;
- u32 max_pd;
- u32 next_qp;
- u32 next_cq;
- u32 next_pd;
- u32 max_mr_size;
- u32 max_qp_wr;
- u32 max_cqe;
- u32 mr_stagmask;
- u32 mpa_version;
- bool dcb;
- bool closing;
- bool reset;
- u32 used_pds;
- u32 used_cqs;
- u32 used_mrs;
- u32 used_qps;
- wait_queue_head_t close_wq;
- atomic64_t use_count;
-};
-
-struct i40iw_ib_device {
- struct ib_device ibdev;
- struct i40iw_device *iwdev;
-};
-
-struct i40iw_handler {
- struct list_head list;
- struct i40e_client *client;
- struct i40iw_device device;
- struct i40e_info ldev;
-};
-
-/**
- * to_iwdev - get device
- * @ibdev: ib device
- **/
-static inline struct i40iw_device *to_iwdev(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct i40iw_ib_device, ibdev)->iwdev;
-}
-
-/**
- * to_ucontext - get user context
- * @ibucontext: ib user context
- **/
-static inline struct i40iw_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
-{
- return container_of(ibucontext, struct i40iw_ucontext, ibucontext);
-}
-
-/**
- * to_iwpd - get protection domain
- * @ibpd: ib pd
- **/
-static inline struct i40iw_pd *to_iwpd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct i40iw_pd, ibpd);
-}
-
-/**
- * to_iwmr - get device memory region
- * @ibdev: ib memory region
- **/
-static inline struct i40iw_mr *to_iwmr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct i40iw_mr, ibmr);
-}
-
-/**
- * to_iwmr_from_ibfmr - get device memory region
- * @ibfmr: ib fmr
- **/
-static inline struct i40iw_mr *to_iwmr_from_ibfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct i40iw_mr, ibfmr);
-}
-
-/**
- * to_iwmw - get device memory window
- * @ibmw: ib memory window
- **/
-static inline struct i40iw_mr *to_iwmw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct i40iw_mr, ibmw);
-}
-
-/**
- * to_iwcq - get completion queue
- * @ibcq: ib cqdevice
- **/
-static inline struct i40iw_cq *to_iwcq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct i40iw_cq, ibcq);
-}
-
-/**
- * to_iwqp - get device qp
- * @ibqp: ib qp
- **/
-static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct i40iw_qp, ibqp);
-}
-
-/* i40iw.c */
-void i40iw_add_ref(struct ib_qp *);
-void i40iw_rem_ref(struct ib_qp *);
-struct ib_qp *i40iw_get_qp(struct ib_device *, int);
-
-void i40iw_flush_wqes(struct i40iw_device *iwdev,
- struct i40iw_qp *qp);
-
-void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
- unsigned char *mac_addr,
- u32 *ip_addr,
- bool ipv4,
- u32 action);
-
-int i40iw_manage_apbvt(struct i40iw_device *iwdev,
- u16 accel_local_port,
- bool add_port);
-
-struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait);
-void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
-void i40iw_put_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
-
-/**
- * i40iw_alloc_resource - allocate a resource
- * @iwdev: device pointer
- * @resource_array: resource bit array:
- * @max_resources: maximum resource number
- * @req_resources_num: Allocated resource number
- * @next: next free id
- **/
-static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
- unsigned long *resource_array,
- u32 max_resources,
- u32 *req_resource_num,
- u32 *next)
-{
- u32 resource_num;
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->resource_lock, flags);
- resource_num = find_next_zero_bit(resource_array, max_resources, *next);
- if (resource_num >= max_resources) {
- resource_num = find_first_zero_bit(resource_array, max_resources);
- if (resource_num >= max_resources) {
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
- return -EOVERFLOW;
- }
- }
- set_bit(resource_num, resource_array);
- *next = resource_num + 1;
- if (*next == max_resources)
- *next = 0;
- *req_resource_num = resource_num;
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
-
- return 0;
-}
-
-/**
- * i40iw_is_resource_allocated - detrmine if resource is
- * allocated
- * @iwdev: device pointer
- * @resource_array: resource array for the resource_num
- * @resource_num: resource number to check
- **/
-static inline bool i40iw_is_resource_allocated(struct i40iw_device *iwdev,
- unsigned long *resource_array,
- u32 resource_num)
-{
- bool bit_is_set;
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->resource_lock, flags);
-
- bit_is_set = test_bit(resource_num, resource_array);
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
-
- return bit_is_set;
-}
-
-/**
- * i40iw_free_resource - free a resource
- * @iwdev: device pointer
- * @resource_array: resource array for the resource_num
- * @resource_num: resource number to free
- **/
-static inline void i40iw_free_resource(struct i40iw_device *iwdev,
- unsigned long *resource_array,
- u32 resource_num)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->resource_lock, flags);
- clear_bit(resource_num, resource_array);
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
-}
-
-/**
- * to_iwhdl - Get the handler from the device pointer
- * @iwdev: device pointer
- **/
-static inline struct i40iw_handler *to_iwhdl(struct i40iw_device *iw_dev)
-{
- return container_of(iw_dev, struct i40iw_handler, device);
-}
-
-struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev);
-
-/**
- * iw_init_resources -
- */
-u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev);
-
-int i40iw_register_rdma_device(struct i40iw_device *iwdev);
-void i40iw_port_ibevent(struct i40iw_device *iwdev);
-void i40iw_cm_disconn(struct i40iw_qp *iwqp);
-void i40iw_cm_disconn_worker(void *);
-int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
- struct sk_buff *);
-
-enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
- struct i40iw_cqp_request *cqp_request);
-enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
- u8 *mac_addr, u8 *mac_index);
-int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
-void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
-
-void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev);
-void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
-void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
-void i40iw_rem_devusecount(struct i40iw_device *iwdev);
-void i40iw_add_devusecount(struct i40iw_device *iwdev);
-void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
- struct i40iw_modify_qp_info *info, bool wait);
-
-void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev,
- struct i40iw_sc_qp *qp,
- bool suspend);
-enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
- struct i40iw_cm_info *cminfo,
- enum i40iw_quad_entry_type etype,
- enum i40iw_quad_hash_manage_type mtype,
- void *cmnode,
- bool wait);
-void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
-void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num);
-enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
- struct i40iw_dma_mem *memptr,
- u32 size, u32 mask);
-
-void i40iw_request_reset(struct i40iw_device *iwdev);
-void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev);
-int i40iw_setup_cm_core(struct i40iw_device *iwdev);
-void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core);
-void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq);
-void i40iw_process_aeq(struct i40iw_device *);
-void i40iw_next_iw_state(struct i40iw_qp *iwqp,
- u8 state, u8 del_hash,
- u8 term, u8 term_len);
-int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack);
-int i40iw_send_reset(struct i40iw_cm_node *cm_node);
-struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
- u16 rem_port,
- u32 *rem_addr,
- u16 loc_port,
- u32 *loc_addr,
- bool add_refcnt,
- bool accelerated_list);
-
-enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
- struct i40iw_sc_qp *qp,
- struct i40iw_qp_flush_info *info,
- bool wait);
-
-void i40iw_gen_ae(struct i40iw_device *iwdev,
- struct i40iw_sc_qp *qp,
- struct i40iw_gen_ae_info *info,
- bool wait);
-
-void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src);
-struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
- u64 addr,
- u64 size,
- int acc,
- u64 *iova_start);
-
-int i40iw_inetaddr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-int i40iw_inet6addr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-int i40iw_net_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-int i40iw_netdevice_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
deleted file mode 100644
index 8233f5a4e623..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ /dev/null
@@ -1,4419 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/atomic.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/notifier.h>
-#include <linux/net.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/etherdevice.h>
-#include <linux/netdevice.h>
-#include <linux/random.h>
-#include <linux/list.h>
-#include <linux/threads.h>
-#include <linux/highmem.h>
-#include <net/arp.h>
-#include <net/ndisc.h>
-#include <net/neighbour.h>
-#include <net/route.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/ip_fib.h>
-#include <net/secure_seq.h>
-#include <net/tcp.h>
-#include <asm/checksum.h>
-
-#include "i40iw.h"
-
-static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *);
-static void i40iw_cm_post_event(struct i40iw_cm_event *event);
-static void i40iw_disconnect_worker(struct work_struct *work);
-
-/**
- * i40iw_free_sqbuf - put back puda buffer if refcount = 0
- * @vsi: pointer to vsi structure
- * @buf: puda buffer to free
- */
-void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
-{
- struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
- struct i40iw_puda_rsrc *ilq = vsi->ilq;
-
- if (!atomic_dec_return(&buf->refcount))
- i40iw_puda_ret_bufpool(ilq, buf);
-}
-
-/**
- * i40iw_derive_hw_ird_setting - Calculate IRD
- *
- * @cm_ird: IRD of connection's node
- *
- * The ird from the connection is rounded to a supported HW
- * setting (2,8,32,64) and then encoded for ird_size field of
- * qp_ctx
- */
-static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
-{
- u8 encoded_ird_size;
-
- /* ird_size field is encoded in qp_ctx */
- switch (cm_ird ? roundup_pow_of_two(cm_ird) : 0) {
- case I40IW_HW_IRD_SETTING_64:
- encoded_ird_size = 3;
- break;
- case I40IW_HW_IRD_SETTING_32:
- case I40IW_HW_IRD_SETTING_16:
- encoded_ird_size = 2;
- break;
- case I40IW_HW_IRD_SETTING_8:
- case I40IW_HW_IRD_SETTING_4:
- encoded_ird_size = 1;
- break;
- case I40IW_HW_IRD_SETTING_2:
- default:
- encoded_ird_size = 0;
- break;
- }
- return encoded_ird_size;
-}
-
-/**
- * i40iw_record_ird_ord - Record IRD/ORD passed in
- * @cm_node: connection's node
- * @conn_ird: connection IRD
- * @conn_ord: connection ORD
- */
-static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u32 conn_ird,
- u32 conn_ord)
-{
- if (conn_ird > I40IW_MAX_IRD_SIZE)
- conn_ird = I40IW_MAX_IRD_SIZE;
-
- if (conn_ord > I40IW_MAX_ORD_SIZE)
- conn_ord = I40IW_MAX_ORD_SIZE;
- else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
- conn_ord = 1;
-
- cm_node->ird_size = conn_ird;
- cm_node->ord_size = conn_ord;
-}
-
-/**
- * i40iw_copy_ip_ntohl - change network to host ip
- * @dst: host ip
- * @src: big endian
- */
-void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src)
-{
- *dst++ = ntohl(*src++);
- *dst++ = ntohl(*src++);
- *dst++ = ntohl(*src++);
- *dst = ntohl(*src);
-}
-
-/**
- * i40iw_copy_ip_htonl - change host addr to network ip
- * @dst: host ip
- * @src: little endian
- */
-static inline void i40iw_copy_ip_htonl(__be32 *dst, u32 *src)
-{
- *dst++ = htonl(*src++);
- *dst++ = htonl(*src++);
- *dst++ = htonl(*src++);
- *dst = htonl(*src);
-}
-
-/**
- * i40iw_fill_sockaddr4 - get addr info for passive connection
- * @cm_node: connection's node
- * @event: upper layer's cm event
- */
-static inline void i40iw_fill_sockaddr4(struct i40iw_cm_node *cm_node,
- struct iw_cm_event *event)
-{
- struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
-
- laddr->sin_family = AF_INET;
- raddr->sin_family = AF_INET;
-
- laddr->sin_port = htons(cm_node->loc_port);
- raddr->sin_port = htons(cm_node->rem_port);
-
- laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
- raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
-}
-
-/**
- * i40iw_fill_sockaddr6 - get ipv6 addr info for passive side
- * @cm_node: connection's node
- * @event: upper layer's cm event
- */
-static inline void i40iw_fill_sockaddr6(struct i40iw_cm_node *cm_node,
- struct iw_cm_event *event)
-{
- struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
- struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
-
- laddr6->sin6_family = AF_INET6;
- raddr6->sin6_family = AF_INET6;
-
- laddr6->sin6_port = htons(cm_node->loc_port);
- raddr6->sin6_port = htons(cm_node->rem_port);
-
- i40iw_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
- cm_node->loc_addr);
- i40iw_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
- cm_node->rem_addr);
-}
-
-/**
- * i40iw_get_addr_info
- * @cm_node: contains ip/tcp info
- * @cm_info: to get a copy of the cm_node ip/tcp info
-*/
-static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node,
- struct i40iw_cm_info *cm_info)
-{
- cm_info->ipv4 = cm_node->ipv4;
- cm_info->vlan_id = cm_node->vlan_id;
- memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
- memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
- cm_info->loc_port = cm_node->loc_port;
- cm_info->rem_port = cm_node->rem_port;
- cm_info->user_pri = cm_node->user_pri;
-}
-
-/**
- * i40iw_get_cmevent_info - for cm event upcall
- * @cm_node: connection's node
- * @cm_id: upper layers cm struct for the event
- * @event: upper layer's cm event
- */
-static inline void i40iw_get_cmevent_info(struct i40iw_cm_node *cm_node,
- struct iw_cm_id *cm_id,
- struct iw_cm_event *event)
-{
- memcpy(&event->local_addr, &cm_id->m_local_addr,
- sizeof(event->local_addr));
- memcpy(&event->remote_addr, &cm_id->m_remote_addr,
- sizeof(event->remote_addr));
- if (cm_node) {
- event->private_data = (void *)cm_node->pdata_buf;
- event->private_data_len = (u8)cm_node->pdata.size;
- event->ird = cm_node->ird_size;
- event->ord = cm_node->ord_size;
- }
-}
-
-/**
- * i40iw_send_cm_event - upcall cm's event handler
- * @cm_node: connection's node
- * @cm_id: upper layer's cm info struct
- * @type: Event type to indicate
- * @status: status for the event type
- */
-static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node,
- struct iw_cm_id *cm_id,
- enum iw_cm_event_type type,
- int status)
-{
- struct iw_cm_event event;
-
- memset(&event, 0, sizeof(event));
- event.event = type;
- event.status = status;
- switch (type) {
- case IW_CM_EVENT_CONNECT_REQUEST:
- if (cm_node->ipv4)
- i40iw_fill_sockaddr4(cm_node, &event);
- else
- i40iw_fill_sockaddr6(cm_node, &event);
- event.provider_data = (void *)cm_node;
- event.private_data = (void *)cm_node->pdata_buf;
- event.private_data_len = (u8)cm_node->pdata.size;
- event.ird = cm_node->ird_size;
- break;
- case IW_CM_EVENT_CONNECT_REPLY:
- i40iw_get_cmevent_info(cm_node, cm_id, &event);
- break;
- case IW_CM_EVENT_ESTABLISHED:
- event.ird = cm_node->ird_size;
- event.ord = cm_node->ord_size;
- break;
- case IW_CM_EVENT_DISCONNECT:
- break;
- case IW_CM_EVENT_CLOSE:
- break;
- default:
- i40iw_pr_err("event type received type = %d\n", type);
- return -1;
- }
- return cm_id->event_handler(cm_id, &event);
-}
-
-/**
- * i40iw_create_event - create cm event
- * @cm_node: connection's node
- * @type: Event type to generate
- */
-static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node,
- enum i40iw_cm_event_type type)
-{
- struct i40iw_cm_event *event;
-
- if (!cm_node->cm_id)
- return NULL;
-
- event = kzalloc(sizeof(*event), GFP_ATOMIC);
-
- if (!event)
- return NULL;
-
- event->type = type;
- event->cm_node = cm_node;
- memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr));
- memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr));
- event->cm_info.rem_port = cm_node->rem_port;
- event->cm_info.loc_port = cm_node->loc_port;
- event->cm_info.cm_id = cm_node->cm_id;
-
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
- cm_node,
- event,
- type,
- event->cm_info.loc_addr,
- event->cm_info.rem_addr);
-
- i40iw_cm_post_event(event);
- return event;
-}
-
-/**
- * i40iw_free_retrans_entry - free send entry
- * @cm_node: connection's node
- */
-static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_device *iwdev = cm_node->iwdev;
- struct i40iw_timer_entry *send_entry;
-
- send_entry = cm_node->send_entry;
- if (send_entry) {
- cm_node->send_entry = NULL;
- i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf);
- kfree(send_entry);
- atomic_dec(&cm_node->ref_count);
- }
-}
-
-/**
- * i40iw_cleanup_retrans_entry - free send entry with lock
- * @cm_node: connection's node
- */
-static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- i40iw_free_retrans_entry(cm_node);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-}
-
-/**
- * i40iw_form_cm_frame - get a free packet and build frame
- * @cm_node: connection's node ionfo to use in frame
- * @options: pointer to options info
- * @hdr: pointer mpa header
- * @pdata: pointer to private data
- * @flags: indicates FIN or ACK
- */
-static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
- struct i40iw_kmem_info *options,
- struct i40iw_kmem_info *hdr,
- struct i40iw_kmem_info *pdata,
- u8 flags)
-{
- struct i40iw_puda_buf *sqbuf;
- struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
- u8 *buf;
-
- struct tcphdr *tcph;
- struct iphdr *iph;
- struct ipv6hdr *ip6h;
- struct ethhdr *ethh;
- u16 packetsize;
- u16 eth_hlen = ETH_HLEN;
- u32 opts_len = 0;
- u32 pd_len = 0;
- u32 hdr_len = 0;
- u16 vtag;
-
- sqbuf = i40iw_puda_get_bufpool(vsi->ilq);
- if (!sqbuf)
- return NULL;
- buf = sqbuf->mem.va;
-
- if (options)
- opts_len = (u32)options->size;
-
- if (hdr)
- hdr_len = hdr->size;
-
- if (pdata)
- pd_len = pdata->size;
-
- if (cm_node->vlan_id <= VLAN_VID_MASK)
- eth_hlen += 4;
-
- if (cm_node->ipv4)
- packetsize = sizeof(*iph) + sizeof(*tcph);
- else
- packetsize = sizeof(*ip6h) + sizeof(*tcph);
- packetsize += opts_len + hdr_len + pd_len;
-
- memset(buf, 0x00, eth_hlen + packetsize);
-
- sqbuf->totallen = packetsize + eth_hlen;
- sqbuf->maclen = eth_hlen;
- sqbuf->tcphlen = sizeof(*tcph) + opts_len;
- sqbuf->scratch = (void *)cm_node;
-
- ethh = (struct ethhdr *)buf;
- buf += eth_hlen;
-
- if (cm_node->ipv4) {
- sqbuf->ipv4 = true;
-
- iph = (struct iphdr *)buf;
- buf += sizeof(*iph);
- tcph = (struct tcphdr *)buf;
- buf += sizeof(*tcph);
-
- ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
- ether_addr_copy(ethh->h_source, cm_node->loc_mac);
- if (cm_node->vlan_id <= VLAN_VID_MASK) {
- ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
-
- ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
- } else {
- ethh->h_proto = htons(ETH_P_IP);
- }
-
- iph->version = IPVERSION;
- iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
- iph->tos = cm_node->tos;
- iph->tot_len = htons(packetsize);
- iph->id = htons(++cm_node->tcp_cntxt.loc_id);
-
- iph->frag_off = htons(0x4000);
- iph->ttl = 0x40;
- iph->protocol = IPPROTO_TCP;
- iph->saddr = htonl(cm_node->loc_addr[0]);
- iph->daddr = htonl(cm_node->rem_addr[0]);
- } else {
- sqbuf->ipv4 = false;
- ip6h = (struct ipv6hdr *)buf;
- buf += sizeof(*ip6h);
- tcph = (struct tcphdr *)buf;
- buf += sizeof(*tcph);
-
- ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
- ether_addr_copy(ethh->h_source, cm_node->loc_mac);
- if (cm_node->vlan_id <= VLAN_VID_MASK) {
- ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
- ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
- } else {
- ethh->h_proto = htons(ETH_P_IPV6);
- }
- ip6h->version = 6;
- ip6h->priority = cm_node->tos >> 4;
- ip6h->flow_lbl[0] = cm_node->tos << 4;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
- ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
- ip6h->nexthdr = 6;
- ip6h->hop_limit = 128;
- i40iw_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
- cm_node->loc_addr);
- i40iw_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
- cm_node->rem_addr);
- }
-
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
-
- tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
-
- if (flags & SET_ACK) {
- cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
- tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
- tcph->ack = 1;
- } else {
- tcph->ack_seq = 0;
- }
-
- if (flags & SET_SYN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->syn = 1;
- } else {
- cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
- }
-
- if (flags & SET_FIN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->fin = 1;
- }
-
- if (flags & SET_RST)
- tcph->rst = 1;
-
- tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
- sqbuf->tcphlen = tcph->doff << 2;
- tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
- tcph->urg_ptr = 0;
-
- if (opts_len) {
- memcpy(buf, options->addr, opts_len);
- buf += opts_len;
- }
-
- if (hdr_len) {
- memcpy(buf, hdr->addr, hdr_len);
- buf += hdr_len;
- }
-
- if (pdata && pdata->addr)
- memcpy(buf, pdata->addr, pdata->size);
-
- atomic_set(&sqbuf->refcount, 1);
-
- return sqbuf;
-}
-
-/**
- * i40iw_send_reset - Send RST packet
- * @cm_node: connection's node
- */
-int i40iw_send_reset(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
- int flags = SET_RST | SET_ACK;
-
- sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, flags);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -1;
- }
-
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 0, 1);
-}
-
-/**
- * i40iw_active_open_err - send event for active side cm error
- * @cm_node: connection's node
- * @reset: Flag to send reset or not
- */
-static void i40iw_active_open_err(struct i40iw_cm_node *cm_node, bool reset)
-{
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->cm_core->stats_connect_errs++;
- if (reset) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s cm_node=%p state=%d\n",
- __func__,
- cm_node,
- cm_node->state);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- }
-
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
-}
-
-/**
- * i40iw_passive_open_err - handle passive side cm error
- * @cm_node: connection's node
- * @reset: send reset or just free cm_node
- */
-static void i40iw_passive_open_err(struct i40iw_cm_node *cm_node, bool reset)
-{
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->cm_core->stats_passive_errs++;
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s cm_node=%p state =%d\n",
- __func__,
- cm_node,
- cm_node->state);
- if (reset)
- i40iw_send_reset(cm_node);
- else
- i40iw_rem_ref_cm_node(cm_node);
-}
-
-/**
- * i40iw_event_connect_error - to create connect error event
- * @event: cm information for connect event
- */
-static void i40iw_event_connect_error(struct i40iw_cm_event *event)
-{
- struct i40iw_qp *iwqp;
- struct iw_cm_id *cm_id;
-
- cm_id = event->cm_node->cm_id;
- if (!cm_id)
- return;
-
- iwqp = cm_id->provider_data;
-
- if (!iwqp || !iwqp->iwdev)
- return;
-
- iwqp->cm_id = NULL;
- cm_id->provider_data = NULL;
- i40iw_send_cm_event(event->cm_node, cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- -ECONNRESET);
- cm_id->rem_ref(cm_id);
- i40iw_rem_ref_cm_node(event->cm_node);
-}
-
-/**
- * i40iw_process_options
- * @cm_node: connection's node
- * @optionsloc: point to start of options
- * @optionsize: size of all options
- * @syn_packet: flag if syn packet
- */
-static int i40iw_process_options(struct i40iw_cm_node *cm_node,
- u8 *optionsloc,
- u32 optionsize,
- u32 syn_packet)
-{
- u32 tmp;
- u32 offset = 0;
- union all_known_options *all_options;
- char got_mss_option = 0;
-
- while (offset < optionsize) {
- all_options = (union all_known_options *)(optionsloc + offset);
- switch (all_options->as_base.optionnum) {
- case OPTION_NUMBER_END:
- offset = optionsize;
- break;
- case OPTION_NUMBER_NONE:
- offset += 1;
- continue;
- case OPTION_NUMBER_MSS:
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s: MSS Length: %d Offset: %d Size: %d\n",
- __func__,
- all_options->as_mss.length,
- offset,
- optionsize);
- got_mss_option = 1;
- if (all_options->as_mss.length != 4)
- return -1;
- tmp = ntohs(all_options->as_mss.mss);
- if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
- cm_node->tcp_cntxt.mss = tmp;
- break;
- case OPTION_NUMBER_WINDOW_SCALE:
- cm_node->tcp_cntxt.snd_wscale =
- all_options->as_windowscale.shiftcount;
- break;
- default:
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "TCP Option not understood: %x\n",
- all_options->as_base.optionnum);
- break;
- }
- offset += all_options->as_base.length;
- }
- if (!got_mss_option && syn_packet)
- cm_node->tcp_cntxt.mss = I40IW_CM_DEFAULT_MSS;
- return 0;
-}
-
-/**
- * i40iw_handle_tcp_options -
- * @cm_node: connection's node
- * @tcph: pointer tcp header
- * @optionsize: size of options rcvd
- * @passive: active or passive flag
- */
-static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node,
- struct tcphdr *tcph,
- int optionsize,
- int passive)
-{
- u8 *optionsloc = (u8 *)&tcph[1];
-
- if (optionsize) {
- if (i40iw_process_options(cm_node,
- optionsloc,
- optionsize,
- (u32)tcph->syn)) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s: Node %p, Sending RESET\n",
- __func__,
- cm_node);
- if (passive)
- i40iw_passive_open_err(cm_node, true);
- else
- i40iw_active_open_err(cm_node, true);
- return -1;
- }
- }
-
- cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
- cm_node->tcp_cntxt.snd_wscale;
-
- if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
- cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
- return 0;
-}
-
-/**
- * i40iw_build_mpa_v1 - build a MPA V1 frame
- * @cm_node: connection's node
- * @mpa_key: to do read0 or write0
- */
-static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node,
- void *start_addr,
- u8 mpa_key)
-{
- struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
- break;
- case MPA_KEY_REPLY:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
- break;
- default:
- break;
- }
- mpa_frame->flags = IETF_MPA_FLAGS_CRC;
- mpa_frame->rev = cm_node->mpa_frame_rev;
- mpa_frame->priv_data_len = htons(cm_node->pdata.size);
-}
-
-/**
- * i40iw_build_mpa_v2 - build a MPA V2 frame
- * @cm_node: connection's node
- * @start_addr: buffer start address
- * @mpa_key: to do read0 or write0
- */
-static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
- void *start_addr,
- u8 mpa_key)
-{
- struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
- struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
- u16 ctrl_ird, ctrl_ord;
-
- /* initialize the upper 5 bytes of the frame */
- i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
- mpa_frame->flags |= IETF_MPA_V2_FLAG;
- mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
-
- /* initialize RTR msg */
- if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- ctrl_ird = IETF_NO_IRD_ORD;
- ctrl_ord = IETF_NO_IRD_ORD;
- } else {
- ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ird_size;
- ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ord_size;
- }
-
- ctrl_ird |= IETF_PEER_TO_PEER;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- ctrl_ord |= IETF_RDMA0_WRITE;
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- case MPA_KEY_REPLY:
- switch (cm_node->send_rdma0_op) {
- case SEND_RDMA_WRITE_ZERO:
- ctrl_ord |= IETF_RDMA0_WRITE;
- break;
- case SEND_RDMA_READ_ZERO:
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- }
- break;
- default:
- break;
- }
- rtr_msg->ctrl_ird = htons(ctrl_ird);
- rtr_msg->ctrl_ord = htons(ctrl_ord);
-}
-
-/**
- * i40iw_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
- * @cm_node: connection's node
- * @mpa: mpa: data buffer
- * @mpa_key: to do read0 or write0
- */
-static int i40iw_cm_build_mpa_frame(struct i40iw_cm_node *cm_node,
- struct i40iw_kmem_info *mpa,
- u8 mpa_key)
-{
- int hdr_len = 0;
-
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V1:
- hdr_len = sizeof(struct ietf_mpa_v1);
- i40iw_build_mpa_v1(cm_node, mpa->addr, mpa_key);
- break;
- case IETF_MPA_V2:
- hdr_len = sizeof(struct ietf_mpa_v2);
- i40iw_build_mpa_v2(cm_node, mpa->addr, mpa_key);
- break;
- default:
- break;
- }
-
- return hdr_len;
-}
-
-/**
- * i40iw_send_mpa_request - active node send mpa request to passive node
- * @cm_node: connection's node
- */
-static int i40iw_send_mpa_request(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
-
- if (!cm_node) {
- i40iw_pr_err("cm_node == NULL\n");
- return -1;
- }
-
- cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
- cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
- &cm_node->mpa_hdr,
- MPA_KEY_REQUEST);
- if (!cm_node->mpa_hdr.size) {
- i40iw_pr_err("mpa size = %d\n", cm_node->mpa_hdr.size);
- return -1;
- }
-
- sqbuf = i40iw_form_cm_frame(cm_node,
- NULL,
- &cm_node->mpa_hdr,
- &cm_node->pdata,
- SET_ACK);
- if (!sqbuf) {
- i40iw_pr_err("sq_buf == NULL\n");
- return -1;
- }
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * i40iw_send_mpa_reject -
- * @cm_node: connection's node
- * @pdata: reject data for connection
- * @plen: length of reject data
- */
-static int i40iw_send_mpa_reject(struct i40iw_cm_node *cm_node,
- const void *pdata,
- u8 plen)
-{
- struct i40iw_puda_buf *sqbuf;
- struct i40iw_kmem_info priv_info;
-
- cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
- cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
- &cm_node->mpa_hdr,
- MPA_KEY_REPLY);
-
- cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
- priv_info.addr = (void *)pdata;
- priv_info.size = plen;
-
- sqbuf = i40iw_form_cm_frame(cm_node,
- NULL,
- &cm_node->mpa_hdr,
- &priv_info,
- SET_ACK | SET_FIN);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -ENOMEM;
- }
- cm_node->state = I40IW_CM_STATE_FIN_WAIT1;
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * recv_mpa - process an IETF MPA frame
- * @cm_node: connection's node
- * @buffer: Data pointer
- * @type: to return accept or reject
- * @len: Len of mpa buffer
- */
-static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, u32 len)
-{
- struct ietf_mpa_v1 *mpa_frame;
- struct ietf_mpa_v2 *mpa_v2_frame;
- struct ietf_rtr_msg *rtr_msg;
- int mpa_hdr_len;
- int priv_data_len;
-
- *type = I40IW_MPA_REQUEST_ACCEPT;
-
- if (len < sizeof(struct ietf_mpa_v1)) {
- i40iw_pr_err("ietf buffer small (%x)\n", len);
- return -1;
- }
-
- mpa_frame = (struct ietf_mpa_v1 *)buffer;
- mpa_hdr_len = sizeof(struct ietf_mpa_v1);
- priv_data_len = ntohs(mpa_frame->priv_data_len);
-
- if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
- i40iw_pr_err("large pri_data %d\n", priv_data_len);
- return -1;
- }
- if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
- i40iw_pr_err("unsupported mpa rev = %d\n", mpa_frame->rev);
- return -1;
- }
- if (mpa_frame->rev > cm_node->mpa_frame_rev) {
- i40iw_pr_err("rev %d\n", mpa_frame->rev);
- return -1;
- }
- cm_node->mpa_frame_rev = mpa_frame->rev;
-
- if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
- i40iw_pr_err("Unexpected MPA Key received\n");
- return -1;
- }
- } else {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
- i40iw_pr_err("Unexpected MPA Key received\n");
- return -1;
- }
- }
-
- if (priv_data_len + mpa_hdr_len > len) {
- i40iw_pr_err("ietf buffer len(%x + %x != %x)\n",
- priv_data_len, mpa_hdr_len, len);
- return -1;
- }
- if (len > MAX_CM_BUFFER) {
- i40iw_pr_err("ietf buffer large len = %d\n", len);
- return -1;
- }
-
- switch (mpa_frame->rev) {
- case IETF_MPA_V2:{
- u16 ird_size;
- u16 ord_size;
- u16 ctrl_ord;
- u16 ctrl_ird;
-
- mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
- mpa_hdr_len += IETF_RTR_MSG_SIZE;
- rtr_msg = &mpa_v2_frame->rtr_msg;
-
- /* parse rtr message */
- ctrl_ord = ntohs(rtr_msg->ctrl_ord);
- ctrl_ird = ntohs(rtr_msg->ctrl_ird);
- ird_size = ctrl_ird & IETF_NO_IRD_ORD;
- ord_size = ctrl_ord & IETF_NO_IRD_ORD;
-
- if (!(ctrl_ird & IETF_PEER_TO_PEER))
- return -1;
-
- if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
- cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
- goto negotiate_done;
- }
-
- if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
- /* responder */
- if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
- cm_node->ird_size = 1;
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
- } else {
- /* initiator */
- if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
- return -1;
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
-
- if (cm_node->ird_size < ord_size)
- /* no resources available */
- return -1;
- }
-
-negotiate_done:
- if (ctrl_ord & IETF_RDMA0_READ)
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- else if (ctrl_ord & IETF_RDMA0_WRITE)
- cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
- else /* Not supported RDMA0 operation */
- return -1;
- i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
- "MPAV2: Negotiated ORD: %d, IRD: %d\n",
- cm_node->ord_size, cm_node->ird_size);
- break;
- }
- break;
- case IETF_MPA_V1:
- default:
- break;
- }
-
- memcpy(cm_node->pdata_buf, buffer + mpa_hdr_len, priv_data_len);
- cm_node->pdata.size = priv_data_len;
-
- if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
- *type = I40IW_MPA_REQUEST_REJECT;
-
- if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
- cm_node->snd_mark_en = true;
-
- return 0;
-}
-
-/**
- * i40iw_schedule_cm_timer
- * @@cm_node: connection's node
- * @sqbuf: buffer to send
- * @type: if it is send or close
- * @send_retrans: if rexmits to be done
- * @close_when_complete: is cm_node to be removed
- *
- * note - cm_node needs to be protected before calling this. Encase in:
- * i40iw_rem_ref_cm_node(cm_core, cm_node);
- * i40iw_schedule_cm_timer(...)
- * atomic_inc(&cm_node->ref_count);
- */
-int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *sqbuf,
- enum i40iw_timer_type type,
- int send_retrans,
- int close_when_complete)
-{
- struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
- struct i40iw_cm_core *cm_core = cm_node->cm_core;
- struct i40iw_timer_entry *new_send;
- int ret = 0;
- u32 was_timer_set;
- unsigned long flags;
-
- new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
- if (!new_send) {
- if (type != I40IW_TIMER_TYPE_CLOSE)
- i40iw_free_sqbuf(vsi, (void *)sqbuf);
- return -ENOMEM;
- }
- new_send->retrycount = I40IW_DEFAULT_RETRYS;
- new_send->retranscount = I40IW_DEFAULT_RETRANS;
- new_send->sqbuf = sqbuf;
- new_send->timetosend = jiffies;
- new_send->type = type;
- new_send->send_retrans = send_retrans;
- new_send->close_when_complete = close_when_complete;
-
- if (type == I40IW_TIMER_TYPE_CLOSE) {
- new_send->timetosend += (HZ / 10);
- if (cm_node->close_entry) {
- kfree(new_send);
- i40iw_pr_err("already close entry\n");
- return -EINVAL;
- }
- cm_node->close_entry = new_send;
- }
-
- if (type == I40IW_TIMER_TYPE_SEND) {
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- cm_node->send_entry = new_send;
- atomic_inc(&cm_node->ref_count);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
-
- atomic_inc(&sqbuf->refcount);
- i40iw_puda_send_buf(vsi->ilq, sqbuf);
- if (!send_retrans) {
- i40iw_cleanup_retrans_entry(cm_node);
- if (close_when_complete)
- i40iw_rem_ref_cm_node(cm_node);
- return ret;
- }
- }
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- was_timer_set = timer_pending(&cm_core->tcp_timer);
-
- if (!was_timer_set) {
- cm_core->tcp_timer.expires = new_send->timetosend;
- add_timer(&cm_core->tcp_timer);
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- return ret;
-}
-
-/**
- * i40iw_retrans_expired - Could not rexmit the packet
- * @cm_node: connection's node
- */
-static void i40iw_retrans_expired(struct i40iw_cm_node *cm_node)
-{
- struct iw_cm_id *cm_id = cm_node->cm_id;
- enum i40iw_cm_node_state state = cm_node->state;
-
- cm_node->state = I40IW_CM_STATE_CLOSED;
- switch (state) {
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_CLOSING:
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_LAST_ACK:
- if (cm_node->cm_id)
- cm_id->rem_ref(cm_id);
- i40iw_send_reset(cm_node);
- break;
- default:
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
- break;
- }
-}
-
-/**
- * i40iw_handle_close_entry - for handling retry/timeouts
- * @cm_node: connection's node
- * @rem_node: flag for remove cm_node
- */
-static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node)
-{
- struct i40iw_timer_entry *close_entry = cm_node->close_entry;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct i40iw_qp *iwqp;
- unsigned long flags;
-
- if (!close_entry)
- return;
- iwqp = (struct i40iw_qp *)close_entry->sqbuf;
- if (iwqp) {
- spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->cm_id) {
- iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
- iwqp->hw_iwarp_state = I40IW_QP_STATE_ERROR;
- iwqp->last_aeq = I40IW_AE_RESET_SENT;
- iwqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- i40iw_cm_disconn(iwqp);
- } else {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- }
- } else if (rem_node) {
- /* TIME_WAIT state */
- i40iw_rem_ref_cm_node(cm_node);
- }
- if (cm_id)
- cm_id->rem_ref(cm_id);
- kfree(close_entry);
- cm_node->close_entry = NULL;
-}
-
-/**
- * i40iw_build_timer_list - Add cm_nodes to timer list
- * @timer_list: ptr to timer list
- * @hte: ptr to accelerated or non-accelerated list
- */
-static void i40iw_build_timer_list(struct list_head *timer_list,
- struct list_head *hte)
-{
- struct i40iw_cm_node *cm_node;
- struct list_head *list_core_temp, *list_node;
-
- list_for_each_safe(list_node, list_core_temp, hte) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- if (cm_node->close_entry || cm_node->send_entry) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->timer_entry, timer_list);
- }
- }
-}
-
-/**
- * i40iw_cm_timer_tick - system's timer expired callback
- * @pass: Pointing to cm_core
- */
-static void i40iw_cm_timer_tick(struct timer_list *t)
-{
- unsigned long nexttimeout = jiffies + I40IW_LONG_TIME;
- struct i40iw_cm_node *cm_node;
- struct i40iw_timer_entry *send_entry, *close_entry;
- struct list_head *list_core_temp;
- struct i40iw_sc_vsi *vsi;
- struct list_head *list_node;
- struct i40iw_cm_core *cm_core = from_timer(cm_core, t, tcp_timer);
- u32 settimer = 0;
- unsigned long timetosend;
- unsigned long flags;
-
- struct list_head timer_list;
-
- INIT_LIST_HEAD(&timer_list);
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- i40iw_build_timer_list(&timer_list, &cm_core->non_accelerated_list);
- i40iw_build_timer_list(&timer_list, &cm_core->accelerated_list);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &timer_list) {
- cm_node = container_of(list_node,
- struct i40iw_cm_node,
- timer_entry);
- close_entry = cm_node->close_entry;
-
- if (close_entry) {
- if (time_after(close_entry->timetosend, jiffies)) {
- if (nexttimeout > close_entry->timetosend ||
- !settimer) {
- nexttimeout = close_entry->timetosend;
- settimer = 1;
- }
- } else {
- i40iw_handle_close_entry(cm_node, 1);
- }
- }
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
-
- send_entry = cm_node->send_entry;
- if (!send_entry)
- goto done;
- if (time_after(send_entry->timetosend, jiffies)) {
- if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
- if ((nexttimeout > send_entry->timetosend) ||
- !settimer) {
- nexttimeout = send_entry->timetosend;
- settimer = 1;
- }
- } else {
- i40iw_free_retrans_entry(cm_node);
- }
- goto done;
- }
-
- if ((cm_node->state == I40IW_CM_STATE_OFFLOADED) ||
- (cm_node->state == I40IW_CM_STATE_CLOSED)) {
- i40iw_free_retrans_entry(cm_node);
- goto done;
- }
-
- if (!send_entry->retranscount || !send_entry->retrycount) {
- i40iw_free_retrans_entry(cm_node);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- i40iw_retrans_expired(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- goto done;
- }
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-
- vsi = &cm_node->iwdev->vsi;
-
- if (!cm_node->ack_rcvd) {
- atomic_inc(&send_entry->sqbuf->refcount);
- i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf);
- cm_node->cm_core->stats_pkt_retrans++;
- }
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- if (send_entry->send_retrans) {
- send_entry->retranscount--;
- timetosend = (I40IW_RETRY_TIMEOUT <<
- (I40IW_DEFAULT_RETRANS -
- send_entry->retranscount));
-
- send_entry->timetosend = jiffies +
- min(timetosend, I40IW_MAX_TIMEOUT);
- if (nexttimeout > send_entry->timetosend || !settimer) {
- nexttimeout = send_entry->timetosend;
- settimer = 1;
- }
- } else {
- int close_when_complete;
-
- close_when_complete = send_entry->close_when_complete;
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "cm_node=%p state=%d\n",
- cm_node,
- cm_node->state);
- i40iw_free_retrans_entry(cm_node);
- if (close_when_complete)
- i40iw_rem_ref_cm_node(cm_node);
- }
-done:
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- i40iw_rem_ref_cm_node(cm_node);
- }
-
- if (settimer) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (!timer_pending(&cm_core->tcp_timer)) {
- cm_core->tcp_timer.expires = nexttimeout;
- add_timer(&cm_core->tcp_timer);
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- }
-}
-
-/**
- * i40iw_send_syn - send SYN packet
- * @cm_node: connection's node
- * @sendack: flag to set ACK bit or not
- */
-int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack)
-{
- struct i40iw_puda_buf *sqbuf;
- int flags = SET_SYN;
- char optionsbuffer[sizeof(struct option_mss) +
- sizeof(struct option_windowscale) +
- sizeof(struct option_base) + TCP_OPTIONS_PADDING];
- struct i40iw_kmem_info opts;
-
- int optionssize = 0;
- /* Sending MSS option */
- union all_known_options *options;
-
- opts.addr = optionsbuffer;
- if (!cm_node) {
- i40iw_pr_err("no cm_node\n");
- return -EINVAL;
- }
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_mss.optionnum = OPTION_NUMBER_MSS;
- options->as_mss.length = sizeof(struct option_mss);
- options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
- optionssize += sizeof(struct option_mss);
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
- options->as_windowscale.length = sizeof(struct option_windowscale);
- options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
- optionssize += sizeof(struct option_windowscale);
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = OPTION_NUMBER_END;
- optionssize += 1;
-
- if (sendack)
- flags |= SET_ACK;
-
- opts.size = optionssize;
-
- sqbuf = i40iw_form_cm_frame(cm_node, &opts, NULL, NULL, flags);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -1;
- }
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * i40iw_send_ack - Send ACK packet
- * @cm_node: connection's node
- */
-static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
- struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
-
- sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
- if (sqbuf)
- i40iw_puda_send_buf(vsi->ilq, sqbuf);
- else
- i40iw_pr_err("no sqbuf\n");
-}
-
-/**
- * i40iw_send_fin - Send FIN pkt
- * @cm_node: connection's node
- */
-static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
-
- sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -1;
- }
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * i40iw_find_node - find a cm node that matches the reference cm node
- * @cm_core: cm's core
- * @rem_port: remote tcp port num
- * @rem_addr: remote ip addr
- * @loc_port: local tcp port num
- * @loc_addr: loc ip addr
- * @add_refcnt: flag to increment refcount of cm_node
- * @accelerated_list: flag for accelerated vs non-accelerated list to search
- */
-struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
- u16 rem_port,
- u32 *rem_addr,
- u16 loc_port,
- u32 *loc_addr,
- bool add_refcnt,
- bool accelerated_list)
-{
- struct list_head *hte;
- struct i40iw_cm_node *cm_node;
- unsigned long flags;
-
- hte = accelerated_list ?
- &cm_core->accelerated_list : &cm_core->non_accelerated_list;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_entry(cm_node, hte, list) {
- if (!memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
- (cm_node->loc_port == loc_port) &&
- !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr)) &&
- (cm_node->rem_port == rem_port)) {
- if (add_refcnt)
- atomic_inc(&cm_node->ref_count);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- return cm_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- /* no owner node */
- return NULL;
-}
-
-/**
- * i40iw_find_listener - find a cm node listening on this addr-port pair
- * @cm_core: cm's core
- * @dst_port: listener tcp port num
- * @dst_addr: listener ip addr
- * @listener_state: state to match with listen node's
- */
-static struct i40iw_cm_listener *i40iw_find_listener(
- struct i40iw_cm_core *cm_core,
- u32 *dst_addr,
- u16 dst_port,
- u16 vlan_id,
- enum i40iw_cm_listener_state
- listener_state)
-{
- struct i40iw_cm_listener *listen_node;
- static const u32 ip_zero[4] = { 0, 0, 0, 0 };
- u32 listen_addr[4];
- u16 listen_port;
- unsigned long flags;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
- listen_port = listen_node->loc_port;
- /* compare node pair, return node handle if a match */
- if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
- !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
- (listen_port == dst_port) &&
- (listener_state & listen_node->listener_state)) {
- atomic_inc(&listen_node->ref_count);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return listen_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return NULL;
-}
-
-/**
- * i40iw_add_hte_node - add a cm node to the hash table
- * @cm_core: cm's core
- * @cm_node: connection's node
- */
-static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
- struct i40iw_cm_node *cm_node)
-{
- unsigned long flags;
-
- if (!cm_node || !cm_core) {
- i40iw_pr_err("cm_node or cm_core == NULL\n");
- return;
- }
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_add_tail(&cm_node->list, &cm_core->non_accelerated_list);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-}
-
-/**
- * i40iw_find_port - find port that matches reference port
- * @hte: ptr to accelerated or non-accelerated list
- * @accelerated_list: flag for accelerated vs non-accelerated list
- */
-static bool i40iw_find_port(struct list_head *hte, u16 port)
-{
- struct i40iw_cm_node *cm_node;
-
- list_for_each_entry(cm_node, hte, list) {
- if (cm_node->loc_port == port)
- return true;
- }
- return false;
-}
-
-/**
- * i40iw_port_in_use - determine if port is in use
- * @cm_core: cm's core
- * @port: port number
- */
-bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
-{
- struct i40iw_cm_listener *listen_node;
- unsigned long flags;
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (i40iw_find_port(&cm_core->accelerated_list, port) ||
- i40iw_find_port(&cm_core->non_accelerated_list, port)) {
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- return true;
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- if (listen_node->loc_port == port) {
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return true;
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- return false;
-}
-
-/**
- * i40iw_del_multiple_qhash - Remove qhash and child listens
- * @iwdev: iWarp device
- * @cm_info: CM info for parent listen node
- * @cm_parent_listen_node: The parent listen node
- */
-static enum i40iw_status_code i40iw_del_multiple_qhash(
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *cm_parent_listen_node)
-{
- struct i40iw_cm_listener *child_listen_node;
- enum i40iw_status_code ret = I40IW_ERR_CONFIG;
- struct list_head *pos, *tpos;
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
- list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) {
- child_listen_node = list_entry(pos, struct i40iw_cm_listener, child_listen_list);
- if (child_listen_node->ipv4)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
- child_listen_node->loc_addr,
- child_listen_node->loc_port,
- child_listen_node->vlan_id);
- else
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
- "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
- child_listen_node->loc_addr,
- child_listen_node->loc_port,
- child_listen_node->vlan_id);
- list_del(pos);
- memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
- sizeof(cm_info->loc_addr));
- cm_info->vlan_id = child_listen_node->vlan_id;
- if (child_listen_node->qhash_set) {
- ret = i40iw_manage_qhash(iwdev, cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL, false);
- child_listen_node->qhash_set = false;
- } else {
- ret = I40IW_SUCCESS;
- }
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "freed pointer = %p\n",
- child_listen_node);
- kfree(child_listen_node);
- cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
- }
- spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
-
- return ret;
-}
-
-/**
- * i40iw_netdev_vlan_ipv6 - Gets the netdev and vlan
- * @addr: local IPv6 address
- * @vlan_id: vlan id for the given IPv6 address
- *
- * Returns the net_device of the IPv6 address and also sets the
- * vlan id for that address.
- */
-static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id)
-{
- struct net_device *ip_dev = NULL;
- struct in6_addr laddr6;
-
- if (!IS_ENABLED(CONFIG_IPV6))
- return NULL;
- i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
- if (vlan_id)
- *vlan_id = I40IW_NO_VLAN;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
- if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
- break;
- }
- }
- rcu_read_unlock();
- return ip_dev;
-}
-
-/**
- * i40iw_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
- * @addr: local IPv4 address
- */
-static u16 i40iw_get_vlan_ipv4(u32 *addr)
-{
- struct net_device *netdev;
- u16 vlan_id = I40IW_NO_VLAN;
-
- netdev = ip_dev_find(&init_net, htonl(addr[0]));
- if (netdev) {
- vlan_id = rdma_vlan_dev_vlan_id(netdev);
- dev_put(netdev);
- }
- return vlan_id;
-}
-
-/**
- * i40iw_add_mqh_6 - Adds multiple qhashes for IPv6
- * @iwdev: iWarp device
- * @cm_info: CM info for parent listen node
- * @cm_parent_listen_node: The parent listen node
- *
- * Adds a qhash and a child listen node for every IPv6 address
- * on the adapter and adds the associated qhash filter
- */
-static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *cm_parent_listen_node)
-{
- struct net_device *ip_dev;
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifp, *tmp;
- enum i40iw_status_code ret = 0;
- struct i40iw_cm_listener *child_listen_node;
- unsigned long flags;
-
- rtnl_lock();
- for_each_netdev(&init_net, ip_dev) {
- if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
- (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
- (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
- idev = __in6_dev_get(ip_dev);
- if (!idev) {
- i40iw_pr_err("idev == NULL\n");
- break;
- }
- list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "IP=%pI6, vlan_id=%d, MAC=%pM\n",
- &ifp->addr,
- rdma_vlan_dev_vlan_id(ip_dev),
- ip_dev->dev_addr);
- child_listen_node =
- kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Allocating child listener %p\n",
- child_listen_node);
- if (!child_listen_node) {
- ret = I40IW_ERR_NO_MEMORY;
- goto exit;
- }
- cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
- cm_parent_listen_node->vlan_id = cm_info->vlan_id;
-
- memcpy(child_listen_node, cm_parent_listen_node,
- sizeof(*child_listen_node));
-
- i40iw_copy_ip_ntohl(child_listen_node->loc_addr,
- ifp->addr.in6_u.u6_addr32);
- memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
- sizeof(cm_info->loc_addr));
-
- ret = i40iw_manage_qhash(iwdev, cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL, true);
- if (!ret) {
- child_listen_node->qhash_set = true;
- spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
- list_add(&child_listen_node->child_listen_list,
- &cm_parent_listen_node->child_listen_list);
- spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
- cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
- } else {
- kfree(child_listen_node);
- }
- }
- }
- }
-exit:
- rtnl_unlock();
- return ret;
-}
-
-/**
- * i40iw_add_mqh_4 - Adds multiple qhashes for IPv4
- * @iwdev: iWarp device
- * @cm_info: CM info for parent listen node
- * @cm_parent_listen_node: The parent listen node
- *
- * Adds a qhash and a child listen node for every IPv4 address
- * on the adapter and adds the associated qhash filter
- */
-static enum i40iw_status_code i40iw_add_mqh_4(
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *cm_parent_listen_node)
-{
- struct net_device *dev;
- struct in_device *idev;
- struct i40iw_cm_listener *child_listen_node;
- enum i40iw_status_code ret = 0;
- unsigned long flags;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- if ((((rdma_vlan_dev_vlan_id(dev) < I40IW_NO_VLAN) &&
- (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
- (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
- idev = in_dev_get(dev);
- for_ifa(idev) {
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
- &ifa->ifa_address,
- rdma_vlan_dev_vlan_id(dev),
- dev->dev_addr);
- child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
- cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Allocating child listener %p\n",
- child_listen_node);
- if (!child_listen_node) {
- in_dev_put(idev);
- ret = I40IW_ERR_NO_MEMORY;
- goto exit;
- }
- cm_info->vlan_id = rdma_vlan_dev_vlan_id(dev);
- cm_parent_listen_node->vlan_id = cm_info->vlan_id;
- memcpy(child_listen_node,
- cm_parent_listen_node,
- sizeof(*child_listen_node));
-
- child_listen_node->loc_addr[0] = ntohl(ifa->ifa_address);
- memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
- sizeof(cm_info->loc_addr));
-
- ret = i40iw_manage_qhash(iwdev,
- cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL,
- true);
- if (!ret) {
- child_listen_node->qhash_set = true;
- spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
- list_add(&child_listen_node->child_listen_list,
- &cm_parent_listen_node->child_listen_list);
- spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
- } else {
- kfree(child_listen_node);
- cm_parent_listen_node->cm_core->stats_listen_nodes_created--;
- }
- }
- endfor_ifa(idev);
- in_dev_put(idev);
- }
- }
-exit:
- rtnl_unlock();
- return ret;
-}
-
-/**
- * i40iw_dec_refcnt_listen - delete listener and associated cm nodes
- * @cm_core: cm's core
- * @free_hanging_nodes: to free associated cm_nodes
- * @apbvt_del: flag to delete the apbvt
- */
-static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
- struct i40iw_cm_listener *listener,
- int free_hanging_nodes, bool apbvt_del)
-{
- int ret = -EINVAL;
- int err = 0;
- struct list_head *list_pos;
- struct list_head *list_temp;
- struct i40iw_cm_node *cm_node;
- struct list_head reset_list;
- struct i40iw_cm_info nfo;
- struct i40iw_cm_node *loopback;
- enum i40iw_cm_node_state old_state;
- unsigned long flags;
-
- /* free non-accelerated child nodes for this listener */
- INIT_LIST_HEAD(&reset_list);
- if (free_hanging_nodes) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_pos,
- list_temp, &cm_core->non_accelerated_list) {
- cm_node = container_of(list_pos, struct i40iw_cm_node, list);
- if ((cm_node->listener == listener) &&
- !cm_node->accelerated) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->reset_entry, &reset_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- }
-
- list_for_each_safe(list_pos, list_temp, &reset_list) {
- cm_node = container_of(list_pos, struct i40iw_cm_node, reset_entry);
- loopback = cm_node->loopbackpartner;
- if (cm_node->state >= I40IW_CM_STATE_FIN_WAIT1) {
- i40iw_rem_ref_cm_node(cm_node);
- } else {
- if (!loopback) {
- i40iw_cleanup_retrans_entry(cm_node);
- err = i40iw_send_reset(cm_node);
- if (err) {
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_pr_err("send reset\n");
- } else {
- old_state = cm_node->state;
- cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
- if (old_state != I40IW_CM_STATE_MPAREQ_RCVD)
- i40iw_rem_ref_cm_node(cm_node);
- }
- } else {
- struct i40iw_cm_event event;
-
- event.cm_node = loopback;
- memcpy(event.cm_info.rem_addr,
- loopback->rem_addr, sizeof(event.cm_info.rem_addr));
- memcpy(event.cm_info.loc_addr,
- loopback->loc_addr, sizeof(event.cm_info.loc_addr));
- event.cm_info.rem_port = loopback->rem_port;
- event.cm_info.loc_port = loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- event.cm_info.ipv4 = loopback->ipv4;
- atomic_inc(&loopback->ref_count);
- loopback->state = I40IW_CM_STATE_CLOSED;
- i40iw_event_connect_error(&event);
- cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
- i40iw_rem_ref_cm_node(cm_node);
- }
- }
- }
-
- if (!atomic_dec_return(&listener->ref_count)) {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_del(&listener->list);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- if (listener->iwdev) {
- if (apbvt_del)
- i40iw_manage_apbvt(listener->iwdev,
- listener->loc_port,
- I40IW_MANAGE_APBVT_DEL);
-
- memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
- nfo.loc_port = listener->loc_port;
- nfo.ipv4 = listener->ipv4;
- nfo.vlan_id = listener->vlan_id;
- nfo.user_pri = listener->user_pri;
-
- if (!list_empty(&listener->child_listen_list)) {
- i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
- } else {
- if (listener->qhash_set)
- i40iw_manage_qhash(listener->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- }
- }
-
- cm_core->stats_listen_destroyed++;
- kfree(listener);
- cm_core->stats_listen_nodes_destroyed++;
- listener = NULL;
- ret = 0;
- }
-
- if (listener) {
- if (atomic_read(&listener->pend_accepts_cnt) > 0)
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s: listener (%p) pending accepts=%u\n",
- __func__,
- listener,
- atomic_read(&listener->pend_accepts_cnt));
- }
-
- return ret;
-}
-
-/**
- * i40iw_cm_del_listen - delete a linstener
- * @cm_core: cm's core
- * @listener: passive connection's listener
- * @apbvt_del: flag to delete apbvt
- */
-static int i40iw_cm_del_listen(struct i40iw_cm_core *cm_core,
- struct i40iw_cm_listener *listener,
- bool apbvt_del)
-{
- listener->listener_state = I40IW_CM_LISTENER_PASSIVE_STATE;
- listener->cm_id = NULL; /* going to be destroyed pretty soon */
- return i40iw_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
-}
-
-/**
- * i40iw_addr_resolve_neigh - resolve neighbor address
- * @iwdev: iwarp device structure
- * @src_ip: local ip address
- * @dst_ip: remote ip address
- * @arpindex: if there is an arp entry
- */
-static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
- u32 src_ip,
- u32 dst_ip,
- int arpindex)
-{
- struct rtable *rt;
- struct neighbour *neigh;
- int rc = arpindex;
- struct net_device *netdev = iwdev->netdev;
- __be32 dst_ipaddr = htonl(dst_ip);
- __be32 src_ipaddr = htonl(src_ip);
-
- rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
- if (IS_ERR(rt)) {
- i40iw_pr_err("ip_route_output\n");
- return rc;
- }
-
- if (netif_is_bond_slave(netdev))
- netdev = netdev_master_upper_dev_get(netdev);
-
- neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
-
- rcu_read_lock();
- if (neigh) {
- if (neigh->nud_state & NUD_VALID) {
- if (arpindex >= 0) {
- if (ether_addr_equal(iwdev->arp_table[arpindex].mac_addr,
- neigh->ha))
- /* Mac address same as arp table */
- goto resolve_neigh_exit;
- i40iw_manage_arp_cache(iwdev,
- iwdev->arp_table[arpindex].mac_addr,
- &dst_ip,
- true,
- I40IW_ARP_DELETE);
- }
-
- i40iw_manage_arp_cache(iwdev, neigh->ha, &dst_ip, true, I40IW_ARP_ADD);
- rc = i40iw_arp_table(iwdev, &dst_ip, true, NULL, I40IW_ARP_RESOLVE);
- } else {
- neigh_event_send(neigh, NULL);
- }
- }
- resolve_neigh_exit:
-
- rcu_read_unlock();
- if (neigh)
- neigh_release(neigh);
-
- ip_rt_put(rt);
- return rc;
-}
-
-/**
- * i40iw_get_dst_ipv6
- */
-static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
- struct sockaddr_in6 *dst_addr)
-{
- struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = dst_addr->sin6_addr;
- fl6.saddr = src_addr->sin6_addr;
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = dst_addr->sin6_scope_id;
-
- dst = ip6_route_output(&init_net, NULL, &fl6);
- return dst;
-}
-
-/**
- * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
- * @iwdev: iwarp device structure
- * @dst_ip: remote ip address
- * @arpindex: if there is an arp entry
- */
-static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
- u32 *src,
- u32 *dest,
- int arpindex)
-{
- struct neighbour *neigh;
- int rc = arpindex;
- struct net_device *netdev = iwdev->netdev;
- struct dst_entry *dst;
- struct sockaddr_in6 dst_addr;
- struct sockaddr_in6 src_addr;
-
- memset(&dst_addr, 0, sizeof(dst_addr));
- dst_addr.sin6_family = AF_INET6;
- i40iw_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
- memset(&src_addr, 0, sizeof(src_addr));
- src_addr.sin6_family = AF_INET6;
- i40iw_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
- dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
- if (!dst || dst->error) {
- if (dst) {
- dst_release(dst);
- i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
- dst->error);
- }
- return rc;
- }
-
- if (netif_is_bond_slave(netdev))
- netdev = netdev_master_upper_dev_get(netdev);
-
- neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
-
- rcu_read_lock();
- if (neigh) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "dst_neigh_lookup MAC=%pM\n", neigh->ha);
- if (neigh->nud_state & NUD_VALID) {
- if (arpindex >= 0) {
- if (ether_addr_equal
- (iwdev->arp_table[arpindex].mac_addr,
- neigh->ha)) {
- /* Mac address same as in arp table */
- goto resolve_neigh_exit6;
- }
- i40iw_manage_arp_cache(iwdev,
- iwdev->arp_table[arpindex].mac_addr,
- dest,
- false,
- I40IW_ARP_DELETE);
- }
- i40iw_manage_arp_cache(iwdev,
- neigh->ha,
- dest,
- false,
- I40IW_ARP_ADD);
- rc = i40iw_arp_table(iwdev,
- dest,
- false,
- NULL,
- I40IW_ARP_RESOLVE);
- } else {
- neigh_event_send(neigh, NULL);
- }
- }
-
- resolve_neigh_exit6:
- rcu_read_unlock();
- if (neigh)
- neigh_release(neigh);
- dst_release(dst);
- return rc;
-}
-
-/**
- * i40iw_ipv4_is_loopback - check if loopback
- * @loc_addr: local addr to compare
- * @rem_addr: remote address
- */
-static bool i40iw_ipv4_is_loopback(u32 loc_addr, u32 rem_addr)
-{
- return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
-}
-
-/**
- * i40iw_ipv6_is_loopback - check if loopback
- * @loc_addr: local addr to compare
- * @rem_addr: remote address
- */
-static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
-{
- struct in6_addr raddr6;
-
- i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
- return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
-}
-
-/**
- * i40iw_make_cm_node - create a new instance of a cm node
- * @cm_core: cm's core
- * @iwdev: iwarp device structure
- * @cm_info: quad info for connection
- * @listener: passive connection's listener
- */
-static struct i40iw_cm_node *i40iw_make_cm_node(
- struct i40iw_cm_core *cm_core,
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *listener)
-{
- struct i40iw_cm_node *cm_node;
- int oldarpindex;
- int arpindex;
- struct net_device *netdev = iwdev->netdev;
-
- /* create an hte and cm_node for this instance */
- cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
- if (!cm_node)
- return NULL;
-
- /* set our node specific transport info */
- cm_node->ipv4 = cm_info->ipv4;
- cm_node->vlan_id = cm_info->vlan_id;
- if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb)
- cm_node->vlan_id = 0;
- cm_node->tos = cm_info->tos;
- cm_node->user_pri = cm_info->user_pri;
- if (listener) {
- if (listener->tos != cm_info->tos)
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB,
- "application TOS[%d] and remote client TOS[%d] mismatch\n",
- listener->tos, cm_info->tos);
- cm_node->tos = max(listener->tos, cm_info->tos);
- cm_node->user_pri = rt_tos2priority(cm_node->tos);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n",
- cm_node->tos, cm_node->user_pri);
- }
- memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
- memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
- cm_node->loc_port = cm_info->loc_port;
- cm_node->rem_port = cm_info->rem_port;
-
- cm_node->mpa_frame_rev = iwdev->mpa_version;
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- cm_node->ird_size = I40IW_MAX_IRD_SIZE;
- cm_node->ord_size = I40IW_MAX_ORD_SIZE;
-
- cm_node->listener = listener;
- cm_node->cm_id = cm_info->cm_id;
- ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
- spin_lock_init(&cm_node->retrans_list_lock);
- cm_node->ack_rcvd = false;
-
- atomic_set(&cm_node->ref_count, 1);
- /* associate our parent CM core */
- cm_node->cm_core = cm_core;
- cm_node->tcp_cntxt.loc_id = I40IW_CM_DEF_LOCAL_ID;
- cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->tcp_cntxt.rcv_wnd =
- I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
- if (cm_node->ipv4) {
- cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
- htonl(cm_node->rem_addr[0]),
- htons(cm_node->loc_port),
- htons(cm_node->rem_port));
- cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4;
- } else if (IS_ENABLED(CONFIG_IPV6)) {
- __be32 loc[4] = {
- htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
- htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
- };
- __be32 rem[4] = {
- htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
- htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
- };
- cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
- htons(cm_node->loc_port),
- htons(cm_node->rem_port));
- cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6;
- }
-
- cm_node->iwdev = iwdev;
- cm_node->dev = &iwdev->sc_dev;
-
- if ((cm_node->ipv4 &&
- i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
- (!cm_node->ipv4 && i40iw_ipv6_is_loopback(cm_node->loc_addr,
- cm_node->rem_addr))) {
- arpindex = i40iw_arp_table(iwdev,
- cm_node->rem_addr,
- false,
- NULL,
- I40IW_ARP_RESOLVE);
- } else {
- oldarpindex = i40iw_arp_table(iwdev,
- cm_node->rem_addr,
- false,
- NULL,
- I40IW_ARP_RESOLVE);
- if (cm_node->ipv4)
- arpindex = i40iw_addr_resolve_neigh(iwdev,
- cm_info->loc_addr[0],
- cm_info->rem_addr[0],
- oldarpindex);
- else if (IS_ENABLED(CONFIG_IPV6))
- arpindex = i40iw_addr_resolve_neigh_ipv6(iwdev,
- cm_info->loc_addr,
- cm_info->rem_addr,
- oldarpindex);
- else
- arpindex = -EINVAL;
- }
- if (arpindex < 0) {
- i40iw_pr_err("cm_node arpindex\n");
- kfree(cm_node);
- return NULL;
- }
- ether_addr_copy(cm_node->rem_mac, iwdev->arp_table[arpindex].mac_addr);
- i40iw_add_hte_node(cm_core, cm_node);
- cm_core->stats_nodes_created++;
- return cm_node;
-}
-
-/**
- * i40iw_rem_ref_cm_node - destroy an instance of a cm node
- * @cm_node: connection's node
- */
-static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_cm_core *cm_core = cm_node->cm_core;
- struct i40iw_qp *iwqp;
- struct i40iw_cm_info nfo;
- unsigned long flags;
-
- spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
- if (atomic_dec_return(&cm_node->ref_count)) {
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
- return;
- }
- list_del(&cm_node->list);
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
-
- /* if the node is destroyed before connection was accelerated */
- if (!cm_node->accelerated && cm_node->accept_pend) {
- pr_err("node destroyed before established\n");
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- }
- if (cm_node->close_entry)
- i40iw_handle_close_entry(cm_node, 0);
- if (cm_node->listener) {
- i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
- } else {
- if (cm_node->apbvt_set) {
- i40iw_manage_apbvt(cm_node->iwdev,
- cm_node->loc_port,
- I40IW_MANAGE_APBVT_DEL);
- cm_node->apbvt_set = 0;
- }
- i40iw_get_addr_info(cm_node, &nfo);
- if (cm_node->qhash_set) {
- i40iw_manage_qhash(cm_node->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- cm_node->qhash_set = 0;
- }
- }
-
- iwqp = cm_node->iwqp;
- if (iwqp) {
- iwqp->cm_node = NULL;
- i40iw_rem_ref(&iwqp->ibqp);
- cm_node->iwqp = NULL;
- } else if (cm_node->qhash_set) {
- i40iw_get_addr_info(cm_node, &nfo);
- i40iw_manage_qhash(cm_node->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- cm_node->qhash_set = 0;
- }
-
- cm_node->cm_core->stats_nodes_destroyed++;
- kfree(cm_node);
-}
-
-/**
- * i40iw_handle_fin_pkt - FIN packet received
- * @cm_node: connection's node
- */
-static void i40iw_handle_fin_pkt(struct i40iw_cm_node *cm_node)
-{
- u32 ret;
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_MPAREJ_RCVD:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_LAST_ACK;
- i40iw_send_fin(cm_node);
- break;
- case I40IW_CM_STATE_MPAREQ_SENT:
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSING;
- i40iw_send_ack(cm_node);
- /*
- * Wait for ACK as this is simultaneous close.
- * After we receive ACK, do not send anything.
- * Just rm the node.
- */
- break;
- case I40IW_CM_STATE_FIN_WAIT2:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_TIME_WAIT;
- i40iw_send_ack(cm_node);
- ret =
- i40iw_schedule_cm_timer(cm_node, NULL, I40IW_TIMER_TYPE_CLOSE, 1, 0);
- if (ret)
- i40iw_pr_err("node %p state = %d\n", cm_node, cm_node->state);
- break;
- case I40IW_CM_STATE_TIME_WAIT:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- default:
- i40iw_pr_err("bad state node %p state = %d\n", cm_node, cm_node->state);
- break;
- }
-}
-
-/**
- * i40iw_handle_rst_pkt - process received RST packet
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- i40iw_cleanup_retrans_entry(cm_node);
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_MPAREQ_SENT:
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V2:
- cm_node->mpa_frame_rev = IETF_MPA_V1;
- /* send a syn and goto syn sent state */
- cm_node->state = I40IW_CM_STATE_SYN_SENT;
- if (i40iw_send_syn(cm_node, 0))
- i40iw_active_open_err(cm_node, false);
- break;
- case IETF_MPA_V1:
- default:
- i40iw_active_open_err(cm_node, false);
- break;
- }
- break;
- case I40IW_CM_STATE_MPAREQ_RCVD:
- atomic_add_return(1, &cm_node->passive_state);
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_LISTENING:
- i40iw_pr_err("Bad state state = %d\n", cm_node->state);
- i40iw_passive_open_err(cm_node, false);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- i40iw_active_open_err(cm_node, false);
- break;
- case I40IW_CM_STATE_CLOSED:
- break;
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_LAST_ACK:
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- /* fall through */
- case I40IW_CM_STATE_TIME_WAIT:
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_rem_ref_cm_node(cm_node);
- break;
- default:
- break;
- }
-}
-
-/**
- * i40iw_handle_rcv_mpa - Process a recv'd mpa buffer
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- int ret;
- int datasize = rbuf->datalen;
- u8 *dataloc = rbuf->data;
-
- enum i40iw_cm_event_type type = I40IW_CM_EVENT_UNKNOWN;
- u32 res_type;
-
- ret = i40iw_parse_mpa(cm_node, dataloc, &res_type, datasize);
- if (ret) {
- if (cm_node->state == I40IW_CM_STATE_MPAREQ_SENT)
- i40iw_active_open_err(cm_node, true);
- else
- i40iw_passive_open_err(cm_node, true);
- return;
- }
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_ESTABLISHED:
- if (res_type == I40IW_MPA_REQUEST_REJECT)
- i40iw_pr_err("state for reject\n");
- cm_node->state = I40IW_CM_STATE_MPAREQ_RCVD;
- type = I40IW_CM_EVENT_MPA_REQ;
- i40iw_send_ack(cm_node); /* ACK received MPA request */
- atomic_set(&cm_node->passive_state,
- I40IW_PASSIVE_STATE_INDICATED);
- break;
- case I40IW_CM_STATE_MPAREQ_SENT:
- i40iw_cleanup_retrans_entry(cm_node);
- if (res_type == I40IW_MPA_REQUEST_REJECT) {
- type = I40IW_CM_EVENT_MPA_REJECT;
- cm_node->state = I40IW_CM_STATE_MPAREJ_RCVD;
- } else {
- type = I40IW_CM_EVENT_CONNECTED;
- cm_node->state = I40IW_CM_STATE_OFFLOADED;
- }
- i40iw_send_ack(cm_node);
- break;
- default:
- pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
- break;
- }
- i40iw_create_event(cm_node, type);
-}
-
-/**
- * i40iw_indicate_pkt_err - Send up err event to cm
- * @cm_node: connection's node
- */
-static void i40iw_indicate_pkt_err(struct i40iw_cm_node *cm_node)
-{
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_MPAREQ_SENT:
- i40iw_active_open_err(cm_node, true);
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_SYN_RCVD:
- i40iw_passive_open_err(cm_node, true);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- default:
- break;
- }
-}
-
-/**
- * i40iw_check_syn - Check for error on received syn ack
- * @cm_node: connection's node
- * @tcph: pointer tcp header
- */
-static int i40iw_check_syn(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
-{
- int err = 0;
-
- if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
- err = 1;
- i40iw_active_open_err(cm_node, true);
- }
- return err;
-}
-
-/**
- * i40iw_check_seq - check seq numbers if OK
- * @cm_node: connection's node
- * @tcph: pointer tcp header
- */
-static int i40iw_check_seq(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
-{
- int err = 0;
- u32 seq;
- u32 ack_seq;
- u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
- u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
- u32 rcv_wnd;
-
- seq = ntohl(tcph->seq);
- ack_seq = ntohl(tcph->ack_seq);
- rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
- if (ack_seq != loc_seq_num)
- err = -1;
- else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
- err = -1;
- if (err) {
- i40iw_pr_err("seq number\n");
- i40iw_indicate_pkt_err(cm_node);
- }
- return err;
-}
-
-/**
- * i40iw_handle_syn_pkt - is for Passive node
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_syn_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- int ret;
- u32 inc_sequence;
- int optionsize;
- struct i40iw_cm_info nfo;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- inc_sequence = ntohl(tcph->seq);
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_MPAREQ_SENT:
- /* Rcvd syn on active open connection */
- i40iw_active_open_err(cm_node, 1);
- break;
- case I40IW_CM_STATE_LISTENING:
- /* Passive OPEN */
- if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
- cm_node->listener->backlog) {
- cm_node->cm_core->stats_backlog_drops++;
- i40iw_passive_open_err(cm_node, false);
- break;
- }
- ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
- if (ret) {
- i40iw_passive_open_err(cm_node, false);
- /* drop pkt */
- break;
- }
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- cm_node->accept_pend = 1;
- atomic_inc(&cm_node->listener->pend_accepts_cnt);
-
- cm_node->state = I40IW_CM_STATE_SYN_RCVD;
- i40iw_get_addr_info(cm_node, &nfo);
- ret = i40iw_manage_qhash(cm_node->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- (void *)cm_node,
- false);
- cm_node->qhash_set = true;
- break;
- case I40IW_CM_STATE_CLOSED:
- i40iw_cleanup_retrans_entry(cm_node);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_MPAREQ_RCVD:
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_CLOSING:
- case I40IW_CM_STATE_UNKNOWN:
- default:
- break;
- }
-}
-
-/**
- * i40iw_handle_synack_pkt - Process SYN+ACK packet (active side)
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_synack_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- int ret;
- u32 inc_sequence;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- inc_sequence = ntohl(tcph->seq);
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- i40iw_cleanup_retrans_entry(cm_node);
- /* active open */
- if (i40iw_check_syn(cm_node, tcph)) {
- i40iw_pr_err("check syn fail\n");
- return;
- }
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- /* setup options */
- ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 0);
- if (ret) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "cm_node=%p tcp_options failed\n",
- cm_node);
- break;
- }
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- i40iw_send_ack(cm_node); /* ACK for the syn_ack */
- ret = i40iw_send_mpa_request(cm_node);
- if (ret) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "cm_node=%p i40iw_send_mpa_request failed\n",
- cm_node);
- break;
- }
- cm_node->state = I40IW_CM_STATE_MPAREQ_SENT;
- break;
- case I40IW_CM_STATE_MPAREQ_RCVD:
- i40iw_passive_open_err(cm_node, true);
- break;
- case I40IW_CM_STATE_LISTENING:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_CLOSED:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- i40iw_cleanup_retrans_entry(cm_node);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_OFFLOADED:
- case I40IW_CM_STATE_CLOSING:
- case I40IW_CM_STATE_UNKNOWN:
- case I40IW_CM_STATE_MPAREQ_SENT:
- default:
- break;
- }
-}
-
-/**
- * i40iw_handle_ack_pkt - process packet with ACK
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static int i40iw_handle_ack_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- u32 inc_sequence;
- int ret = 0;
- int optionsize;
- u32 datasize = rbuf->datalen;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-
- if (i40iw_check_seq(cm_node, tcph))
- return -EINVAL;
-
- inc_sequence = ntohl(tcph->seq);
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_RCVD:
- i40iw_cleanup_retrans_entry(cm_node);
- ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
- if (ret)
- break;
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- cm_node->state = I40IW_CM_STATE_ESTABLISHED;
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- i40iw_handle_rcv_mpa(cm_node, rbuf);
- }
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- i40iw_cleanup_retrans_entry(cm_node);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- i40iw_handle_rcv_mpa(cm_node, rbuf);
- }
- break;
- case I40IW_CM_STATE_MPAREQ_SENT:
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- cm_node->ack_rcvd = false;
- i40iw_handle_rcv_mpa(cm_node, rbuf);
- } else {
- cm_node->ack_rcvd = true;
- }
- break;
- case I40IW_CM_STATE_LISTENING:
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_CLOSED:
- i40iw_cleanup_retrans_entry(cm_node);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_CLOSING:
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- if (!cm_node->accept_pend)
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_FIN_WAIT2;
- break;
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_OFFLOADED:
- case I40IW_CM_STATE_MPAREQ_RCVD:
- case I40IW_CM_STATE_UNKNOWN:
- default:
- i40iw_cleanup_retrans_entry(cm_node);
- break;
- }
- return ret;
-}
-
-/**
- * i40iw_process_packet - process cm packet
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_process_packet(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- enum i40iw_tcpip_pkt_type pkt_type = I40IW_PKT_TYPE_UNKNOWN;
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- u32 fin_set = 0;
- int ret;
-
- if (tcph->rst) {
- pkt_type = I40IW_PKT_TYPE_RST;
- } else if (tcph->syn) {
- pkt_type = I40IW_PKT_TYPE_SYN;
- if (tcph->ack)
- pkt_type = I40IW_PKT_TYPE_SYNACK;
- } else if (tcph->ack) {
- pkt_type = I40IW_PKT_TYPE_ACK;
- }
- if (tcph->fin)
- fin_set = 1;
-
- switch (pkt_type) {
- case I40IW_PKT_TYPE_SYN:
- i40iw_handle_syn_pkt(cm_node, rbuf);
- break;
- case I40IW_PKT_TYPE_SYNACK:
- i40iw_handle_synack_pkt(cm_node, rbuf);
- break;
- case I40IW_PKT_TYPE_ACK:
- ret = i40iw_handle_ack_pkt(cm_node, rbuf);
- if (fin_set && !ret)
- i40iw_handle_fin_pkt(cm_node);
- break;
- case I40IW_PKT_TYPE_RST:
- i40iw_handle_rst_pkt(cm_node, rbuf);
- break;
- default:
- if (fin_set &&
- (!i40iw_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
- i40iw_handle_fin_pkt(cm_node);
- break;
- }
-}
-
-/**
- * i40iw_make_listen_node - create a listen node with params
- * @cm_core: cm's core
- * @iwdev: iwarp device structure
- * @cm_info: quad info for connection
- */
-static struct i40iw_cm_listener *i40iw_make_listen_node(
- struct i40iw_cm_core *cm_core,
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info)
-{
- struct i40iw_cm_listener *listener;
- unsigned long flags;
-
- /* cannot have multiple matching listeners */
- listener = i40iw_find_listener(cm_core, cm_info->loc_addr,
- cm_info->loc_port,
- cm_info->vlan_id,
- I40IW_CM_LISTENER_EITHER_STATE);
- if (listener &&
- (listener->listener_state == I40IW_CM_LISTENER_ACTIVE_STATE)) {
- atomic_dec(&listener->ref_count);
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "Not creating listener since it already exists\n");
- return NULL;
- }
-
- if (!listener) {
- /* create a CM listen node (1/2 node to compare incoming traffic to) */
- listener = kzalloc(sizeof(*listener), GFP_KERNEL);
- if (!listener)
- return NULL;
- cm_core->stats_listen_nodes_created++;
- memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr));
- listener->loc_port = cm_info->loc_port;
-
- INIT_LIST_HEAD(&listener->child_listen_list);
-
- atomic_set(&listener->ref_count, 1);
- } else {
- listener->reused_node = 1;
- }
-
- listener->cm_id = cm_info->cm_id;
- listener->ipv4 = cm_info->ipv4;
- listener->vlan_id = cm_info->vlan_id;
- atomic_set(&listener->pend_accepts_cnt, 0);
- listener->cm_core = cm_core;
- listener->iwdev = iwdev;
-
- listener->backlog = cm_info->backlog;
- listener->listener_state = I40IW_CM_LISTENER_ACTIVE_STATE;
-
- if (!listener->reused_node) {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_add(&listener->list, &cm_core->listen_nodes);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- }
-
- return listener;
-}
-
-/**
- * i40iw_create_cm_node - make a connection node with params
- * @cm_core: cm's core
- * @iwdev: iwarp device structure
- * @conn_param: upper layer connection parameters
- * @cm_info: quad info for connection
- */
-static struct i40iw_cm_node *i40iw_create_cm_node(
- struct i40iw_cm_core *cm_core,
- struct i40iw_device *iwdev,
- struct iw_cm_conn_param *conn_param,
- struct i40iw_cm_info *cm_info)
-{
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_listener *loopback_remotelistener;
- struct i40iw_cm_node *loopback_remotenode;
- struct i40iw_cm_info loopback_cm_info;
-
- u16 private_data_len = conn_param->private_data_len;
- const void *private_data = conn_param->private_data;
-
- /* create a CM connection node */
- cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
- if (!cm_node)
- return ERR_PTR(-ENOMEM);
- /* set our node side to client (active) side */
- cm_node->tcp_cntxt.client = 1;
- cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
-
- i40iw_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
-
- if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
- loopback_remotelistener = i40iw_find_listener(
- cm_core,
- cm_info->rem_addr,
- cm_node->rem_port,
- cm_node->vlan_id,
- I40IW_CM_LISTENER_ACTIVE_STATE);
- if (!loopback_remotelistener) {
- i40iw_rem_ref_cm_node(cm_node);
- return ERR_PTR(-ECONNREFUSED);
- } else {
- loopback_cm_info = *cm_info;
- loopback_cm_info.loc_port = cm_info->rem_port;
- loopback_cm_info.rem_port = cm_info->loc_port;
- loopback_cm_info.cm_id = loopback_remotelistener->cm_id;
- loopback_cm_info.ipv4 = cm_info->ipv4;
- loopback_remotenode = i40iw_make_cm_node(cm_core,
- iwdev,
- &loopback_cm_info,
- loopback_remotelistener);
- if (!loopback_remotenode) {
- i40iw_rem_ref_cm_node(cm_node);
- return ERR_PTR(-ENOMEM);
- }
- cm_core->stats_loopbacks++;
- loopback_remotenode->loopbackpartner = cm_node;
- loopback_remotenode->tcp_cntxt.rcv_wscale =
- I40IW_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->loopbackpartner = loopback_remotenode;
- memcpy(loopback_remotenode->pdata_buf, private_data,
- private_data_len);
- loopback_remotenode->pdata.size = private_data_len;
-
- if (loopback_remotenode->ord_size > cm_node->ird_size)
- loopback_remotenode->ord_size =
- cm_node->ird_size;
-
- cm_node->state = I40IW_CM_STATE_OFFLOADED;
- cm_node->tcp_cntxt.rcv_nxt =
- loopback_remotenode->tcp_cntxt.loc_seq_num;
- loopback_remotenode->tcp_cntxt.rcv_nxt =
- cm_node->tcp_cntxt.loc_seq_num;
- cm_node->tcp_cntxt.max_snd_wnd =
- loopback_remotenode->tcp_cntxt.rcv_wnd;
- loopback_remotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wnd = loopback_remotenode->tcp_cntxt.rcv_wnd;
- loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale;
- loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
- }
- return cm_node;
- }
-
- cm_node->pdata.size = private_data_len;
- cm_node->pdata.addr = cm_node->pdata_buf;
-
- memcpy(cm_node->pdata_buf, private_data, private_data_len);
-
- cm_node->state = I40IW_CM_STATE_SYN_SENT;
- return cm_node;
-}
-
-/**
- * i40iw_cm_reject - reject and teardown a connection
- * @cm_node: connection's node
- * @pdate: ptr to private data for reject
- * @plen: size of private data
- */
-static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen)
-{
- int ret = 0;
- int err;
- int passive_state;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct i40iw_cm_node *loopback = cm_node->loopbackpartner;
-
- if (cm_node->tcp_cntxt.client)
- return ret;
- i40iw_cleanup_retrans_entry(cm_node);
-
- if (!loopback) {
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == I40IW_SEND_RESET_EVENT) {
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_rem_ref_cm_node(cm_node);
- } else {
- if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
- i40iw_rem_ref_cm_node(cm_node);
- } else {
- ret = i40iw_send_mpa_reject(cm_node, pdata, plen);
- if (ret) {
- cm_node->state = I40IW_CM_STATE_CLOSED;
- err = i40iw_send_reset(cm_node);
- if (err)
- i40iw_pr_err("send reset failed\n");
- } else {
- cm_id->add_ref(cm_id);
- }
- }
- }
- } else {
- cm_node->cm_id = NULL;
- if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
- i40iw_rem_ref_cm_node(cm_node);
- i40iw_rem_ref_cm_node(loopback);
- } else {
- ret = i40iw_send_cm_event(loopback,
- loopback->cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- -ECONNREFUSED);
- i40iw_rem_ref_cm_node(cm_node);
- loopback->state = I40IW_CM_STATE_CLOSING;
-
- cm_id = loopback->cm_id;
- i40iw_rem_ref_cm_node(loopback);
- cm_id->rem_ref(cm_id);
- }
- }
-
- return ret;
-}
-
-/**
- * i40iw_cm_close - close of cm connection
- * @cm_node: connection's node
- */
-static int i40iw_cm_close(struct i40iw_cm_node *cm_node)
-{
- int ret = 0;
-
- if (!cm_node)
- return -EINVAL;
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_ONE_SIDE_ESTABLISHED:
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_ACCEPTING:
- case I40IW_CM_STATE_MPAREQ_SENT:
- case I40IW_CM_STATE_MPAREQ_RCVD:
- i40iw_cleanup_retrans_entry(cm_node);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_CLOSE_WAIT:
- cm_node->state = I40IW_CM_STATE_LAST_ACK;
- i40iw_send_fin(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_TIME_WAIT:
- case I40IW_CM_STATE_CLOSING:
- ret = -1;
- break;
- case I40IW_CM_STATE_LISTENING:
- i40iw_cleanup_retrans_entry(cm_node);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_MPAREJ_RCVD:
- case I40IW_CM_STATE_UNKNOWN:
- case I40IW_CM_STATE_INITED:
- case I40IW_CM_STATE_CLOSED:
- case I40IW_CM_STATE_LISTENER_DESTROYED:
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- if (cm_node->send_entry)
- i40iw_pr_err("send_entry\n");
- i40iw_rem_ref_cm_node(cm_node);
- break;
- }
- return ret;
-}
-
-/**
- * i40iw_receive_ilq - recv an ETHERNET packet, and process it
- * through CM
- * @vsi: pointer to the vsi structure
- * @rbuf: receive buffer
- */
-void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
-{
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_listener *listener;
- struct iphdr *iph;
- struct ipv6hdr *ip6h;
- struct tcphdr *tcph;
- struct i40iw_cm_info cm_info;
- struct i40iw_sc_dev *dev = vsi->dev;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- struct vlan_ethhdr *ethh;
- u16 vtag;
-
- /* if vlan, then maclen = 18 else 14 */
- iph = (struct iphdr *)rbuf->iph;
- memset(&cm_info, 0, sizeof(cm_info));
-
- i40iw_debug_buf(dev,
- I40IW_DEBUG_ILQ,
- "RECEIVE ILQ BUFFER",
- rbuf->mem.va,
- rbuf->totallen);
- ethh = (struct vlan_ethhdr *)rbuf->mem.va;
-
- if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
- vtag = ntohs(ethh->h_vlan_TCI);
- cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
- cm_info.vlan_id = vtag & VLAN_VID_MASK;
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s vlan_id=%d\n",
- __func__,
- cm_info.vlan_id);
- } else {
- cm_info.vlan_id = I40IW_NO_VLAN;
- }
- tcph = (struct tcphdr *)rbuf->tcph;
-
- if (rbuf->ipv4) {
- cm_info.loc_addr[0] = ntohl(iph->daddr);
- cm_info.rem_addr[0] = ntohl(iph->saddr);
- cm_info.ipv4 = true;
- cm_info.tos = iph->tos;
- } else {
- ip6h = (struct ipv6hdr *)rbuf->iph;
- i40iw_copy_ip_ntohl(cm_info.loc_addr,
- ip6h->daddr.in6_u.u6_addr32);
- i40iw_copy_ip_ntohl(cm_info.rem_addr,
- ip6h->saddr.in6_u.u6_addr32);
- cm_info.ipv4 = false;
- cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
- }
- cm_info.loc_port = ntohs(tcph->dest);
- cm_info.rem_port = ntohs(tcph->source);
- cm_node = i40iw_find_node(cm_core,
- cm_info.rem_port,
- cm_info.rem_addr,
- cm_info.loc_port,
- cm_info.loc_addr,
- true,
- false);
-
- if (!cm_node) {
- /* Only type of packet accepted are for */
- /* the PASSIVE open (syn only) */
- if (!tcph->syn || tcph->ack)
- return;
- listener =
- i40iw_find_listener(cm_core,
- cm_info.loc_addr,
- cm_info.loc_port,
- cm_info.vlan_id,
- I40IW_CM_LISTENER_ACTIVE_STATE);
- if (!listener) {
- cm_info.cm_id = NULL;
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s no listener found\n",
- __func__);
- return;
- }
- cm_info.cm_id = listener->cm_id;
- cm_node = i40iw_make_cm_node(cm_core, iwdev, &cm_info, listener);
- if (!cm_node) {
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s allocate node failed\n",
- __func__);
- atomic_dec(&listener->ref_count);
- return;
- }
- if (!tcph->rst && !tcph->fin) {
- cm_node->state = I40IW_CM_STATE_LISTENING;
- } else {
- i40iw_rem_ref_cm_node(cm_node);
- return;
- }
- atomic_inc(&cm_node->ref_count);
- } else if (cm_node->state == I40IW_CM_STATE_OFFLOADED) {
- i40iw_rem_ref_cm_node(cm_node);
- return;
- }
- i40iw_process_packet(cm_node, rbuf);
- i40iw_rem_ref_cm_node(cm_node);
-}
-
-/**
- * i40iw_setup_cm_core - allocate a top level instance of a cm
- * core
- * @iwdev: iwarp device structure
- */
-int i40iw_setup_cm_core(struct i40iw_device *iwdev)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
-
- cm_core->iwdev = iwdev;
- cm_core->dev = &iwdev->sc_dev;
-
- INIT_LIST_HEAD(&cm_core->accelerated_list);
- INIT_LIST_HEAD(&cm_core->non_accelerated_list);
- INIT_LIST_HEAD(&cm_core->listen_nodes);
-
- timer_setup(&cm_core->tcp_timer, i40iw_cm_timer_tick, 0);
-
- spin_lock_init(&cm_core->ht_lock);
- spin_lock_init(&cm_core->listen_list_lock);
- spin_lock_init(&cm_core->apbvt_lock);
-
- cm_core->event_wq = alloc_ordered_workqueue("iwewq",
- WQ_MEM_RECLAIM);
- if (!cm_core->event_wq)
- goto error;
-
- cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq",
- WQ_MEM_RECLAIM);
- if (!cm_core->disconn_wq)
- goto error;
-
- return 0;
-error:
- i40iw_cleanup_cm_core(&iwdev->cm_core);
-
- return -ENOMEM;
-}
-
-/**
- * i40iw_cleanup_cm_core - deallocate a top level instance of a
- * cm core
- * @cm_core: cm's core
- */
-void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core)
-{
- unsigned long flags;
-
- if (!cm_core)
- return;
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (timer_pending(&cm_core->tcp_timer))
- del_timer_sync(&cm_core->tcp_timer);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- if (cm_core->event_wq)
- destroy_workqueue(cm_core->event_wq);
- if (cm_core->disconn_wq)
- destroy_workqueue(cm_core->disconn_wq);
-}
-
-/**
- * i40iw_init_tcp_ctx - setup qp context
- * @cm_node: connection's node
- * @tcp_info: offload info for tcp
- * @iwqp: associate qp for the connection
- */
-static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
- struct i40iw_tcp_offload_info *tcp_info,
- struct i40iw_qp *iwqp)
-{
- tcp_info->ipv4 = cm_node->ipv4;
- tcp_info->drop_ooo_seg = true;
- tcp_info->wscale = true;
- tcp_info->ignore_tcp_opt = true;
- tcp_info->ignore_tcp_uns_opt = true;
- tcp_info->no_nagle = false;
-
- tcp_info->ttl = I40IW_DEFAULT_TTL;
- tcp_info->rtt_var = cpu_to_le32(I40IW_DEFAULT_RTT_VAR);
- tcp_info->ss_thresh = cpu_to_le32(I40IW_DEFAULT_SS_THRESH);
- tcp_info->rexmit_thresh = I40IW_DEFAULT_REXMIT_THRESH;
-
- tcp_info->tcp_state = I40IW_TCP_STATE_ESTABLISHED;
- tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
- tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
-
- tcp_info->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- tcp_info->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
- tcp_info->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- tcp_info->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
-
- tcp_info->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- tcp_info->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
- tcp_info->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- tcp_info->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- tcp_info->max_snd_window = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
- tcp_info->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
- cm_node->tcp_cntxt.rcv_wscale);
-
- tcp_info->flow_label = 0;
- tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
- if (cm_node->vlan_id <= VLAN_VID_MASK) {
- tcp_info->insert_vlan_tag = true;
- tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) |
- cm_node->vlan_id);
- }
- if (cm_node->ipv4) {
- tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
- tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
-
- tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
- tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
- tcp_info->arp_idx =
- cpu_to_le16((u16)i40iw_arp_table(
- iwqp->iwdev,
- &tcp_info->dest_ip_addr3,
- true,
- NULL,
- I40IW_ARP_RESOLVE));
- } else {
- tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
- tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
- tcp_info->dest_ip_addr0 = cpu_to_le32(cm_node->rem_addr[0]);
- tcp_info->dest_ip_addr1 = cpu_to_le32(cm_node->rem_addr[1]);
- tcp_info->dest_ip_addr2 = cpu_to_le32(cm_node->rem_addr[2]);
- tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[3]);
- tcp_info->local_ipaddr0 = cpu_to_le32(cm_node->loc_addr[0]);
- tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
- tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
- tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
- tcp_info->arp_idx =
- cpu_to_le16((u16)i40iw_arp_table(
- iwqp->iwdev,
- &tcp_info->dest_ip_addr0,
- false,
- NULL,
- I40IW_ARP_RESOLVE));
- }
-}
-
-/**
- * i40iw_cm_init_tsa_conn - setup qp for RTS
- * @iwqp: associate qp for the connection
- * @cm_node: connection's node
- */
-static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
- struct i40iw_cm_node *cm_node)
-{
- struct i40iw_tcp_offload_info tcp_info;
- struct i40iwarp_offload_info *iwarp_info;
- struct i40iw_qp_host_ctx_info *ctx_info;
- struct i40iw_device *iwdev = iwqp->iwdev;
- struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
-
- memset(&tcp_info, 0x00, sizeof(struct i40iw_tcp_offload_info));
- iwarp_info = &iwqp->iwarp_info;
- ctx_info = &iwqp->ctx_info;
-
- ctx_info->tcp_info = &tcp_info;
- ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
- ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
-
- iwarp_info->ord_size = cm_node->ord_size;
- iwarp_info->ird_size = i40iw_derive_hw_ird_setting(cm_node->ird_size);
-
- if (iwarp_info->ord_size == 1)
- iwarp_info->ord_size = 2;
-
- iwarp_info->rd_enable = true;
- iwarp_info->rdmap_ver = 1;
- iwarp_info->ddp_ver = 1;
-
- iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
-
- ctx_info->tcp_info_valid = true;
- ctx_info->iwarp_info_valid = true;
- ctx_info->add_to_qoslist = true;
- ctx_info->user_pri = cm_node->user_pri;
-
- i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
- if (cm_node->snd_mark_en) {
- iwarp_info->snd_mark_en = true;
- iwarp_info->snd_mark_offset = (tcp_info.snd_nxt &
- SNDMARKER_SEQNMASK) + cm_node->lsmm_size;
- }
-
- cm_node->state = I40IW_CM_STATE_OFFLOADED;
- tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
- tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
- tcp_info.tos = cm_node->tos;
-
- dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
-
- /* once tcp_info is set, no need to do it again */
- ctx_info->tcp_info_valid = false;
- ctx_info->iwarp_info_valid = false;
- ctx_info->add_to_qoslist = false;
-}
-
-/**
- * i40iw_cm_disconn - when a connection is being closed
- * @iwqp: associate qp for the connection
- */
-void i40iw_cm_disconn(struct i40iw_qp *iwqp)
-{
- struct disconn_work *work;
- struct i40iw_device *iwdev = iwqp->iwdev;
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- unsigned long flags;
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return; /* Timer will clean up */
-
- spin_lock_irqsave(&iwdev->qptable_lock, flags);
- if (!iwdev->qp_table[iwqp->ibqp.qp_num]) {
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
- "%s qp_id %d is already freed\n",
- __func__, iwqp->ibqp.qp_num);
- kfree(work);
- return;
- }
- i40iw_add_ref(&iwqp->ibqp);
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
-
- work->iwqp = iwqp;
- INIT_WORK(&work->work, i40iw_disconnect_worker);
- queue_work(cm_core->disconn_wq, &work->work);
- return;
-}
-
-/**
- * i40iw_qp_disconnect - free qp and close cm
- * @iwqp: associate qp for the connection
- */
-static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
-{
- struct i40iw_device *iwdev;
- struct i40iw_ib_device *iwibdev;
-
- iwdev = to_iwdev(iwqp->ibqp.device);
- if (!iwdev) {
- i40iw_pr_err("iwdev == NULL\n");
- return;
- }
-
- iwibdev = iwdev->iwibdev;
-
- if (iwqp->active_conn) {
- /* indicate this connection is NOT active */
- iwqp->active_conn = 0;
- } else {
- /* Need to free the Last Streaming Mode Message */
- if (iwqp->ietf_mem.va) {
- if (iwqp->lsmm_mr)
- iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr,
- NULL);
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
- }
- }
-
- /* close the CM node down if it is still active */
- if (iwqp->cm_node) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "%s Call close API\n", __func__);
- i40iw_cm_close(iwqp->cm_node);
- }
-}
-
-/**
- * i40iw_cm_disconn_true - called by worker thread to disconnect qp
- * @iwqp: associate qp for the connection
- */
-static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
-{
- struct iw_cm_id *cm_id;
- struct i40iw_device *iwdev;
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
- u16 last_ae;
- u8 original_hw_tcp_state;
- u8 original_ibqp_state;
- int disconn_status = 0;
- int issue_disconn = 0;
- int issue_close = 0;
- int issue_flush = 0;
- struct ib_event ibevent;
- unsigned long flags;
- int ret;
-
- if (!iwqp) {
- i40iw_pr_err("iwqp == NULL\n");
- return;
- }
-
- spin_lock_irqsave(&iwqp->lock, flags);
- cm_id = iwqp->cm_id;
- /* make sure we havent already closed this connection */
- if (!cm_id) {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return;
- }
-
- iwdev = to_iwdev(iwqp->ibqp.device);
-
- original_hw_tcp_state = iwqp->hw_tcp_state;
- original_ibqp_state = iwqp->ibqp_state;
- last_ae = iwqp->last_aeq;
-
- if (qp->term_flags) {
- issue_disconn = 1;
- issue_close = 1;
- iwqp->cm_id = NULL;
- /*When term timer expires after cm_timer, don't want
- *terminate-handler to issue cm_disconn which can re-free
- *a QP even after its refcnt=0.
- */
- i40iw_terminate_del_timer(qp);
- if (!iwqp->flush_issued) {
- iwqp->flush_issued = 1;
- issue_flush = 1;
- }
- } else if ((original_hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) ||
- ((original_ibqp_state == IB_QPS_RTS) &&
- (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
- issue_disconn = 1;
- if (last_ae == I40IW_AE_LLP_CONNECTION_RESET)
- disconn_status = -ECONNRESET;
- }
-
- if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
- (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
- (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == I40IW_AE_LLP_CONNECTION_RESET) ||
- iwdev->reset)) {
- issue_close = 1;
- iwqp->cm_id = NULL;
- if (!iwqp->flush_issued) {
- iwqp->flush_issued = 1;
- issue_flush = 1;
- }
- }
-
- spin_unlock_irqrestore(&iwqp->lock, flags);
- if (issue_flush && !iwqp->destroyed) {
- /* Flush the queues */
- i40iw_flush_wqes(iwdev, iwqp);
-
- if (qp->term_flags && iwqp->ibqp.event_handler) {
- ibevent.device = iwqp->ibqp.device;
- ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
- IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
- ibevent.element.qp = &iwqp->ibqp;
- iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
- }
- }
-
- if (cm_id && cm_id->event_handler) {
- if (issue_disconn) {
- ret = i40iw_send_cm_event(NULL,
- cm_id,
- IW_CM_EVENT_DISCONNECT,
- disconn_status);
-
- if (ret)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "disconnect event failed %s: - cm_id = %p\n",
- __func__, cm_id);
- }
- if (issue_close) {
- i40iw_qp_disconnect(iwqp);
- cm_id->provider_data = iwqp;
- ret = i40iw_send_cm_event(NULL, cm_id, IW_CM_EVENT_CLOSE, 0);
- if (ret)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "close event failed %s: - cm_id = %p\n",
- __func__, cm_id);
- cm_id->rem_ref(cm_id);
- }
- }
-}
-
-/**
- * i40iw_disconnect_worker - worker for connection close
- * @work: points or disconn structure
- */
-static void i40iw_disconnect_worker(struct work_struct *work)
-{
- struct disconn_work *dwork = container_of(work, struct disconn_work, work);
- struct i40iw_qp *iwqp = dwork->iwqp;
-
- kfree(dwork);
- i40iw_cm_disconn_true(iwqp);
- i40iw_rem_ref(&iwqp->ibqp);
-}
-
-/**
- * i40iw_accept - registered call for connection to be accepted
- * @cm_id: cm information for passive connection
- * @conn_param: accpet parameters
- */
-int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct ib_qp *ibqp;
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
- struct i40iw_cm_core *cm_core;
- struct i40iw_cm_node *cm_node;
- struct ib_qp_attr attr;
- int passive_state;
- struct ib_mr *ibmr;
- struct i40iw_pd *iwpd;
- u16 buf_len = 0;
- struct i40iw_kmem_info accept;
- enum i40iw_status_code status;
- u64 tagged_offset;
- unsigned long flags;
-
- memset(&attr, 0, sizeof(attr));
- ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
-
- iwqp = to_iwqp(ibqp);
- iwdev = iwqp->iwdev;
- dev = &iwdev->sc_dev;
- cm_core = &iwdev->cm_core;
- cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
-
- if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
- cm_node->ipv4 = true;
- cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr);
- } else {
- cm_node->ipv4 = false;
- i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id);
- }
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Accept vlan_id=%d\n",
- cm_node->vlan_id);
- if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
- if (cm_node->loopbackpartner)
- i40iw_rem_ref_cm_node(cm_node->loopbackpartner);
- i40iw_rem_ref_cm_node(cm_node);
- return -EINVAL;
- }
-
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == I40IW_SEND_RESET_EVENT) {
- i40iw_rem_ref_cm_node(cm_node);
- return -ECONNRESET;
- }
-
- cm_node->cm_core->stats_accepts++;
- iwqp->cm_node = (void *)cm_node;
- cm_node->iwqp = iwqp;
-
- buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE;
-
- status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
-
- if (status)
- return -ENOMEM;
- cm_node->pdata.size = conn_param->private_data_len;
- accept.addr = iwqp->ietf_mem.va;
- accept.size = i40iw_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
- memcpy(accept.addr + accept.size, conn_param->private_data,
- conn_param->private_data_len);
-
- /* setup our first outgoing iWarp send WQE (the IETF frame response) */
- if ((cm_node->ipv4 &&
- !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
- (!cm_node->ipv4 &&
- !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
- iwpd = iwqp->iwpd;
- tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
- ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
- iwqp->ietf_mem.pa,
- buf_len,
- IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (IS_ERR(ibmr)) {
- i40iw_free_dma_mem(dev->hw, &iwqp->ietf_mem);
- return -ENOMEM;
- }
-
- ibmr->pd = &iwpd->ibpd;
- ibmr->device = iwpd->ibpd.device;
- iwqp->lsmm_mr = ibmr;
- if (iwqp->page)
- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
- iwqp->ietf_mem.va,
- (accept.size + conn_param->private_data_len),
- ibmr->lkey);
-
- } else {
- if (iwqp->page)
- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
- }
-
- if (iwqp->page)
- kunmap(iwqp->page);
-
- iwqp->cm_id = cm_id;
- cm_node->cm_id = cm_id;
-
- cm_id->provider_data = (void *)iwqp;
- iwqp->active_conn = 0;
-
- cm_node->lsmm_size = accept.size + conn_param->private_data_len;
- i40iw_cm_init_tsa_conn(iwqp, cm_node);
- cm_id->add_ref(cm_id);
- i40iw_add_ref(&iwqp->ibqp);
-
- attr.qp_state = IB_QPS_RTS;
- cm_node->qhash_set = false;
- i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
-
- cm_node->accelerated = true;
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_move_tail(&cm_node->list, &cm_core->accelerated_list);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- status =
- i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
- if (status)
- i40iw_debug(dev, I40IW_DEBUG_CM, "error sending cm event - ESTABLISHED\n");
-
- if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->pdata.size = conn_param->private_data_len;
-
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->loopbackpartner->pdata_buf,
- conn_param->private_data,
- conn_param->private_data_len);
- i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED);
- }
-
- if (cm_node->accept_pend) {
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- cm_node->accept_pend = 0;
- }
- return 0;
-}
-
-/**
- * i40iw_reject - registered call for connection to be rejected
- * @cm_id: cm information for passive connection
- * @pdata: private data to be sent
- * @pdata_len: private data length
- */
-int i40iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- struct i40iw_device *iwdev;
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_node *loopback;
-
- cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
- loopback = cm_node->loopbackpartner;
- cm_node->cm_id = cm_id;
- cm_node->pdata.size = pdata_len;
-
- iwdev = to_iwdev(cm_id->device);
- if (!iwdev)
- return -EINVAL;
- cm_node->cm_core->stats_rejects++;
-
- if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
- return -EINVAL;
-
- if (loopback) {
- memcpy(&loopback->pdata_buf, pdata, pdata_len);
- loopback->pdata.size = pdata_len;
- }
-
- return i40iw_cm_reject(cm_node, pdata, pdata_len);
-}
-
-/**
- * i40iw_connect - registered call for connection to be established
- * @cm_id: cm information for passive connection
- * @conn_param: Information about the connection
- */
-int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct ib_qp *ibqp;
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_info cm_info;
- struct sockaddr_in *laddr;
- struct sockaddr_in *raddr;
- struct sockaddr_in6 *laddr6;
- struct sockaddr_in6 *raddr6;
- int ret = 0;
-
- ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
- iwqp = to_iwqp(ibqp);
- if (!iwqp)
- return -EINVAL;
- iwdev = to_iwdev(iwqp->ibqp.device);
- if (!iwdev)
- return -EINVAL;
-
- laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
- laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
- raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
-
- if (!(laddr->sin_port) || !(raddr->sin_port))
- return -EINVAL;
-
- iwqp->active_conn = 1;
- iwqp->cm_id = NULL;
- cm_id->provider_data = iwqp;
-
- /* set up the connection params for the node */
- if (cm_id->remote_addr.ss_family == AF_INET) {
- cm_info.ipv4 = true;
- memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
- memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
- cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
- cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
- cm_info.loc_port = ntohs(laddr->sin_port);
- cm_info.rem_port = ntohs(raddr->sin_port);
- cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
- } else {
- cm_info.ipv4 = false;
- i40iw_copy_ip_ntohl(cm_info.loc_addr,
- laddr6->sin6_addr.in6_u.u6_addr32);
- i40iw_copy_ip_ntohl(cm_info.rem_addr,
- raddr6->sin6_addr.in6_u.u6_addr32);
- cm_info.loc_port = ntohs(laddr6->sin6_port);
- cm_info.rem_port = ntohs(raddr6->sin6_port);
- i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id);
- }
- cm_info.cm_id = cm_id;
- cm_info.tos = cm_id->tos;
- cm_info.user_pri = rt_tos2priority(cm_id->tos);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
- __func__, cm_id->tos, cm_info.user_pri);
- cm_id->add_ref(cm_id);
- cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
- conn_param, &cm_info);
-
- if (IS_ERR(cm_node)) {
- ret = PTR_ERR(cm_node);
- cm_id->rem_ref(cm_id);
- return ret;
- }
-
- if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
- (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
- raddr6->sin6_addr.in6_u.u6_addr32,
- sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
- if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
- ret = -EINVAL;
- goto err;
- }
- cm_node->qhash_set = true;
- }
-
- if (i40iw_manage_apbvt(iwdev, cm_info.loc_port,
- I40IW_MANAGE_APBVT_ADD)) {
- ret = -EINVAL;
- goto err;
- }
-
- cm_node->apbvt_set = true;
- iwqp->cm_node = cm_node;
- cm_node->iwqp = iwqp;
- iwqp->cm_id = cm_id;
- i40iw_add_ref(&iwqp->ibqp);
-
- if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
- cm_node->state = I40IW_CM_STATE_SYN_SENT;
- ret = i40iw_send_syn(cm_node, 0);
- if (ret)
- goto err;
- }
-
- if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->state = I40IW_CM_STATE_MPAREQ_RCVD;
- i40iw_create_event(cm_node->loopbackpartner,
- I40IW_CM_EVENT_MPA_REQ);
- }
-
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_port,
- cm_node,
- cm_node->cm_id);
-
- return 0;
-
-err:
- if (cm_info.ipv4)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI4",
- cm_info.rem_addr);
- else
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI6",
- cm_info.rem_addr);
-
- i40iw_rem_ref_cm_node(cm_node);
- cm_id->rem_ref(cm_id);
- iwdev->cm_core.stats_connect_errs++;
- return ret;
-}
-
-/**
- * i40iw_create_listen - registered call creating listener
- * @cm_id: cm information for passive connection
- * @backlog: to max accept pending count
- */
-int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- struct i40iw_device *iwdev;
- struct i40iw_cm_listener *cm_listen_node;
- struct i40iw_cm_info cm_info;
- enum i40iw_status_code ret;
- struct sockaddr_in *laddr;
- struct sockaddr_in6 *laddr6;
- bool wildcard = false;
-
- iwdev = to_iwdev(cm_id->device);
- if (!iwdev)
- return -EINVAL;
-
- laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
- memset(&cm_info, 0, sizeof(cm_info));
- if (laddr->sin_family == AF_INET) {
- cm_info.ipv4 = true;
- cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
- cm_info.loc_port = ntohs(laddr->sin_port);
-
- if (laddr->sin_addr.s_addr != INADDR_ANY)
- cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
- else
- wildcard = true;
-
- } else {
- cm_info.ipv4 = false;
- i40iw_copy_ip_ntohl(cm_info.loc_addr,
- laddr6->sin6_addr.in6_u.u6_addr32);
- cm_info.loc_port = ntohs(laddr6->sin6_port);
- if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY)
- i40iw_netdev_vlan_ipv6(cm_info.loc_addr,
- &cm_info.vlan_id);
- else
- wildcard = true;
- }
- cm_info.backlog = backlog;
- cm_info.cm_id = cm_id;
-
- cm_listen_node = i40iw_make_listen_node(&iwdev->cm_core, iwdev, &cm_info);
- if (!cm_listen_node) {
- i40iw_pr_err("cm_listen_node == NULL\n");
- return -ENOMEM;
- }
-
- cm_id->provider_data = cm_listen_node;
-
- cm_listen_node->tos = cm_id->tos;
- cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
- cm_info.user_pri = cm_listen_node->user_pri;
-
- if (!cm_listen_node->reused_node) {
- if (wildcard) {
- if (cm_info.ipv4)
- ret = i40iw_add_mqh_4(iwdev,
- &cm_info,
- cm_listen_node);
- else
- ret = i40iw_add_mqh_6(iwdev,
- &cm_info,
- cm_listen_node);
- if (ret)
- goto error;
-
- ret = i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_ADD);
-
- if (ret)
- goto error;
- } else {
- ret = i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL,
- true);
- if (ret)
- goto error;
- cm_listen_node->qhash_set = true;
- ret = i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_ADD);
- if (ret)
- goto error;
- }
- }
- cm_id->add_ref(cm_id);
- cm_listen_node->cm_core->stats_listen_created++;
- return 0;
- error:
- i40iw_cm_del_listen(&iwdev->cm_core, (void *)cm_listen_node, false);
- return -EINVAL;
-}
-
-/**
- * i40iw_destroy_listen - registered call to destroy listener
- * @cm_id: cm information for passive connection
- */
-int i40iw_destroy_listen(struct iw_cm_id *cm_id)
-{
- struct i40iw_device *iwdev;
-
- iwdev = to_iwdev(cm_id->device);
- if (cm_id->provider_data)
- i40iw_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true);
- else
- i40iw_pr_err("cm_id->provider_data was NULL\n");
-
- cm_id->rem_ref(cm_id);
-
- return 0;
-}
-
-/**
- * i40iw_cm_event_connected - handle connected active node
- * @event: the info for cm_node of connection
- */
-static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
-{
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_cm_core *cm_core;
- struct i40iw_cm_node *cm_node;
- struct i40iw_sc_dev *dev;
- struct ib_qp_attr attr;
- struct iw_cm_id *cm_id;
- unsigned long flags;
- int status;
- bool read0;
-
- cm_node = event->cm_node;
- cm_id = cm_node->cm_id;
- iwqp = (struct i40iw_qp *)cm_id->provider_data;
- iwdev = to_iwdev(iwqp->ibqp.device);
- dev = &iwdev->sc_dev;
- cm_core = &iwdev->cm_core;
-
- if (iwqp->destroyed) {
- status = -ETIMEDOUT;
- goto error;
- }
- i40iw_cm_init_tsa_conn(iwqp, cm_node);
- read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
- if (iwqp->page)
- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
- if (iwqp->page)
- kunmap(iwqp->page);
-
- memset(&attr, 0, sizeof(attr));
- attr.qp_state = IB_QPS_RTS;
- cm_node->qhash_set = false;
- i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
-
- cm_node->accelerated = true;
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_move_tail(&cm_node->list, &cm_core->accelerated_list);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
- 0);
- if (status)
- i40iw_debug(dev, I40IW_DEBUG_CM, "error sending cm event - CONNECT_REPLY\n");
-
- return;
-
-error:
- iwqp->cm_id = NULL;
- cm_id->provider_data = NULL;
- i40iw_send_cm_event(event->cm_node,
- cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- status);
- cm_id->rem_ref(cm_id);
- i40iw_rem_ref_cm_node(event->cm_node);
-}
-
-/**
- * i40iw_cm_event_reset - handle reset
- * @event: the info for cm_node of connection
- */
-static void i40iw_cm_event_reset(struct i40iw_cm_event *event)
-{
- struct i40iw_cm_node *cm_node = event->cm_node;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct i40iw_qp *iwqp;
-
- if (!cm_id)
- return;
-
- iwqp = cm_id->provider_data;
- if (!iwqp)
- return;
-
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "reset event %p - cm_id = %p\n",
- event->cm_node, cm_id);
- iwqp->cm_id = NULL;
-
- i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET);
- i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
-}
-
-/**
- * i40iw_cm_event_handler - worker thread callback to send event to cm upper layer
- * @work: pointer of cm event info.
- */
-static void i40iw_cm_event_handler(struct work_struct *work)
-{
- struct i40iw_cm_event *event = container_of(work,
- struct i40iw_cm_event,
- event_work);
- struct i40iw_cm_node *cm_node;
-
- if (!event || !event->cm_node || !event->cm_node->cm_core)
- return;
-
- cm_node = event->cm_node;
-
- switch (event->type) {
- case I40IW_CM_EVENT_MPA_REQ:
- i40iw_send_cm_event(cm_node,
- cm_node->cm_id,
- IW_CM_EVENT_CONNECT_REQUEST,
- 0);
- break;
- case I40IW_CM_EVENT_RESET:
- i40iw_cm_event_reset(event);
- break;
- case I40IW_CM_EVENT_CONNECTED:
- if (!event->cm_node->cm_id ||
- (event->cm_node->state != I40IW_CM_STATE_OFFLOADED))
- break;
- i40iw_cm_event_connected(event);
- break;
- case I40IW_CM_EVENT_MPA_REJECT:
- if (!event->cm_node->cm_id ||
- (cm_node->state == I40IW_CM_STATE_OFFLOADED))
- break;
- i40iw_send_cm_event(cm_node,
- cm_node->cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- -ECONNREFUSED);
- break;
- case I40IW_CM_EVENT_ABORTED:
- if (!event->cm_node->cm_id ||
- (event->cm_node->state == I40IW_CM_STATE_OFFLOADED))
- break;
- i40iw_event_connect_error(event);
- break;
- default:
- i40iw_pr_err("event type = %d\n", event->type);
- break;
- }
-
- event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
- i40iw_rem_ref_cm_node(event->cm_node);
- kfree(event);
-}
-
-/**
- * i40iw_cm_post_event - queue event request for worker thread
- * @event: cm node's info for up event call
- */
-static void i40iw_cm_post_event(struct i40iw_cm_event *event)
-{
- atomic_inc(&event->cm_node->ref_count);
- event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
- INIT_WORK(&event->event_work, i40iw_cm_event_handler);
-
- queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
-}
-
-/**
- * i40iw_qhash_ctrl - enable/disable qhash for list
- * @iwdev: device pointer
- * @parent_listen_node: parent listen node
- * @nfo: cm info node
- * @ipaddr: Pointer to IPv4 or IPv6 address
- * @ipv4: flag indicating IPv4 when true
- * @ifup: flag indicating interface up when true
- *
- * Enables or disables the qhash for the node in the child
- * listen list that matches ipaddr. If no matching IP was found
- * it will allocate and add a new child listen node to the
- * parent listen node. The listen_list_lock is assumed to be
- * held when called.
- */
-static void i40iw_qhash_ctrl(struct i40iw_device *iwdev,
- struct i40iw_cm_listener *parent_listen_node,
- struct i40iw_cm_info *nfo,
- u32 *ipaddr, bool ipv4, bool ifup)
-{
- struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
- struct i40iw_cm_listener *child_listen_node;
- struct list_head *pos, *tpos;
- enum i40iw_status_code ret;
- bool node_allocated = false;
- enum i40iw_quad_hash_manage_type op =
- ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
-
- list_for_each_safe(pos, tpos, child_listen_list) {
- child_listen_node =
- list_entry(pos,
- struct i40iw_cm_listener,
- child_listen_list);
- if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
- goto set_qhash;
- }
-
- /* if not found then add a child listener if interface is going up */
- if (!ifup)
- return;
- child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
- if (!child_listen_node)
- return;
- node_allocated = true;
- memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node));
-
- memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
-
-set_qhash:
- memcpy(nfo->loc_addr,
- child_listen_node->loc_addr,
- sizeof(nfo->loc_addr));
- nfo->vlan_id = child_listen_node->vlan_id;
- ret = i40iw_manage_qhash(iwdev, nfo,
- I40IW_QHASH_TYPE_TCP_SYN,
- op,
- NULL, false);
- if (!ret) {
- child_listen_node->qhash_set = ifup;
- if (node_allocated)
- list_add(&child_listen_node->child_listen_list,
- &parent_listen_node->child_listen_list);
- } else if (node_allocated) {
- kfree(child_listen_node);
- }
-}
-
-/**
- * i40iw_cm_teardown_connections - teardown QPs
- * @iwdev: device pointer
- * @ipaddr: Pointer to IPv4 or IPv6 address
- * @ipv4: flag indicating IPv4 when true
- * @disconnect_all: flag indicating disconnect all QPs
- * teardown QPs where source or destination addr matches ip addr
- */
-void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
- struct i40iw_cm_info *nfo,
- bool disconnect_all)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- struct list_head *list_core_temp;
- struct list_head *list_node;
- struct i40iw_cm_node *cm_node;
- unsigned long flags;
- struct list_head teardown_list;
- struct ib_qp_attr attr;
-
- INIT_LIST_HEAD(&teardown_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp,
- &cm_core->accelerated_list) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- if (disconnect_all ||
- (nfo->vlan_id == cm_node->vlan_id &&
- (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
- !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->teardown_entry, &teardown_list);
- }
- }
- list_for_each_safe(list_node, list_core_temp,
- &cm_core->non_accelerated_list) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- if (disconnect_all ||
- (nfo->vlan_id == cm_node->vlan_id &&
- (!memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16) ||
- !memcmp(cm_node->rem_addr, ipaddr, nfo->ipv4 ? 4 : 16)))) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->teardown_entry, &teardown_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &teardown_list) {
- cm_node = container_of(list_node, struct i40iw_cm_node,
- teardown_entry);
- attr.qp_state = IB_QPS_ERR;
- i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- if (iwdev->reset)
- i40iw_cm_disconn(cm_node->iwqp);
- i40iw_rem_ref_cm_node(cm_node);
- }
-}
-
-/**
- * i40iw_ifdown_notify - process an ifdown on an interface
- * @iwdev: device pointer
- * @ipaddr: Pointer to IPv4 or IPv6 address
- * @ipv4: flag indicating IPv4 when true
- * @ifup: flag indicating interface up when true
- */
-void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
- u32 *ipaddr, bool ipv4, bool ifup)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- unsigned long flags;
- struct i40iw_cm_listener *listen_node;
- static const u32 ip_zero[4] = { 0, 0, 0, 0 };
- struct i40iw_cm_info nfo;
- u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
- enum i40iw_status_code ret;
- enum i40iw_quad_hash_manage_type op =
- ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
-
- nfo.vlan_id = vlan_id;
- nfo.ipv4 = ipv4;
-
- /* Disable or enable qhash for listeners */
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- if (vlan_id == listen_node->vlan_id &&
- (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) ||
- !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) {
- memcpy(nfo.loc_addr, listen_node->loc_addr,
- sizeof(nfo.loc_addr));
- nfo.loc_port = listen_node->loc_port;
- nfo.user_pri = listen_node->user_pri;
- if (!list_empty(&listen_node->child_listen_list)) {
- i40iw_qhash_ctrl(iwdev,
- listen_node,
- &nfo,
- ipaddr, ipv4, ifup);
- } else if (memcmp(listen_node->loc_addr, ip_zero,
- ipv4 ? 4 : 16)) {
- ret = i40iw_manage_qhash(iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_SYN,
- op,
- NULL,
- false);
- if (!ret)
- listen_node->qhash_set = ifup;
- }
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- /* teardown connected qp's on ifdown */
- if (!ifup)
- i40iw_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
deleted file mode 100644
index 66dc1ba03389..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ /dev/null
@@ -1,462 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_CM_H
-#define I40IW_CM_H
-
-#define QUEUE_EVENTS
-
-#define I40IW_MANAGE_APBVT_DEL 0
-#define I40IW_MANAGE_APBVT_ADD 1
-
-#define I40IW_MPA_REQUEST_ACCEPT 1
-#define I40IW_MPA_REQUEST_REJECT 2
-
-/* IETF MPA -- defines, enums, structs */
-#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
-#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
-#define IETF_MPA_KEY_SIZE 16
-#define IETF_MPA_VERSION 1
-#define IETF_MAX_PRIV_DATA_LEN 512
-#define IETF_MPA_FRAME_SIZE 20
-#define IETF_RTR_MSG_SIZE 4
-#define IETF_MPA_V2_FLAG 0x10
-#define SNDMARKER_SEQNMASK 0x000001FF
-
-#define I40IW_MAX_IETF_SIZE 32
-
-/* IETF RTR MSG Fields */
-#define IETF_PEER_TO_PEER 0x8000
-#define IETF_FLPDU_ZERO_LEN 0x4000
-#define IETF_RDMA0_WRITE 0x8000
-#define IETF_RDMA0_READ 0x4000
-#define IETF_NO_IRD_ORD 0x3FFF
-
-/* HW-supported IRD sizes*/
-#define I40IW_HW_IRD_SETTING_2 2
-#define I40IW_HW_IRD_SETTING_4 4
-#define I40IW_HW_IRD_SETTING_8 8
-#define I40IW_HW_IRD_SETTING_16 16
-#define I40IW_HW_IRD_SETTING_32 32
-#define I40IW_HW_IRD_SETTING_64 64
-
-#define MAX_PORTS 65536
-#define I40IW_VLAN_PRIO_SHIFT 13
-
-enum ietf_mpa_flags {
- IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
- IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */
- IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
-};
-
-struct ietf_mpa_v1 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- u8 priv_data[0];
-};
-
-#define ietf_mpa_req_resp_frame ietf_mpa_frame
-
-struct ietf_rtr_msg {
- __be16 ctrl_ird;
- __be16 ctrl_ord;
-};
-
-struct ietf_mpa_v2 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- struct ietf_rtr_msg rtr_msg;
- u8 priv_data[0];
-};
-
-struct i40iw_cm_node;
-enum i40iw_timer_type {
- I40IW_TIMER_TYPE_SEND,
- I40IW_TIMER_TYPE_RECV,
- I40IW_TIMER_NODE_CLEANUP,
- I40IW_TIMER_TYPE_CLOSE,
-};
-
-#define I40IW_PASSIVE_STATE_INDICATED 0
-#define I40IW_DO_NOT_SEND_RESET_EVENT 1
-#define I40IW_SEND_RESET_EVENT 2
-
-#define MAX_I40IW_IFS 4
-
-#define SET_ACK 0x1
-#define SET_SYN 0x2
-#define SET_FIN 0x4
-#define SET_RST 0x8
-
-#define TCP_OPTIONS_PADDING 3
-
-struct option_base {
- u8 optionnum;
- u8 length;
-};
-
-enum option_numbers {
- OPTION_NUMBER_END,
- OPTION_NUMBER_NONE,
- OPTION_NUMBER_MSS,
- OPTION_NUMBER_WINDOW_SCALE,
- OPTION_NUMBER_SACK_PERM,
- OPTION_NUMBER_SACK,
- OPTION_NUMBER_WRITE0 = 0xbc
-};
-
-struct option_mss {
- u8 optionnum;
- u8 length;
- __be16 mss;
-};
-
-struct option_windowscale {
- u8 optionnum;
- u8 length;
- u8 shiftcount;
-};
-
-union all_known_options {
- char as_end;
- struct option_base as_base;
- struct option_mss as_mss;
- struct option_windowscale as_windowscale;
-};
-
-struct i40iw_timer_entry {
- struct list_head list;
- unsigned long timetosend; /* jiffies */
- struct i40iw_puda_buf *sqbuf;
- u32 type;
- u32 retrycount;
- u32 retranscount;
- u32 context;
- u32 send_retrans;
- int close_when_complete;
-};
-
-#define I40IW_DEFAULT_RETRYS 64
-#define I40IW_DEFAULT_RETRANS 8
-#define I40IW_DEFAULT_TTL 0x40
-#define I40IW_DEFAULT_RTT_VAR 0x6
-#define I40IW_DEFAULT_SS_THRESH 0x3FFFFFFF
-#define I40IW_DEFAULT_REXMIT_THRESH 8
-
-#define I40IW_RETRY_TIMEOUT HZ
-#define I40IW_SHORT_TIME 10
-#define I40IW_LONG_TIME (2 * HZ)
-#define I40IW_MAX_TIMEOUT ((unsigned long)(12 * HZ))
-
-#define I40IW_CM_HASHTABLE_SIZE 1024
-#define I40IW_CM_TCP_TIMER_INTERVAL 3000
-#define I40IW_CM_DEFAULT_MTU 1540
-#define I40IW_CM_DEFAULT_FRAME_CNT 10
-#define I40IW_CM_THREAD_STACK_SIZE 256
-#define I40IW_CM_DEFAULT_RCV_WND 64240
-#define I40IW_CM_DEFAULT_RCV_WND_SCALED 0x3fffc
-#define I40IW_CM_DEFAULT_RCV_WND_SCALE 2
-#define I40IW_CM_DEFAULT_FREE_PKTS 0x000A
-#define I40IW_CM_FREE_PKT_LO_WATERMARK 2
-
-#define I40IW_CM_DEFAULT_MSS 536
-
-#define I40IW_CM_DEF_SEQ 0x159bf75f
-#define I40IW_CM_DEF_LOCAL_ID 0x3b47
-
-#define I40IW_CM_DEF_SEQ2 0x18ed5740
-#define I40IW_CM_DEF_LOCAL_ID2 0xb807
-#define MAX_CM_BUFFER (I40IW_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
-
-typedef u32 i40iw_addr_t;
-
-#define i40iw_cm_tsa_context i40iw_qp_context
-
-struct i40iw_qp;
-
-/* cm node transition states */
-enum i40iw_cm_node_state {
- I40IW_CM_STATE_UNKNOWN,
- I40IW_CM_STATE_INITED,
- I40IW_CM_STATE_LISTENING,
- I40IW_CM_STATE_SYN_RCVD,
- I40IW_CM_STATE_SYN_SENT,
- I40IW_CM_STATE_ONE_SIDE_ESTABLISHED,
- I40IW_CM_STATE_ESTABLISHED,
- I40IW_CM_STATE_ACCEPTING,
- I40IW_CM_STATE_MPAREQ_SENT,
- I40IW_CM_STATE_MPAREQ_RCVD,
- I40IW_CM_STATE_MPAREJ_RCVD,
- I40IW_CM_STATE_OFFLOADED,
- I40IW_CM_STATE_FIN_WAIT1,
- I40IW_CM_STATE_FIN_WAIT2,
- I40IW_CM_STATE_CLOSE_WAIT,
- I40IW_CM_STATE_TIME_WAIT,
- I40IW_CM_STATE_LAST_ACK,
- I40IW_CM_STATE_CLOSING,
- I40IW_CM_STATE_LISTENER_DESTROYED,
- I40IW_CM_STATE_CLOSED
-};
-
-enum mpa_frame_version {
- IETF_MPA_V1 = 1,
- IETF_MPA_V2 = 2
-};
-
-enum mpa_frame_key {
- MPA_KEY_REQUEST,
- MPA_KEY_REPLY
-};
-
-enum send_rdma0 {
- SEND_RDMA_READ_ZERO = 1,
- SEND_RDMA_WRITE_ZERO = 2
-};
-
-enum i40iw_tcpip_pkt_type {
- I40IW_PKT_TYPE_UNKNOWN,
- I40IW_PKT_TYPE_SYN,
- I40IW_PKT_TYPE_SYNACK,
- I40IW_PKT_TYPE_ACK,
- I40IW_PKT_TYPE_FIN,
- I40IW_PKT_TYPE_RST
-};
-
-/* CM context params */
-struct i40iw_cm_tcp_context {
- u8 client;
-
- u32 loc_seq_num;
- u32 loc_ack_num;
- u32 rem_ack_num;
- u32 rcv_nxt;
-
- u32 loc_id;
- u32 rem_id;
-
- u32 snd_wnd;
- u32 max_snd_wnd;
-
- u32 rcv_wnd;
- u32 mss;
- u8 snd_wscale;
- u8 rcv_wscale;
-};
-
-enum i40iw_cm_listener_state {
- I40IW_CM_LISTENER_PASSIVE_STATE = 1,
- I40IW_CM_LISTENER_ACTIVE_STATE = 2,
- I40IW_CM_LISTENER_EITHER_STATE = 3
-};
-
-struct i40iw_cm_listener {
- struct list_head list;
- struct i40iw_cm_core *cm_core;
- u8 loc_mac[ETH_ALEN];
- u32 loc_addr[4];
- u16 loc_port;
- struct iw_cm_id *cm_id;
- atomic_t ref_count;
- struct i40iw_device *iwdev;
- atomic_t pend_accepts_cnt;
- int backlog;
- enum i40iw_cm_listener_state listener_state;
- u32 reused_node;
- u8 user_pri;
- u8 tos;
- u16 vlan_id;
- bool qhash_set;
- bool ipv4;
- struct list_head child_listen_list;
-
-};
-
-struct i40iw_kmem_info {
- void *addr;
- u32 size;
-};
-
-/* per connection node and node state information */
-struct i40iw_cm_node {
- u32 loc_addr[4], rem_addr[4];
- u16 loc_port, rem_port;
- u16 vlan_id;
- enum i40iw_cm_node_state state;
- u8 loc_mac[ETH_ALEN];
- u8 rem_mac[ETH_ALEN];
- atomic_t ref_count;
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
- struct i40iw_cm_tcp_context tcp_cntxt;
- struct i40iw_cm_core *cm_core;
- struct i40iw_cm_node *loopbackpartner;
- struct i40iw_timer_entry *send_entry;
- struct i40iw_timer_entry *close_entry;
- spinlock_t retrans_list_lock; /* cm transmit packet */
- enum send_rdma0 send_rdma0_op;
- u16 ird_size;
- u16 ord_size;
- u16 mpav2_ird_ord;
- struct iw_cm_id *cm_id;
- struct list_head list;
- bool accelerated;
- struct i40iw_cm_listener *listener;
- int apbvt_set;
- int accept_pend;
- struct list_head timer_entry;
- struct list_head reset_entry;
- struct list_head teardown_entry;
- atomic_t passive_state;
- bool qhash_set;
- u8 user_pri;
- u8 tos;
- bool ipv4;
- bool snd_mark_en;
- u16 lsmm_size;
- enum mpa_frame_version mpa_frame_rev;
- struct i40iw_kmem_info pdata;
- union {
- struct ietf_mpa_v1 mpa_frame;
- struct ietf_mpa_v2 mpa_v2_frame;
- };
-
- u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
- struct i40iw_kmem_info mpa_hdr;
- bool ack_rcvd;
-};
-
-/* structure for client or CM to fill when making CM api calls. */
-/* - only need to set relevant data, based on op. */
-struct i40iw_cm_info {
- struct iw_cm_id *cm_id;
- u16 loc_port;
- u16 rem_port;
- u32 loc_addr[4];
- u32 rem_addr[4];
- u16 vlan_id;
- int backlog;
- u8 user_pri;
- u8 tos;
- bool ipv4;
-};
-
-/* CM event codes */
-enum i40iw_cm_event_type {
- I40IW_CM_EVENT_UNKNOWN,
- I40IW_CM_EVENT_ESTABLISHED,
- I40IW_CM_EVENT_MPA_REQ,
- I40IW_CM_EVENT_MPA_CONNECT,
- I40IW_CM_EVENT_MPA_ACCEPT,
- I40IW_CM_EVENT_MPA_REJECT,
- I40IW_CM_EVENT_MPA_ESTABLISHED,
- I40IW_CM_EVENT_CONNECTED,
- I40IW_CM_EVENT_RESET,
- I40IW_CM_EVENT_ABORTED
-};
-
-/* event to post to CM event handler */
-struct i40iw_cm_event {
- enum i40iw_cm_event_type type;
- struct i40iw_cm_info cm_info;
- struct work_struct event_work;
- struct i40iw_cm_node *cm_node;
-};
-
-struct i40iw_cm_core {
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
-
- struct list_head listen_nodes;
- struct list_head accelerated_list;
- struct list_head non_accelerated_list;
-
- struct timer_list tcp_timer;
-
- struct workqueue_struct *event_wq;
- struct workqueue_struct *disconn_wq;
-
- spinlock_t ht_lock; /* manage hash table */
- spinlock_t listen_list_lock; /* listen list */
- spinlock_t apbvt_lock; /*manage apbvt entries*/
-
- unsigned long ports_in_use[BITS_TO_LONGS(MAX_PORTS)];
-
- u64 stats_nodes_created;
- u64 stats_nodes_destroyed;
- u64 stats_listen_created;
- u64 stats_listen_destroyed;
- u64 stats_listen_nodes_created;
- u64 stats_listen_nodes_destroyed;
- u64 stats_loopbacks;
- u64 stats_accepts;
- u64 stats_rejects;
- u64 stats_connect_errs;
- u64 stats_passive_errs;
- u64 stats_pkt_retrans;
- u64 stats_backlog_drops;
-};
-
-int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *sqbuf,
- enum i40iw_timer_type type,
- int send_retrans,
- int close_when_complete);
-
-int i40iw_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
-int i40iw_reject(struct iw_cm_id *, const void *, u8);
-int i40iw_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
-int i40iw_create_listen(struct iw_cm_id *, int);
-int i40iw_destroy_listen(struct iw_cm_id *);
-
-int i40iw_cm_start(struct i40iw_device *);
-int i40iw_cm_stop(struct i40iw_device *);
-
-int i40iw_arp_table(struct i40iw_device *iwdev,
- u32 *ip_addr,
- bool ipv4,
- u8 *mac_addr,
- u32 action);
-
-void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
- u32 *ipaddr, bool ipv4, bool ifup);
-void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
- struct i40iw_cm_info *nfo,
- bool disconnect_all);
-bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port);
-#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
deleted file mode 100644
index 4d841a3c68f3..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ /dev/null
@@ -1,5198 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_vf.h"
-#include "i40iw_virtchnl.h"
-
-/**
- * i40iw_insert_wqe_hdr - write wqe header
- * @wqe: cqp wqe for header
- * @header: header for the cqp wqe
- */
-void i40iw_insert_wqe_hdr(u64 *wqe, u64 header)
-{
- wmb(); /* make sure WQE is populated before polarity is set */
- set_64bit_val(wqe, 24, header);
-}
-
-void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev)
-{
- if (cqp_timeout->compl_cqp_cmds != dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]) {
- cqp_timeout->compl_cqp_cmds = dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS];
- cqp_timeout->count = 0;
- } else {
- if (dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] != cqp_timeout->compl_cqp_cmds)
- cqp_timeout->count++;
- }
-}
-
-/**
- * i40iw_get_cqp_reg_info - get head and tail for cqp using registers
- * @cqp: struct for cqp hw
- * @val: cqp tail register value
- * @tail:wqtail register value
- * @error: cqp processing err
- */
-static inline void i40iw_get_cqp_reg_info(struct i40iw_sc_cqp *cqp,
- u32 *val,
- u32 *tail,
- u32 *error)
-{
- if (cqp->dev->is_pf) {
- *val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPTAIL);
- *tail = RS_32(*val, I40E_PFPE_CQPTAIL_WQTAIL);
- *error = RS_32(*val, I40E_PFPE_CQPTAIL_CQP_OP_ERR);
- } else {
- *val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPTAIL1);
- *tail = RS_32(*val, I40E_VFPE_CQPTAIL_WQTAIL);
- *error = RS_32(*val, I40E_VFPE_CQPTAIL_CQP_OP_ERR);
- }
-}
-
-/**
- * i40iw_cqp_poll_registers - poll cqp registers
- * @cqp: struct for cqp hw
- * @tail:wqtail register value
- * @count: how many times to try for completion
- */
-static enum i40iw_status_code i40iw_cqp_poll_registers(
- struct i40iw_sc_cqp *cqp,
- u32 tail,
- u32 count)
-{
- u32 i = 0;
- u32 newtail, error, val;
-
- while (i < count) {
- i++;
- i40iw_get_cqp_reg_info(cqp, &val, &newtail, &error);
- if (error) {
- error = (cqp->dev->is_pf) ?
- i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES) :
- i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
- return I40IW_ERR_CQP_COMPL_ERROR;
- }
- if (newtail != tail) {
- /* SUCCESS */
- I40IW_RING_MOVE_TAIL(cqp->sq_ring);
- cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
- return 0;
- }
- udelay(I40IW_SLEEP_COUNT);
- }
- return I40IW_ERR_TIMEOUT;
-}
-
-/**
- * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
- * @buf: ptr to fpm commit buffer
- * @info: ptr to i40iw_hmc_obj_info struct
- * @sd: number of SDs for HMC objects
- *
- * parses fpm commit info and copy base value
- * of hmc objects in hmc_info
- */
-static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
- u64 *buf,
- struct i40iw_hmc_obj_info *info,
- u32 *sd)
-{
- u64 temp;
- u64 size;
- u64 base = 0;
- u32 i, j;
- u32 k = 0;
-
- /* copy base values in obj_info */
- for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
- if ((i == I40IW_HMC_IW_SRQ) ||
- (i == I40IW_HMC_IW_FSIMC) ||
- (i == I40IW_HMC_IW_FSIAV)) {
- info[i].base = 0;
- info[i].cnt = 0;
- continue;
- }
- get_64bit_val(buf, j, &temp);
- info[i].base = RS_64_1(temp, 32) * 512;
- if (info[i].base > base) {
- base = info[i].base;
- k = i;
- }
- if (i == I40IW_HMC_IW_APBVT_ENTRY) {
- info[i].cnt = 1;
- continue;
- }
- if (i == I40IW_HMC_IW_QP)
- info[i].cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS);
- else if (i == I40IW_HMC_IW_CQ)
- info[i].cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS);
- else
- info[i].cnt = (u32)(temp);
- }
- size = info[k].cnt * info[k].size + info[k].base;
- if (size & 0x1FFFFF)
- *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
- else
- *sd = (u32)(size >> 21);
-
- return 0;
-}
-
-/**
- * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size
- * @buf: ptr to fpm query buffer
- * @buf_idx: index into buf
- * @info: ptr to i40iw_hmc_obj_info struct
- * @rsrc_idx: resource index into info
- *
- * Decode a 64 bit value from fpm query buffer into max count and size
- */
-static u64 i40iw_sc_decode_fpm_query(u64 *buf,
- u32 buf_idx,
- struct i40iw_hmc_obj_info *obj_info,
- u32 rsrc_idx)
-{
- u64 temp;
- u32 size;
-
- get_64bit_val(buf, buf_idx, &temp);
- obj_info[rsrc_idx].max_cnt = (u32)temp;
- size = (u32)RS_64_1(temp, 32);
- obj_info[rsrc_idx].size = LS_64_1(1, size);
-
- return temp;
-}
-
-/**
- * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer
- * @buf: ptr to fpm query buffer
- * @info: ptr to i40iw_hmc_obj_info struct
- * @hmc_fpm_misc: ptr to fpm data
- *
- * parses fpm query buffer and copy max_cnt and
- * size value of hmc objects in hmc_info
- */
-static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf(
- u64 *buf,
- struct i40iw_hmc_info *hmc_info,
- struct i40iw_hmc_fpm_misc *hmc_fpm_misc)
-{
- struct i40iw_hmc_obj_info *obj_info;
- u64 temp;
- u32 size;
- u16 max_pe_sds;
-
- obj_info = hmc_info->hmc_obj;
-
- get_64bit_val(buf, 0, &temp);
- hmc_info->first_sd_index = (u16)RS_64(temp, I40IW_QUERY_FPM_FIRST_PE_SD_INDEX);
- max_pe_sds = (u16)RS_64(temp, I40IW_QUERY_FPM_MAX_PE_SDS);
-
- /* Reduce SD count for VFs by 1 to account for PBLE backing page rounding */
- if (hmc_info->hmc_fn_id >= I40IW_FIRST_VF_FPM_ID)
- max_pe_sds--;
- hmc_fpm_misc->max_sds = max_pe_sds;
- hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
-
- get_64bit_val(buf, 8, &temp);
- obj_info[I40IW_HMC_IW_QP].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS);
- size = (u32)RS_64_1(temp, 32);
- obj_info[I40IW_HMC_IW_QP].size = LS_64_1(1, size);
-
- get_64bit_val(buf, 16, &temp);
- obj_info[I40IW_HMC_IW_CQ].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS);
- size = (u32)RS_64_1(temp, 32);
- obj_info[I40IW_HMC_IW_CQ].size = LS_64_1(1, size);
-
- i40iw_sc_decode_fpm_query(buf, 32, obj_info, I40IW_HMC_IW_HTE);
- i40iw_sc_decode_fpm_query(buf, 40, obj_info, I40IW_HMC_IW_ARP);
-
- obj_info[I40IW_HMC_IW_APBVT_ENTRY].size = 8192;
- obj_info[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1;
-
- i40iw_sc_decode_fpm_query(buf, 48, obj_info, I40IW_HMC_IW_MR);
- i40iw_sc_decode_fpm_query(buf, 56, obj_info, I40IW_HMC_IW_XF);
-
- get_64bit_val(buf, 64, &temp);
- obj_info[I40IW_HMC_IW_XFFL].max_cnt = (u32)temp;
- obj_info[I40IW_HMC_IW_XFFL].size = 4;
- hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE);
- if (!hmc_fpm_misc->xf_block_size)
- return I40IW_ERR_INVALID_SIZE;
-
- i40iw_sc_decode_fpm_query(buf, 72, obj_info, I40IW_HMC_IW_Q1);
-
- get_64bit_val(buf, 80, &temp);
- obj_info[I40IW_HMC_IW_Q1FL].max_cnt = (u32)temp;
- obj_info[I40IW_HMC_IW_Q1FL].size = 4;
- hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE);
- if (!hmc_fpm_misc->q1_block_size)
- return I40IW_ERR_INVALID_SIZE;
-
- i40iw_sc_decode_fpm_query(buf, 88, obj_info, I40IW_HMC_IW_TIMER);
-
- get_64bit_val(buf, 112, &temp);
- obj_info[I40IW_HMC_IW_PBLE].max_cnt = (u32)temp;
- obj_info[I40IW_HMC_IW_PBLE].size = 8;
-
- get_64bit_val(buf, 120, &temp);
- hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS);
- hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER);
- hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET);
-
- return 0;
-}
-
-/**
- * i40iw_fill_qos_list - Change all unknown qs handles to available ones
- * @qs_list: list of qs_handles to be fixed with valid qs_handles
- */
-static void i40iw_fill_qos_list(u16 *qs_list)
-{
- u16 qshandle = qs_list[0];
- int i;
-
- for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- if (qs_list[i] == QS_HANDLE_UNKNOWN)
- qs_list[i] = qshandle;
- else
- qshandle = qs_list[i];
- }
-}
-
-/**
- * i40iw_qp_from_entry - Given entry, get to the qp structure
- * @entry: Points to list of qp structure
- */
-static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry)
-{
- if (!entry)
- return NULL;
-
- return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list));
-}
-
-/**
- * i40iw_get_qp - get the next qp from the list given current qp
- * @head: Listhead of qp's
- * @qp: current qp
- */
-static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp)
-{
- struct list_head *entry = NULL;
- struct list_head *lastentry;
-
- if (list_empty(head))
- return NULL;
-
- if (!qp) {
- entry = head->next;
- } else {
- lastentry = &qp->list;
- entry = (lastentry != head) ? lastentry->next : NULL;
- }
-
- return i40iw_qp_from_entry(entry);
-}
-
-/**
- * i40iw_change_l2params - given the new l2 parameters, change all qp
- * @vsi: pointer to the vsi structure
- * @l2params: New paramaters from l2
- */
-void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params)
-{
- struct i40iw_sc_dev *dev = vsi->dev;
- struct i40iw_sc_qp *qp = NULL;
- bool qs_handle_change = false;
- unsigned long flags;
- u16 qs_handle;
- int i;
-
- if (vsi->mtu != l2params->mtu) {
- vsi->mtu = l2params->mtu;
- i40iw_reinitialize_ieq(dev);
- }
-
- i40iw_fill_qos_list(l2params->qs_handle_list);
- for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- qs_handle = l2params->qs_handle_list[i];
- if (vsi->qos[i].qs_handle != qs_handle)
- qs_handle_change = true;
- spin_lock_irqsave(&vsi->qos[i].lock, flags);
- qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
- while (qp) {
- if (qs_handle_change) {
- qp->qs_handle = qs_handle;
- /* issue cqp suspend command */
- i40iw_qp_suspend_resume(dev, qp, true);
- }
- qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
- }
- spin_unlock_irqrestore(&vsi->qos[i].lock, flags);
- vsi->qos[i].qs_handle = qs_handle;
- }
-}
-
-/**
- * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp
- * @qp: qp to be removed from qos
- */
-void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp)
-{
- struct i40iw_sc_vsi *vsi = qp->vsi;
- unsigned long flags;
-
- if (!qp->on_qoslist)
- return;
- spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
- list_del(&qp->list);
- spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
-}
-
-/**
- * i40iw_qp_add_qos - called during setctx fot qp to be added to qos
- * @qp: qp to be added to qos
- */
-void i40iw_qp_add_qos(struct i40iw_sc_qp *qp)
-{
- struct i40iw_sc_vsi *vsi = qp->vsi;
- unsigned long flags;
-
- if (qp->on_qoslist)
- return;
- spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
- qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
- list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
- qp->on_qoslist = true;
- spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
-}
-
-/**
- * i40iw_sc_pd_init - initialize sc pd struct
- * @dev: sc device struct
- * @pd: sc pd ptr
- * @pd_id: pd_id for allocated pd
- * @abi_ver: ABI version from user context, -1 if not valid
- */
-static void i40iw_sc_pd_init(struct i40iw_sc_dev *dev,
- struct i40iw_sc_pd *pd,
- u16 pd_id,
- int abi_ver)
-{
- pd->size = sizeof(*pd);
- pd->pd_id = pd_id;
- pd->abi_ver = abi_ver;
- pd->dev = dev;
-}
-
-/**
- * i40iw_get_encoded_wqe_size - given wq size, returns hardware encoded size
- * @wqsize: size of the wq (sq, rq, srq) to encoded_size
- * @cqpsq: encoded size for sq for cqp as its encoded size is 1+ other wq's
- */
-u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq)
-{
- u8 encoded_size = 0;
-
- /* cqp sq's hw coded value starts from 1 for size of 4
- * while it starts from 0 for qp' wq's.
- */
- if (cqpsq)
- encoded_size = 1;
- wqsize >>= 2;
- while (wqsize >>= 1)
- encoded_size++;
- return encoded_size;
-}
-
-/**
- * i40iw_sc_cqp_init - Initialize buffers for a control Queue Pair
- * @cqp: IWARP control queue pair pointer
- * @info: IWARP control queue pair init info pointer
- *
- * Initializes the object and context buffers for a control Queue Pair.
- */
-static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
- struct i40iw_cqp_init_info *info)
-{
- u8 hw_sq_size;
-
- if ((info->sq_size > I40IW_CQP_SW_SQSIZE_2048) ||
- (info->sq_size < I40IW_CQP_SW_SQSIZE_4) ||
- ((info->sq_size & (info->sq_size - 1))))
- return I40IW_ERR_INVALID_SIZE;
-
- hw_sq_size = i40iw_get_encoded_wqe_size(info->sq_size, true);
- cqp->size = sizeof(*cqp);
- cqp->sq_size = info->sq_size;
- cqp->hw_sq_size = hw_sq_size;
- cqp->sq_base = info->sq;
- cqp->host_ctx = info->host_ctx;
- cqp->sq_pa = info->sq_pa;
- cqp->host_ctx_pa = info->host_ctx_pa;
- cqp->dev = info->dev;
- cqp->struct_ver = info->struct_ver;
- cqp->scratch_array = info->scratch_array;
- cqp->polarity = 0;
- cqp->en_datacenter_tcp = info->en_datacenter_tcp;
- cqp->enabled_vf_count = info->enabled_vf_count;
- cqp->hmc_profile = info->hmc_profile;
- info->dev->cqp = cqp;
-
- I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
- cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0;
- cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0;
- INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head); /* for the cqp commands backlog. */
-
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPTAIL, 0);
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, 0);
-
- i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
- "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
- __func__, cqp->sq_size, cqp->hw_sq_size,
- cqp->sq_base, cqp->sq_pa, cqp, cqp->polarity);
- return 0;
-}
-
-/**
- * i40iw_sc_cqp_create - create cqp during bringup
- * @cqp: struct for cqp hw
- * @maj_err: If error, major err number
- * @min_err: If error, minor err number
- */
-static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
- u16 *maj_err,
- u16 *min_err)
-{
- u64 temp;
- u32 cnt = 0, p1, p2, val = 0, err_code;
- enum i40iw_status_code ret_code;
-
- *maj_err = 0;
- *min_err = 0;
-
- ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
- &cqp->sdbuf,
- I40IW_UPDATE_SD_BUF_SIZE * cqp->sq_size,
- I40IW_SD_BUF_ALIGNMENT);
-
- if (ret_code)
- goto exit;
-
- temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
- LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
-
- set_64bit_val(cqp->host_ctx, 0, temp);
- set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
- temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
- LS_64(cqp->hmc_profile, I40IW_CQPHC_HMC_PROFILE);
- set_64bit_val(cqp->host_ctx, 16, temp);
- set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
- set_64bit_val(cqp->host_ctx, 32, 0);
- set_64bit_val(cqp->host_ctx, 40, 0);
- set_64bit_val(cqp->host_ctx, 48, 0);
- set_64bit_val(cqp->host_ctx, 56, 0);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQP_HOST_CTX",
- cqp->host_ctx, I40IW_CQP_CTX_SIZE * 8);
-
- p1 = RS_32_1(cqp->host_ctx_pa, 32);
- p2 = (u32)cqp->host_ctx_pa;
-
- if (cqp->dev->is_pf) {
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, p1);
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, p2);
- } else {
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, p1);
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, p2);
- }
- do {
- if (cnt++ > I40IW_DONE_COUNT) {
- i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
- ret_code = I40IW_ERR_TIMEOUT;
- /*
- * read PFPE_CQPERRORCODES register to get the minor
- * and major error code
- */
- if (cqp->dev->is_pf)
- err_code = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES);
- else
- err_code = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
- *min_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE);
- *maj_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE);
- goto exit;
- }
- udelay(I40IW_SLEEP_COUNT);
- if (cqp->dev->is_pf)
- val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CCQPSTATUS);
- else
- val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CCQPSTATUS1);
- } while (!val);
-
-exit:
- if (!ret_code)
- cqp->process_cqp_sds = i40iw_update_sds_noccq;
- return ret_code;
-}
-
-/**
- * i40iw_sc_cqp_post_sq - post of cqp's sq
- * @cqp: struct for cqp hw
- */
-void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp)
-{
- if (cqp->dev->is_pf)
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
- else
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CQPDB1, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
-
- i40iw_debug(cqp->dev,
- I40IW_DEBUG_WQE,
- "%s: HEAD_TAIL[%04d,%04d,%04d]\n",
- __func__,
- cqp->sq_ring.head,
- cqp->sq_ring.tail,
- cqp->sq_ring.size);
-}
-
-/**
- * i40iw_sc_cqp_get_next_send_wqe_idx - get next WQE on CQP SQ and pass back the index
- * @cqp: pointer to CQP structure
- * @scratch: private data for CQP WQE
- * @wqe_idx: WQE index for next WQE on CQP SQ
- */
-static u64 *i40iw_sc_cqp_get_next_send_wqe_idx(struct i40iw_sc_cqp *cqp,
- u64 scratch, u32 *wqe_idx)
-{
- u64 *wqe = NULL;
- enum i40iw_status_code ret_code;
-
- if (I40IW_RING_FULL_ERR(cqp->sq_ring)) {
- i40iw_debug(cqp->dev,
- I40IW_DEBUG_WQE,
- "%s: ring is full head %x tail %x size %x\n",
- __func__,
- cqp->sq_ring.head,
- cqp->sq_ring.tail,
- cqp->sq_ring.size);
- return NULL;
- }
- I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, *wqe_idx, ret_code);
- cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++;
- if (ret_code)
- return NULL;
- if (!*wqe_idx)
- cqp->polarity = !cqp->polarity;
-
- wqe = cqp->sq_base[*wqe_idx].elem;
- cqp->scratch_array[*wqe_idx] = scratch;
- I40IW_CQP_INIT_WQE(wqe);
-
- return wqe;
-}
-
-/**
- * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
- * @cqp: struct for cqp hw
- * @scratch: private data for CQP WQE
- */
-u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
-{
- u32 wqe_idx;
-
- return i40iw_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
-}
-
-/**
- * i40iw_sc_cqp_destroy - destroy cqp during close
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_cqp_destroy(struct i40iw_sc_cqp *cqp)
-{
- u32 cnt = 0, val = 1;
- enum i40iw_status_code ret_code = 0;
- u32 cqpstat_addr;
-
- if (cqp->dev->is_pf) {
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, 0);
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, 0);
- cqpstat_addr = I40E_PFPE_CCQPSTATUS;
- } else {
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, 0);
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, 0);
- cqpstat_addr = I40E_VFPE_CCQPSTATUS1;
- }
- do {
- if (cnt++ > I40IW_DONE_COUNT) {
- ret_code = I40IW_ERR_TIMEOUT;
- break;
- }
- udelay(I40IW_SLEEP_COUNT);
- val = i40iw_rd32(cqp->dev->hw, cqpstat_addr);
- } while (val);
-
- i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
- return ret_code;
-}
-
-/**
- * i40iw_sc_ccq_arm - enable intr for control cq
- * @ccq: ccq sc struct
- */
-static void i40iw_sc_ccq_arm(struct i40iw_sc_cq *ccq)
-{
- u64 temp_val;
- u16 sw_cq_sel;
- u8 arm_next_se;
- u8 arm_seq_num;
-
- /* write to cq doorbell shadow area */
- /* arm next se should always be zero */
- get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
-
- sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
- arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
-
- arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
- arm_seq_num++;
-
- temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
- LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
- LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
- LS_64(1, I40IW_CQ_DBSA_ARM_NEXT);
-
- set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
-
- wmb(); /* make sure shadow area is updated before arming */
-
- if (ccq->dev->is_pf)
- i40iw_wr32(ccq->dev->hw, I40E_PFPE_CQARM, ccq->cq_uk.cq_id);
- else
- i40iw_wr32(ccq->dev->hw, I40E_VFPE_CQARM1, ccq->cq_uk.cq_id);
-}
-
-/**
- * i40iw_sc_ccq_get_cqe_info - get ccq's cq entry
- * @ccq: ccq sc struct
- * @info: completion q entry to return
- */
-static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
- struct i40iw_sc_cq *ccq,
- struct i40iw_ccq_cqe_info *info)
-{
- u64 qp_ctx, temp, temp1;
- u64 *cqe;
- struct i40iw_sc_cqp *cqp;
- u32 wqe_idx;
- u8 polarity;
- enum i40iw_status_code ret_code = 0;
-
- if (ccq->cq_uk.avoid_mem_cflct)
- cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(&ccq->cq_uk);
- else
- cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&ccq->cq_uk);
-
- get_64bit_val(cqe, 24, &temp);
- polarity = (u8)RS_64(temp, I40IW_CQ_VALID);
- if (polarity != ccq->cq_uk.polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- get_64bit_val(cqe, 8, &qp_ctx);
- cqp = (struct i40iw_sc_cqp *)(unsigned long)qp_ctx;
- info->error = (bool)RS_64(temp, I40IW_CQ_ERROR);
- info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
- if (info->error) {
- info->maj_err_code = (u16)RS_64(temp, I40IW_CQ_MAJERR);
- info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
- }
- wqe_idx = (u32)RS_64(temp, I40IW_CQ_WQEIDX);
- info->scratch = cqp->scratch_array[wqe_idx];
-
- get_64bit_val(cqe, 16, &temp1);
- info->op_ret_val = (u32)RS_64(temp1, I40IW_CCQ_OPRETVAL);
- get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
- info->op_code = (u8)RS_64(temp1, I40IW_CQPSQ_OPCODE);
- info->cqp = cqp;
-
- /* move the head for cq */
- I40IW_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
- if (I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring) == 0)
- ccq->cq_uk.polarity ^= 1;
-
- /* update cq tail in cq shadow memory also */
- I40IW_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
- set_64bit_val(ccq->cq_uk.shadow_area,
- 0,
- I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
- wmb(); /* write shadow area before tail */
- I40IW_RING_MOVE_TAIL(cqp->sq_ring);
- ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
- * @cqp: struct for cqp hw
- * @op_code: cqp opcode for completion
- * @info: completion q entry to return
- */
-static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
- struct i40iw_sc_cqp *cqp,
- u8 op_code,
- struct i40iw_ccq_cqe_info *compl_info)
-{
- struct i40iw_ccq_cqe_info info;
- struct i40iw_sc_cq *ccq;
- enum i40iw_status_code ret_code = 0;
- u32 cnt = 0;
-
- memset(&info, 0, sizeof(info));
- ccq = cqp->dev->ccq;
- while (1) {
- if (cnt++ > I40IW_DONE_COUNT)
- return I40IW_ERR_TIMEOUT;
-
- if (i40iw_sc_ccq_get_cqe_info(ccq, &info)) {
- udelay(I40IW_SLEEP_COUNT);
- continue;
- }
-
- if (info.error) {
- ret_code = I40IW_ERR_CQP_COMPL_ERROR;
- break;
- }
- /* check if opcode is cq create */
- if (op_code != info.op_code) {
- i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
- "%s: opcode mismatch for my op code 0x%x, returned opcode %x\n",
- __func__, op_code, info.op_code);
- }
- /* success, exit out of the loop */
- if (op_code == info.op_code)
- break;
- }
-
- if (compl_info)
- memcpy(compl_info, &info, sizeof(*compl_info));
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_manage_push_page - Handle push page
- * @cqp: struct for cqp hw
- * @info: push page info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_push_page(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_cqp_manage_push_page_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT)
- return I40IW_ERR_INVALID_PUSH_PAGE_INDEX;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, info->qs_handle);
-
- header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_manage_hmc_pm_func_table - manage of function table
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @vf_index: vf index for cqp
- * @free_pm_fcn: function number
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 vf_index,
- bool free_pm_fcn,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- if (vf_index >= I40IW_MAX_VF_PER_PF)
- return I40IW_ERR_INVALID_VF_ID;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- header = LS_64(vf_index, I40IW_CQPSQ_MHMC_VFIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, I40IW_CQPSQ_OPCODE) |
- LS_64(free_pm_fcn, I40IW_CQPSQ_MHMC_FREEPMFN) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_set_hmc_resource_profile - cqp wqe for hmc profile
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_profile_type: type of profile to set
- * @vf_num: vf number for profile
- * @post_sq: flag for cqp db to ring
- * @poll_registers: flag to poll register for cqp completion
- */
-static enum i40iw_status_code i40iw_sc_set_hmc_resource_profile(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_profile_type,
- u8 vf_num, bool post_sq,
- bool poll_registers)
-{
- u64 *wqe;
- u64 header;
- u32 val, tail, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16,
- (LS_64(hmc_profile_type, I40IW_CQPSQ_SHMCRP_HMC_PROFILE) |
- LS_64(vf_num, I40IW_CQPSQ_SHMCRP_VFNUM)));
-
- header = LS_64(I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- if (poll_registers)
- ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000000);
- else
- ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
- I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
- NULL);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_manage_hmc_pm_func_table_done - wait for cqp wqe completion for function table
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table_done(struct i40iw_sc_cqp *cqp)
-{
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, NULL);
-}
-
-/**
- * i40iw_sc_commit_fpm_values_done - wait for cqp eqe completion for fpm commit
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cqp *cqp)
-{
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_COMMIT_FPM_VALUES, NULL);
-}
-
-/**
- * i40iw_sc_commit_fpm_values - cqp wqe for commit fpm values
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_fn_id: hmc function id
- * @commit_fpm_mem; Memory for fpm values
- * @post_sq: flag for cqp db to ring
- * @wait_type: poll ccq or cqp registers for cqp completion
- */
-static enum i40iw_status_code i40iw_sc_commit_fpm_values(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_fn_id,
- struct i40iw_dma_mem *commit_fpm_mem,
- bool post_sq,
- u8 wait_type)
-{
- u64 *wqe;
- u64 header;
- u32 tail, val, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, hmc_fn_id);
- set_64bit_val(wqe, 32, commit_fpm_mem->pa);
-
- header = LS_64(I40IW_CQP_OP_COMMIT_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "COMMIT_FPM_VALUES WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
-
- if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
- ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
- else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
- ret_code = i40iw_sc_commit_fpm_values_done(cqp);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_query_fpm_values_done(struct i40iw_sc_cqp *cqp)
-{
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_QUERY_FPM_VALUES, NULL);
-}
-
-/**
- * i40iw_sc_query_fpm_values - cqp wqe query fpm values
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_fn_id: hmc function id
- * @query_fpm_mem: memory for return fpm values
- * @post_sq: flag for cqp db to ring
- * @wait_type: poll ccq or cqp registers for cqp completion
- */
-static enum i40iw_status_code i40iw_sc_query_fpm_values(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_fn_id,
- struct i40iw_dma_mem *query_fpm_mem,
- bool post_sq,
- u8 wait_type)
-{
- u64 *wqe;
- u64 header;
- u32 tail, val, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, hmc_fn_id);
- set_64bit_val(wqe, 32, query_fpm_mem->pa);
-
- header = LS_64(I40IW_CQP_OP_QUERY_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_FPM WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- /* read the tail from CQP_TAIL register */
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
-
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
- ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
- else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
- ret_code = i40iw_sc_query_fpm_values_done(cqp);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_add_arp_cache_entry - cqp wqe add arp cache entry
- * @cqp: struct for cqp hw
- * @info: arp entry information
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_add_arp_cache_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_add_arp_cache_entry_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp, header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 8, info->reach_max);
-
- temp = info->mac_addr[5] |
- LS_64_1(info->mac_addr[4], 8) |
- LS_64_1(info->mac_addr[3], 16) |
- LS_64_1(info->mac_addr[2], 24) |
- LS_64_1(info->mac_addr[1], 32) |
- LS_64_1(info->mac_addr[0], 40);
-
- set_64bit_val(wqe, 16, temp);
-
- header = info->arp_index |
- LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
- LS_64((info->permanent ? 1 : 0), I40IW_CQPSQ_MAT_PERMANENT) |
- LS_64(1, I40IW_CQPSQ_MAT_ENTRYVALID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_ENTRY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_del_arp_cache_entry - dele arp cache entry
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @arp_index: arp index to delete arp entry
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_del_arp_cache_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u16 arp_index,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- header = arp_index |
- LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_DEL_ENTRY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_query_arp_cache_entry - cqp wqe to query arp and arp index
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @arp_index: arp index to delete arp entry
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_query_arp_cache_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u16 arp_index,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- header = arp_index |
- LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_MAT_QUERY) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_ARP_CACHE_ENTRY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_manage_apbvt_entry - for adding and deleting apbvt entries
- * @cqp: struct for cqp hw
- * @info: info for apbvt entry to add or delete
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_apbvt_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_apbvt_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, info->port);
-
- header = LS_64(I40IW_CQP_OP_MANAGE_APBVT, I40IW_CQPSQ_OPCODE) |
- LS_64(info->add, I40IW_CQPSQ_MAPT_ADDPORT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_APBVT WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_manage_qhash_table_entry - manage quad hash entries
- * @cqp: struct for cqp hw
- * @info: info for quad hash to manage
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- *
- * This is called before connection establishment is started. For passive connections, when
- * listener is created, it will call with entry type of I40IW_QHASH_TYPE_TCP_SYN with local
- * ip address and tcp port. When SYN is received (passive connections) or
- * sent (active connections), this routine is called with entry type of
- * I40IW_QHASH_TYPE_TCP_ESTABLISHED and quad is passed in info.
- *
- * When iwarp connection is done and its state moves to RTS, the quad hash entry in
- * the hardware will point to iwarp's qp number and requires no calls from the driver.
- */
-static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_qhash_table_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 qw1 = 0;
- u64 qw2 = 0;
- u64 temp;
- struct i40iw_sc_vsi *vsi = info->vsi;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = info->mac_addr[5] |
- LS_64_1(info->mac_addr[4], 8) |
- LS_64_1(info->mac_addr[3], 16) |
- LS_64_1(info->mac_addr[2], 24) |
- LS_64_1(info->mac_addr[1], 32) |
- LS_64_1(info->mac_addr[0], 40);
-
- set_64bit_val(wqe, 0, temp);
-
- qw1 = LS_64(info->qp_num, I40IW_CQPSQ_QHASH_QPN) |
- LS_64(info->dest_port, I40IW_CQPSQ_QHASH_DEST_PORT);
- if (info->ipv4_valid) {
- set_64bit_val(wqe,
- 48,
- LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
- } else {
- set_64bit_val(wqe,
- 56,
- LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
- LS_64(info->dest_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
-
- set_64bit_val(wqe,
- 48,
- LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
- LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
- }
- qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
- if (info->vlan_valid)
- qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
- set_64bit_val(wqe, 16, qw2);
- if (info->entry_type == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
- qw1 |= LS_64(info->src_port, I40IW_CQPSQ_QHASH_SRC_PORT);
- if (!info->ipv4_valid) {
- set_64bit_val(wqe,
- 40,
- LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
- LS_64(info->src_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
- set_64bit_val(wqe,
- 32,
- LS_64(info->src_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
- LS_64(info->src_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
- } else {
- set_64bit_val(wqe,
- 32,
- LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
- }
- }
-
- set_64bit_val(wqe, 8, qw1);
- temp = LS_64(cqp->polarity, I40IW_CQPSQ_QHASH_WQEVALID) |
- LS_64(I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY, I40IW_CQPSQ_QHASH_OPCODE) |
- LS_64(info->manage, I40IW_CQPSQ_QHASH_MANAGE) |
- LS_64(info->ipv4_valid, I40IW_CQPSQ_QHASH_IPV4VALID) |
- LS_64(info->vlan_valid, I40IW_CQPSQ_QHASH_VLANVALID) |
- LS_64(info->entry_type, I40IW_CQPSQ_QHASH_ENTRYTYPE);
-
- i40iw_insert_wqe_hdr(wqe, temp);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_QHASH WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_alloc_local_mac_ipaddr_entry - cqp wqe for loc mac entry
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_alloc_local_mac_ipaddr_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ALLOCATE_LOCAL_MAC_IPADDR WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_add_local_mac_ipaddr_entry - add mac enry
- * @cqp: struct for cqp hw
- * @info:mac addr info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_local_mac_ipaddr_entry_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp, header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- temp = info->mac_addr[5] |
- LS_64_1(info->mac_addr[4], 8) |
- LS_64_1(info->mac_addr[3], 16) |
- LS_64_1(info->mac_addr[2], 24) |
- LS_64_1(info->mac_addr[1], 32) |
- LS_64_1(info->mac_addr[0], 40);
-
- set_64bit_val(wqe, 32, temp);
-
- header = LS_64(info->entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ADD_LOCAL_MAC_IPADDR WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_del_local_mac_ipaddr_entry - cqp wqe to dele local mac
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @entry_idx: index of mac entry
- * @ ignore_ref_count: to force mac adde delete
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 entry_idx,
- u8 ignore_ref_count,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_MLIPA_FREEENTRY) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(ignore_ref_count, I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "DEL_LOCAL_MAC_IPADDR WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cqp_nop - send a nop wqe
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_cqp_nop(struct i40iw_sc_cqp *cqp,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(I40IW_CQP_OP_NOP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "NOP WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_ceq_init - initialize ceq
- * @ceq: ceq sc structure
- * @info: ceq initialization info
- */
-static enum i40iw_status_code i40iw_sc_ceq_init(struct i40iw_sc_ceq *ceq,
- struct i40iw_ceq_init_info *info)
-{
- u32 pble_obj_cnt;
-
- if ((info->elem_cnt < I40IW_MIN_CEQ_ENTRIES) ||
- (info->elem_cnt > I40IW_MAX_CEQ_ENTRIES))
- return I40IW_ERR_INVALID_SIZE;
-
- if (info->ceq_id >= I40IW_MAX_CEQID)
- return I40IW_ERR_INVALID_CEQ_ID;
-
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- ceq->size = sizeof(*ceq);
- ceq->ceqe_base = (struct i40iw_ceqe *)info->ceqe_base;
- ceq->ceq_id = info->ceq_id;
- ceq->dev = info->dev;
- ceq->elem_cnt = info->elem_cnt;
- ceq->ceq_elem_pa = info->ceqe_pa;
- ceq->virtual_map = info->virtual_map;
-
- ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
- ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
- ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
-
- ceq->tph_en = info->tph_en;
- ceq->tph_val = info->tph_val;
- ceq->polarity = 1;
- I40IW_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
- ceq->dev->ceq[info->ceq_id] = ceq;
-
- return 0;
-}
-
-/**
- * i40iw_sc_ceq_create - create ceq wqe
- * @ceq: ceq sc structure
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ceq_create(struct i40iw_sc_ceq *ceq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- cqp = ceq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, ceq->elem_cnt);
- set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
- set_64bit_val(wqe, 48, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56, LS_64(ceq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = ceq->ceq_id |
- LS_64(I40IW_CQP_OP_CREATE_CEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
- LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
- LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cceq_create_done - poll for control ceq wqe to complete
- * @ceq: ceq sc structure
- */
-static enum i40iw_status_code i40iw_sc_cceq_create_done(struct i40iw_sc_ceq *ceq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = ceq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CEQ, NULL);
-}
-
-/**
- * i40iw_sc_cceq_destroy_done - poll for destroy cceq to complete
- * @ceq: ceq sc structure
- */
-static enum i40iw_status_code i40iw_sc_cceq_destroy_done(struct i40iw_sc_ceq *ceq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = ceq->dev->cqp;
- cqp->process_cqp_sds = i40iw_update_sds_noccq;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_CEQ, NULL);
-}
-
-/**
- * i40iw_sc_cceq_create - create cceq
- * @ceq: ceq sc structure
- * @scratch: u64 saved to be used during cqp completion
- */
-static enum i40iw_status_code i40iw_sc_cceq_create(struct i40iw_sc_ceq *ceq, u64 scratch)
-{
- enum i40iw_status_code ret_code;
-
- ret_code = i40iw_sc_ceq_create(ceq, scratch, true);
- if (!ret_code)
- ret_code = i40iw_sc_cceq_create_done(ceq);
- return ret_code;
-}
-
-/**
- * i40iw_sc_ceq_destroy - destroy ceq
- * @ceq: ceq sc structure
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ceq_destroy(struct i40iw_sc_ceq *ceq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- cqp = ceq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, ceq->elem_cnt);
- set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
- header = ceq->ceq_id |
- LS_64(I40IW_CQP_OP_DESTROY_CEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
- LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
- LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_process_ceq - process ceq
- * @dev: sc device struct
- * @ceq: ceq sc structure
- */
-static void *i40iw_sc_process_ceq(struct i40iw_sc_dev *dev, struct i40iw_sc_ceq *ceq)
-{
- u64 temp;
- u64 *ceqe;
- struct i40iw_sc_cq *cq = NULL;
- u8 polarity;
-
- ceqe = (u64 *)I40IW_GET_CURRENT_CEQ_ELEMENT(ceq);
- get_64bit_val(ceqe, 0, &temp);
- polarity = (u8)RS_64(temp, I40IW_CEQE_VALID);
- if (polarity != ceq->polarity)
- return cq;
-
- cq = (struct i40iw_sc_cq *)(unsigned long)LS_64_1(temp, 1);
-
- I40IW_RING_MOVE_TAIL(ceq->ceq_ring);
- if (I40IW_RING_GETCURRENT_TAIL(ceq->ceq_ring) == 0)
- ceq->polarity ^= 1;
-
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFPE_CQACK, cq->cq_uk.cq_id);
- else
- i40iw_wr32(dev->hw, I40E_VFPE_CQACK1, cq->cq_uk.cq_id);
-
- return cq;
-}
-
-/**
- * i40iw_sc_aeq_init - initialize aeq
- * @aeq: aeq structure ptr
- * @info: aeq initialization info
- */
-static enum i40iw_status_code i40iw_sc_aeq_init(struct i40iw_sc_aeq *aeq,
- struct i40iw_aeq_init_info *info)
-{
- u32 pble_obj_cnt;
-
- if ((info->elem_cnt < I40IW_MIN_AEQ_ENTRIES) ||
- (info->elem_cnt > I40IW_MAX_AEQ_ENTRIES))
- return I40IW_ERR_INVALID_SIZE;
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- aeq->size = sizeof(*aeq);
- aeq->polarity = 1;
- aeq->aeqe_base = (struct i40iw_sc_aeqe *)info->aeqe_base;
- aeq->dev = info->dev;
- aeq->elem_cnt = info->elem_cnt;
-
- aeq->aeq_elem_pa = info->aeq_elem_pa;
- I40IW_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
- info->dev->aeq = aeq;
-
- aeq->virtual_map = info->virtual_map;
- aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
- aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
- aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
- info->dev->aeq = aeq;
- return 0;
-}
-
-/**
- * i40iw_sc_aeq_create - create aeq
- * @aeq: aeq structure ptr
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_aeq_create(struct i40iw_sc_aeq *aeq,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = aeq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, aeq->elem_cnt);
- set_64bit_val(wqe, 32,
- (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
- set_64bit_val(wqe, 48,
- (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
-
- header = LS_64(I40IW_CQP_OP_CREATE_AEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
- LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_aeq_destroy - destroy aeq during close
- * @aeq: aeq structure ptr
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_aeq_destroy(struct i40iw_sc_aeq *aeq,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = aeq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, aeq->elem_cnt);
- set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
- header = LS_64(I40IW_CQP_OP_DESTROY_AEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
- LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_get_next_aeqe - get next aeq entry
- * @aeq: aeq structure ptr
- * @info: aeqe info to be returned
- */
-static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
- struct i40iw_aeqe_info *info)
-{
- u64 temp, compl_ctx;
- u64 *aeqe;
- u16 wqe_idx;
- u8 ae_src;
- u8 polarity;
-
- aeqe = (u64 *)I40IW_GET_CURRENT_AEQ_ELEMENT(aeq);
- get_64bit_val(aeqe, 0, &compl_ctx);
- get_64bit_val(aeqe, 8, &temp);
- polarity = (u8)RS_64(temp, I40IW_AEQE_VALID);
-
- if (aeq->polarity != polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- i40iw_debug_buf(aeq->dev, I40IW_DEBUG_WQE, "AEQ_ENTRY", aeqe, 16);
-
- ae_src = (u8)RS_64(temp, I40IW_AEQE_AESRC);
- wqe_idx = (u16)RS_64(temp, I40IW_AEQE_WQDESCIDX);
- info->qp_cq_id = (u32)RS_64(temp, I40IW_AEQE_QPCQID);
- info->ae_id = (u16)RS_64(temp, I40IW_AEQE_AECODE);
- info->tcp_state = (u8)RS_64(temp, I40IW_AEQE_TCPSTATE);
- info->iwarp_state = (u8)RS_64(temp, I40IW_AEQE_IWSTATE);
- info->q2_data_written = (u8)RS_64(temp, I40IW_AEQE_Q2DATA);
- info->aeqe_overflow = (bool)RS_64(temp, I40IW_AEQE_OVERFLOW);
-
- switch (info->ae_id) {
- case I40IW_AE_PRIV_OPERATION_DENIED:
- case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
- case I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT:
- case I40IW_AE_BAD_CLOSE:
- case I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE:
- case I40IW_AE_RDMA_READ_WHILE_ORD_ZERO:
- case I40IW_AE_STAG_ZERO_INVALID:
- case I40IW_AE_IB_RREQ_AND_Q1_FULL:
- case I40IW_AE_WQE_UNEXPECTED_OPCODE:
- case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
- case I40IW_AE_DDP_UBE_INVALID_MO:
- case I40IW_AE_DDP_UBE_INVALID_QN:
- case I40IW_AE_DDP_NO_L_BIT:
- case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
- case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
- case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST:
- case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
- case I40IW_AE_INVALID_ARP_ENTRY:
- case I40IW_AE_INVALID_TCP_OPTION_RCVD:
- case I40IW_AE_STALE_ARP_ENTRY:
- case I40IW_AE_LLP_CLOSE_COMPLETE:
- case I40IW_AE_LLP_CONNECTION_RESET:
- case I40IW_AE_LLP_FIN_RECEIVED:
- case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
- case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
- case I40IW_AE_LLP_SYN_RECEIVED:
- case I40IW_AE_LLP_TERMINATE_RECEIVED:
- case I40IW_AE_LLP_TOO_MANY_RETRIES:
- case I40IW_AE_LLP_DOUBT_REACHABILITY:
- case I40IW_AE_RESET_SENT:
- case I40IW_AE_TERMINATE_SENT:
- case I40IW_AE_RESET_NOT_SENT:
- case I40IW_AE_LCE_QP_CATASTROPHIC:
- case I40IW_AE_QP_SUSPEND_COMPLETE:
- info->qp = true;
- info->compl_ctx = compl_ctx;
- ae_src = I40IW_AE_SOURCE_RSVD;
- break;
- case I40IW_AE_LCE_CQ_CATASTROPHIC:
- info->cq = true;
- info->compl_ctx = LS_64_1(compl_ctx, 1);
- ae_src = I40IW_AE_SOURCE_RSVD;
- break;
- }
-
- switch (ae_src) {
- case I40IW_AE_SOURCE_RQ:
- case I40IW_AE_SOURCE_RQ_0011:
- info->qp = true;
- info->wqe_idx = wqe_idx;
- info->compl_ctx = compl_ctx;
- break;
- case I40IW_AE_SOURCE_CQ:
- case I40IW_AE_SOURCE_CQ_0110:
- case I40IW_AE_SOURCE_CQ_1010:
- case I40IW_AE_SOURCE_CQ_1110:
- info->cq = true;
- info->compl_ctx = LS_64_1(compl_ctx, 1);
- break;
- case I40IW_AE_SOURCE_SQ:
- case I40IW_AE_SOURCE_SQ_0111:
- info->qp = true;
- info->sq = true;
- info->wqe_idx = wqe_idx;
- info->compl_ctx = compl_ctx;
- break;
- case I40IW_AE_SOURCE_IN_RR_WR:
- case I40IW_AE_SOURCE_IN_RR_WR_1011:
- info->qp = true;
- info->compl_ctx = compl_ctx;
- info->in_rdrsp_wr = true;
- break;
- case I40IW_AE_SOURCE_OUT_RR:
- case I40IW_AE_SOURCE_OUT_RR_1111:
- info->qp = true;
- info->compl_ctx = compl_ctx;
- info->out_rdrsp = true;
- break;
- case I40IW_AE_SOURCE_RSVD:
- /* fallthrough */
- default:
- break;
- }
- I40IW_RING_MOVE_TAIL(aeq->aeq_ring);
- if (I40IW_RING_GETCURRENT_TAIL(aeq->aeq_ring) == 0)
- aeq->polarity ^= 1;
- return 0;
-}
-
-/**
- * i40iw_sc_repost_aeq_entries - repost completed aeq entries
- * @dev: sc device struct
- * @count: allocate count
- */
-static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
- u32 count)
-{
-
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
- else
- i40iw_wr32(dev->hw, I40E_VFPE_AEQALLOC1, count);
-
- return 0;
-}
-
-/**
- * i40iw_sc_aeq_create_done - create aeq
- * @aeq: aeq structure ptr
- */
-static enum i40iw_status_code i40iw_sc_aeq_create_done(struct i40iw_sc_aeq *aeq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = aeq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_AEQ, NULL);
-}
-
-/**
- * i40iw_sc_aeq_destroy_done - destroy of aeq during close
- * @aeq: aeq structure ptr
- */
-static enum i40iw_status_code i40iw_sc_aeq_destroy_done(struct i40iw_sc_aeq *aeq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = aeq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_AEQ, NULL);
-}
-
-/**
- * i40iw_sc_ccq_init - initialize control cq
- * @cq: sc's cq ctruct
- * @info: info for control cq initialization
- */
-static enum i40iw_status_code i40iw_sc_ccq_init(struct i40iw_sc_cq *cq,
- struct i40iw_ccq_init_info *info)
-{
- u32 pble_obj_cnt;
-
- if (info->num_elem < I40IW_MIN_CQ_SIZE || info->num_elem > I40IW_MAX_CQ_SIZE)
- return I40IW_ERR_INVALID_SIZE;
-
- if (info->ceq_id > I40IW_MAX_CEQID)
- return I40IW_ERR_INVALID_CEQ_ID;
-
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cq->cq_pa = info->cq_pa;
- cq->cq_uk.cq_base = info->cq_base;
- cq->shadow_area_pa = info->shadow_area_pa;
- cq->cq_uk.shadow_area = info->shadow_area;
- cq->shadow_read_threshold = info->shadow_read_threshold;
- cq->dev = info->dev;
- cq->ceq_id = info->ceq_id;
- cq->cq_uk.cq_size = info->num_elem;
- cq->cq_type = I40IW_CQ_TYPE_CQP;
- cq->ceqe_mask = info->ceqe_mask;
- I40IW_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
-
- cq->cq_uk.cq_id = 0; /* control cq is id 0 always */
- cq->ceq_id_valid = info->ceq_id_valid;
- cq->tph_en = info->tph_en;
- cq->tph_val = info->tph_val;
- cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
-
- cq->pbl_list = info->pbl_list;
- cq->virtual_map = info->virtual_map;
- cq->pbl_chunk_size = info->pbl_chunk_size;
- cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
- cq->cq_uk.polarity = true;
-
- /* following are only for iw cqs so initialize them to zero */
- cq->cq_uk.cqe_alloc_reg = NULL;
- info->dev->ccq = cq;
- return 0;
-}
-
-/**
- * i40iw_sc_ccq_create_done - poll cqp for ccq create
- * @ccq: ccq sc struct
- */
-static enum i40iw_status_code i40iw_sc_ccq_create_done(struct i40iw_sc_cq *ccq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = ccq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CQ, NULL);
-}
-
-/**
- * i40iw_sc_ccq_create - create control cq
- * @ccq: ccq sc struct
- * @scratch: u64 saved to be used during cqp completion
- * @check_overflow: overlow flag for ccq
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ccq_create(struct i40iw_sc_cq *ccq,
- u64 scratch,
- bool check_overflow,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- enum i40iw_status_code ret_code;
-
- cqp = ccq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
- set_64bit_val(wqe, 16,
- LS_64(ccq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, (ccq->virtual_map ? 0 : ccq->cq_pa));
- set_64bit_val(wqe, 40, ccq->shadow_area_pa);
- set_64bit_val(wqe, 48,
- (ccq->virtual_map ? ccq->first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56,
- LS_64(ccq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = ccq->cq_uk.cq_id |
- LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ccq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(ccq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- ret_code = i40iw_sc_ccq_create_done(ccq);
- if (ret_code)
- return ret_code;
- }
- cqp->process_cqp_sds = i40iw_cqp_sds_cmd;
-
- return 0;
-}
-
-/**
- * i40iw_sc_ccq_destroy - destroy ccq during close
- * @ccq: ccq sc struct
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
- enum i40iw_status_code ret_code = 0;
- u32 tail, val, error;
-
- cqp = ccq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
- set_64bit_val(wqe, 40, ccq->shadow_area_pa);
-
- header = ccq->cq_uk.cq_id |
- LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
- }
-
- cqp->process_cqp_sds = i40iw_update_sds_noccq;
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_cq_init - initialize completion q
- * @cq: cq struct
- * @info: cq initialization info
- */
-static enum i40iw_status_code i40iw_sc_cq_init(struct i40iw_sc_cq *cq,
- struct i40iw_cq_init_info *info)
-{
- u32 __iomem *cqe_alloc_reg = NULL;
- enum i40iw_status_code ret_code;
- u32 pble_obj_cnt;
- u32 arm_offset;
-
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cq->cq_pa = info->cq_base_pa;
- cq->dev = info->dev;
- cq->ceq_id = info->ceq_id;
- arm_offset = (info->dev->is_pf) ? I40E_PFPE_CQARM : I40E_VFPE_CQARM1;
- if (i40iw_get_hw_addr(cq->dev))
- cqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(cq->dev) +
- arm_offset);
- info->cq_uk_init_info.cqe_alloc_reg = cqe_alloc_reg;
- ret_code = i40iw_cq_uk_init(&cq->cq_uk, &info->cq_uk_init_info);
- if (ret_code)
- return ret_code;
- cq->virtual_map = info->virtual_map;
- cq->pbl_chunk_size = info->pbl_chunk_size;
- cq->ceqe_mask = info->ceqe_mask;
- cq->cq_type = (info->type) ? info->type : I40IW_CQ_TYPE_IWARP;
-
- cq->shadow_area_pa = info->shadow_area_pa;
- cq->shadow_read_threshold = info->shadow_read_threshold;
-
- cq->ceq_id_valid = info->ceq_id_valid;
- cq->tph_en = info->tph_en;
- cq->tph_val = info->tph_val;
-
- cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
-
- return 0;
-}
-
-/**
- * i40iw_sc_cq_create - create completion q
- * @cq: cq struct
- * @scratch: u64 saved to be used during cqp completion
- * @check_overflow: flag for overflow check
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_cq_create(struct i40iw_sc_cq *cq,
- u64 scratch,
- bool check_overflow,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- if (cq->cq_uk.cq_id > I40IW_MAX_CQID)
- return I40IW_ERR_INVALID_CQ_ID;
-
- if (cq->ceq_id > I40IW_MAX_CEQID)
- return I40IW_ERR_INVALID_CEQ_ID;
-
- cqp = cq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe,
- 16,
- LS_64(cq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
-
- set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
-
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
- set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = cq->cq_uk.cq_id |
- LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cq_destroy - destroy completion q
- * @cq: cq struct
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- cqp = cq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
- set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
-
- header = cq->cq_uk.cq_id |
- LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cq_modify - modify a Completion Queue
- * @cq: cq struct
- * @info: modification info struct
- * @scratch:
- * @post_sq: flag to post to sq
- */
-static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq,
- struct i40iw_modify_cq_info *info,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
- u32 cq_size, ceq_id, first_pm_pbl_idx;
- u8 pbl_chunk_size;
- bool virtual_map, ceq_id_valid, check_overflow;
- u32 pble_obj_cnt;
-
- if (info->ceq_valid && (info->ceq_id > I40IW_MAX_CEQID))
- return I40IW_ERR_INVALID_CEQ_ID;
-
- pble_obj_cnt = cq->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->cq_resize && info->virtual_map &&
- (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cqp = cq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- cq->pbl_list = info->pbl_list;
- cq->cq_pa = info->cq_pa;
- cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
-
- cq_size = info->cq_resize ? info->cq_size : cq->cq_uk.cq_size;
- if (info->ceq_change) {
- ceq_id_valid = true;
- ceq_id = info->ceq_id;
- } else {
- ceq_id_valid = cq->ceq_id_valid;
- ceq_id = ceq_id_valid ? cq->ceq_id : 0;
- }
- virtual_map = info->cq_resize ? info->virtual_map : cq->virtual_map;
- first_pm_pbl_idx = (info->cq_resize ?
- (info->virtual_map ? info->first_pm_pbl_idx : 0) :
- (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
- pbl_chunk_size = (info->cq_resize ?
- (info->virtual_map ? info->pbl_chunk_size : 0) :
- (cq->virtual_map ? cq->pbl_chunk_size : 0));
- check_overflow = info->check_overflow_change ? info->check_overflow :
- cq->check_overflow;
- cq->cq_uk.cq_size = cq_size;
- cq->ceq_id_valid = ceq_id_valid;
- cq->ceq_id = ceq_id;
- cq->virtual_map = virtual_map;
- cq->first_pm_pbl_idx = first_pm_pbl_idx;
- cq->pbl_chunk_size = pbl_chunk_size;
- cq->check_overflow = check_overflow;
-
- set_64bit_val(wqe, 0, cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 16,
- LS_64(info->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
- set_64bit_val(wqe, 48, (cq->virtual_map ? first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = cq->cq_uk.cq_id |
- LS_64(ceq_id, I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_MODIFY_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(info->cq_resize, I40IW_CQPSQ_CQ_CQRESIZE) |
- LS_64(pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_MODIFY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_init - initialize qp
- * @qp: sc qp
- * @info: initialization qp info
- */
-static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
- struct i40iw_qp_init_info *info)
-{
- u32 __iomem *wqe_alloc_reg = NULL;
- enum i40iw_status_code ret_code;
- u32 pble_obj_cnt;
- u8 wqe_size;
- u32 offset;
-
- qp->dev = info->pd->dev;
- qp->vsi = info->vsi;
- qp->sq_pa = info->sq_pa;
- qp->rq_pa = info->rq_pa;
- qp->hw_host_ctx_pa = info->host_ctx_pa;
- qp->q2_pa = info->q2_pa;
- qp->shadow_area_pa = info->shadow_area_pa;
-
- qp->q2_buf = info->q2;
- qp->pd = info->pd;
- qp->hw_host_ctx = info->host_ctx;
- offset = (qp->pd->dev->is_pf) ? I40E_PFPE_WQEALLOC : I40E_VFPE_WQEALLOC1;
- if (i40iw_get_hw_addr(qp->pd->dev))
- wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
- offset);
-
- info->qp_uk_init_info.wqe_alloc_reg = wqe_alloc_reg;
- info->qp_uk_init_info.abi_ver = qp->pd->abi_ver;
- ret_code = i40iw_qp_uk_init(&qp->qp_uk, &info->qp_uk_init_info);
- if (ret_code)
- return ret_code;
- qp->virtual_map = info->virtual_map;
-
- pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if ((info->virtual_map && (info->sq_pa >= pble_obj_cnt)) ||
- (info->virtual_map && (info->rq_pa >= pble_obj_cnt)))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- qp->llp_stream_handle = (void *)(-1);
- qp->qp_type = (info->type) ? info->type : I40IW_QP_TYPE_IWARP;
-
- qp->hw_sq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
- false);
- i40iw_debug(qp->dev, I40IW_DEBUG_WQE, "%s: hw_sq_size[%04d] sq_ring.size[%04d]\n",
- __func__, qp->hw_sq_size, qp->qp_uk.sq_ring.size);
-
- switch (qp->pd->abi_ver) {
- case 4:
- ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
- &wqe_size);
- if (ret_code)
- return ret_code;
- break;
- case 5: /* fallthrough until next ABI version */
- default:
- if (qp->qp_uk.max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
- return I40IW_ERR_INVALID_FRAG_COUNT;
- wqe_size = I40IW_MAX_WQE_SIZE_RQ;
- break;
- }
- qp->hw_rq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.rq_size *
- (wqe_size / I40IW_QP_WQE_MIN_SIZE), false);
- i40iw_debug(qp->dev, I40IW_DEBUG_WQE,
- "%s: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
- __func__, qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
- qp->sq_tph_val = info->sq_tph_val;
- qp->rq_tph_val = info->rq_tph_val;
- qp->sq_tph_en = info->sq_tph_en;
- qp->rq_tph_en = info->rq_tph_en;
- qp->rcv_tph_en = info->rcv_tph_en;
- qp->xmit_tph_en = info->xmit_tph_en;
- qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
-
- return 0;
-}
-
-/**
- * i40iw_sc_qp_create - create qp
- * @qp: sc qp
- * @info: qp create info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_create(
- struct i40iw_sc_qp *qp,
- struct i40iw_create_qp_info *info,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- if ((qp->qp_uk.qp_id < I40IW_MIN_IW_QP_ID) ||
- (qp->qp_uk.qp_id > I40IW_MAX_IW_QP_ID))
- return I40IW_ERR_INVALID_QP_ID;
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
-
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
- LS_64((info->ord_valid ? 1 : 0), I40IW_CQPSQ_QP_ORDVALID) |
- LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
- LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
- LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
- LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_modify - modify qp cqp wqe
- * @qp: sc qp
- * @info: modify qp info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_modify(
- struct i40iw_sc_qp *qp,
- struct i40iw_modify_qp_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- u8 term_actions = 0;
- u8 term_len = 0;
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- if (info->next_iwarp_state == I40IW_QP_STATE_TERMINATE) {
- if (info->dont_send_fin)
- term_actions += I40IWQP_TERM_SEND_TERM_ONLY;
- if (info->dont_send_term)
- term_actions += I40IWQP_TERM_SEND_FIN_ONLY;
- if ((term_actions == I40IWQP_TERM_SEND_TERM_AND_FIN) ||
- (term_actions == I40IWQP_TERM_SEND_TERM_ONLY))
- term_len = info->termlen;
- }
-
- set_64bit_val(wqe,
- 8,
- LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
-
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_MODIFY_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(info->ord_valid, I40IW_CQPSQ_QP_ORDVALID) |
- LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
- LS_64(info->cached_var_valid, I40IW_CQPSQ_QP_CACHEDVARVALID) |
- LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
- LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
- LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
- LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
- LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) |
- LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
- LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_MODIFY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_destroy - cqp destroy qp
- * @qp: sc qp
- * @scratch: u64 saved to be used during cqp completion
- * @remove_hash_idx: flag if to remove hash idx
- * @ignore_mw_bnd: memory window bind flag
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_destroy(
- struct i40iw_sc_qp *qp,
- u64 scratch,
- bool remove_hash_idx,
- bool ignore_mw_bnd,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- i40iw_qp_rem_qos(qp);
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_DESTROY_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(ignore_mw_bnd, I40IW_CQPSQ_QP_IGNOREMWBOUND) |
- LS_64(remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_flush_wqes - flush qp's wqe
- * @qp: sc qp
- * @info: dlush information
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
- struct i40iw_sc_qp *qp,
- struct i40iw_qp_flush_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 temp = 0;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- bool flush_sq = false, flush_rq = false;
-
- if (info->rq && !qp->flush_rq)
- flush_rq = true;
-
- if (info->sq && !qp->flush_sq)
- flush_sq = true;
-
- qp->flush_sq |= flush_sq;
- qp->flush_rq |= flush_rq;
- if (!flush_sq && !flush_rq)
- return 0;
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- if (info->userflushcode) {
- if (flush_rq) {
- temp |= LS_64(info->rq_minor_code, I40IW_CQPSQ_FWQE_RQMNERR) |
- LS_64(info->rq_major_code, I40IW_CQPSQ_FWQE_RQMJERR);
- }
- if (flush_sq) {
- temp |= LS_64(info->sq_minor_code, I40IW_CQPSQ_FWQE_SQMNERR) |
- LS_64(info->sq_major_code, I40IW_CQPSQ_FWQE_SQMJERR);
- }
- }
- set_64bit_val(wqe, 16, temp);
-
- temp = (info->generate_ae) ?
- info->ae_code | LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE) : 0;
-
- set_64bit_val(wqe, 8, temp);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_FLUSH_WQES, I40IW_CQPSQ_OPCODE) |
- LS_64(info->generate_ae, I40IW_CQPSQ_FWQE_GENERATE_AE) |
- LS_64(info->userflushcode, I40IW_CQPSQ_FWQE_USERFLCODE) |
- LS_64(flush_sq, I40IW_CQPSQ_FWQE_FLUSHSQ) |
- LS_64(flush_rq, I40IW_CQPSQ_FWQE_FLUSHRQ) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_FLUSH WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_gen_ae - generate AE, currently uses flush WQE CQP OP
- * @qp: sc qp
- * @info: gen ae information
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_gen_ae(
- struct i40iw_sc_qp *qp,
- struct i40iw_gen_ae_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 temp;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = info->ae_code |
- LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE);
-
- set_64bit_val(wqe, 8, temp);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_GEN_AE, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_FWQE_GENERATE_AE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "GEN_AE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_upload_context - upload qp's context
- * @dev: sc device struct
- * @info: upload context info ptr for return
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_upload_context(
- struct i40iw_sc_dev *dev,
- struct i40iw_upload_context_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, info->buf_pa);
-
- header = LS_64(info->qp_id, I40IW_CQPSQ_UCTX_QPID) |
- LS_64(I40IW_CQP_OP_UPLOAD_CONTEXT, I40IW_CQPSQ_OPCODE) |
- LS_64(info->qp_type, I40IW_CQPSQ_UCTX_QPTYPE) |
- LS_64(info->raw_format, I40IW_CQPSQ_UCTX_RAWFORMAT) |
- LS_64(info->freeze_qp, I40IW_CQPSQ_UCTX_FREEZEQP) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QP_UPLOAD_CTX WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_setctx - set qp's context
- * @qp: sc qp
- * @qp_ctx: context ptr
- * @info: ctx info
- */
-static enum i40iw_status_code i40iw_sc_qp_setctx(
- struct i40iw_sc_qp *qp,
- u64 *qp_ctx,
- struct i40iw_qp_host_ctx_info *info)
-{
- struct i40iwarp_offload_info *iw;
- struct i40iw_tcp_offload_info *tcp;
- struct i40iw_sc_vsi *vsi;
- struct i40iw_sc_dev *dev;
- u64 qw0, qw3, qw7 = 0;
-
- iw = info->iwarp_info;
- tcp = info->tcp_info;
- vsi = qp->vsi;
- dev = qp->dev;
- if (info->add_to_qoslist) {
- qp->user_pri = info->user_pri;
- i40iw_qp_add_qos(qp);
- i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n",
- __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle);
- }
- qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
- LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
- LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
- LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) |
- LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) |
- LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) |
- LS_64(info->push_idx, I40IWQPC_PPIDX) |
- LS_64(info->push_mode_en, I40IWQPC_PMENA);
-
- set_64bit_val(qp_ctx, 8, qp->sq_pa);
- set_64bit_val(qp_ctx, 16, qp->rq_pa);
-
- qw3 = LS_64(qp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
- LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
- LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE);
-
- set_64bit_val(qp_ctx,
- 128,
- LS_64(info->err_rq_idx, I40IWQPC_ERR_RQ_IDX));
-
- set_64bit_val(qp_ctx,
- 136,
- LS_64(info->send_cq_num, I40IWQPC_TXCQNUM) |
- LS_64(info->rcv_cq_num, I40IWQPC_RXCQNUM));
-
- set_64bit_val(qp_ctx,
- 168,
- LS_64(info->qp_compl_ctx, I40IWQPC_QPCOMPCTX));
- set_64bit_val(qp_ctx,
- 176,
- LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
- LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
- LS_64(qp->qs_handle, I40IWQPC_QSHANDLE) |
- LS_64(vsi->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE));
-
- if (info->iwarp_info_valid) {
- qw0 |= LS_64(iw->ddp_ver, I40IWQPC_DDP_VER) |
- LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
-
- qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
- set_64bit_val(qp_ctx,
- 144,
- LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) |
- LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX));
- set_64bit_val(qp_ctx,
- 152,
- LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
-
- set_64bit_val(qp_ctx,
- 160,
- LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
- LS_64(iw->ird_size, I40IWQPC_IRDSIZE) |
- LS_64(iw->wr_rdresp_en, I40IWQPC_WRRDRSPOK) |
- LS_64(iw->rd_enable, I40IWQPC_RDOK) |
- LS_64(iw->snd_mark_en, I40IWQPC_SNDMARKERS) |
- LS_64(iw->bind_en, I40IWQPC_BINDEN) |
- LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
- LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
- LS_64((((vsi->stats_fcn_id_alloc) &&
- (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0),
- I40IWQPC_USESTATSINSTANCE) |
- LS_64(1, I40IWQPC_IWARPMODE) |
- LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
- LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
- LS_64(iw->rcv_no_mpa_crc, I40IWQPC_RCVNOMPACRC) |
- LS_64(iw->rcv_mark_offset, I40IWQPC_RCVMARKOFFSET) |
- LS_64(iw->snd_mark_offset, I40IWQPC_SNDMARKOFFSET));
- }
- if (info->tcp_info_valid) {
- qw0 |= LS_64(tcp->ipv4, I40IWQPC_IPV4) |
- LS_64(tcp->no_nagle, I40IWQPC_NONAGLE) |
- LS_64(tcp->insert_vlan_tag, I40IWQPC_INSERTVLANTAG) |
- LS_64(tcp->time_stamp, I40IWQPC_TIMESTAMP) |
- LS_64(tcp->cwnd_inc_limit, I40IWQPC_LIMIT) |
- LS_64(tcp->drop_ooo_seg, I40IWQPC_DROPOOOSEG) |
- LS_64(tcp->dup_ack_thresh, I40IWQPC_DUPACK_THRESH);
-
- qw3 |= LS_64(tcp->ttl, I40IWQPC_TTL) |
- LS_64(tcp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
- LS_64(tcp->avoid_stretch_ack, I40IWQPC_AVOIDSTRETCHACK) |
- LS_64(tcp->tos, I40IWQPC_TOS) |
- LS_64(tcp->src_port, I40IWQPC_SRCPORTNUM) |
- LS_64(tcp->dst_port, I40IWQPC_DESTPORTNUM);
-
- qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
- set_64bit_val(qp_ctx,
- 32,
- LS_64(tcp->dest_ip_addr2, I40IWQPC_DESTIPADDR2) |
- LS_64(tcp->dest_ip_addr3, I40IWQPC_DESTIPADDR3));
-
- set_64bit_val(qp_ctx,
- 40,
- LS_64(tcp->dest_ip_addr0, I40IWQPC_DESTIPADDR0) |
- LS_64(tcp->dest_ip_addr1, I40IWQPC_DESTIPADDR1));
-
- set_64bit_val(qp_ctx,
- 48,
- LS_64(tcp->snd_mss, I40IWQPC_SNDMSS) |
- LS_64(tcp->vlan_tag, I40IWQPC_VLANTAG) |
- LS_64(tcp->arp_idx, I40IWQPC_ARPIDX));
-
- qw7 |= LS_64(tcp->flow_label, I40IWQPC_FLOWLABEL) |
- LS_64(tcp->wscale, I40IWQPC_WSCALE) |
- LS_64(tcp->ignore_tcp_opt, I40IWQPC_IGNORE_TCP_OPT) |
- LS_64(tcp->ignore_tcp_uns_opt, I40IWQPC_IGNORE_TCP_UNS_OPT) |
- LS_64(tcp->tcp_state, I40IWQPC_TCPSTATE) |
- LS_64(tcp->rcv_wscale, I40IWQPC_RCVSCALE) |
- LS_64(tcp->snd_wscale, I40IWQPC_SNDSCALE);
-
- set_64bit_val(qp_ctx,
- 72,
- LS_64(tcp->time_stamp_recent, I40IWQPC_TIMESTAMP_RECENT) |
- LS_64(tcp->time_stamp_age, I40IWQPC_TIMESTAMP_AGE));
- set_64bit_val(qp_ctx,
- 80,
- LS_64(tcp->snd_nxt, I40IWQPC_SNDNXT) |
- LS_64(tcp->snd_wnd, I40IWQPC_SNDWND));
-
- set_64bit_val(qp_ctx,
- 88,
- LS_64(tcp->rcv_nxt, I40IWQPC_RCVNXT) |
- LS_64(tcp->rcv_wnd, I40IWQPC_RCVWND));
- set_64bit_val(qp_ctx,
- 96,
- LS_64(tcp->snd_max, I40IWQPC_SNDMAX) |
- LS_64(tcp->snd_una, I40IWQPC_SNDUNA));
- set_64bit_val(qp_ctx,
- 104,
- LS_64(tcp->srtt, I40IWQPC_SRTT) |
- LS_64(tcp->rtt_var, I40IWQPC_RTTVAR));
- set_64bit_val(qp_ctx,
- 112,
- LS_64(tcp->ss_thresh, I40IWQPC_SSTHRESH) |
- LS_64(tcp->cwnd, I40IWQPC_CWND));
- set_64bit_val(qp_ctx,
- 120,
- LS_64(tcp->snd_wl1, I40IWQPC_SNDWL1) |
- LS_64(tcp->snd_wl2, I40IWQPC_SNDWL2));
- set_64bit_val(qp_ctx,
- 128,
- LS_64(tcp->max_snd_window, I40IWQPC_MAXSNDWND) |
- LS_64(tcp->rexmit_thresh, I40IWQPC_REXMIT_THRESH));
- set_64bit_val(qp_ctx,
- 184,
- LS_64(tcp->local_ipaddr3, I40IWQPC_LOCAL_IPADDR3) |
- LS_64(tcp->local_ipaddr2, I40IWQPC_LOCAL_IPADDR2));
- set_64bit_val(qp_ctx,
- 192,
- LS_64(tcp->local_ipaddr1, I40IWQPC_LOCAL_IPADDR1) |
- LS_64(tcp->local_ipaddr0, I40IWQPC_LOCAL_IPADDR0));
- }
-
- set_64bit_val(qp_ctx, 0, qw0);
- set_64bit_val(qp_ctx, 24, qw3);
- set_64bit_val(qp_ctx, 56, qw7);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "QP_HOST)CTX WQE",
- qp_ctx, I40IW_QP_CTX_SIZE);
- return 0;
-}
-
-/**
- * i40iw_sc_alloc_stag - mr stag alloc
- * @dev: sc device struct
- * @info: stag info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_alloc_stag(
- struct i40iw_sc_dev *dev,
- struct i40iw_allocate_stag_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- enum i40iw_page_size page_size;
-
- page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 8,
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID) |
- LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN));
- set_64bit_val(wqe,
- 16,
- LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
- set_64bit_val(wqe,
- 40,
- LS_64(info->hmc_fcn_index, I40IW_CQPSQ_STAG_HMCFNIDX));
-
- header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_STAG_MR) |
- LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
- LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
- LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
- LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
- LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "ALLOC_STAG WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mr_reg_non_shared - non-shared mr registration
- * @dev: sc device struct
- * @info: mr info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
- struct i40iw_sc_dev *dev,
- struct i40iw_reg_ns_stag_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- u32 pble_obj_cnt;
- bool remote_access;
- u8 addr_type;
- enum i40iw_page_size page_size;
-
- page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
- if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
- I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
- remote_access = true;
- else
- remote_access = false;
-
- pble_obj_cnt = dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->chunk_size && (info->first_pm_pbl_index >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
- set_64bit_val(wqe, 0, temp);
-
- set_64bit_val(wqe,
- 8,
- LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN) |
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
-
- set_64bit_val(wqe,
- 16,
- LS_64(info->stag_key, I40IW_CQPSQ_STAG_KEY) |
- LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
- if (!info->chunk_size) {
- set_64bit_val(wqe, 32, info->reg_addr_pa);
- set_64bit_val(wqe, 48, 0);
- } else {
- set_64bit_val(wqe, 32, 0);
- set_64bit_val(wqe, 48, info->first_pm_pbl_index);
- }
- set_64bit_val(wqe, 40, info->hmc_fcn_index);
- set_64bit_val(wqe, 56, 0);
-
- addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
- header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_STAG_MR) |
- LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
- LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
- LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
- LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
- LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
- LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_NS WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mr_reg_shared - registered shared memory region
- * @dev: sc device struct
- * @info: info for shared memory registeration
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_mr_reg_shared(
- struct i40iw_sc_dev *dev,
- struct i40iw_register_shared_stag *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 temp, va64, fbo, header;
- u32 va32;
- bool remote_access;
- u8 addr_type;
-
- if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
- I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
- remote_access = true;
- else
- remote_access = false;
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- va64 = (uintptr_t)(info->va);
- va32 = (u32)(va64 & 0x00000000FFFFFFFF);
- fbo = (u64)(va32 & (4096 - 1));
-
- set_64bit_val(wqe,
- 0,
- (info->addr_type == I40IW_ADDR_TYPE_VA_BASED ? (uintptr_t)info->va : fbo));
-
- set_64bit_val(wqe,
- 8,
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
- temp = LS_64(info->new_stag_key, I40IW_CQPSQ_STAG_KEY) |
- LS_64(info->new_stag_idx, I40IW_CQPSQ_STAG_IDX) |
- LS_64(info->parent_stag_idx, I40IW_CQPSQ_STAG_PARENTSTAGIDX);
- set_64bit_val(wqe, 16, temp);
-
- addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
- header = LS_64(I40IW_CQP_OP_REG_SMR, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_STAG_MR) |
- LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
- LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
- LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_SHARED WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_dealloc_stag - deallocate stag
- * @dev: sc device struct
- * @info: dealloc stag info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_dealloc_stag(
- struct i40iw_sc_dev *dev,
- struct i40iw_dealloc_stag_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 header;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 8,
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
- set_64bit_val(wqe,
- 16,
- LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
-
- header = LS_64(I40IW_CQP_OP_DEALLOC_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(info->mr, I40IW_CQPSQ_STAG_MR) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "DEALLOC_STAG WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_query_stag - query hardware for stag
- * @dev: sc device struct
- * @scratch: u64 saved to be used during cqp completion
- * @stag_index: stag index for query
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_query_stag(struct i40iw_sc_dev *dev,
- u64 scratch,
- u32 stag_index,
- bool post_sq)
-{
- u64 header;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 16,
- LS_64(stag_index, I40IW_CQPSQ_QUERYSTAG_IDX));
-
- header = LS_64(I40IW_CQP_OP_QUERY_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QUERY_STAG WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mw_alloc - mw allocate
- * @dev: sc device struct
- * @scratch: u64 saved to be used during cqp completion
- * @mw_stag_index:stag index
- * @pd_id: pd is for this mw
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_mw_alloc(
- struct i40iw_sc_dev *dev,
- u64 scratch,
- u32 mw_stag_index,
- u16 pd_id,
- bool post_sq)
-{
- u64 header;
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 8, LS_64(pd_id, I40IW_CQPSQ_STAG_PDID));
- set_64bit_val(wqe,
- 16,
- LS_64(mw_stag_index, I40IW_CQPSQ_STAG_IDX));
-
- header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MW_ALLOC WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
- * @qp: sc qp struct
- * @info: fast mr info
- * @post_sq: flag for cqp db to ring
- */
-enum i40iw_status_code i40iw_sc_mr_fast_register(
- struct i40iw_sc_qp *qp,
- struct i40iw_fast_reg_stag_info *info,
- bool post_sq)
-{
- u64 temp, header;
- u64 *wqe;
- u32 wqe_idx;
- enum i40iw_page_size page_size;
-
- page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
- wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
- 0, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
- __func__, info->wr_id, wqe_idx,
- &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
- temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
- set_64bit_val(wqe, 0, temp);
-
- temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI);
- set_64bit_val(wqe,
- 8,
- LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) |
- LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR));
-
- set_64bit_val(wqe,
- 16,
- info->total_len |
- LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO));
-
- header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) |
- LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
- LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
- LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
- LS_64(page_size, I40IWQPSQ_HPAGESIZE) |
- LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
- LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-
- if (post_sq)
- i40iw_qp_post_wr(&qp->qp_uk);
- return 0;
-}
-
-/**
- * i40iw_sc_send_lsmm - send last streaming mode message
- * @qp: sc qp struct
- * @lsmm_buf: buffer with lsmm message
- * @size: size of lsmm buffer
- * @stag: stag of lsmm buffer
- */
-static void i40iw_sc_send_lsmm(struct i40iw_sc_qp *qp,
- void *lsmm_buf,
- u32 size,
- i40iw_stag stag)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
-
- set_64bit_val(wqe, 8, (size | LS_64(stag, I40IWQPSQ_FRAG_STAG)));
-
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
- LS_64(1, I40IWQPSQ_STREAMMODE) |
- LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_QP, "SEND_LSMM WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-}
-
-/**
- * i40iw_sc_send_lsmm_nostag - for privilege qp
- * @qp: sc qp struct
- * @lsmm_buf: buffer with lsmm message
- * @size: size of lsmm buffer
- */
-static void i40iw_sc_send_lsmm_nostag(struct i40iw_sc_qp *qp,
- void *lsmm_buf,
- u32 size)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
-
- set_64bit_val(wqe, 8, size);
-
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
- LS_64(1, I40IWQPSQ_STREAMMODE) |
- LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "SEND_LSMM_NOSTAG WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-}
-
-/**
- * i40iw_sc_send_rtt - send last read0 or write0
- * @qp: sc qp struct
- * @read: Do read0 or write0
- */
-static void i40iw_sc_send_rtt(struct i40iw_sc_qp *qp, bool read)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
- if (read) {
- header = LS_64(0x1234, I40IWQPSQ_REMSTAG) |
- LS_64(I40IWQP_OP_RDMA_READ, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
- set_64bit_val(wqe, 8, ((u64)0xabcd << 32));
- } else {
- header = LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
- }
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "RTR WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-}
-
-/**
- * i40iw_sc_post_wqe0 - send wqe with opcode
- * @qp: sc qp struct
- * @opcode: opcode to use for wqe0
- */
-static enum i40iw_status_code i40iw_sc_post_wqe0(struct i40iw_sc_qp *qp, u8 opcode)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- switch (opcode) {
- case I40IWQP_OP_NOP:
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- break;
- case I40IWQP_OP_RDMA_SEND:
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
- header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID) |
- LS_64(1, I40IWQPSQ_STREAMMODE) |
- LS_64(1, I40IWQPSQ_WAITFORRCVPDU);
-
- i40iw_insert_wqe_hdr(wqe, header);
- break;
- default:
- i40iw_debug(qp->dev, I40IW_DEBUG_QP, "%s: Invalid WQE zero opcode\n",
- __func__);
- break;
- }
- return 0;
-}
-
-/**
- * i40iw_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
- * @dev : ptr to i40iw_dev struct
- * @hmc_fn_id: hmc function id
- */
-enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
-{
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_dma_mem query_fpm_mem;
- struct i40iw_virt_mem virt_mem;
- struct i40iw_vfdev *vf_dev = NULL;
- u32 mem_size;
- enum i40iw_status_code ret_code = 0;
- bool poll_registers = true;
- u16 iw_vf_idx;
- u8 wait_type;
-
- if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
- (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
- return I40IW_ERR_INVALID_HMCFN_ID;
-
- i40iw_debug(dev, I40IW_DEBUG_HMC, "hmc_fn_id %u, dev->hmc_fn_id %u\n", hmc_fn_id,
- dev->hmc_fn_id);
- if (hmc_fn_id == dev->hmc_fn_id) {
- hmc_info = dev->hmc_info;
- query_fpm_mem.pa = dev->fpm_query_buf_pa;
- query_fpm_mem.va = dev->fpm_query_buf;
- } else {
- vf_dev = i40iw_vfdev_from_fpm(dev, hmc_fn_id);
- if (!vf_dev)
- return I40IW_ERR_INVALID_VF_ID;
-
- hmc_info = &vf_dev->hmc_info;
- iw_vf_idx = vf_dev->iw_vf_idx;
- i40iw_debug(dev, I40IW_DEBUG_HMC, "vf_dev %p, hmc_info %p, hmc_obj %p\n", vf_dev,
- hmc_info, hmc_info->hmc_obj);
- if (!vf_dev->fpm_query_buf) {
- if (!dev->vf_fpm_query_buf[iw_vf_idx].va) {
- ret_code = i40iw_alloc_query_fpm_buf(dev,
- &dev->vf_fpm_query_buf[iw_vf_idx]);
- if (ret_code)
- return ret_code;
- }
- vf_dev->fpm_query_buf = dev->vf_fpm_query_buf[iw_vf_idx].va;
- vf_dev->fpm_query_buf_pa = dev->vf_fpm_query_buf[iw_vf_idx].pa;
- }
- query_fpm_mem.pa = vf_dev->fpm_query_buf_pa;
- query_fpm_mem.va = vf_dev->fpm_query_buf;
- /**
- * It is HARDWARE specific:
- * this call is done by PF for VF and
- * i40iw_sc_query_fpm_values needs ccq poll
- * because PF ccq is already created.
- */
- poll_registers = false;
- }
-
- hmc_info->hmc_fn_id = hmc_fn_id;
-
- if (hmc_fn_id != dev->hmc_fn_id) {
- ret_code =
- i40iw_cqp_query_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
- } else {
- wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
- (u8)I40IW_CQP_WAIT_POLL_CQ;
-
- ret_code = i40iw_sc_query_fpm_values(
- dev->cqp,
- 0,
- hmc_info->hmc_fn_id,
- &query_fpm_mem,
- true,
- wait_type);
- }
- if (ret_code)
- return ret_code;
-
- /* parse the fpm_query_buf and fill hmc obj info */
- ret_code =
- i40iw_sc_parse_fpm_query_buf((u64 *)query_fpm_mem.va,
- hmc_info,
- &dev->hmc_fpm_misc);
- if (ret_code)
- return ret_code;
- i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "QUERY FPM BUFFER",
- query_fpm_mem.va, I40IW_QUERY_FPM_BUF_SIZE);
-
- if (hmc_fn_id != dev->hmc_fn_id) {
- i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
-
- /* parse the fpm_commit_buf and fill hmc obj info */
- i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt);
- mem_size = sizeof(struct i40iw_hmc_sd_entry) *
- (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
- ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
- if (ret_code)
- return ret_code;
- hmc_info->sd_table.sd_entry = virt_mem.va;
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_configure_iw_fpm() - commits hmc obj cnt values using cqp command and
- * populates fpm base address in hmc_info
- * @dev : ptr to i40iw_dev struct
- * @hmc_fn_id: hmc function id
- */
-static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id)
-{
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_hmc_obj_info *obj_info;
- u64 *buf;
- struct i40iw_dma_mem commit_fpm_mem;
- u32 i, j;
- enum i40iw_status_code ret_code = 0;
- bool poll_registers = true;
- u8 wait_type;
-
- if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
- (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
- return I40IW_ERR_INVALID_HMCFN_ID;
-
- if (hmc_fn_id == dev->hmc_fn_id) {
- hmc_info = dev->hmc_info;
- } else {
- hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, hmc_fn_id);
- poll_registers = false;
- }
- if (!hmc_info)
- return I40IW_ERR_BAD_PTR;
-
- obj_info = hmc_info->hmc_obj;
- buf = dev->fpm_commit_buf;
-
- /* copy cnt values in commit buf */
- for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE;
- i++, j += 8)
- set_64bit_val(buf, j, (u64)obj_info[i].cnt);
-
- set_64bit_val(buf, 40, 0); /* APBVT rsvd */
-
- commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
- commit_fpm_mem.va = dev->fpm_commit_buf;
- wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
- (u8)I40IW_CQP_WAIT_POLL_CQ;
- ret_code = i40iw_sc_commit_fpm_values(
- dev->cqp,
- 0,
- hmc_info->hmc_fn_id,
- &commit_fpm_mem,
- true,
- wait_type);
-
- /* parse the fpm_commit_buf and fill hmc obj info */
- if (!ret_code)
- ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf,
- hmc_info->hmc_obj,
- &hmc_info->sd_table.sd_cnt);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
- commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
-
- return ret_code;
-}
-
-/**
- * cqp_sds_wqe_fill - fill cqp wqe doe sd
- * @cqp: struct for cqp hw
- * @info; sd info for wqe
- * @scratch: u64 saved to be used during cqp completion
- */
-static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
- struct i40iw_update_sds_info *info,
- u64 scratch)
-{
- u64 data;
- u64 header;
- u64 *wqe;
- int mem_entries, wqe_entries;
- struct i40iw_dma_mem *sdbuf = &cqp->sdbuf;
- u64 offset;
- u32 wqe_idx;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- I40IW_CQP_INIT_WQE(wqe);
- wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
- mem_entries = info->cnt - wqe_entries;
-
- header = LS_64(I40IW_CQP_OP_UPDATE_PE_SDS, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT);
-
- if (mem_entries) {
- offset = wqe_idx * I40IW_UPDATE_SD_BUF_SIZE;
- memcpy((char *)sdbuf->va + offset, &info->entry[3],
- mem_entries << 4);
- data = (u64)sdbuf->pa + offset;
- } else {
- data = 0;
- }
- data |= LS_64(info->hmc_fn_id, I40IW_CQPSQ_UPESD_HMCFNID);
-
- set_64bit_val(wqe, 16, data);
-
- switch (wqe_entries) {
- case 3:
- set_64bit_val(wqe, 48,
- (LS_64(info->entry[2].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
- LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
-
- set_64bit_val(wqe, 56, info->entry[2].data);
- /* fallthrough */
- case 2:
- set_64bit_val(wqe, 32,
- (LS_64(info->entry[1].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
- LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
-
- set_64bit_val(wqe, 40, info->entry[1].data);
- /* fallthrough */
- case 1:
- set_64bit_val(wqe, 0,
- LS_64(info->entry[0].cmd, I40IW_CQPSQ_UPESD_SDCMD));
-
- set_64bit_val(wqe, 8, info->entry[0].data);
- break;
- default:
- break;
- }
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "UPDATE_PE_SDS WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- return 0;
-}
-
-/**
- * i40iw_update_pe_sds - cqp wqe for sd
- * @dev: ptr to i40iw_dev struct
- * @info: sd info for sd's
- * @scratch: u64 saved to be used during cqp completion
- */
-static enum i40iw_status_code i40iw_update_pe_sds(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info,
- u64 scratch)
-{
- struct i40iw_sc_cqp *cqp = dev->cqp;
- enum i40iw_status_code ret_code;
-
- ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
- if (!ret_code)
- i40iw_sc_cqp_post_sq(cqp);
-
- return ret_code;
-}
-
-/**
- * i40iw_update_sds_noccq - update sd before ccq created
- * @dev: sc device struct
- * @info: sd info for sd's
- */
-enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info)
-{
- u32 error, val, tail;
- struct i40iw_sc_cqp *cqp = dev->cqp;
- enum i40iw_status_code ret_code;
-
- ret_code = cqp_sds_wqe_fill(cqp, info, 0);
- if (ret_code)
- return ret_code;
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- i40iw_sc_cqp_post_sq(cqp);
- ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_suspend_qp - suspend qp for param change
- * @cqp: struct for cqp hw
- * @qp: sc qp struct
- * @scratch: u64 saved to be used during cqp completion
- */
-enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp,
- u64 scratch)
-{
- u64 header;
- u64 *wqe;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_SUSPENDQP_QPID) |
- LS_64(I40IW_CQP_OP_SUSPEND_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SUSPEND_QP WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_resume_qp - resume qp after suspend
- * @cqp: struct for cqp hw
- * @qp: sc qp struct
- * @scratch: u64 saved to be used during cqp completion
- */
-enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp,
- u64 scratch)
-{
- u64 header;
- u64 *wqe;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 16,
- LS_64(qp->qs_handle, I40IW_CQPSQ_RESUMEQP_QSHANDLE));
-
- header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_RESUMEQP_QPID) |
- LS_64(I40IW_CQP_OP_RESUME_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "RESUME_QP WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_fn_id: hmc function id
- * @post_sq: flag for cqp db to ring
- * @poll_registers: flag to poll register for cqp completion
- */
-enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_fn_id,
- bool post_sq,
- bool poll_registers)
-{
- u64 header;
- u64 *wqe;
- u32 tail, val, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 16,
- LS_64(hmc_fn_id, I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID));
-
- header = LS_64(I40IW_CQP_OP_SHMC_PAGES_ALLOCATED, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SHMC_PAGES_ALLOCATED WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error) {
- ret_code = I40IW_ERR_CQP_COMPL_ERROR;
- return ret_code;
- }
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- if (poll_registers)
- /* check for cqp sq tail update */
- ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
- else
- ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
- I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
- NULL);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_ring_full - check if cqp ring is full
- * @cqp: struct for cqp hw
- */
-static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
-{
- return I40IW_RING_FULL_ERR(cqp->sq_ring);
-}
-
-/**
- * i40iw_est_sd - returns approximate number of SDs for HMC
- * @dev: sc device struct
- * @hmc_info: hmc structure, size and count for HMC objects
- */
-static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info)
-{
- int i;
- u64 size = 0;
- u64 sd;
-
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++)
- size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size;
-
- if (dev->is_pf)
- size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
-
- if (size & 0x1FFFFF)
- sd = (size >> 21) + 1; /* add 1 for remainder */
- else
- sd = size >> 21;
-
- if (!dev->is_pf) {
- /* 2MB alignment for VF PBLE HMC */
- size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
- if (size & 0x1FFFFF)
- sd += (size >> 21) + 1; /* add 1 for remainder */
- else
- sd += size >> 21;
- }
-
- return sd;
-}
-
-/**
- * i40iw_config_fpm_values - configure HMC objects
- * @dev: sc device struct
- * @qp_count: desired qp count
- */
-enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count)
-{
- struct i40iw_virt_mem virt_mem;
- u32 i, mem_size;
- u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
- u64 sd_needed;
- u32 loop_count = 0;
-
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_hmc_fpm_misc *hmc_fpm_misc;
- enum i40iw_status_code ret_code = 0;
-
- hmc_info = dev->hmc_info;
- hmc_fpm_misc = &dev->hmc_fpm_misc;
-
- ret_code = i40iw_sc_init_iw_hmc(dev, dev->hmc_fn_id);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "i40iw_sc_init_iw_hmc returned error_code = %d\n",
- ret_code);
- return ret_code;
- }
-
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
- hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
- sd_needed = i40iw_est_sd(dev, hmc_info);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
- __func__, sd_needed, hmc_info->first_sd_index);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: sd count %d where max sd is %d\n",
- __func__, hmc_info->sd_table.sd_cnt,
- hmc_fpm_misc->max_sds);
-
- qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
- qpwantedoriginal = qpwanted;
- mrwanted = hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt;
- pblewanted = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt;
-
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d\n",
- qp_count, hmc_fpm_misc->max_sds,
- hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_CQ].max_cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt);
-
- do {
- ++loop_count;
- hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt = qpwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt =
- min(2 * qpwanted, hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt);
- hmc_info->hmc_obj[I40IW_HMC_IW_SRQ].cnt = 0x00; /* Reserved */
- hmc_info->hmc_obj[I40IW_HMC_IW_HTE].cnt =
- qpwanted * hmc_fpm_misc->ht_multiplier;
- hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt =
- hmc_info->hmc_obj[I40IW_HMC_IW_ARP].max_cnt;
- hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
- hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
-
- hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt =
- roundup_pow_of_two(I40IW_MAX_WQ_ENTRIES * qpwanted);
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt =
- roundup_pow_of_two(2 * I40IW_MAX_IRD_SIZE * qpwanted);
- hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
- hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
- hmc_info->hmc_obj[I40IW_HMC_IW_TIMER].cnt =
- ((qpwanted) / 512 + 1) * hmc_fpm_misc->timer_bucket;
- hmc_info->hmc_obj[I40IW_HMC_IW_FSIMC].cnt = 0x00;
- hmc_info->hmc_obj[I40IW_HMC_IW_FSIAV].cnt = 0x00;
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
-
- /* How much memory is needed for all the objects. */
- sd_needed = i40iw_est_sd(dev, hmc_info);
- if ((loop_count > 1000) ||
- ((!(loop_count % 10)) &&
- (qpwanted > qpwantedoriginal * 2 / 3))) {
- if (qpwanted > FPM_MULTIPLIER)
- qpwanted = roundup_pow_of_two(qpwanted -
- FPM_MULTIPLIER);
- qpwanted >>= 1;
- }
- if (mrwanted > FPM_MULTIPLIER * 10)
- mrwanted -= FPM_MULTIPLIER * 10;
- if (pblewanted > FPM_MULTIPLIER * 1000)
- pblewanted -= FPM_MULTIPLIER * 1000;
- } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
-
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
- loop_count, sd_needed,
- hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt);
-
- ret_code = i40iw_sc_configure_iw_fpm(dev, dev->hmc_fn_id);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "configure_iw_fpm returned error_code[x%08X]\n",
- i40iw_rd32(dev->hw, dev->is_pf ? I40E_PFPE_CQPERRCODES : I40E_VFPE_CQPERRCODES1));
- return ret_code;
- }
-
- mem_size = sizeof(struct i40iw_hmc_sd_entry) *
- (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
- ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: failed to allocate memory for sd_entry buffer\n",
- __func__);
- return ret_code;
- }
- hmc_info->sd_table.sd_entry = virt_mem.va;
-
- return ret_code;
-}
-
-/**
- * i40iw_exec_cqp_cmd - execute cqp cmd when wqe are available
- * @dev: rdma device
- * @pcmdinfo: cqp command info
- */
-static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
- struct cqp_commands_info *pcmdinfo)
-{
- enum i40iw_status_code status;
- struct i40iw_dma_mem values_mem;
-
- dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
- switch (pcmdinfo->cqp_cmd) {
- case OP_DELETE_LOCAL_MAC_IPADDR_ENTRY:
- status = i40iw_sc_del_local_mac_ipaddr_entry(
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.cqp,
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.scratch,
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.entry_idx,
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.ignore_ref_count,
- pcmdinfo->post_sq);
- break;
- case OP_CEQ_DESTROY:
- status = i40iw_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
- pcmdinfo->in.u.ceq_destroy.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_AEQ_DESTROY:
- status = i40iw_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
- pcmdinfo->in.u.aeq_destroy.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_DELETE_ARP_CACHE_ENTRY:
- status = i40iw_sc_del_arp_cache_entry(
- pcmdinfo->in.u.del_arp_cache_entry.cqp,
- pcmdinfo->in.u.del_arp_cache_entry.scratch,
- pcmdinfo->in.u.del_arp_cache_entry.arp_index,
- pcmdinfo->post_sq);
- break;
- case OP_MANAGE_APBVT_ENTRY:
- status = i40iw_sc_manage_apbvt_entry(
- pcmdinfo->in.u.manage_apbvt_entry.cqp,
- &pcmdinfo->in.u.manage_apbvt_entry.info,
- pcmdinfo->in.u.manage_apbvt_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_CEQ_CREATE:
- status = i40iw_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
- pcmdinfo->in.u.ceq_create.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_AEQ_CREATE:
- status = i40iw_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
- pcmdinfo->in.u.aeq_create.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY:
- status = i40iw_sc_alloc_local_mac_ipaddr_entry(
- pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.cqp,
- pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_ADD_LOCAL_MAC_IPADDR_ENTRY:
- status = i40iw_sc_add_local_mac_ipaddr_entry(
- pcmdinfo->in.u.add_local_mac_ipaddr_entry.cqp,
- &pcmdinfo->in.u.add_local_mac_ipaddr_entry.info,
- pcmdinfo->in.u.add_local_mac_ipaddr_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_MANAGE_QHASH_TABLE_ENTRY:
- status = i40iw_sc_manage_qhash_table_entry(
- pcmdinfo->in.u.manage_qhash_table_entry.cqp,
- &pcmdinfo->in.u.manage_qhash_table_entry.info,
- pcmdinfo->in.u.manage_qhash_table_entry.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_MODIFY:
- status = i40iw_sc_qp_modify(
- pcmdinfo->in.u.qp_modify.qp,
- &pcmdinfo->in.u.qp_modify.info,
- pcmdinfo->in.u.qp_modify.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_UPLOAD_CONTEXT:
- status = i40iw_sc_qp_upload_context(
- pcmdinfo->in.u.qp_upload_context.dev,
- &pcmdinfo->in.u.qp_upload_context.info,
- pcmdinfo->in.u.qp_upload_context.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_CQ_CREATE:
- status = i40iw_sc_cq_create(
- pcmdinfo->in.u.cq_create.cq,
- pcmdinfo->in.u.cq_create.scratch,
- pcmdinfo->in.u.cq_create.check_overflow,
- pcmdinfo->post_sq);
- break;
- case OP_CQ_DESTROY:
- status = i40iw_sc_cq_destroy(
- pcmdinfo->in.u.cq_destroy.cq,
- pcmdinfo->in.u.cq_destroy.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_CREATE:
- status = i40iw_sc_qp_create(
- pcmdinfo->in.u.qp_create.qp,
- &pcmdinfo->in.u.qp_create.info,
- pcmdinfo->in.u.qp_create.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_QP_DESTROY:
- status = i40iw_sc_qp_destroy(
- pcmdinfo->in.u.qp_destroy.qp,
- pcmdinfo->in.u.qp_destroy.scratch,
- pcmdinfo->in.u.qp_destroy.remove_hash_idx,
- pcmdinfo->in.u.qp_destroy.
- ignore_mw_bnd,
- pcmdinfo->post_sq);
-
- break;
- case OP_ALLOC_STAG:
- status = i40iw_sc_alloc_stag(
- pcmdinfo->in.u.alloc_stag.dev,
- &pcmdinfo->in.u.alloc_stag.info,
- pcmdinfo->in.u.alloc_stag.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_MR_REG_NON_SHARED:
- status = i40iw_sc_mr_reg_non_shared(
- pcmdinfo->in.u.mr_reg_non_shared.dev,
- &pcmdinfo->in.u.mr_reg_non_shared.info,
- pcmdinfo->in.u.mr_reg_non_shared.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_DEALLOC_STAG:
- status = i40iw_sc_dealloc_stag(
- pcmdinfo->in.u.dealloc_stag.dev,
- &pcmdinfo->in.u.dealloc_stag.info,
- pcmdinfo->in.u.dealloc_stag.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_MW_ALLOC:
- status = i40iw_sc_mw_alloc(
- pcmdinfo->in.u.mw_alloc.dev,
- pcmdinfo->in.u.mw_alloc.scratch,
- pcmdinfo->in.u.mw_alloc.mw_stag_index,
- pcmdinfo->in.u.mw_alloc.pd_id,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_FLUSH_WQES:
- status = i40iw_sc_qp_flush_wqes(
- pcmdinfo->in.u.qp_flush_wqes.qp,
- &pcmdinfo->in.u.qp_flush_wqes.info,
- pcmdinfo->in.u.qp_flush_wqes.
- scratch, pcmdinfo->post_sq);
- break;
- case OP_GEN_AE:
- status = i40iw_sc_gen_ae(
- pcmdinfo->in.u.gen_ae.qp,
- &pcmdinfo->in.u.gen_ae.info,
- pcmdinfo->in.u.gen_ae.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_ADD_ARP_CACHE_ENTRY:
- status = i40iw_sc_add_arp_cache_entry(
- pcmdinfo->in.u.add_arp_cache_entry.cqp,
- &pcmdinfo->in.u.add_arp_cache_entry.info,
- pcmdinfo->in.u.add_arp_cache_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_MANAGE_PUSH_PAGE:
- status = i40iw_sc_manage_push_page(
- pcmdinfo->in.u.manage_push_page.cqp,
- &pcmdinfo->in.u.manage_push_page.info,
- pcmdinfo->in.u.manage_push_page.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_UPDATE_PE_SDS:
- /* case I40IW_CQP_OP_UPDATE_PE_SDS */
- status = i40iw_update_pe_sds(
- pcmdinfo->in.u.update_pe_sds.dev,
- &pcmdinfo->in.u.update_pe_sds.info,
- pcmdinfo->in.u.update_pe_sds.
- scratch);
-
- break;
- case OP_MANAGE_HMC_PM_FUNC_TABLE:
- status = i40iw_sc_manage_hmc_pm_func_table(
- pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
- pcmdinfo->in.u.manage_hmc_pm.scratch,
- (u8)pcmdinfo->in.u.manage_hmc_pm.info.vf_id,
- pcmdinfo->in.u.manage_hmc_pm.info.free_fcn,
- true);
- break;
- case OP_SUSPEND:
- status = i40iw_sc_suspend_qp(
- pcmdinfo->in.u.suspend_resume.cqp,
- pcmdinfo->in.u.suspend_resume.qp,
- pcmdinfo->in.u.suspend_resume.scratch);
- break;
- case OP_RESUME:
- status = i40iw_sc_resume_qp(
- pcmdinfo->in.u.suspend_resume.cqp,
- pcmdinfo->in.u.suspend_resume.qp,
- pcmdinfo->in.u.suspend_resume.scratch);
- break;
- case OP_MANAGE_VF_PBLE_BP:
- status = i40iw_manage_vf_pble_bp(
- pcmdinfo->in.u.manage_vf_pble_bp.cqp,
- &pcmdinfo->in.u.manage_vf_pble_bp.info,
- pcmdinfo->in.u.manage_vf_pble_bp.scratch, true);
- break;
- case OP_QUERY_FPM_VALUES:
- values_mem.pa = pcmdinfo->in.u.query_fpm_values.fpm_values_pa;
- values_mem.va = pcmdinfo->in.u.query_fpm_values.fpm_values_va;
- status = i40iw_sc_query_fpm_values(
- pcmdinfo->in.u.query_fpm_values.cqp,
- pcmdinfo->in.u.query_fpm_values.scratch,
- pcmdinfo->in.u.query_fpm_values.hmc_fn_id,
- &values_mem, true, I40IW_CQP_WAIT_EVENT);
- break;
- case OP_COMMIT_FPM_VALUES:
- values_mem.pa = pcmdinfo->in.u.commit_fpm_values.fpm_values_pa;
- values_mem.va = pcmdinfo->in.u.commit_fpm_values.fpm_values_va;
- status = i40iw_sc_commit_fpm_values(
- pcmdinfo->in.u.commit_fpm_values.cqp,
- pcmdinfo->in.u.commit_fpm_values.scratch,
- pcmdinfo->in.u.commit_fpm_values.hmc_fn_id,
- &values_mem,
- true,
- I40IW_CQP_WAIT_EVENT);
- break;
- default:
- status = I40IW_NOT_SUPPORTED;
- break;
- }
-
- return status;
-}
-
-/**
- * i40iw_process_cqp_cmd - process all cqp commands
- * @dev: sc device struct
- * @pcmdinfo: cqp command info
- */
-enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
- struct cqp_commands_info *pcmdinfo)
-{
- enum i40iw_status_code status = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&dev->cqp_lock, flags);
- if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
- status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
- else
- list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
- spin_unlock_irqrestore(&dev->cqp_lock, flags);
- return status;
-}
-
-/**
- * i40iw_process_bh - called from tasklet for cqp list
- * @dev: sc device struct
- */
-enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
-{
- enum i40iw_status_code status = 0;
- struct cqp_commands_info *pcmdinfo;
- unsigned long flags;
-
- spin_lock_irqsave(&dev->cqp_lock, flags);
- while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
- pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
-
- status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
- if (status)
- break;
- }
- spin_unlock_irqrestore(&dev->cqp_lock, flags);
- return status;
-}
-
-/**
- * i40iw_iwarp_opcode - determine if incoming is rdma layer
- * @info: aeq info for the packet
- * @pkt: packet for error
- */
-static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
-{
- __be16 *mpa;
- u32 opcode = 0xffffffff;
-
- if (info->q2_data_written) {
- mpa = (__be16 *)pkt;
- opcode = ntohs(mpa[1]) & 0xf;
- }
- return opcode;
-}
-
-/**
- * i40iw_locate_mpa - return pointer to mpa in the pkt
- * @pkt: packet with data
- */
-static u8 *i40iw_locate_mpa(u8 *pkt)
-{
- /* skip over ethernet header */
- pkt += I40IW_MAC_HLEN;
-
- /* Skip over IP and TCP headers */
- pkt += 4 * (pkt[0] & 0x0f);
- pkt += 4 * ((pkt[12] >> 4) & 0x0f);
- return pkt;
-}
-
-/**
- * i40iw_setup_termhdr - termhdr for terminate pkt
- * @qp: sc qp ptr for pkt
- * @hdr: term hdr
- * @opcode: flush opcode for termhdr
- * @layer_etype: error layer + error type
- * @err: error cod ein the header
- */
-static void i40iw_setup_termhdr(struct i40iw_sc_qp *qp,
- struct i40iw_terminate_hdr *hdr,
- enum i40iw_flush_opcode opcode,
- u8 layer_etype,
- u8 err)
-{
- qp->flush_code = opcode;
- hdr->layer_etype = layer_etype;
- hdr->error_code = err;
-}
-
-/**
- * i40iw_bld_terminate_hdr - build terminate message header
- * @qp: qp associated with received terminate AE
- * @info: the struct contiaing AE information
- */
-static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
- struct i40iw_aeqe_info *info)
-{
- u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
- u16 ddp_seg_len;
- int copy_len = 0;
- u8 is_tagged = 0;
- u32 opcode;
- struct i40iw_terminate_hdr *termhdr;
-
- termhdr = (struct i40iw_terminate_hdr *)qp->q2_buf;
- memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
-
- if (info->q2_data_written) {
- /* Use data from offending packet to fill in ddp & rdma hdrs */
- pkt = i40iw_locate_mpa(pkt);
- ddp_seg_len = ntohs(*(__be16 *)pkt);
- if (ddp_seg_len) {
- copy_len = 2;
- termhdr->hdrct = DDP_LEN_FLAG;
- if (pkt[2] & 0x80) {
- is_tagged = 1;
- if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
- copy_len += TERM_DDP_LEN_TAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
- } else {
- if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
- copy_len += TERM_DDP_LEN_UNTAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
-
- if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
- if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
- copy_len += TERM_RDMA_LEN;
- termhdr->hdrct |= RDMA_HDR_FLAG;
- }
- }
- }
- }
- }
-
- opcode = i40iw_iwarp_opcode(info, pkt);
-
- switch (info->ae_id) {
- case I40IW_AE_AMP_UNALLOCATED_STAG:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- if (opcode == I40IW_OP_TYPE_RDMA_WRITE)
- i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_STAG);
- else
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
- break;
- case I40IW_AE_AMP_BOUNDS_VIOLATION:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- if (info->q2_data_written)
- i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_BOUNDS);
- else
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_BOUNDS);
- break;
- case I40IW_AE_AMP_BAD_PD:
- switch (opcode) {
- case I40IW_OP_TYPE_RDMA_WRITE:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_UNASSOC_STAG);
- break;
- case I40IW_OP_TYPE_SEND_INV:
- case I40IW_OP_TYPE_SEND_SOL_INV:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_CANT_INV_STAG);
- break;
- default:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_UNASSOC_STAG);
- }
- break;
- case I40IW_AE_AMP_INVALID_STAG:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
- break;
- case I40IW_AE_AMP_BAD_QP:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
- break;
- case I40IW_AE_AMP_BAD_STAG_KEY:
- case I40IW_AE_AMP_BAD_STAG_INDEX:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- switch (opcode) {
- case I40IW_OP_TYPE_SEND_INV:
- case I40IW_OP_TYPE_SEND_SOL_INV:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_CANT_INV_STAG);
- break;
- default:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_STAG);
- }
- break;
- case I40IW_AE_AMP_RIGHTS_VIOLATION:
- case I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case I40IW_AE_PRIV_OPERATION_DENIED:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_ACCESS);
- break;
- case I40IW_AE_AMP_TO_WRAP:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP);
- break;
- case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
- break;
- case I40IW_AE_LLP_SEGMENT_TOO_LARGE:
- case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
- (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
- break;
- case I40IW_AE_LCE_QP_CATASTROPHIC:
- case I40IW_AE_DDP_NO_L_BIT:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
- (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
- break;
- case I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_RANGE);
- break;
- case I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_TOO_LONG);
- break;
- case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
- if (is_tagged)
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_DDP_VER);
- else
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_DDP_VER);
- break;
- case I40IW_AE_DDP_UBE_INVALID_MO:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MO);
- break;
- case I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_NO_BUF);
- break;
- case I40IW_AE_DDP_UBE_INVALID_QN:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
- break;
- case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_RDMAP_VER);
- break;
- case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNEXPECTED_OP);
- break;
- default:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNSPECIFIED);
- break;
- }
-
- if (copy_len)
- memcpy(termhdr + 1, pkt, copy_len);
-
- return sizeof(struct i40iw_terminate_hdr) + copy_len;
-}
-
-/**
- * i40iw_terminate_send_fin() - Send fin for terminate message
- * @qp: qp associated with received terminate AE
- */
-void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp)
-{
- /* Send the fin only */
- i40iw_term_modify_qp(qp,
- I40IW_QP_STATE_TERMINATE,
- I40IWQP_TERM_SEND_FIN_ONLY,
- 0);
-}
-
-/**
- * i40iw_terminate_connection() - Bad AE and send terminate to remote QP
- * @qp: qp associated with received terminate AE
- * @info: the struct contiaing AE information
- */
-void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
-{
- u8 termlen = 0;
-
- if (qp->term_flags & I40IW_TERM_SENT)
- return; /* Sanity check */
-
- /* Eventtype can change from bld_terminate_hdr */
- qp->eventtype = TERM_EVENT_QP_FATAL;
- termlen = i40iw_bld_terminate_hdr(qp, info);
- i40iw_terminate_start_timer(qp);
- qp->term_flags |= I40IW_TERM_SENT;
- i40iw_term_modify_qp(qp, I40IW_QP_STATE_TERMINATE,
- I40IWQP_TERM_SEND_TERM_ONLY, termlen);
-}
-
-/**
- * i40iw_terminate_received - handle terminate received AE
- * @qp: qp associated with received terminate AE
- * @info: the struct contiaing AE information
- */
-void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
-{
- u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
- __be32 *mpa;
- u8 ddp_ctl;
- u8 rdma_ctl;
- u16 aeq_id = 0;
- struct i40iw_terminate_hdr *termhdr;
-
- mpa = (__be32 *)i40iw_locate_mpa(pkt);
- if (info->q2_data_written) {
- /* did not validate the frame - do it now */
- ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
- rdma_ctl = ntohl(mpa[0]) & 0xff;
- if ((ddp_ctl & 0xc0) != 0x40)
- aeq_id = I40IW_AE_LCE_QP_CATASTROPHIC;
- else if ((ddp_ctl & 0x03) != 1)
- aeq_id = I40IW_AE_DDP_UBE_INVALID_DDP_VERSION;
- else if (ntohl(mpa[2]) != 2)
- aeq_id = I40IW_AE_DDP_UBE_INVALID_QN;
- else if (ntohl(mpa[3]) != 1)
- aeq_id = I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN;
- else if (ntohl(mpa[4]) != 0)
- aeq_id = I40IW_AE_DDP_UBE_INVALID_MO;
- else if ((rdma_ctl & 0xc0) != 0x40)
- aeq_id = I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
-
- info->ae_id = aeq_id;
- if (info->ae_id) {
- /* Bad terminate recvd - send back a terminate */
- i40iw_terminate_connection(qp, info);
- return;
- }
- }
-
- qp->term_flags |= I40IW_TERM_RCVD;
- qp->eventtype = TERM_EVENT_QP_FATAL;
- termhdr = (struct i40iw_terminate_hdr *)&mpa[5];
- if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
- termhdr->layer_etype == RDMAP_REMOTE_OP) {
- i40iw_terminate_done(qp, 0);
- } else {
- i40iw_terminate_start_timer(qp);
- i40iw_terminate_send_fin(qp);
- }
-}
-
-/**
- * i40iw_sc_vsi_init - Initialize virtual device
- * @vsi: pointer to the vsi structure
- * @info: parameters to initialize vsi
- **/
-void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info)
-{
- int i;
-
- vsi->dev = info->dev;
- vsi->back_vsi = info->back_vsi;
- vsi->mtu = info->params->mtu;
- vsi->exception_lan_queue = info->exception_lan_queue;
- i40iw_fill_qos_list(info->params->qs_handle_list);
-
- for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- vsi->qos[i].qs_handle = info->params->qs_handle_list[i];
- i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i,
- vsi->qos[i].qs_handle);
- spin_lock_init(&vsi->qos[i].lock);
- INIT_LIST_HEAD(&vsi->qos[i].qplist);
- }
-}
-
-/**
- * i40iw_hw_stats_init - Initiliaze HW stats table
- * @stats: pestat struct
- * @fcn_idx: PCI fn id
- * @is_pf: Is it a PF?
- *
- * Populate the HW stats table with register offset addr for each
- * stats. And start the perioidic stats timer.
- */
-void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf)
-{
- u32 stats_reg_offset;
- u32 stats_index;
- struct i40iw_dev_hw_stats_offsets *stats_table =
- &stats->hw_stats_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
-
- if (is_pf) {
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
- I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
- I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
- I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
- I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
- I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
- I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
- I40E_GLPES_PFTCPRTXSEG(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
- I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
- I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
-
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
- I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
- I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
- I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
- I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
- I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
- I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
- I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
- I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
- I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
- I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
- I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
- I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
- I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
- I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
- I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
- I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
- I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
- I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
- I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
- I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
- I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
- I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
- I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
- I40E_GLPES_PFRDMAVINVLO(fcn_idx);
- } else {
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
- I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
- I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
- I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
- I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
- I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
- I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
- I40E_GLPES_VFTCPRTXSEG(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
- I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
- I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
-
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
- I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
- I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
- I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
- I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
- I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
- I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
- I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
- I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
- I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
- I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
- I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
- I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
- I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
- I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
- I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
- I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
- I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
- I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
- I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
- I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
- I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
- I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
- I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
- I40E_GLPES_VFRDMAVINVLO(fcn_idx);
- }
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
- stats_index++) {
- stats_reg_offset = stats_table->stats_offset_64[stats_index];
- last_rd_stats->stats_value_64[stats_index] =
- readq(stats->hw->hw_addr + stats_reg_offset);
- }
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
- stats_index++) {
- stats_reg_offset = stats_table->stats_offset_32[stats_index];
- last_rd_stats->stats_value_32[stats_index] =
- i40iw_rd32(stats->hw, stats_reg_offset);
- }
-}
-
-/**
- * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs.
- * @stat: pestat struct
- * @index: index in HW stats table which contains offset reg-addr
- * @value: hw stats value
- */
-void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_32b index,
- u64 *value)
-{
- struct i40iw_dev_hw_stats_offsets *stats_table =
- &stats->hw_stats_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
- u64 new_stats_value = 0;
- u32 stats_reg_offset = stats_table->stats_offset_32[index];
-
- new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset);
- /*roll-over case */
- if (new_stats_value < last_rd_stats->stats_value_32[index])
- hw_stats->stats_value_32[index] += new_stats_value;
- else
- hw_stats->stats_value_32[index] +=
- new_stats_value - last_rd_stats->stats_value_32[index];
- last_rd_stats->stats_value_32[index] = new_stats_value;
- *value = hw_stats->stats_value_32[index];
-}
-
-/**
- * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs.
- * @stats: pestat struct
- * @index: index in HW stats table which contains offset reg-addr
- * @value: hw stats value
- */
-void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_64b index,
- u64 *value)
-{
- struct i40iw_dev_hw_stats_offsets *stats_table =
- &stats->hw_stats_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
- u64 new_stats_value = 0;
- u32 stats_reg_offset = stats_table->stats_offset_64[index];
-
- new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset);
- /*roll-over case */
- if (new_stats_value < last_rd_stats->stats_value_64[index])
- hw_stats->stats_value_64[index] += new_stats_value;
- else
- hw_stats->stats_value_64[index] +=
- new_stats_value - last_rd_stats->stats_value_64[index];
- last_rd_stats->stats_value_64[index] = new_stats_value;
- *value = hw_stats->stats_value_64[index];
-}
-
-/**
- * i40iw_hw_stats_read_all - read all HW stat counters
- * @stats: pestat struct
- * @stats_values: hw stats structure
- *
- * Read all the HW stat counters and populates hw_stats structure
- * of passed-in vsi's pestat as well as copy created in stat_values.
- */
-void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats,
- struct i40iw_dev_hw_stats *stats_values)
-{
- u32 stats_index;
- unsigned long flags;
-
- spin_lock_irqsave(&stats->lock, flags);
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
- stats_index++)
- i40iw_hw_stats_read_32(stats, stats_index,
- &stats_values->stats_value_32[stats_index]);
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
- stats_index++)
- i40iw_hw_stats_read_64(stats, stats_index,
- &stats_values->stats_value_64[stats_index]);
- spin_unlock_irqrestore(&stats->lock, flags);
-}
-
-/**
- * i40iw_hw_stats_refresh_all - Update all HW stats structs
- * @stats: pestat struct
- *
- * Read all the HW stats counters to refresh values in hw_stats structure
- * of passed-in dev's pestat
- */
-void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats)
-{
- u64 stats_value;
- u32 stats_index;
- unsigned long flags;
-
- spin_lock_irqsave(&stats->lock, flags);
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
- stats_index++)
- i40iw_hw_stats_read_32(stats, stats_index, &stats_value);
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
- stats_index++)
- i40iw_hw_stats_read_64(stats, stats_index, &stats_value);
- spin_unlock_irqrestore(&stats->lock, flags);
-}
-
-/**
- * i40iw_get_fcn_id - Return the function id
- * @dev: pointer to the device
- */
-static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev)
-{
- u8 fcn_id = I40IW_INVALID_FCN_ID;
- u8 i;
-
- for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++)
- if (!dev->fcn_id_array[i]) {
- fcn_id = i;
- dev->fcn_id_array[i] = true;
- break;
- }
- return fcn_id;
-}
-
-/**
- * i40iw_vsi_stats_init - Initialize the vsi statistics
- * @vsi: pointer to the vsi structure
- * @info: The info structure used for initialization
- */
-enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info)
-{
- u8 fcn_id = info->fcn_id;
-
- if (info->alloc_fcn_id)
- fcn_id = i40iw_get_fcn_id(vsi->dev);
-
- if (fcn_id == I40IW_INVALID_FCN_ID)
- return I40IW_ERR_NOT_READY;
-
- vsi->pestat = info->pestat;
- vsi->pestat->hw = vsi->dev->hw;
- vsi->pestat->vsi = vsi;
-
- if (info->stats_initialize) {
- i40iw_hw_stats_init(vsi->pestat, fcn_id, true);
- spin_lock_init(&vsi->pestat->lock);
- i40iw_hw_stats_start_timer(vsi);
- }
- vsi->stats_fcn_id_alloc = info->alloc_fcn_id;
- vsi->fcn_id = fcn_id;
- return I40IW_SUCCESS;
-}
-
-/**
- * i40iw_vsi_stats_free - Free the vsi stats
- * @vsi: pointer to the vsi structure
- */
-void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
-{
- u8 fcn_id = vsi->fcn_id;
-
- if (vsi->stats_fcn_id_alloc && fcn_id < I40IW_MAX_STATS_COUNT)
- vsi->dev->fcn_id_array[fcn_id] = false;
- i40iw_hw_stats_stop_timer(vsi);
-}
-
-static struct i40iw_cqp_ops iw_cqp_ops = {
- .cqp_init = i40iw_sc_cqp_init,
- .cqp_create = i40iw_sc_cqp_create,
- .cqp_post_sq = i40iw_sc_cqp_post_sq,
- .cqp_get_next_send_wqe = i40iw_sc_cqp_get_next_send_wqe,
- .cqp_destroy = i40iw_sc_cqp_destroy,
- .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done
-};
-
-static struct i40iw_ccq_ops iw_ccq_ops = {
- .ccq_init = i40iw_sc_ccq_init,
- .ccq_create = i40iw_sc_ccq_create,
- .ccq_destroy = i40iw_sc_ccq_destroy,
- .ccq_create_done = i40iw_sc_ccq_create_done,
- .ccq_get_cqe_info = i40iw_sc_ccq_get_cqe_info,
- .ccq_arm = i40iw_sc_ccq_arm
-};
-
-static struct i40iw_ceq_ops iw_ceq_ops = {
- .ceq_init = i40iw_sc_ceq_init,
- .ceq_create = i40iw_sc_ceq_create,
- .cceq_create_done = i40iw_sc_cceq_create_done,
- .cceq_destroy_done = i40iw_sc_cceq_destroy_done,
- .cceq_create = i40iw_sc_cceq_create,
- .ceq_destroy = i40iw_sc_ceq_destroy,
- .process_ceq = i40iw_sc_process_ceq
-};
-
-static struct i40iw_aeq_ops iw_aeq_ops = {
- .aeq_init = i40iw_sc_aeq_init,
- .aeq_create = i40iw_sc_aeq_create,
- .aeq_destroy = i40iw_sc_aeq_destroy,
- .get_next_aeqe = i40iw_sc_get_next_aeqe,
- .repost_aeq_entries = i40iw_sc_repost_aeq_entries,
- .aeq_create_done = i40iw_sc_aeq_create_done,
- .aeq_destroy_done = i40iw_sc_aeq_destroy_done
-};
-
-/* iwarp pd ops */
-static struct i40iw_pd_ops iw_pd_ops = {
- .pd_init = i40iw_sc_pd_init,
-};
-
-static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
- .qp_init = i40iw_sc_qp_init,
- .qp_create = i40iw_sc_qp_create,
- .qp_modify = i40iw_sc_qp_modify,
- .qp_destroy = i40iw_sc_qp_destroy,
- .qp_flush_wqes = i40iw_sc_qp_flush_wqes,
- .qp_upload_context = i40iw_sc_qp_upload_context,
- .qp_setctx = i40iw_sc_qp_setctx,
- .qp_send_lsmm = i40iw_sc_send_lsmm,
- .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag,
- .qp_send_rtt = i40iw_sc_send_rtt,
- .qp_post_wqe0 = i40iw_sc_post_wqe0,
- .iw_mr_fast_register = i40iw_sc_mr_fast_register
-};
-
-static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
- .cq_init = i40iw_sc_cq_init,
- .cq_create = i40iw_sc_cq_create,
- .cq_destroy = i40iw_sc_cq_destroy,
- .cq_modify = i40iw_sc_cq_modify,
-};
-
-static struct i40iw_mr_ops iw_mr_ops = {
- .alloc_stag = i40iw_sc_alloc_stag,
- .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared,
- .mr_reg_shared = i40iw_sc_mr_reg_shared,
- .dealloc_stag = i40iw_sc_dealloc_stag,
- .query_stag = i40iw_sc_query_stag,
- .mw_alloc = i40iw_sc_mw_alloc
-};
-
-static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
- .manage_push_page = i40iw_sc_manage_push_page,
- .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table,
- .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile,
- .commit_fpm_values = i40iw_sc_commit_fpm_values,
- .query_fpm_values = i40iw_sc_query_fpm_values,
- .static_hmc_pages_allocated = i40iw_sc_static_hmc_pages_allocated,
- .add_arp_cache_entry = i40iw_sc_add_arp_cache_entry,
- .del_arp_cache_entry = i40iw_sc_del_arp_cache_entry,
- .query_arp_cache_entry = i40iw_sc_query_arp_cache_entry,
- .manage_apbvt_entry = i40iw_sc_manage_apbvt_entry,
- .manage_qhash_table_entry = i40iw_sc_manage_qhash_table_entry,
- .alloc_local_mac_ipaddr_table_entry = i40iw_sc_alloc_local_mac_ipaddr_entry,
- .add_local_mac_ipaddr_entry = i40iw_sc_add_local_mac_ipaddr_entry,
- .del_local_mac_ipaddr_entry = i40iw_sc_del_local_mac_ipaddr_entry,
- .cqp_nop = i40iw_sc_cqp_nop,
- .commit_fpm_values_done = i40iw_sc_commit_fpm_values_done,
- .query_fpm_values_done = i40iw_sc_query_fpm_values_done,
- .manage_hmc_pm_func_table_done = i40iw_sc_manage_hmc_pm_func_table_done,
- .update_suspend_qp = i40iw_sc_suspend_qp,
- .update_resume_qp = i40iw_sc_resume_qp
-};
-
-static struct i40iw_hmc_ops iw_hmc_ops = {
- .init_iw_hmc = i40iw_sc_init_iw_hmc,
- .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf,
- .configure_iw_fpm = i40iw_sc_configure_iw_fpm,
- .parse_fpm_commit_buf = i40iw_sc_parse_fpm_commit_buf,
- .create_hmc_object = i40iw_sc_create_hmc_obj,
- .del_hmc_object = i40iw_sc_del_hmc_obj
-};
-
-/**
- * i40iw_device_init - Initialize IWARP device
- * @dev: IWARP device pointer
- * @info: IWARP init info
- */
-enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
- struct i40iw_device_init_info *info)
-{
- u32 val;
- u32 vchnl_ver = 0;
- u16 hmc_fcn = 0;
- enum i40iw_status_code ret_code = 0;
- u8 db_size;
-
- spin_lock_init(&dev->cqp_lock);
-
- i40iw_device_init_uk(&dev->dev_uk);
-
- dev->debug_mask = info->debug_mask;
-
- dev->hmc_fn_id = info->hmc_fn_id;
- dev->is_pf = info->is_pf;
-
- dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
- dev->fpm_query_buf = info->fpm_query_buf;
-
- dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
- dev->fpm_commit_buf = info->fpm_commit_buf;
-
- dev->hw = info->hw;
- dev->hw->hw_addr = info->bar0;
-
- if (dev->is_pf) {
- val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
- dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
-
- val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
- db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
- if ((db_size != I40IW_PE_DB_SIZE_4M) &&
- (db_size != I40IW_PE_DB_SIZE_8M)) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s: PE doorbell is not enabled in CSR val 0x%x\n",
- __func__, val);
- ret_code = I40IW_ERR_PE_DOORBELL_NOT_ENABLED;
- return ret_code;
- }
- dev->db_addr = dev->hw->hw_addr + I40IW_DB_ADDR_OFFSET;
- dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_pf;
- } else {
- dev->db_addr = dev->hw->hw_addr + I40IW_VF_DB_ADDR_OFFSET;
- }
-
- dev->cqp_ops = &iw_cqp_ops;
- dev->ccq_ops = &iw_ccq_ops;
- dev->ceq_ops = &iw_ceq_ops;
- dev->aeq_ops = &iw_aeq_ops;
- dev->cqp_misc_ops = &iw_cqp_misc_ops;
- dev->iw_pd_ops = &iw_pd_ops;
- dev->iw_priv_qp_ops = &iw_priv_qp_ops;
- dev->iw_priv_cq_ops = &iw_priv_cq_ops;
- dev->mr_ops = &iw_mr_ops;
- dev->hmc_ops = &iw_hmc_ops;
- dev->vchnl_if.vchnl_send = info->vchnl_send;
- if (dev->vchnl_if.vchnl_send)
- dev->vchnl_up = true;
- else
- dev->vchnl_up = false;
- if (!dev->is_pf) {
- dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_vf;
- ret_code = i40iw_vchnl_vf_get_ver(dev, &vchnl_ver);
- if (!ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s: Get Channel version rc = 0x%0x, version is %u\n",
- __func__, ret_code, vchnl_ver);
- ret_code = i40iw_vchnl_vf_get_hmc_fcn(dev, &hmc_fcn);
- if (!ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s Get HMC function rc = 0x%0x, hmc fcn is %u\n",
- __func__, ret_code, hmc_fcn);
- dev->hmc_fn_id = (u8)hmc_fcn;
- }
- }
- }
- dev->iw_vf_cqp_ops = &iw_vf_cqp_ops;
-
- return ret_code;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
deleted file mode 100644
index 6ddaeec87d2f..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ /dev/null
@@ -1,1737 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_D_H
-#define I40IW_D_H
-
-#define I40IW_FIRST_USER_QP_ID 2
-
-#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
-#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
-
-#define I40IW_PUSH_OFFSET (4 * 1024 * 1024)
-#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16
-#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024)
-#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2
-
-#define I40IW_PE_DB_SIZE_4M 1
-#define I40IW_PE_DB_SIZE_8M 2
-
-#define I40IW_DDP_VER 1
-#define I40IW_RDMAP_VER 1
-
-#define I40IW_RDMA_MODE_RDMAC 0
-#define I40IW_RDMA_MODE_IETF 1
-
-#define I40IW_QP_STATE_INVALID 0
-#define I40IW_QP_STATE_IDLE 1
-#define I40IW_QP_STATE_RTS 2
-#define I40IW_QP_STATE_CLOSING 3
-#define I40IW_QP_STATE_RESERVED 4
-#define I40IW_QP_STATE_TERMINATE 5
-#define I40IW_QP_STATE_ERROR 6
-
-#define I40IW_STAG_STATE_INVALID 0
-#define I40IW_STAG_STATE_VALID 1
-
-#define I40IW_STAG_TYPE_SHARED 0
-#define I40IW_STAG_TYPE_NONSHARED 1
-
-#define I40IW_MAX_USER_PRIORITY 8
-#define I40IW_MAX_STATS_COUNT 16
-#define I40IW_FIRST_NON_PF_STAT 4
-
-
-#define I40IW_MTU_TO_MSS_IPV4 40
-#define I40IW_MTU_TO_MSS_IPV6 60
-#define I40IW_DEFAULT_MTU 1500
-
-#define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits)
-#define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits)
-#define LS_32_1(val, bits) (u32)(val << bits)
-#define RS_32_1(val, bits) (u32)(val >> bits)
-#define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
-
-#define QS_HANDLE_UNKNOWN 0xffff
-
-#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
-
-#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
-#define LS_32(val, field) ((val << field ## _SHIFT) & (field ## _MASK))
-#define RS_32(val, field) ((val & field ## _MASK) >> field ## _SHIFT)
-
-#define TERM_DDP_LEN_TAGGED 14
-#define TERM_DDP_LEN_UNTAGGED 18
-#define TERM_RDMA_LEN 28
-#define RDMA_OPCODE_MASK 0x0f
-#define RDMA_READ_REQ_OPCODE 1
-#define Q2_BAD_FRAME_OFFSET 72
-#define Q2_FPSN_OFFSET 64
-#define CQE_MAJOR_DRV 0x8000
-
-#define I40IW_TERM_SENT 0x01
-#define I40IW_TERM_RCVD 0x02
-#define I40IW_TERM_DONE 0x04
-#define I40IW_MAC_HLEN 14
-
-#define I40IW_INVALID_WQE_INDEX 0xffffffff
-
-#define I40IW_CQP_WAIT_POLL_REGS 1
-#define I40IW_CQP_WAIT_POLL_CQ 2
-#define I40IW_CQP_WAIT_EVENT 3
-
-#define I40IW_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
-
-#define I40IW_GET_CURRENT_CQ_ELEMENT(_cq) \
- ( \
- &((_cq)->cq_base[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
- )
-#define I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(_cq) \
- ( \
- &(((struct i40iw_extended_cqe *) \
- ((_cq)->cq_base))[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
- )
-
-#define I40IW_GET_CURRENT_AEQ_ELEMENT(_aeq) \
- ( \
- &_aeq->aeqe_base[I40IW_RING_GETCURRENT_TAIL(_aeq->aeq_ring)] \
- )
-
-#define I40IW_GET_CURRENT_CEQ_ELEMENT(_ceq) \
- ( \
- &_ceq->ceqe_base[I40IW_RING_GETCURRENT_TAIL(_ceq->ceq_ring)] \
- )
-
-#define I40IW_AE_SOURCE_RSVD 0x0
-#define I40IW_AE_SOURCE_RQ 0x1
-#define I40IW_AE_SOURCE_RQ_0011 0x3
-
-#define I40IW_AE_SOURCE_CQ 0x2
-#define I40IW_AE_SOURCE_CQ_0110 0x6
-#define I40IW_AE_SOURCE_CQ_1010 0xA
-#define I40IW_AE_SOURCE_CQ_1110 0xE
-
-#define I40IW_AE_SOURCE_SQ 0x5
-#define I40IW_AE_SOURCE_SQ_0111 0x7
-
-#define I40IW_AE_SOURCE_IN_RR_WR 0x9
-#define I40IW_AE_SOURCE_IN_RR_WR_1011 0xB
-#define I40IW_AE_SOURCE_OUT_RR 0xD
-#define I40IW_AE_SOURCE_OUT_RR_1111 0xF
-
-#define I40IW_TCP_STATE_NON_EXISTENT 0
-#define I40IW_TCP_STATE_CLOSED 1
-#define I40IW_TCP_STATE_LISTEN 2
-#define I40IW_STATE_SYN_SEND 3
-#define I40IW_TCP_STATE_SYN_RECEIVED 4
-#define I40IW_TCP_STATE_ESTABLISHED 5
-#define I40IW_TCP_STATE_CLOSE_WAIT 6
-#define I40IW_TCP_STATE_FIN_WAIT_1 7
-#define I40IW_TCP_STATE_CLOSING 8
-#define I40IW_TCP_STATE_LAST_ACK 9
-#define I40IW_TCP_STATE_FIN_WAIT_2 10
-#define I40IW_TCP_STATE_TIME_WAIT 11
-#define I40IW_TCP_STATE_RESERVED_1 12
-#define I40IW_TCP_STATE_RESERVED_2 13
-#define I40IW_TCP_STATE_RESERVED_3 14
-#define I40IW_TCP_STATE_RESERVED_4 15
-
-/* ILQ CQP hash table fields */
-#define I40IW_CQPSQ_QHASH_VLANID_SHIFT 32
-#define I40IW_CQPSQ_QHASH_VLANID_MASK \
- ((u64)0xfff << I40IW_CQPSQ_QHASH_VLANID_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_QPN_SHIFT 32
-#define I40IW_CQPSQ_QHASH_QPN_MASK \
- ((u64)0x3ffff << I40IW_CQPSQ_QHASH_QPN_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT 0
-#define I40IW_CQPSQ_QHASH_QS_HANDLE_MASK ((u64)0x3ff << I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT 16
-#define I40IW_CQPSQ_QHASH_SRC_PORT_MASK \
- ((u64)0xffff << I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT 0
-#define I40IW_CQPSQ_QHASH_DEST_PORT_MASK \
- ((u64)0xffff << I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR0_SHIFT 32
-#define I40IW_CQPSQ_QHASH_ADDR0_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR0_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR1_SHIFT 0
-#define I40IW_CQPSQ_QHASH_ADDR1_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR1_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR2_SHIFT 32
-#define I40IW_CQPSQ_QHASH_ADDR2_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR2_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR3_SHIFT 0
-#define I40IW_CQPSQ_QHASH_ADDR3_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR3_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_WQEVALID_SHIFT 63
-#define I40IW_CQPSQ_QHASH_WQEVALID_MASK \
- ((u64)0x1 << I40IW_CQPSQ_QHASH_WQEVALID_SHIFT)
-#define I40IW_CQPSQ_QHASH_OPCODE_SHIFT 32
-#define I40IW_CQPSQ_QHASH_OPCODE_MASK \
- ((u64)0x3f << I40IW_CQPSQ_QHASH_OPCODE_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_MANAGE_SHIFT 61
-#define I40IW_CQPSQ_QHASH_MANAGE_MASK \
- ((u64)0x3 << I40IW_CQPSQ_QHASH_MANAGE_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT 60
-#define I40IW_CQPSQ_QHASH_IPV4VALID_MASK \
- ((u64)0x1 << I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_VLANVALID_SHIFT 59
-#define I40IW_CQPSQ_QHASH_VLANVALID_MASK \
- ((u64)0x1 << I40IW_CQPSQ_QHASH_VLANVALID_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT 42
-#define I40IW_CQPSQ_QHASH_ENTRYTYPE_MASK \
- ((u64)0x7 << I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT)
-/* CQP Host Context */
-#define I40IW_CQPHC_EN_DC_TCP_SHIFT 0
-#define I40IW_CQPHC_EN_DC_TCP_MASK (1UL << I40IW_CQPHC_EN_DC_TCP_SHIFT)
-
-#define I40IW_CQPHC_SQSIZE_SHIFT 8
-#define I40IW_CQPHC_SQSIZE_MASK (0xfUL << I40IW_CQPHC_SQSIZE_SHIFT)
-
-#define I40IW_CQPHC_DISABLE_PFPDUS_SHIFT 1
-#define I40IW_CQPHC_DISABLE_PFPDUS_MASK (0x1UL << I40IW_CQPHC_DISABLE_PFPDUS_SHIFT)
-
-#define I40IW_CQPHC_ENABLED_VFS_SHIFT 32
-#define I40IW_CQPHC_ENABLED_VFS_MASK (0x3fULL << I40IW_CQPHC_ENABLED_VFS_SHIFT)
-
-#define I40IW_CQPHC_HMC_PROFILE_SHIFT 0
-#define I40IW_CQPHC_HMC_PROFILE_MASK (0x7ULL << I40IW_CQPHC_HMC_PROFILE_SHIFT)
-
-#define I40IW_CQPHC_SVER_SHIFT 24
-#define I40IW_CQPHC_SVER_MASK (0xffUL << I40IW_CQPHC_SVER_SHIFT)
-
-#define I40IW_CQPHC_SQBASE_SHIFT 9
-#define I40IW_CQPHC_SQBASE_MASK \
- (0xfffffffffffffeULL << I40IW_CQPHC_SQBASE_SHIFT)
-
-#define I40IW_CQPHC_QPCTX_SHIFT 0
-#define I40IW_CQPHC_QPCTX_MASK \
- (0xffffffffffffffffULL << I40IW_CQPHC_QPCTX_SHIFT)
-#define I40IW_CQPHC_SVER 1
-
-#define I40IW_CQP_SW_SQSIZE_4 4
-#define I40IW_CQP_SW_SQSIZE_2048 2048
-
-/* iWARP QP Doorbell shadow area */
-#define I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT 0
-#define I40IW_QP_DBSA_HW_SQ_TAIL_MASK \
- (0x3fffUL << I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT)
-
-/* Completion Queue Doorbell shadow area */
-#define I40IW_CQ_DBSA_CQEIDX_SHIFT 0
-#define I40IW_CQ_DBSA_CQEIDX_MASK (0xfffffUL << I40IW_CQ_DBSA_CQEIDX_SHIFT)
-
-#define I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT 0
-#define I40IW_CQ_DBSA_SW_CQ_SELECT_MASK \
- (0x3fffUL << I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT)
-
-#define I40IW_CQ_DBSA_ARM_NEXT_SHIFT 14
-#define I40IW_CQ_DBSA_ARM_NEXT_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SHIFT)
-
-#define I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT 15
-#define I40IW_CQ_DBSA_ARM_NEXT_SE_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT)
-
-#define I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT 16
-#define I40IW_CQ_DBSA_ARM_SEQ_NUM_MASK \
- (0x3UL << I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT)
-
-/* CQP and iWARP Completion Queue */
-#define I40IW_CQ_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQ_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CCQ_OPRETVAL_SHIFT 0
-#define I40IW_CCQ_OPRETVAL_MASK (0xffffffffUL << I40IW_CCQ_OPRETVAL_SHIFT)
-
-#define I40IW_CQ_MINERR_SHIFT 0
-#define I40IW_CQ_MINERR_MASK (0xffffUL << I40IW_CQ_MINERR_SHIFT)
-
-#define I40IW_CQ_MAJERR_SHIFT 16
-#define I40IW_CQ_MAJERR_MASK (0xffffUL << I40IW_CQ_MAJERR_SHIFT)
-
-#define I40IW_CQ_WQEIDX_SHIFT 32
-#define I40IW_CQ_WQEIDX_MASK (0x3fffULL << I40IW_CQ_WQEIDX_SHIFT)
-
-#define I40IW_CQ_ERROR_SHIFT 55
-#define I40IW_CQ_ERROR_MASK (1ULL << I40IW_CQ_ERROR_SHIFT)
-
-#define I40IW_CQ_SQ_SHIFT 62
-#define I40IW_CQ_SQ_MASK (1ULL << I40IW_CQ_SQ_SHIFT)
-
-#define I40IW_CQ_VALID_SHIFT 63
-#define I40IW_CQ_VALID_MASK (1ULL << I40IW_CQ_VALID_SHIFT)
-
-#define I40IWCQ_PAYLDLEN_SHIFT 0
-#define I40IWCQ_PAYLDLEN_MASK (0xffffffffUL << I40IWCQ_PAYLDLEN_SHIFT)
-
-#define I40IWCQ_TCPSEQNUM_SHIFT 32
-#define I40IWCQ_TCPSEQNUM_MASK (0xffffffffULL << I40IWCQ_TCPSEQNUM_SHIFT)
-
-#define I40IWCQ_INVSTAG_SHIFT 0
-#define I40IWCQ_INVSTAG_MASK (0xffffffffUL << I40IWCQ_INVSTAG_SHIFT)
-
-#define I40IWCQ_QPID_SHIFT 32
-#define I40IWCQ_QPID_MASK (0x3ffffULL << I40IWCQ_QPID_SHIFT)
-
-#define I40IWCQ_PSHDROP_SHIFT 51
-#define I40IWCQ_PSHDROP_MASK (1ULL << I40IWCQ_PSHDROP_SHIFT)
-
-#define I40IWCQ_SRQ_SHIFT 52
-#define I40IWCQ_SRQ_MASK (1ULL << I40IWCQ_SRQ_SHIFT)
-
-#define I40IWCQ_STAG_SHIFT 53
-#define I40IWCQ_STAG_MASK (1ULL << I40IWCQ_STAG_SHIFT)
-
-#define I40IWCQ_SOEVENT_SHIFT 54
-#define I40IWCQ_SOEVENT_MASK (1ULL << I40IWCQ_SOEVENT_SHIFT)
-
-#define I40IWCQ_OP_SHIFT 56
-#define I40IWCQ_OP_MASK (0x3fULL << I40IWCQ_OP_SHIFT)
-
-/* CEQE format */
-#define I40IW_CEQE_CQCTX_SHIFT 0
-#define I40IW_CEQE_CQCTX_MASK \
- (0x7fffffffffffffffULL << I40IW_CEQE_CQCTX_SHIFT)
-
-#define I40IW_CEQE_VALID_SHIFT 63
-#define I40IW_CEQE_VALID_MASK (1ULL << I40IW_CEQE_VALID_SHIFT)
-
-/* AEQE format */
-#define I40IW_AEQE_COMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_AEQE_COMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_AEQE_QPCQID_SHIFT 0
-#define I40IW_AEQE_QPCQID_MASK (0x3ffffUL << I40IW_AEQE_QPCQID_SHIFT)
-
-#define I40IW_AEQE_WQDESCIDX_SHIFT 18
-#define I40IW_AEQE_WQDESCIDX_MASK (0x3fffULL << I40IW_AEQE_WQDESCIDX_SHIFT)
-
-#define I40IW_AEQE_OVERFLOW_SHIFT 33
-#define I40IW_AEQE_OVERFLOW_MASK (1ULL << I40IW_AEQE_OVERFLOW_SHIFT)
-
-#define I40IW_AEQE_AECODE_SHIFT 34
-#define I40IW_AEQE_AECODE_MASK (0xffffULL << I40IW_AEQE_AECODE_SHIFT)
-
-#define I40IW_AEQE_AESRC_SHIFT 50
-#define I40IW_AEQE_AESRC_MASK (0xfULL << I40IW_AEQE_AESRC_SHIFT)
-
-#define I40IW_AEQE_IWSTATE_SHIFT 54
-#define I40IW_AEQE_IWSTATE_MASK (0x7ULL << I40IW_AEQE_IWSTATE_SHIFT)
-
-#define I40IW_AEQE_TCPSTATE_SHIFT 57
-#define I40IW_AEQE_TCPSTATE_MASK (0xfULL << I40IW_AEQE_TCPSTATE_SHIFT)
-
-#define I40IW_AEQE_Q2DATA_SHIFT 61
-#define I40IW_AEQE_Q2DATA_MASK (0x3ULL << I40IW_AEQE_Q2DATA_SHIFT)
-
-#define I40IW_AEQE_VALID_SHIFT 63
-#define I40IW_AEQE_VALID_MASK (1ULL << I40IW_AEQE_VALID_SHIFT)
-
-/* CQP SQ WQES */
-#define I40IW_QP_TYPE_IWARP 1
-#define I40IW_QP_TYPE_UDA 2
-#define I40IW_QP_TYPE_CQP 4
-
-#define I40IW_CQ_TYPE_IWARP 1
-#define I40IW_CQ_TYPE_ILQ 2
-#define I40IW_CQ_TYPE_IEQ 3
-#define I40IW_CQ_TYPE_CQP 4
-
-#define I40IWQP_TERM_SEND_TERM_AND_FIN 0
-#define I40IWQP_TERM_SEND_TERM_ONLY 1
-#define I40IWQP_TERM_SEND_FIN_ONLY 2
-#define I40IWQP_TERM_DONOT_SEND_TERM_OR_FIN 3
-
-#define I40IW_CQP_OP_CREATE_QP 0
-#define I40IW_CQP_OP_MODIFY_QP 0x1
-#define I40IW_CQP_OP_DESTROY_QP 0x02
-#define I40IW_CQP_OP_CREATE_CQ 0x03
-#define I40IW_CQP_OP_MODIFY_CQ 0x04
-#define I40IW_CQP_OP_DESTROY_CQ 0x05
-#define I40IW_CQP_OP_CREATE_SRQ 0x06
-#define I40IW_CQP_OP_MODIFY_SRQ 0x07
-#define I40IW_CQP_OP_DESTROY_SRQ 0x08
-#define I40IW_CQP_OP_ALLOC_STAG 0x09
-#define I40IW_CQP_OP_REG_MR 0x0a
-#define I40IW_CQP_OP_QUERY_STAG 0x0b
-#define I40IW_CQP_OP_REG_SMR 0x0c
-#define I40IW_CQP_OP_DEALLOC_STAG 0x0d
-#define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e
-#define I40IW_CQP_OP_MANAGE_ARP 0x0f
-#define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10
-#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11
-#define I40IW_CQP_OP_MANAGE_PE_TEAM 0x12
-#define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13
-#define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
-#define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15
-#define I40IW_CQP_OP_CREATE_CEQ 0x16
-#define I40IW_CQP_OP_DESTROY_CEQ 0x18
-#define I40IW_CQP_OP_CREATE_AEQ 0x19
-#define I40IW_CQP_OP_DESTROY_AEQ 0x1b
-#define I40IW_CQP_OP_CREATE_ADDR_VECT 0x1c
-#define I40IW_CQP_OP_MODIFY_ADDR_VECT 0x1d
-#define I40IW_CQP_OP_DESTROY_ADDR_VECT 0x1e
-#define I40IW_CQP_OP_UPDATE_PE_SDS 0x1f
-#define I40IW_CQP_OP_QUERY_FPM_VALUES 0x20
-#define I40IW_CQP_OP_COMMIT_FPM_VALUES 0x21
-#define I40IW_CQP_OP_FLUSH_WQES 0x22
-/* I40IW_CQP_OP_GEN_AE is the same value as I40IW_CQP_OP_FLUSH_WQES */
-#define I40IW_CQP_OP_GEN_AE 0x22
-#define I40IW_CQP_OP_MANAGE_APBVT 0x23
-#define I40IW_CQP_OP_NOP 0x24
-#define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
-#define I40IW_CQP_OP_CREATE_UDA_MCAST_GROUP 0x26
-#define I40IW_CQP_OP_MODIFY_UDA_MCAST_GROUP 0x27
-#define I40IW_CQP_OP_DESTROY_UDA_MCAST_GROUP 0x28
-#define I40IW_CQP_OP_SUSPEND_QP 0x29
-#define I40IW_CQP_OP_RESUME_QP 0x2a
-#define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b
-#define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE 0x2d
-
-#define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16
-#define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT)
-
-#define I40IW_UDA_QPSQ_OPCODE_SHIFT 32
-#define I40IW_UDA_QPSQ_OPCODE_MASK ((u64)0x3f << I40IW_UDA_QPSQ_OPCODE_SHIFT)
-
-#define I40IW_UDA_QPSQ_MACLEN_SHIFT 56
-#define I40IW_UDA_QPSQ_MACLEN_MASK \
- ((u64)0x7f << I40IW_UDA_QPSQ_MACLEN_SHIFT)
-
-#define I40IW_UDA_QPSQ_IPLEN_SHIFT 48
-#define I40IW_UDA_QPSQ_IPLEN_MASK \
- ((u64)0x7f << I40IW_UDA_QPSQ_IPLEN_SHIFT)
-
-#define I40IW_UDA_QPSQ_L4T_SHIFT 30
-#define I40IW_UDA_QPSQ_L4T_MASK \
- ((u64)0x3 << I40IW_UDA_QPSQ_L4T_SHIFT)
-
-#define I40IW_UDA_QPSQ_IIPT_SHIFT 28
-#define I40IW_UDA_QPSQ_IIPT_MASK \
- ((u64)0x3 << I40IW_UDA_QPSQ_IIPT_SHIFT)
-
-#define I40IW_UDA_QPSQ_L4LEN_SHIFT 24
-#define I40IW_UDA_QPSQ_L4LEN_MASK ((u64)0xf << I40IW_UDA_QPSQ_L4LEN_SHIFT)
-
-#define I40IW_UDA_QPSQ_AVIDX_SHIFT 0
-#define I40IW_UDA_QPSQ_AVIDX_MASK ((u64)0xffff << I40IW_UDA_QPSQ_AVIDX_SHIFT)
-
-#define I40IW_UDA_QPSQ_VALID_SHIFT 63
-#define I40IW_UDA_QPSQ_VALID_MASK \
- ((u64)0x1 << I40IW_UDA_QPSQ_VALID_SHIFT)
-
-#define I40IW_UDA_QPSQ_SIGCOMPL_SHIFT 62
-#define I40IW_UDA_QPSQ_SIGCOMPL_MASK ((u64)0x1 << I40IW_UDA_QPSQ_SIGCOMPL_SHIFT)
-
-#define I40IW_UDA_PAYLOADLEN_SHIFT 0
-#define I40IW_UDA_PAYLOADLEN_MASK ((u64)0x3fff << I40IW_UDA_PAYLOADLEN_SHIFT)
-
-#define I40IW_UDA_HDRLEN_SHIFT 16
-#define I40IW_UDA_HDRLEN_MASK ((u64)0x1ff << I40IW_UDA_HDRLEN_SHIFT)
-
-#define I40IW_VLAN_TAG_VALID_SHIFT 50
-#define I40IW_VLAN_TAG_VALID_MASK ((u64)0x1 << I40IW_VLAN_TAG_VALID_SHIFT)
-
-#define I40IW_UDA_L3PROTO_SHIFT 0
-#define I40IW_UDA_L3PROTO_MASK ((u64)0x3 << I40IW_UDA_L3PROTO_SHIFT)
-
-#define I40IW_UDA_L4PROTO_SHIFT 16
-#define I40IW_UDA_L4PROTO_MASK ((u64)0x3 << I40IW_UDA_L4PROTO_SHIFT)
-
-#define I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT 44
-#define I40IW_UDA_QPSQ_DOLOOPBACK_MASK \
- ((u64)0x1 << I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT)
-
-/* CQP SQ WQE common fields */
-#define I40IW_CQPSQ_OPCODE_SHIFT 32
-#define I40IW_CQPSQ_OPCODE_MASK (0x3fULL << I40IW_CQPSQ_OPCODE_SHIFT)
-
-#define I40IW_CQPSQ_WQEVALID_SHIFT 63
-#define I40IW_CQPSQ_WQEVALID_MASK (1ULL << I40IW_CQPSQ_WQEVALID_SHIFT)
-
-#define I40IW_CQPSQ_TPHVAL_SHIFT 0
-#define I40IW_CQPSQ_TPHVAL_MASK (0xffUL << I40IW_CQPSQ_TPHVAL_SHIFT)
-
-#define I40IW_CQPSQ_TPHEN_SHIFT 60
-#define I40IW_CQPSQ_TPHEN_MASK (1ULL << I40IW_CQPSQ_TPHEN_SHIFT)
-
-#define I40IW_CQPSQ_PBUFADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_PBUFADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* Create/Modify/Destroy QP */
-
-#define I40IW_CQPSQ_QP_NEWMSS_SHIFT 32
-#define I40IW_CQPSQ_QP_NEWMSS_MASK (0x3fffULL << I40IW_CQPSQ_QP_NEWMSS_SHIFT)
-
-#define I40IW_CQPSQ_QP_TERMLEN_SHIFT 48
-#define I40IW_CQPSQ_QP_TERMLEN_MASK (0xfULL << I40IW_CQPSQ_QP_TERMLEN_SHIFT)
-
-#define I40IW_CQPSQ_QP_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_QP_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_QP_QPID_SHIFT 0
-#define I40IW_CQPSQ_QP_QPID_MASK (0x3FFFFUL)
-/* I40IWCQ_QPID_MASK */
-
-#define I40IW_CQPSQ_QP_OP_SHIFT 32
-#define I40IW_CQPSQ_QP_OP_MASK I40IWCQ_OP_MASK
-
-#define I40IW_CQPSQ_QP_ORDVALID_SHIFT 42
-#define I40IW_CQPSQ_QP_ORDVALID_MASK (1ULL << I40IW_CQPSQ_QP_ORDVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_TOECTXVALID_SHIFT 43
-#define I40IW_CQPSQ_QP_TOECTXVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_TOECTXVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT 44
-#define I40IW_CQPSQ_QP_CACHEDVARVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_VQ_SHIFT 45
-#define I40IW_CQPSQ_QP_VQ_MASK (1ULL << I40IW_CQPSQ_QP_VQ_SHIFT)
-
-#define I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT 46
-#define I40IW_CQPSQ_QP_FORCELOOPBACK_MASK \
- (1ULL << I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT)
-
-#define I40IW_CQPSQ_QP_CQNUMVALID_SHIFT 47
-#define I40IW_CQPSQ_QP_CQNUMVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_CQNUMVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_QPTYPE_SHIFT 48
-#define I40IW_CQPSQ_QP_QPTYPE_MASK (0x3ULL << I40IW_CQPSQ_QP_QPTYPE_SHIFT)
-
-#define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52
-#define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT)
-
-#define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54
-#define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK \
- (1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT)
-
-#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT 55
-#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_MASK \
- (1ULL << I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT)
-
-#define I40IW_CQPSQ_QP_TERMACT_SHIFT 56
-#define I40IW_CQPSQ_QP_TERMACT_MASK (0x3ULL << I40IW_CQPSQ_QP_TERMACT_SHIFT)
-
-#define I40IW_CQPSQ_QP_RESETCON_SHIFT 58
-#define I40IW_CQPSQ_QP_RESETCON_MASK (1ULL << I40IW_CQPSQ_QP_RESETCON_SHIFT)
-
-#define I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT 59
-#define I40IW_CQPSQ_QP_ARPTABIDXVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT 60
-#define I40IW_CQPSQ_QP_NEXTIWSTATE_MASK \
- (0x7ULL << I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT)
-
-#define I40IW_CQPSQ_QP_DBSHADOWADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_QP_DBSHADOWADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* Create/Modify/Destroy CQ */
-#define I40IW_CQPSQ_CQ_CQSIZE_SHIFT 0
-#define I40IW_CQPSQ_CQ_CQSIZE_MASK (0x3ffffUL << I40IW_CQPSQ_CQ_CQSIZE_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
-#define I40IW_CQPSQ_CQ_CQCTX_MASK \
- (0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
-#define I40IW_CQPSQ_CQ_CQCTX_MASK \
- (0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
-
-#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT 0
-#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_MASK \
- (0x3ffff << I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CEQID_SHIFT 24
-#define I40IW_CQPSQ_CQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CQ_CEQID_SHIFT)
-
-#define I40IW_CQPSQ_CQ_OP_SHIFT 32
-#define I40IW_CQPSQ_CQ_OP_MASK (0x3fULL << I40IW_CQPSQ_CQ_OP_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CQRESIZE_SHIFT 43
-#define I40IW_CQPSQ_CQ_CQRESIZE_MASK (1ULL << I40IW_CQPSQ_CQ_CQRESIZE_SHIFT)
-
-#define I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT 44
-#define I40IW_CQPSQ_CQ_LPBLSIZE_MASK (3ULL << I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT 46
-#define I40IW_CQPSQ_CQ_CHKOVERFLOW_MASK \
- (1ULL << I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT)
-
-#define I40IW_CQPSQ_CQ_VIRTMAP_SHIFT 47
-#define I40IW_CQPSQ_CQ_VIRTMAP_MASK (1ULL << I40IW_CQPSQ_CQ_VIRTMAP_SHIFT)
-
-#define I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT 48
-#define I40IW_CQPSQ_CQ_ENCEQEMASK_MASK \
- (1ULL << I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT 49
-#define I40IW_CQPSQ_CQ_CEQIDVALID_MASK \
- (1ULL << I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT)
-
-#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT 61
-#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_MASK \
- (1ULL << I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT)
-
-/* Create/Modify/Destroy Shared Receive Queue */
-
-#define I40IW_CQPSQ_SRQ_RQSIZE_SHIFT 0
-#define I40IW_CQPSQ_SRQ_RQSIZE_MASK (0xfUL << I40IW_CQPSQ_SRQ_RQSIZE_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT 4
-#define I40IW_CQPSQ_SRQ_RQWQESIZE_MASK \
- (0x7UL << I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT 32
-#define I40IW_CQPSQ_SRQ_SRQLIMIT_MASK \
- (0xfffULL << I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_SRQCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_SRQ_SRQCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_SRQ_PDID_SHIFT 16
-#define I40IW_CQPSQ_SRQ_PDID_MASK \
- (0x7fffULL << I40IW_CQPSQ_SRQ_PDID_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_SRQID_SHIFT 0
-#define I40IW_CQPSQ_SRQ_SRQID_MASK (0x7fffUL << I40IW_CQPSQ_SRQ_SRQID_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_SRQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_SRQ_VIRTMAP_SHIFT I40IW_CQPSQ_CQ_VIRTMAP_SHIFT
-#define I40IW_CQPSQ_SRQ_VIRTMAP_MASK I40IW_CQPSQ_CQ_VIRTMAP_MASK
-
-#define I40IW_CQPSQ_SRQ_TPHEN_SHIFT I40IW_CQPSQ_TPHEN_SHIFT
-#define I40IW_CQPSQ_SRQ_TPHEN_MASK I40IW_CQPSQ_TPHEN_MASK
-
-#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT 61
-#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_MASK \
- (1ULL << I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT 6
-#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_MASK \
- (0x3ffffffffffffffULL << I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT)
-
-/* Allocate/Register/Register Shared/Deallocate Stag */
-#define I40IW_CQPSQ_STAG_VA_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_STAG_VA_FBO_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_STAG_STAGLEN_SHIFT 0
-#define I40IW_CQPSQ_STAG_STAGLEN_MASK \
- (0x3fffffffffffULL << I40IW_CQPSQ_STAG_STAGLEN_SHIFT)
-
-#define I40IW_CQPSQ_STAG_PDID_SHIFT 48
-#define I40IW_CQPSQ_STAG_PDID_MASK (0x7fffULL << I40IW_CQPSQ_STAG_PDID_SHIFT)
-
-#define I40IW_CQPSQ_STAG_KEY_SHIFT 0
-#define I40IW_CQPSQ_STAG_KEY_MASK (0xffUL << I40IW_CQPSQ_STAG_KEY_SHIFT)
-
-#define I40IW_CQPSQ_STAG_IDX_SHIFT 8
-#define I40IW_CQPSQ_STAG_IDX_MASK (0xffffffUL << I40IW_CQPSQ_STAG_IDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT 32
-#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_MASK \
- (0xffffffULL << I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_MR_SHIFT 43
-#define I40IW_CQPSQ_STAG_MR_MASK (1ULL << I40IW_CQPSQ_STAG_MR_SHIFT)
-
-#define I40IW_CQPSQ_STAG_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_STAG_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT 46
-#define I40IW_CQPSQ_STAG_HPAGESIZE_MASK \
- (1ULL << I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT)
-
-#define I40IW_CQPSQ_STAG_ARIGHTS_SHIFT 48
-#define I40IW_CQPSQ_STAG_ARIGHTS_MASK \
- (0x1fULL << I40IW_CQPSQ_STAG_ARIGHTS_SHIFT)
-
-#define I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT 53
-#define I40IW_CQPSQ_STAG_REMACCENABLED_MASK \
- (1ULL << I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT)
-
-#define I40IW_CQPSQ_STAG_VABASEDTO_SHIFT 59
-#define I40IW_CQPSQ_STAG_VABASEDTO_MASK \
- (1ULL << I40IW_CQPSQ_STAG_VABASEDTO_SHIFT)
-
-#define I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT 60
-#define I40IW_CQPSQ_STAG_USEHMCFNIDX_MASK \
- (1ULL << I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_USEPFRID_SHIFT 61
-#define I40IW_CQPSQ_STAG_USEPFRID_MASK \
- (1ULL << I40IW_CQPSQ_STAG_USEPFRID_SHIFT)
-
-#define I40IW_CQPSQ_STAG_PBA_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_STAG_PBA_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT 0
-#define I40IW_CQPSQ_STAG_HMCFNIDX_MASK \
- (0x3fUL << I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT)
-
-/* Query stag */
-#define I40IW_CQPSQ_QUERYSTAG_IDX_SHIFT I40IW_CQPSQ_STAG_IDX_SHIFT
-#define I40IW_CQPSQ_QUERYSTAG_IDX_MASK I40IW_CQPSQ_STAG_IDX_MASK
-
-/* Allocate Local IP Address Entry */
-
-/* Manage Local IP Address Table - MLIPA */
-#define I40IW_CQPSQ_MLIPA_IPV6LO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_MLIPA_IPV6LO_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_MLIPA_IPV6HI_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_MLIPA_IPV6HI_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_MLIPA_IPV4_SHIFT 0
-#define I40IW_CQPSQ_MLIPA_IPV4_MASK \
- (0xffffffffUL << I40IW_CQPSQ_MLIPA_IPV4_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT 0
-#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_MASK \
- (0x3fUL << I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT 42
-#define I40IW_CQPSQ_MLIPA_IPV4VALID_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT 43
-#define I40IW_CQPSQ_MLIPA_IPV6VALID_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT 62
-#define I40IW_CQPSQ_MLIPA_FREEENTRY_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT 61
-#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC0_SHIFT 0
-#define I40IW_CQPSQ_MLIPA_MAC0_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC0_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC1_SHIFT 8
-#define I40IW_CQPSQ_MLIPA_MAC1_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC1_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC2_SHIFT 16
-#define I40IW_CQPSQ_MLIPA_MAC2_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC2_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC3_SHIFT 24
-#define I40IW_CQPSQ_MLIPA_MAC3_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC3_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC4_SHIFT 32
-#define I40IW_CQPSQ_MLIPA_MAC4_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC4_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC5_SHIFT 40
-#define I40IW_CQPSQ_MLIPA_MAC5_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC5_SHIFT)
-
-/* Manage ARP Table - MAT */
-#define I40IW_CQPSQ_MAT_REACHMAX_SHIFT 0
-#define I40IW_CQPSQ_MAT_REACHMAX_MASK \
- (0xffffffffUL << I40IW_CQPSQ_MAT_REACHMAX_SHIFT)
-
-#define I40IW_CQPSQ_MAT_MACADDR_SHIFT 0
-#define I40IW_CQPSQ_MAT_MACADDR_MASK \
- (0xffffffffffffULL << I40IW_CQPSQ_MAT_MACADDR_SHIFT)
-
-#define I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT 0
-#define I40IW_CQPSQ_MAT_ARPENTRYIDX_MASK \
- (0xfffUL << I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT)
-
-#define I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT 42
-#define I40IW_CQPSQ_MAT_ENTRYVALID_MASK \
- (1ULL << I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT)
-
-#define I40IW_CQPSQ_MAT_PERMANENT_SHIFT 43
-#define I40IW_CQPSQ_MAT_PERMANENT_MASK \
- (1ULL << I40IW_CQPSQ_MAT_PERMANENT_SHIFT)
-
-#define I40IW_CQPSQ_MAT_QUERY_SHIFT 44
-#define I40IW_CQPSQ_MAT_QUERY_MASK (1ULL << I40IW_CQPSQ_MAT_QUERY_SHIFT)
-
-/* Manage VF PBLE Backing Pages - MVPBP*/
-#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT 0
-#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_MASK \
- (0x3ffULL << I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT 16
-#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_MASK \
- (0x1ffULL << I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_SD_INX_SHIFT 32
-#define I40IW_CQPSQ_MVPBP_SD_INX_MASK \
- (0xfffULL << I40IW_CQPSQ_MVPBP_SD_INX_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT 62
-#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_MASK \
- (0x1ULL << I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT 3
-#define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \
- (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT)
-
-/* Manage Push Page - MPP */
-#define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff
-
-#define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0
-#define I40IW_CQPSQ_MPP_QS_HANDLE_MASK (0xffffUL << \
- I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT)
-
-#define I40IW_CQPSQ_MPP_PPIDX_SHIFT 0
-#define I40IW_CQPSQ_MPP_PPIDX_MASK (0x3ffUL << I40IW_CQPSQ_MPP_PPIDX_SHIFT)
-
-#define I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT 62
-#define I40IW_CQPSQ_MPP_FREE_PAGE_MASK (1ULL << I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT)
-
-/* Upload Context - UCTX */
-#define I40IW_CQPSQ_UCTX_QPCTXADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_UCTX_QPCTXADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_UCTX_QPID_SHIFT 0
-#define I40IW_CQPSQ_UCTX_QPID_MASK (0x3ffffUL << I40IW_CQPSQ_UCTX_QPID_SHIFT)
-
-#define I40IW_CQPSQ_UCTX_QPTYPE_SHIFT 48
-#define I40IW_CQPSQ_UCTX_QPTYPE_MASK (0xfULL << I40IW_CQPSQ_UCTX_QPTYPE_SHIFT)
-
-#define I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT 61
-#define I40IW_CQPSQ_UCTX_RAWFORMAT_MASK \
- (1ULL << I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT)
-
-#define I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT 62
-#define I40IW_CQPSQ_UCTX_FREEZEQP_MASK \
- (1ULL << I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT)
-
-/* Manage HMC PM Function Table - MHMC */
-#define I40IW_CQPSQ_MHMC_VFIDX_SHIFT 0
-#define I40IW_CQPSQ_MHMC_VFIDX_MASK (0x7fUL << I40IW_CQPSQ_MHMC_VFIDX_SHIFT)
-
-#define I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT 62
-#define I40IW_CQPSQ_MHMC_FREEPMFN_MASK \
- (1ULL << I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT)
-
-/* Set HMC Resource Profile - SHMCRP */
-#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT 0
-#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_MASK \
- (0x7ULL << I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT)
-#define I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT 32
-#define I40IW_CQPSQ_SHMCRP_VFNUM_MASK (0x3fULL << I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT)
-
-/* Create/Destroy CEQ */
-#define I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT 0
-#define I40IW_CQPSQ_CEQ_CEQSIZE_MASK \
- (0x1ffffUL << I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT)
-
-#define I40IW_CQPSQ_CEQ_CEQID_SHIFT 0
-#define I40IW_CQPSQ_CEQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CEQ_CEQID_SHIFT)
-
-#define I40IW_CQPSQ_CEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_CEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_CEQ_VMAP_SHIFT 47
-#define I40IW_CQPSQ_CEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_CEQ_VMAP_SHIFT)
-
-#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT)
-
-/* Create/Destroy AEQ */
-#define I40IW_CQPSQ_AEQ_AEQECNT_SHIFT 0
-#define I40IW_CQPSQ_AEQ_AEQECNT_MASK \
- (0x7ffffUL << I40IW_CQPSQ_AEQ_AEQECNT_SHIFT)
-
-#define I40IW_CQPSQ_AEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_AEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_AEQ_VMAP_SHIFT 47
-#define I40IW_CQPSQ_AEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_AEQ_VMAP_SHIFT)
-
-#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT)
-
-/* Commit FPM Values - CFPM */
-#define I40IW_CQPSQ_CFPM_HMCFNID_SHIFT 0
-#define I40IW_CQPSQ_CFPM_HMCFNID_MASK (0x3fUL << I40IW_CQPSQ_CFPM_HMCFNID_SHIFT)
-
-/* Flush WQEs - FWQE */
-#define I40IW_CQPSQ_FWQE_AECODE_SHIFT 0
-#define I40IW_CQPSQ_FWQE_AECODE_MASK (0xffffUL << I40IW_CQPSQ_FWQE_AECODE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_AESOURCE_SHIFT 16
-#define I40IW_CQPSQ_FWQE_AESOURCE_MASK \
- (0xfUL << I40IW_CQPSQ_FWQE_AESOURCE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_RQMNERR_SHIFT 0
-#define I40IW_CQPSQ_FWQE_RQMNERR_MASK \
- (0xffffUL << I40IW_CQPSQ_FWQE_RQMNERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_RQMJERR_SHIFT 16
-#define I40IW_CQPSQ_FWQE_RQMJERR_MASK \
- (0xffffUL << I40IW_CQPSQ_FWQE_RQMJERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_SQMNERR_SHIFT 32
-#define I40IW_CQPSQ_FWQE_SQMNERR_MASK \
- (0xffffULL << I40IW_CQPSQ_FWQE_SQMNERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_SQMJERR_SHIFT 48
-#define I40IW_CQPSQ_FWQE_SQMJERR_MASK \
- (0xffffULL << I40IW_CQPSQ_FWQE_SQMJERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_QPID_SHIFT 0
-#define I40IW_CQPSQ_FWQE_QPID_MASK (0x3ffffULL << I40IW_CQPSQ_FWQE_QPID_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT 59
-#define I40IW_CQPSQ_FWQE_GENERATE_AE_MASK (1ULL << \
- I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT 60
-#define I40IW_CQPSQ_FWQE_USERFLCODE_MASK \
- (1ULL << I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT 61
-#define I40IW_CQPSQ_FWQE_FLUSHSQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT 62
-#define I40IW_CQPSQ_FWQE_FLUSHRQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT)
-
-/* Manage Accelerated Port Table - MAPT */
-#define I40IW_CQPSQ_MAPT_PORT_SHIFT 0
-#define I40IW_CQPSQ_MAPT_PORT_MASK (0xffffUL << I40IW_CQPSQ_MAPT_PORT_SHIFT)
-
-#define I40IW_CQPSQ_MAPT_ADDPORT_SHIFT 62
-#define I40IW_CQPSQ_MAPT_ADDPORT_MASK (1ULL << I40IW_CQPSQ_MAPT_ADDPORT_SHIFT)
-
-/* Update Protocol Engine SDs */
-#define I40IW_CQPSQ_UPESD_SDCMD_SHIFT 0
-#define I40IW_CQPSQ_UPESD_SDCMD_MASK (0xffffffffUL << I40IW_CQPSQ_UPESD_SDCMD_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT 0
-#define I40IW_CQPSQ_UPESD_SDDATALOW_MASK \
- (0xffffffffUL << I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT 32
-#define I40IW_CQPSQ_UPESD_SDDATAHI_MASK \
- (0xffffffffULL << I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT)
-#define I40IW_CQPSQ_UPESD_HMCFNID_SHIFT 0
-#define I40IW_CQPSQ_UPESD_HMCFNID_MASK \
- (0x3fUL << I40IW_CQPSQ_UPESD_HMCFNID_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT 63
-#define I40IW_CQPSQ_UPESD_ENTRY_VALID_MASK \
- ((u64)1 << I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT 0
-#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_MASK \
- (0xfUL << I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT 7
-#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_MASK \
- (0x1UL << I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT)
-
-/* Suspend QP */
-#define I40IW_CQPSQ_SUSPENDQP_QPID_SHIFT 0
-#define I40IW_CQPSQ_SUSPENDQP_QPID_MASK (0x3FFFFUL)
-/* I40IWCQ_QPID_MASK */
-
-/* Resume QP */
-#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT 0
-#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_MASK \
- (0xffffffffUL << I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT)
-
-#define I40IW_CQPSQ_RESUMEQP_QPID_SHIFT 0
-#define I40IW_CQPSQ_RESUMEQP_QPID_MASK (0x3FFFFUL)
-/* I40IWCQ_QPID_MASK */
-
-/* IW QP Context */
-#define I40IWQPC_DDP_VER_SHIFT 0
-#define I40IWQPC_DDP_VER_MASK (3UL << I40IWQPC_DDP_VER_SHIFT)
-
-#define I40IWQPC_SNAP_SHIFT 2
-#define I40IWQPC_SNAP_MASK (1UL << I40IWQPC_SNAP_SHIFT)
-
-#define I40IWQPC_IPV4_SHIFT 3
-#define I40IWQPC_IPV4_MASK (1UL << I40IWQPC_IPV4_SHIFT)
-
-#define I40IWQPC_NONAGLE_SHIFT 4
-#define I40IWQPC_NONAGLE_MASK (1UL << I40IWQPC_NONAGLE_SHIFT)
-
-#define I40IWQPC_INSERTVLANTAG_SHIFT 5
-#define I40IWQPC_INSERTVLANTAG_MASK (1 << I40IWQPC_INSERTVLANTAG_SHIFT)
-
-#define I40IWQPC_USESRQ_SHIFT 6
-#define I40IWQPC_USESRQ_MASK (1UL << I40IWQPC_USESRQ_SHIFT)
-
-#define I40IWQPC_TIMESTAMP_SHIFT 7
-#define I40IWQPC_TIMESTAMP_MASK (1UL << I40IWQPC_TIMESTAMP_SHIFT)
-
-#define I40IWQPC_RQWQESIZE_SHIFT 8
-#define I40IWQPC_RQWQESIZE_MASK (3UL << I40IWQPC_RQWQESIZE_SHIFT)
-
-#define I40IWQPC_INSERTL2TAG2_SHIFT 11
-#define I40IWQPC_INSERTL2TAG2_MASK (1UL << I40IWQPC_INSERTL2TAG2_SHIFT)
-
-#define I40IWQPC_LIMIT_SHIFT 12
-#define I40IWQPC_LIMIT_MASK (3UL << I40IWQPC_LIMIT_SHIFT)
-
-#define I40IWQPC_DROPOOOSEG_SHIFT 15
-#define I40IWQPC_DROPOOOSEG_MASK (1UL << I40IWQPC_DROPOOOSEG_SHIFT)
-
-#define I40IWQPC_DUPACK_THRESH_SHIFT 16
-#define I40IWQPC_DUPACK_THRESH_MASK (7UL << I40IWQPC_DUPACK_THRESH_SHIFT)
-
-#define I40IWQPC_ERR_RQ_IDX_VALID_SHIFT 19
-#define I40IWQPC_ERR_RQ_IDX_VALID_MASK (1UL << I40IWQPC_ERR_RQ_IDX_VALID_SHIFT)
-
-#define I40IWQPC_DIS_VLAN_CHECKS_SHIFT 19
-#define I40IWQPC_DIS_VLAN_CHECKS_MASK (7UL << I40IWQPC_DIS_VLAN_CHECKS_SHIFT)
-
-#define I40IWQPC_RCVTPHEN_SHIFT 28
-#define I40IWQPC_RCVTPHEN_MASK (1UL << I40IWQPC_RCVTPHEN_SHIFT)
-
-#define I40IWQPC_XMITTPHEN_SHIFT 29
-#define I40IWQPC_XMITTPHEN_MASK (1ULL << I40IWQPC_XMITTPHEN_SHIFT)
-
-#define I40IWQPC_RQTPHEN_SHIFT 30
-#define I40IWQPC_RQTPHEN_MASK (1UL << I40IWQPC_RQTPHEN_SHIFT)
-
-#define I40IWQPC_SQTPHEN_SHIFT 31
-#define I40IWQPC_SQTPHEN_MASK (1ULL << I40IWQPC_SQTPHEN_SHIFT)
-
-#define I40IWQPC_PPIDX_SHIFT 32
-#define I40IWQPC_PPIDX_MASK (0x3ffULL << I40IWQPC_PPIDX_SHIFT)
-
-#define I40IWQPC_PMENA_SHIFT 47
-#define I40IWQPC_PMENA_MASK (1ULL << I40IWQPC_PMENA_SHIFT)
-
-#define I40IWQPC_RDMAP_VER_SHIFT 62
-#define I40IWQPC_RDMAP_VER_MASK (3ULL << I40IWQPC_RDMAP_VER_SHIFT)
-
-#define I40IWQPC_SQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_SQADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPC_RQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_RQADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPC_TTL_SHIFT 0
-#define I40IWQPC_TTL_MASK (0xffUL << I40IWQPC_TTL_SHIFT)
-
-#define I40IWQPC_RQSIZE_SHIFT 8
-#define I40IWQPC_RQSIZE_MASK (0xfUL << I40IWQPC_RQSIZE_SHIFT)
-
-#define I40IWQPC_SQSIZE_SHIFT 12
-#define I40IWQPC_SQSIZE_MASK (0xfUL << I40IWQPC_SQSIZE_SHIFT)
-
-#define I40IWQPC_SRCMACADDRIDX_SHIFT 16
-#define I40IWQPC_SRCMACADDRIDX_MASK (0x3fUL << I40IWQPC_SRCMACADDRIDX_SHIFT)
-
-#define I40IWQPC_AVOIDSTRETCHACK_SHIFT 23
-#define I40IWQPC_AVOIDSTRETCHACK_MASK (1UL << I40IWQPC_AVOIDSTRETCHACK_SHIFT)
-
-#define I40IWQPC_TOS_SHIFT 24
-#define I40IWQPC_TOS_MASK (0xffUL << I40IWQPC_TOS_SHIFT)
-
-#define I40IWQPC_SRCPORTNUM_SHIFT 32
-#define I40IWQPC_SRCPORTNUM_MASK (0xffffULL << I40IWQPC_SRCPORTNUM_SHIFT)
-
-#define I40IWQPC_DESTPORTNUM_SHIFT 48
-#define I40IWQPC_DESTPORTNUM_MASK (0xffffULL << I40IWQPC_DESTPORTNUM_SHIFT)
-
-#define I40IWQPC_DESTIPADDR0_SHIFT 32
-#define I40IWQPC_DESTIPADDR0_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR0_SHIFT)
-
-#define I40IWQPC_DESTIPADDR1_SHIFT 0
-#define I40IWQPC_DESTIPADDR1_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR1_SHIFT)
-
-#define I40IWQPC_DESTIPADDR2_SHIFT 32
-#define I40IWQPC_DESTIPADDR2_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR2_SHIFT)
-
-#define I40IWQPC_DESTIPADDR3_SHIFT 0
-#define I40IWQPC_DESTIPADDR3_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR3_SHIFT)
-
-#define I40IWQPC_SNDMSS_SHIFT 16
-#define I40IWQPC_SNDMSS_MASK (0x3fffUL << I40IWQPC_SNDMSS_SHIFT)
-
-#define I40IW_UDA_QPC_MAXFRAMESIZE_SHIFT 16
-#define I40IW_UDA_QPC_MAXFRAMESIZE_MASK (0x3fffUL << I40IW_UDA_QPC_MAXFRAMESIZE_SHIFT)
-
-#define I40IWQPC_VLANTAG_SHIFT 32
-#define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT)
-
-#define I40IWQPC_ARPIDX_SHIFT 48
-#define I40IWQPC_ARPIDX_MASK (0xffffULL << I40IWQPC_ARPIDX_SHIFT)
-
-#define I40IWQPC_FLOWLABEL_SHIFT 0
-#define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT)
-
-#define I40IWQPC_WSCALE_SHIFT 20
-#define I40IWQPC_WSCALE_MASK (1UL << I40IWQPC_WSCALE_SHIFT)
-
-#define I40IWQPC_KEEPALIVE_SHIFT 21
-#define I40IWQPC_KEEPALIVE_MASK (1UL << I40IWQPC_KEEPALIVE_SHIFT)
-
-#define I40IWQPC_IGNORE_TCP_OPT_SHIFT 22
-#define I40IWQPC_IGNORE_TCP_OPT_MASK (1UL << I40IWQPC_IGNORE_TCP_OPT_SHIFT)
-
-#define I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT 23
-#define I40IWQPC_IGNORE_TCP_UNS_OPT_MASK \
- (1UL << I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT)
-
-#define I40IWQPC_TCPSTATE_SHIFT 28
-#define I40IWQPC_TCPSTATE_MASK (0xfUL << I40IWQPC_TCPSTATE_SHIFT)
-
-#define I40IWQPC_RCVSCALE_SHIFT 32
-#define I40IWQPC_RCVSCALE_MASK (0xfULL << I40IWQPC_RCVSCALE_SHIFT)
-
-#define I40IWQPC_SNDSCALE_SHIFT 40
-#define I40IWQPC_SNDSCALE_MASK (0xfULL << I40IWQPC_SNDSCALE_SHIFT)
-
-#define I40IWQPC_PDIDX_SHIFT 48
-#define I40IWQPC_PDIDX_MASK (0x7fffULL << I40IWQPC_PDIDX_SHIFT)
-
-#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT 16
-#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_MASK \
- (0xffUL << I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT)
-
-#define I40IWQPC_KEEPALIVE_INTERVAL_SHIFT 24
-#define I40IWQPC_KEEPALIVE_INTERVAL_MASK \
- (0xffUL << I40IWQPC_KEEPALIVE_INTERVAL_SHIFT)
-
-#define I40IWQPC_TIMESTAMP_RECENT_SHIFT 0
-#define I40IWQPC_TIMESTAMP_RECENT_MASK \
- (0xffffffffUL << I40IWQPC_TIMESTAMP_RECENT_SHIFT)
-
-#define I40IWQPC_TIMESTAMP_AGE_SHIFT 32
-#define I40IWQPC_TIMESTAMP_AGE_MASK \
- (0xffffffffULL << I40IWQPC_TIMESTAMP_AGE_SHIFT)
-
-#define I40IWQPC_SNDNXT_SHIFT 0
-#define I40IWQPC_SNDNXT_MASK (0xffffffffUL << I40IWQPC_SNDNXT_SHIFT)
-
-#define I40IWQPC_SNDWND_SHIFT 32
-#define I40IWQPC_SNDWND_MASK (0xffffffffULL << I40IWQPC_SNDWND_SHIFT)
-
-#define I40IWQPC_RCVNXT_SHIFT 0
-#define I40IWQPC_RCVNXT_MASK (0xffffffffUL << I40IWQPC_RCVNXT_SHIFT)
-
-#define I40IWQPC_RCVWND_SHIFT 32
-#define I40IWQPC_RCVWND_MASK (0xffffffffULL << I40IWQPC_RCVWND_SHIFT)
-
-#define I40IWQPC_SNDMAX_SHIFT 0
-#define I40IWQPC_SNDMAX_MASK (0xffffffffUL << I40IWQPC_SNDMAX_SHIFT)
-
-#define I40IWQPC_SNDUNA_SHIFT 32
-#define I40IWQPC_SNDUNA_MASK (0xffffffffULL << I40IWQPC_SNDUNA_SHIFT)
-
-#define I40IWQPC_SRTT_SHIFT 0
-#define I40IWQPC_SRTT_MASK (0xffffffffUL << I40IWQPC_SRTT_SHIFT)
-
-#define I40IWQPC_RTTVAR_SHIFT 32
-#define I40IWQPC_RTTVAR_MASK (0xffffffffULL << I40IWQPC_RTTVAR_SHIFT)
-
-#define I40IWQPC_SSTHRESH_SHIFT 0
-#define I40IWQPC_SSTHRESH_MASK (0xffffffffUL << I40IWQPC_SSTHRESH_SHIFT)
-
-#define I40IWQPC_CWND_SHIFT 32
-#define I40IWQPC_CWND_MASK (0xffffffffULL << I40IWQPC_CWND_SHIFT)
-
-#define I40IWQPC_SNDWL1_SHIFT 0
-#define I40IWQPC_SNDWL1_MASK (0xffffffffUL << I40IWQPC_SNDWL1_SHIFT)
-
-#define I40IWQPC_SNDWL2_SHIFT 32
-#define I40IWQPC_SNDWL2_MASK (0xffffffffULL << I40IWQPC_SNDWL2_SHIFT)
-
-#define I40IWQPC_ERR_RQ_IDX_SHIFT 32
-#define I40IWQPC_ERR_RQ_IDX_MASK (0x3fffULL << I40IWQPC_ERR_RQ_IDX_SHIFT)
-
-#define I40IWQPC_MAXSNDWND_SHIFT 0
-#define I40IWQPC_MAXSNDWND_MASK (0xffffffffUL << I40IWQPC_MAXSNDWND_SHIFT)
-
-#define I40IWQPC_REXMIT_THRESH_SHIFT 48
-#define I40IWQPC_REXMIT_THRESH_MASK (0x3fULL << I40IWQPC_REXMIT_THRESH_SHIFT)
-
-#define I40IWQPC_TXCQNUM_SHIFT 0
-#define I40IWQPC_TXCQNUM_MASK (0x1ffffUL << I40IWQPC_TXCQNUM_SHIFT)
-
-#define I40IWQPC_RXCQNUM_SHIFT 32
-#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
-
-#define I40IWQPC_STAT_INDEX_SHIFT 0
-#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT)
-
-#define I40IWQPC_Q2ADDR_SHIFT 0
-#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT)
-
-#define I40IWQPC_LASTBYTESENT_SHIFT 0
-#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
-
-#define I40IWQPC_SRQID_SHIFT 32
-#define I40IWQPC_SRQID_MASK (0xffULL << I40IWQPC_SRQID_SHIFT)
-
-#define I40IWQPC_ORDSIZE_SHIFT 0
-#define I40IWQPC_ORDSIZE_MASK (0x7fUL << I40IWQPC_ORDSIZE_SHIFT)
-
-#define I40IWQPC_IRDSIZE_SHIFT 16
-#define I40IWQPC_IRDSIZE_MASK (0x3UL << I40IWQPC_IRDSIZE_SHIFT)
-
-#define I40IWQPC_WRRDRSPOK_SHIFT 20
-#define I40IWQPC_WRRDRSPOK_MASK (1UL << I40IWQPC_WRRDRSPOK_SHIFT)
-
-#define I40IWQPC_RDOK_SHIFT 21
-#define I40IWQPC_RDOK_MASK (1UL << I40IWQPC_RDOK_SHIFT)
-
-#define I40IWQPC_SNDMARKERS_SHIFT 22
-#define I40IWQPC_SNDMARKERS_MASK (1UL << I40IWQPC_SNDMARKERS_SHIFT)
-
-#define I40IWQPC_BINDEN_SHIFT 23
-#define I40IWQPC_BINDEN_MASK (1UL << I40IWQPC_BINDEN_SHIFT)
-
-#define I40IWQPC_FASTREGEN_SHIFT 24
-#define I40IWQPC_FASTREGEN_MASK (1UL << I40IWQPC_FASTREGEN_SHIFT)
-
-#define I40IWQPC_PRIVEN_SHIFT 25
-#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
-
-#define I40IWQPC_USESTATSINSTANCE_SHIFT 26
-#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT)
-
-#define I40IWQPC_IWARPMODE_SHIFT 28
-#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
-
-#define I40IWQPC_RCVMARKERS_SHIFT 29
-#define I40IWQPC_RCVMARKERS_MASK (1UL << I40IWQPC_RCVMARKERS_SHIFT)
-
-#define I40IWQPC_ALIGNHDRS_SHIFT 30
-#define I40IWQPC_ALIGNHDRS_MASK (1UL << I40IWQPC_ALIGNHDRS_SHIFT)
-
-#define I40IWQPC_RCVNOMPACRC_SHIFT 31
-#define I40IWQPC_RCVNOMPACRC_MASK (1UL << I40IWQPC_RCVNOMPACRC_SHIFT)
-
-#define I40IWQPC_RCVMARKOFFSET_SHIFT 33
-#define I40IWQPC_RCVMARKOFFSET_MASK (0x1ffULL << I40IWQPC_RCVMARKOFFSET_SHIFT)
-
-#define I40IWQPC_SNDMARKOFFSET_SHIFT 48
-#define I40IWQPC_SNDMARKOFFSET_MASK (0x1ffULL << I40IWQPC_SNDMARKOFFSET_SHIFT)
-
-#define I40IWQPC_QPCOMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_QPCOMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPC_SQTPHVAL_SHIFT 0
-#define I40IWQPC_SQTPHVAL_MASK (0xffUL << I40IWQPC_SQTPHVAL_SHIFT)
-
-#define I40IWQPC_RQTPHVAL_SHIFT 8
-#define I40IWQPC_RQTPHVAL_MASK (0xffUL << I40IWQPC_RQTPHVAL_SHIFT)
-
-#define I40IWQPC_QSHANDLE_SHIFT 16
-#define I40IWQPC_QSHANDLE_MASK (0x3ffUL << I40IWQPC_QSHANDLE_SHIFT)
-
-#define I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT 32
-#define I40IWQPC_EXCEPTION_LAN_QUEUE_MASK (0xfffULL << \
- I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR3_SHIFT 0
-#define I40IWQPC_LOCAL_IPADDR3_MASK \
- (0xffffffffUL << I40IWQPC_LOCAL_IPADDR3_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR2_SHIFT 32
-#define I40IWQPC_LOCAL_IPADDR2_MASK \
- (0xffffffffULL << I40IWQPC_LOCAL_IPADDR2_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR1_SHIFT 0
-#define I40IWQPC_LOCAL_IPADDR1_MASK \
- (0xffffffffUL << I40IWQPC_LOCAL_IPADDR1_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR0_SHIFT 32
-#define I40IWQPC_LOCAL_IPADDR0_MASK \
- (0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT)
-
-/* wqe size considering 32 bytes per wqe*/
-#define I40IW_QP_SW_MIN_WQSIZE 4 /*in WRs*/
-#define I40IW_SQ_RSVD 2
-#define I40IW_RQ_RSVD 1
-#define I40IW_MAX_QUANTAS_PER_WR 2
-#define I40IW_QP_SW_MAX_SQ_QUANTAS 2048
-#define I40IW_QP_SW_MAX_RQ_QUANTAS 16384
-#define I40IW_MAX_QP_WRS ((I40IW_QP_SW_MAX_SQ_QUANTAS / I40IW_MAX_QUANTAS_PER_WR) - 1)
-
-#define I40IWQP_OP_RDMA_WRITE 0
-#define I40IWQP_OP_RDMA_READ 1
-#define I40IWQP_OP_RDMA_SEND 3
-#define I40IWQP_OP_RDMA_SEND_INV 4
-#define I40IWQP_OP_RDMA_SEND_SOL_EVENT 5
-#define I40IWQP_OP_RDMA_SEND_SOL_EVENT_INV 6
-#define I40IWQP_OP_BIND_MW 8
-#define I40IWQP_OP_FAST_REGISTER 9
-#define I40IWQP_OP_LOCAL_INVALIDATE 10
-#define I40IWQP_OP_RDMA_READ_LOC_INV 11
-#define I40IWQP_OP_NOP 12
-
-#define I40IW_RSVD_SHIFT 41
-#define I40IW_RSVD_MASK (0x7fffULL << I40IW_RSVD_SHIFT)
-
-/* iwarp QP SQ WQE common fields */
-#define I40IWQPSQ_OPCODE_SHIFT 32
-#define I40IWQPSQ_OPCODE_MASK (0x3fULL << I40IWQPSQ_OPCODE_SHIFT)
-
-#define I40IWQPSQ_ADDFRAGCNT_SHIFT 38
-#define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT)
-
-#define I40IWQPSQ_PUSHWQE_SHIFT 56
-#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT)
-
-#define I40IWQPSQ_STREAMMODE_SHIFT 58
-#define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT)
-
-#define I40IWQPSQ_WAITFORRCVPDU_SHIFT 59
-#define I40IWQPSQ_WAITFORRCVPDU_MASK (1ULL << I40IWQPSQ_WAITFORRCVPDU_SHIFT)
-
-#define I40IWQPSQ_READFENCE_SHIFT 60
-#define I40IWQPSQ_READFENCE_MASK (1ULL << I40IWQPSQ_READFENCE_SHIFT)
-
-#define I40IWQPSQ_LOCALFENCE_SHIFT 61
-#define I40IWQPSQ_LOCALFENCE_MASK (1ULL << I40IWQPSQ_LOCALFENCE_SHIFT)
-
-#define I40IWQPSQ_SIGCOMPL_SHIFT 62
-#define I40IWQPSQ_SIGCOMPL_MASK (1ULL << I40IWQPSQ_SIGCOMPL_SHIFT)
-
-#define I40IWQPSQ_VALID_SHIFT 63
-#define I40IWQPSQ_VALID_MASK (1ULL << I40IWQPSQ_VALID_SHIFT)
-
-#define I40IWQPSQ_FRAG_TO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_FRAG_TO_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPSQ_FRAG_LEN_SHIFT 0
-#define I40IWQPSQ_FRAG_LEN_MASK (0xffffffffUL << I40IWQPSQ_FRAG_LEN_SHIFT)
-
-#define I40IWQPSQ_FRAG_STAG_SHIFT 32
-#define I40IWQPSQ_FRAG_STAG_MASK (0xffffffffULL << I40IWQPSQ_FRAG_STAG_SHIFT)
-
-#define I40IWQPSQ_REMSTAGINV_SHIFT 0
-#define I40IWQPSQ_REMSTAGINV_MASK (0xffffffffUL << I40IWQPSQ_REMSTAGINV_SHIFT)
-
-#define I40IWQPSQ_INLINEDATAFLAG_SHIFT 57
-#define I40IWQPSQ_INLINEDATAFLAG_MASK (1ULL << I40IWQPSQ_INLINEDATAFLAG_SHIFT)
-
-#define I40IWQPSQ_INLINEDATALEN_SHIFT 48
-#define I40IWQPSQ_INLINEDATALEN_MASK \
- (0x7fULL << I40IWQPSQ_INLINEDATALEN_SHIFT)
-
-/* iwarp send with push mode */
-#define I40IWQPSQ_WQDESCIDX_SHIFT 0
-#define I40IWQPSQ_WQDESCIDX_MASK (0x3fffUL << I40IWQPSQ_WQDESCIDX_SHIFT)
-
-/* rdma write */
-#define I40IWQPSQ_REMSTAG_SHIFT 0
-#define I40IWQPSQ_REMSTAG_MASK (0xffffffffUL << I40IWQPSQ_REMSTAG_SHIFT)
-
-#define I40IWQPSQ_REMTO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_REMTO_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* memory window */
-#define I40IWQPSQ_STAGRIGHTS_SHIFT 48
-#define I40IWQPSQ_STAGRIGHTS_MASK (0x1fULL << I40IWQPSQ_STAGRIGHTS_SHIFT)
-
-#define I40IWQPSQ_VABASEDTO_SHIFT 53
-#define I40IWQPSQ_VABASEDTO_MASK (1ULL << I40IWQPSQ_VABASEDTO_SHIFT)
-
-#define I40IWQPSQ_MWLEN_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_MWLEN_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPSQ_PARENTMRSTAG_SHIFT 0
-#define I40IWQPSQ_PARENTMRSTAG_MASK \
- (0xffffffffUL << I40IWQPSQ_PARENTMRSTAG_SHIFT)
-
-#define I40IWQPSQ_MWSTAG_SHIFT 32
-#define I40IWQPSQ_MWSTAG_MASK (0xffffffffULL << I40IWQPSQ_MWSTAG_SHIFT)
-
-#define I40IWQPSQ_BASEVA_TO_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_BASEVA_TO_FBO_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* Local Invalidate */
-#define I40IWQPSQ_LOCSTAG_SHIFT 32
-#define I40IWQPSQ_LOCSTAG_MASK (0xffffffffULL << I40IWQPSQ_LOCSTAG_SHIFT)
-
-/* Fast Register */
-#define I40IWQPSQ_STAGKEY_SHIFT 0
-#define I40IWQPSQ_STAGKEY_MASK (0xffUL << I40IWQPSQ_STAGKEY_SHIFT)
-
-#define I40IWQPSQ_STAGINDEX_SHIFT 8
-#define I40IWQPSQ_STAGINDEX_MASK (0xffffffUL << I40IWQPSQ_STAGINDEX_SHIFT)
-
-#define I40IWQPSQ_COPYHOSTPBLS_SHIFT 43
-#define I40IWQPSQ_COPYHOSTPBLS_MASK (1ULL << I40IWQPSQ_COPYHOSTPBLS_SHIFT)
-
-#define I40IWQPSQ_LPBLSIZE_SHIFT 44
-#define I40IWQPSQ_LPBLSIZE_MASK (3ULL << I40IWQPSQ_LPBLSIZE_SHIFT)
-
-#define I40IWQPSQ_HPAGESIZE_SHIFT 46
-#define I40IWQPSQ_HPAGESIZE_MASK (3ULL << I40IWQPSQ_HPAGESIZE_SHIFT)
-
-#define I40IWQPSQ_STAGLEN_SHIFT 0
-#define I40IWQPSQ_STAGLEN_MASK (0x1ffffffffffULL << I40IWQPSQ_STAGLEN_SHIFT)
-
-#define I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT 48
-#define I40IWQPSQ_FIRSTPMPBLIDXLO_MASK \
- (0xffffULL << I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT)
-
-#define I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT 0
-#define I40IWQPSQ_FIRSTPMPBLIDXHI_MASK \
- (0xfffUL << I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT)
-
-#define I40IWQPSQ_PBLADDR_SHIFT 12
-#define I40IWQPSQ_PBLADDR_MASK (0xfffffffffffffULL << I40IWQPSQ_PBLADDR_SHIFT)
-
-/* iwarp QP RQ WQE common fields */
-#define I40IWQPRQ_ADDFRAGCNT_SHIFT I40IWQPSQ_ADDFRAGCNT_SHIFT
-#define I40IWQPRQ_ADDFRAGCNT_MASK I40IWQPSQ_ADDFRAGCNT_MASK
-
-#define I40IWQPRQ_VALID_SHIFT I40IWQPSQ_VALID_SHIFT
-#define I40IWQPRQ_VALID_MASK I40IWQPSQ_VALID_MASK
-
-#define I40IWQPRQ_COMPLCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPRQ_COMPLCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPRQ_FRAG_LEN_SHIFT I40IWQPSQ_FRAG_LEN_SHIFT
-#define I40IWQPRQ_FRAG_LEN_MASK I40IWQPSQ_FRAG_LEN_MASK
-
-#define I40IWQPRQ_STAG_SHIFT I40IWQPSQ_FRAG_STAG_SHIFT
-#define I40IWQPRQ_STAG_MASK I40IWQPSQ_FRAG_STAG_MASK
-
-#define I40IWQPRQ_TO_SHIFT I40IWQPSQ_FRAG_TO_SHIFT
-#define I40IWQPRQ_TO_MASK I40IWQPSQ_FRAG_TO_MASK
-
-/* Query FPM CQP buf */
-#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_QPS_MASK \
- (0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_CQS_MASK \
- (0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
-
-#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT 0
-#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_MASK \
- (0x3fffUL << I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT 32
-#define I40IW_QUERY_FPM_MAX_PE_SDS_MASK \
- (0x3fffULL << I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_QPS_MASK \
- (0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_CQS_MASK \
- (0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_CEQS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_CEQS_MASK \
- (0xffUL << I40IW_QUERY_FPM_MAX_CEQS_SHIFT)
-
-#define I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT 32
-#define I40IW_QUERY_FPM_XFBLOCKSIZE_MASK \
- (0xffffffffULL << I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT)
-
-#define I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT 32
-#define I40IW_QUERY_FPM_Q1BLOCKSIZE_MASK \
- (0xffffffffULL << I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT)
-
-#define I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT 16
-#define I40IW_QUERY_FPM_HTMULTIPLIER_MASK \
- (0xfUL << I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT)
-
-#define I40IW_QUERY_FPM_TIMERBUCKET_SHIFT 32
-#define I40IW_QUERY_FPM_TIMERBUCKET_MASK \
- (0xffFFULL << I40IW_QUERY_FPM_TIMERBUCKET_SHIFT)
-
-/* Static HMC pages allocated buf */
-#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT 0
-#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_MASK \
- (0x3fUL << I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT)
-
-#define I40IW_HW_PAGE_SIZE 4096
-#define I40IW_DONE_COUNT 1000
-#define I40IW_SLEEP_COUNT 10
-
-enum {
- I40IW_QUEUES_ALIGNMENT_MASK = (128 - 1),
- I40IW_AEQ_ALIGNMENT_MASK = (256 - 1),
- I40IW_Q2_ALIGNMENT_MASK = (256 - 1),
- I40IW_CEQ_ALIGNMENT_MASK = (256 - 1),
- I40IW_CQ0_ALIGNMENT_MASK = (256 - 1),
- I40IW_HOST_CTX_ALIGNMENT_MASK = (4 - 1),
- I40IW_SHADOWAREA_MASK = (128 - 1),
- I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = (4 - 1),
- I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = (4 - 1)
-};
-
-enum i40iw_alignment {
- I40IW_CQP_ALIGNMENT = 0x200,
- I40IW_AEQ_ALIGNMENT = 0x100,
- I40IW_CEQ_ALIGNMENT = 0x100,
- I40IW_CQ0_ALIGNMENT = 0x100,
- I40IW_SD_BUF_ALIGNMENT = 0x80
-};
-
-#define I40IW_WQE_SIZE_64 64
-
-#define I40IW_QP_WQE_MIN_SIZE 32
-#define I40IW_QP_WQE_MAX_SIZE 128
-
-#define I40IW_UPDATE_SD_BUF_SIZE 128
-
-#define I40IW_CQE_QTYPE_RQ 0
-#define I40IW_CQE_QTYPE_SQ 1
-
-#define I40IW_RING_INIT(_ring, _size) \
- { \
- (_ring).head = 0; \
- (_ring).tail = 0; \
- (_ring).size = (_size); \
- }
-#define I40IW_RING_GETSIZE(_ring) ((_ring).size)
-#define I40IW_RING_GETCURRENT_HEAD(_ring) ((_ring).head)
-#define I40IW_RING_GETCURRENT_TAIL(_ring) ((_ring).tail)
-
-#define I40IW_RING_MOVE_HEAD(_ring, _retcode) \
- { \
- register u32 size; \
- size = (_ring).size; \
- if (!I40IW_RING_FULL_ERR(_ring)) { \
- (_ring).head = ((_ring).head + 1) % size; \
- (_retcode) = 0; \
- } else { \
- (_retcode) = I40IW_ERR_RING_FULL; \
- } \
- }
-
-#define I40IW_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
- { \
- register u32 size; \
- size = (_ring).size; \
- if ((I40IW_RING_WORK_AVAILABLE(_ring) + (_count)) < size) { \
- (_ring).head = ((_ring).head + (_count)) % size; \
- (_retcode) = 0; \
- } else { \
- (_retcode) = I40IW_ERR_RING_FULL; \
- } \
- }
-
-#define I40IW_RING_MOVE_TAIL(_ring) \
- (_ring).tail = ((_ring).tail + 1) % (_ring).size
-
-#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \
- (_ring).head = ((_ring).head + 1) % (_ring).size
-
-#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
- (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
-
-#define I40IW_RING_SET_TAIL(_ring, _pos) \
- (_ring).tail = (_pos) % (_ring).size
-
-#define I40IW_RING_FULL_ERR(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 1)) \
- )
-
-#define I40IW_ERR_RING_FULL2(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 2)) \
- )
-
-#define I40IW_ERR_RING_FULL3(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 3)) \
- )
-
-#define I40IW_RING_MORE_WORK(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) != 0) \
- )
-
-#define I40IW_RING_WORK_AVAILABLE(_ring) \
- ( \
- (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
- )
-
-#define I40IW_RING_GET_WQES_AVAILABLE(_ring) \
- ( \
- ((_ring).size - I40IW_RING_WORK_AVAILABLE(_ring) - 1) \
- )
-
-#define I40IW_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
- { \
- index = I40IW_RING_GETCURRENT_HEAD(_ring); \
- I40IW_RING_MOVE_HEAD(_ring, _retcode); \
- }
-
-/* Async Events codes */
-#define I40IW_AE_AMP_UNALLOCATED_STAG 0x0102
-#define I40IW_AE_AMP_INVALID_STAG 0x0103
-#define I40IW_AE_AMP_BAD_QP 0x0104
-#define I40IW_AE_AMP_BAD_PD 0x0105
-#define I40IW_AE_AMP_BAD_STAG_KEY 0x0106
-#define I40IW_AE_AMP_BAD_STAG_INDEX 0x0107
-#define I40IW_AE_AMP_BOUNDS_VIOLATION 0x0108
-#define I40IW_AE_AMP_RIGHTS_VIOLATION 0x0109
-#define I40IW_AE_AMP_TO_WRAP 0x010a
-#define I40IW_AE_AMP_FASTREG_SHARED 0x010b
-#define I40IW_AE_AMP_FASTREG_VALID_STAG 0x010c
-#define I40IW_AE_AMP_FASTREG_MW_STAG 0x010d
-#define I40IW_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
-#define I40IW_AE_AMP_FASTREG_PBL_TABLE_OVERFLOW 0x010f
-#define I40IW_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
-#define I40IW_AE_AMP_INVALIDATE_SHARED 0x0111
-#define I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
-#define I40IW_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
-#define I40IW_AE_AMP_MWBIND_VALID_STAG 0x0114
-#define I40IW_AE_AMP_MWBIND_OF_MR_STAG 0x0115
-#define I40IW_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
-#define I40IW_AE_AMP_MWBIND_TO_MW_STAG 0x0117
-#define I40IW_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
-#define I40IW_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
-#define I40IW_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
-#define I40IW_AE_AMP_MWBIND_BIND_DISABLED 0x011b
-#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132
-#define I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134
-#define I40IW_AE_BAD_CLOSE 0x0201
-#define I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
-#define I40IW_AE_CQ_OPERATION_ERROR 0x0203
-#define I40IW_AE_PRIV_OPERATION_DENIED 0x011c
-#define I40IW_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
-#define I40IW_AE_STAG_ZERO_INVALID 0x0206
-#define I40IW_AE_IB_RREQ_AND_Q1_FULL 0x0207
-#define I40IW_AE_WQE_UNEXPECTED_OPCODE 0x020a
-#define I40IW_AE_WQE_INVALID_PARAMETER 0x020b
-#define I40IW_AE_WQE_LSMM_TOO_LONG 0x0220
-#define I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
-#define I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
-#define I40IW_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
-#define I40IW_AE_DDP_UBE_INVALID_MO 0x0305
-#define I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
-#define I40IW_AE_DDP_UBE_INVALID_QN 0x0307
-#define I40IW_AE_DDP_NO_L_BIT 0x0308
-#define I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
-#define I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
-#define I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
-#define I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
-#define I40IW_AE_INVALID_ARP_ENTRY 0x0401
-#define I40IW_AE_INVALID_TCP_OPTION_RCVD 0x0402
-#define I40IW_AE_STALE_ARP_ENTRY 0x0403
-#define I40IW_AE_INVALID_MAC_ENTRY 0x0405
-#define I40IW_AE_LLP_CLOSE_COMPLETE 0x0501
-#define I40IW_AE_LLP_CONNECTION_RESET 0x0502
-#define I40IW_AE_LLP_FIN_RECEIVED 0x0503
-#define I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
-#define I40IW_AE_LLP_SEGMENT_TOO_LARGE 0x0506
-#define I40IW_AE_LLP_SEGMENT_TOO_SMALL 0x0507
-#define I40IW_AE_LLP_SYN_RECEIVED 0x0508
-#define I40IW_AE_LLP_TERMINATE_RECEIVED 0x0509
-#define I40IW_AE_LLP_TOO_MANY_RETRIES 0x050a
-#define I40IW_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
-#define I40IW_AE_LLP_DOUBT_REACHABILITY 0x050c
-#define I40IW_AE_LLP_RX_VLAN_MISMATCH 0x050d
-#define I40IW_AE_RESOURCE_EXHAUSTION 0x0520
-#define I40IW_AE_RESET_SENT 0x0601
-#define I40IW_AE_TERMINATE_SENT 0x0602
-#define I40IW_AE_RESET_NOT_SENT 0x0603
-#define I40IW_AE_LCE_QP_CATASTROPHIC 0x0700
-#define I40IW_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
-#define I40IW_AE_LCE_CQ_CATASTROPHIC 0x0702
-#define I40IW_AE_QP_SUSPEND_COMPLETE 0x0900
-
-#define OP_DELETE_LOCAL_MAC_IPADDR_ENTRY 1
-#define OP_CEQ_DESTROY 2
-#define OP_AEQ_DESTROY 3
-#define OP_DELETE_ARP_CACHE_ENTRY 4
-#define OP_MANAGE_APBVT_ENTRY 5
-#define OP_CEQ_CREATE 6
-#define OP_AEQ_CREATE 7
-#define OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY 8
-#define OP_ADD_LOCAL_MAC_IPADDR_ENTRY 9
-#define OP_MANAGE_QHASH_TABLE_ENTRY 10
-#define OP_QP_MODIFY 11
-#define OP_QP_UPLOAD_CONTEXT 12
-#define OP_CQ_CREATE 13
-#define OP_CQ_DESTROY 14
-#define OP_QP_CREATE 15
-#define OP_QP_DESTROY 16
-#define OP_ALLOC_STAG 17
-#define OP_MR_REG_NON_SHARED 18
-#define OP_DEALLOC_STAG 19
-#define OP_MW_ALLOC 20
-#define OP_QP_FLUSH_WQES 21
-#define OP_ADD_ARP_CACHE_ENTRY 22
-#define OP_MANAGE_PUSH_PAGE 23
-#define OP_UPDATE_PE_SDS 24
-#define OP_MANAGE_HMC_PM_FUNC_TABLE 25
-#define OP_SUSPEND 26
-#define OP_RESUME 27
-#define OP_MANAGE_VF_PBLE_BP 28
-#define OP_QUERY_FPM_VALUES 29
-#define OP_COMMIT_FPM_VALUES 30
-#define OP_REQUESTED_COMMANDS 31
-#define OP_COMPLETED_COMMANDS 32
-#define OP_GEN_AE 33
-#define OP_SIZE_CQP_STAT_ARRAY 34
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
deleted file mode 100644
index 5484cbf55f0f..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c
+++ /dev/null
@@ -1,821 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_vf.h"
-#include "i40iw_virtchnl.h"
-
-/**
- * i40iw_find_sd_index_limit - finds segment descriptor index limit
- * @hmc_info: pointer to the HMC configuration information structure
- * @type: type of HMC resources we're searching
- * @index: starting index for the object
- * @cnt: number of objects we're trying to create
- * @sd_idx: pointer to return index of the segment descriptor in question
- * @sd_limit: pointer to return the maximum number of segment descriptors
- *
- * This function calculates the segment descriptor index and index limit
- * for the resource defined by i40iw_hmc_rsrc_type.
- */
-
-static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info,
- u32 type,
- u32 idx,
- u32 cnt,
- u32 *sd_idx,
- u32 *sd_limit)
-{
- u64 fpm_addr, fpm_limit;
-
- fpm_addr = hmc_info->hmc_obj[(type)].base +
- hmc_info->hmc_obj[type].size * idx;
- fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
- *sd_idx = (u32)(fpm_addr / I40IW_HMC_DIRECT_BP_SIZE);
- *sd_limit = (u32)((fpm_limit - 1) / I40IW_HMC_DIRECT_BP_SIZE);
- *sd_limit += 1;
-}
-
-/**
- * i40iw_find_pd_index_limit - finds page descriptor index limit
- * @hmc_info: pointer to the HMC configuration information struct
- * @type: HMC resource type we're examining
- * @idx: starting index for the object
- * @cnt: number of objects we're trying to create
- * @pd_index: pointer to return page descriptor index
- * @pd_limit: pointer to return page descriptor index limit
- *
- * Calculates the page descriptor index and index limit for the resource
- * defined by i40iw_hmc_rsrc_type.
- */
-
-static inline void i40iw_find_pd_index_limit(struct i40iw_hmc_info *hmc_info,
- u32 type,
- u32 idx,
- u32 cnt,
- u32 *pd_idx,
- u32 *pd_limit)
-{
- u64 fpm_adr, fpm_limit;
-
- fpm_adr = hmc_info->hmc_obj[type].base +
- hmc_info->hmc_obj[type].size * idx;
- fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
- *(pd_idx) = (u32)(fpm_adr / I40IW_HMC_PAGED_BP_SIZE);
- *(pd_limit) = (u32)((fpm_limit - 1) / I40IW_HMC_PAGED_BP_SIZE);
- *(pd_limit) += 1;
-}
-
-/**
- * i40iw_set_sd_entry - setup entry for sd programming
- * @pa: physical addr
- * @idx: sd index
- * @type: paged or direct sd
- * @entry: sd entry ptr
- */
-static inline void i40iw_set_sd_entry(u64 pa,
- u32 idx,
- enum i40iw_sd_entry_type type,
- struct update_sd_entry *entry)
-{
- entry->data = pa | (I40IW_HMC_MAX_BP_COUNT << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
- (((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
- I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |
- (1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);
- entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
-}
-
-/**
- * i40iw_clr_sd_entry - setup entry for sd clear
- * @idx: sd index
- * @type: paged or direct sd
- * @entry: sd entry ptr
- */
-static inline void i40iw_clr_sd_entry(u32 idx, enum i40iw_sd_entry_type type,
- struct update_sd_entry *entry)
-{
- entry->data = (I40IW_HMC_MAX_BP_COUNT <<
- I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
- (((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
- I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);
- entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
-}
-
-/**
- * i40iw_hmc_sd_one - setup 1 sd entry for cqp
- * @dev: pointer to the device structure
- * @hmc_fn_id: hmc's function id
- * @pa: physical addr
- * @sd_idx: sd index
- * @type: paged or direct sd
- * @setsd: flag to set or clear sd
- */
-enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id,
- u64 pa, u32 sd_idx,
- enum i40iw_sd_entry_type type,
- bool setsd)
-{
- struct i40iw_update_sds_info sdinfo;
-
- sdinfo.cnt = 1;
- sdinfo.hmc_fn_id = hmc_fn_id;
- if (setsd)
- i40iw_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
- else
- i40iw_clr_sd_entry(sd_idx, type, sdinfo.entry);
-
- return dev->cqp->process_cqp_sds(dev, &sdinfo);
-}
-
-/**
- * i40iw_hmc_sd_grp - setup group od sd entries for cqp
- * @dev: pointer to the device structure
- * @hmc_info: pointer to the HMC configuration information struct
- * @sd_index: sd index
- * @sd_cnt: number of sd entries
- * @setsd: flag to set or clear sd
- */
-static enum i40iw_status_code i40iw_hmc_sd_grp(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_info *hmc_info,
- u32 sd_index,
- u32 sd_cnt,
- bool setsd)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- struct i40iw_update_sds_info sdinfo;
- u64 pa;
- u32 i;
- enum i40iw_status_code ret_code = 0;
-
- memset(&sdinfo, 0, sizeof(sdinfo));
- sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
- for (i = sd_index; i < sd_index + sd_cnt; i++) {
- sd_entry = &hmc_info->sd_table.sd_entry[i];
- if (!sd_entry ||
- (!sd_entry->valid && setsd) ||
- (sd_entry->valid && !setsd))
- continue;
- if (setsd) {
- pa = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
- sd_entry->u.pd_table.pd_page_addr.pa :
- sd_entry->u.bp.addr.pa;
- i40iw_set_sd_entry(pa, i, sd_entry->entry_type,
- &sdinfo.entry[sdinfo.cnt]);
- } else {
- i40iw_clr_sd_entry(i, sd_entry->entry_type,
- &sdinfo.entry[sdinfo.cnt]);
- }
- sdinfo.cnt++;
- if (sdinfo.cnt == I40IW_MAX_SD_ENTRIES) {
- ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "i40iw_hmc_sd_grp: sd_programming failed err=%d\n",
- ret_code);
- return ret_code;
- }
- sdinfo.cnt = 0;
- }
- }
- if (sdinfo.cnt)
- ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
-
- return ret_code;
-}
-
-/**
- * i40iw_vfdev_from_fpm - return vf dev ptr for hmc function id
- * @dev: pointer to the device structure
- * @hmc_fn_id: hmc's function id
- */
-struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
-{
- struct i40iw_vfdev *vf_dev = NULL;
- u16 idx;
-
- for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
- if (dev->vf_dev[idx] &&
- ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
- vf_dev = dev->vf_dev[idx];
- break;
- }
- }
- return vf_dev;
-}
-
-/**
- * i40iw_vf_hmcinfo_from_fpm - get ptr to hmc for func_id
- * @dev: pointer to the device structure
- * @hmc_fn_id: hmc's function id
- */
-struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id)
-{
- struct i40iw_hmc_info *hmc_info = NULL;
- u16 idx;
-
- for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
- if (dev->vf_dev[idx] &&
- ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
- hmc_info = &dev->vf_dev[idx]->hmc_info;
- break;
- }
- }
- return hmc_info;
-}
-
-/**
- * i40iw_hmc_finish_add_sd_reg - program sd entries for objects
- * @dev: pointer to the device structure
- * @info: create obj info
- */
-static enum i40iw_status_code i40iw_hmc_finish_add_sd_reg(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info)
-{
- if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
-
- if ((info->start_idx + info->count) >
- info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
-
- if (!info->add_sd_cnt)
- return 0;
-
- return i40iw_hmc_sd_grp(dev, info->hmc_info,
- info->hmc_info->sd_indexes[0],
- info->add_sd_cnt, true);
-}
-
-/**
- * i40iw_create_iw_hmc_obj - allocate backing store for hmc objects
- * @dev: pointer to the device structure
- * @info: pointer to i40iw_hmc_iw_create_obj_info struct
- *
- * This will allocate memory for PDs and backing pages and populate
- * the sd and pd entries.
- */
-enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- u32 sd_idx, sd_lmt;
- u32 pd_idx = 0, pd_lmt = 0;
- u32 pd_idx1 = 0, pd_lmt1 = 0;
- u32 i, j;
- bool pd_error = false;
- enum i40iw_status_code ret_code = 0;
-
- if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
-
- if ((info->start_idx + info->count) >
- info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: error type %u, start = %u, req cnt %u, cnt = %u\n",
- __func__, info->rsrc_type, info->start_idx, info->count,
- info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
- }
-
- if (!dev->is_pf)
- return i40iw_vchnl_vf_add_hmc_objs(dev, info->rsrc_type, 0, info->count);
-
- i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count,
- &sd_idx, &sd_lmt);
- if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
- sd_lmt > info->hmc_info->sd_table.sd_cnt) {
- return I40IW_ERR_INVALID_SD_INDEX;
- }
- i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count, &pd_idx, &pd_lmt);
-
- for (j = sd_idx; j < sd_lmt; j++) {
- ret_code = i40iw_add_sd_table_entry(dev->hw, info->hmc_info,
- j,
- info->entry_type,
- I40IW_HMC_DIRECT_BP_SIZE);
- if (ret_code)
- goto exit_sd_error;
- sd_entry = &info->hmc_info->sd_table.sd_entry[j];
-
- if ((sd_entry->entry_type == I40IW_SD_TYPE_PAGED) &&
- ((dev->hmc_info == info->hmc_info) &&
- (info->rsrc_type != I40IW_HMC_IW_PBLE))) {
- pd_idx1 = max(pd_idx, (j * I40IW_HMC_MAX_BP_COUNT));
- pd_lmt1 = min(pd_lmt,
- (j + 1) * I40IW_HMC_MAX_BP_COUNT);
- for (i = pd_idx1; i < pd_lmt1; i++) {
- /* update the pd table entry */
- ret_code = i40iw_add_pd_table_entry(dev->hw, info->hmc_info,
- i, NULL);
- if (ret_code) {
- pd_error = true;
- break;
- }
- }
- if (pd_error) {
- while (i && (i > pd_idx1)) {
- i40iw_remove_pd_bp(dev->hw, info->hmc_info, (i - 1),
- info->is_pf);
- i--;
- }
- }
- }
- if (sd_entry->valid)
- continue;
-
- info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
- info->add_sd_cnt++;
- sd_entry->valid = true;
- }
- return i40iw_hmc_finish_add_sd_reg(dev, info);
-
-exit_sd_error:
- while (j && (j > sd_idx)) {
- sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
- switch (sd_entry->entry_type) {
- case I40IW_SD_TYPE_PAGED:
- pd_idx1 = max(pd_idx,
- (j - 1) * I40IW_HMC_MAX_BP_COUNT);
- pd_lmt1 = min(pd_lmt, (j * I40IW_HMC_MAX_BP_COUNT));
- for (i = pd_idx1; i < pd_lmt1; i++)
- i40iw_prep_remove_pd_page(info->hmc_info, i);
- break;
- case I40IW_SD_TYPE_DIRECT:
- i40iw_prep_remove_pd_page(info->hmc_info, (j - 1));
- break;
- default:
- ret_code = I40IW_ERR_INVALID_SD_TYPE;
- break;
- }
- j--;
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_finish_del_sd_reg - delete sd entries for objects
- * @dev: pointer to the device structure
- * @info: dele obj info
- * @reset: true if called before reset
- */
-static enum i40iw_status_code i40iw_finish_del_sd_reg(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *info,
- bool reset)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- enum i40iw_status_code ret_code = 0;
- u32 i, sd_idx;
- struct i40iw_dma_mem *mem;
-
- if (dev->is_pf && !reset)
- ret_code = i40iw_hmc_sd_grp(dev, info->hmc_info,
- info->hmc_info->sd_indexes[0],
- info->del_sd_cnt, false);
-
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd sd_grp\n", __func__);
-
- for (i = 0; i < info->del_sd_cnt; i++) {
- sd_idx = info->hmc_info->sd_indexes[i];
- sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
- if (!sd_entry)
- continue;
- mem = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
- &sd_entry->u.pd_table.pd_page_addr :
- &sd_entry->u.bp.addr;
-
- if (!mem || !mem->va)
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd mem\n", __func__);
- else
- i40iw_free_dma_mem(dev->hw, mem);
- }
- return ret_code;
-}
-
-/**
- * i40iw_del_iw_hmc_obj - remove pe hmc objects
- * @dev: pointer to the device structure
- * @info: pointer to i40iw_hmc_del_obj_info struct
- * @reset: true if called before reset
- *
- * This will de-populate the SDs and PDs. It frees
- * the memory for PDS and backing storage. After this function is returned,
- * caller should deallocate memory allocated previously for
- * book-keeping information about PDs and backing storage.
- */
-enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *info,
- bool reset)
-{
- struct i40iw_hmc_pd_table *pd_table;
- u32 sd_idx, sd_lmt;
- u32 pd_idx, pd_lmt, rel_pd_idx;
- u32 i, j;
- enum i40iw_status_code ret_code = 0;
-
- if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
- __func__, info->start_idx, info->rsrc_type,
- info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
- }
-
- if ((info->start_idx + info->count) >
- info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n",
- __func__, info->start_idx, info->count,
- info->rsrc_type,
- info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
- }
- if (!dev->is_pf) {
- ret_code = i40iw_vchnl_vf_del_hmc_obj(dev, info->rsrc_type, 0,
- info->count);
- if (info->rsrc_type != I40IW_HMC_IW_PBLE)
- return ret_code;
- }
-
- i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count, &pd_idx, &pd_lmt);
-
- for (j = pd_idx; j < pd_lmt; j++) {
- sd_idx = j / I40IW_HMC_PD_CNT_IN_SD;
-
- if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
- I40IW_SD_TYPE_PAGED)
- continue;
-
- rel_pd_idx = j % I40IW_HMC_PD_CNT_IN_SD;
- pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
- if (pd_table->pd_entry[rel_pd_idx].valid) {
- ret_code = i40iw_remove_pd_bp(dev->hw, info->hmc_info, j,
- info->is_pf);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error\n", __func__);
- return ret_code;
- }
- }
- }
-
- i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count, &sd_idx, &sd_lmt);
- if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
- sd_lmt > info->hmc_info->sd_table.sd_cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error invalid sd_idx\n", __func__);
- return I40IW_ERR_INVALID_SD_INDEX;
- }
-
- for (i = sd_idx; i < sd_lmt; i++) {
- if (!info->hmc_info->sd_table.sd_entry[i].valid)
- continue;
- switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
- case I40IW_SD_TYPE_DIRECT:
- ret_code = i40iw_prep_remove_sd_bp(info->hmc_info, i);
- if (!ret_code) {
- info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
- info->del_sd_cnt++;
- }
- break;
- case I40IW_SD_TYPE_PAGED:
- ret_code = i40iw_prep_remove_pd_page(info->hmc_info, i);
- if (!ret_code) {
- info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
- info->del_sd_cnt++;
- }
- break;
- default:
- break;
- }
- }
- return i40iw_finish_del_sd_reg(dev, info, reset);
-}
-
-/**
- * i40iw_add_sd_table_entry - Adds a segment descriptor to the table
- * @hw: pointer to our hw struct
- * @hmc_info: pointer to the HMC configuration information struct
- * @sd_index: segment descriptor index to manipulate
- * @type: what type of segment descriptor we're manipulating
- * @direct_mode_sz: size to alloc in direct mode
- */
-enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info,
- u32 sd_index,
- enum i40iw_sd_entry_type type,
- u64 direct_mode_sz)
-{
- enum i40iw_status_code ret_code = 0;
- struct i40iw_hmc_sd_entry *sd_entry;
- bool dma_mem_alloc_done = false;
- struct i40iw_dma_mem mem;
- u64 alloc_len;
-
- sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
- if (!sd_entry->valid) {
- if (type == I40IW_SD_TYPE_PAGED)
- alloc_len = I40IW_HMC_PAGED_BP_SIZE;
- else
- alloc_len = direct_mode_sz;
-
- /* allocate a 4K pd page or 2M backing page */
- ret_code = i40iw_allocate_dma_mem(hw, &mem, alloc_len,
- I40IW_HMC_PD_BP_BUF_ALIGNMENT);
- if (ret_code)
- goto exit;
- dma_mem_alloc_done = true;
- if (type == I40IW_SD_TYPE_PAGED) {
- ret_code = i40iw_allocate_virt_mem(hw,
- &sd_entry->u.pd_table.pd_entry_virt_mem,
- sizeof(struct i40iw_hmc_pd_entry) * 512);
- if (ret_code)
- goto exit;
- sd_entry->u.pd_table.pd_entry = (struct i40iw_hmc_pd_entry *)
- sd_entry->u.pd_table.pd_entry_virt_mem.va;
-
- memcpy(&sd_entry->u.pd_table.pd_page_addr, &mem, sizeof(struct i40iw_dma_mem));
- } else {
- memcpy(&sd_entry->u.bp.addr, &mem, sizeof(struct i40iw_dma_mem));
- sd_entry->u.bp.sd_pd_index = sd_index;
- }
-
- hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
-
- I40IW_INC_SD_REFCNT(&hmc_info->sd_table);
- }
- if (sd_entry->entry_type == I40IW_SD_TYPE_DIRECT)
- I40IW_INC_BP_REFCNT(&sd_entry->u.bp);
-exit:
- if (ret_code)
- if (dma_mem_alloc_done)
- i40iw_free_dma_mem(hw, &mem);
-
- return ret_code;
-}
-
-/**
- * i40iw_add_pd_table_entry - Adds page descriptor to the specified table
- * @hw: pointer to our HW structure
- * @hmc_info: pointer to the HMC configuration information structure
- * @pd_index: which page descriptor index to manipulate
- * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
- *
- * This function:
- * 1. Initializes the pd entry
- * 2. Adds pd_entry in the pd_table
- * 3. Mark the entry valid in i40iw_hmc_pd_entry structure
- * 4. Initializes the pd_entry's ref count to 1
- * assumptions:
- * 1. The memory for pd should be pinned down, physically contiguous and
- * aligned on 4K boundary and zeroed memory.
- * 2. It should be 4K in size.
- */
-enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info,
- u32 pd_index,
- struct i40iw_dma_mem *rsrc_pg)
-{
- enum i40iw_status_code ret_code = 0;
- struct i40iw_hmc_pd_table *pd_table;
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_dma_mem mem;
- struct i40iw_dma_mem *page = &mem;
- u32 sd_idx, rel_pd_idx;
- u64 *pd_addr;
- u64 page_desc;
-
- if (pd_index / I40IW_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
- return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
-
- sd_idx = (pd_index / I40IW_HMC_PD_CNT_IN_SD);
- if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != I40IW_SD_TYPE_PAGED)
- return 0;
-
- rel_pd_idx = (pd_index % I40IW_HMC_PD_CNT_IN_SD);
- pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
- pd_entry = &pd_table->pd_entry[rel_pd_idx];
- if (!pd_entry->valid) {
- if (rsrc_pg) {
- pd_entry->rsrc_pg = true;
- page = rsrc_pg;
- } else {
- ret_code = i40iw_allocate_dma_mem(hw, page,
- I40IW_HMC_PAGED_BP_SIZE,
- I40IW_HMC_PD_BP_BUF_ALIGNMENT);
- if (ret_code)
- return ret_code;
- pd_entry->rsrc_pg = false;
- }
-
- memcpy(&pd_entry->bp.addr, page, sizeof(struct i40iw_dma_mem));
- pd_entry->bp.sd_pd_index = pd_index;
- pd_entry->bp.entry_type = I40IW_SD_TYPE_PAGED;
- page_desc = page->pa | 0x1;
-
- pd_addr = (u64 *)pd_table->pd_page_addr.va;
- pd_addr += rel_pd_idx;
-
- memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
-
- pd_entry->sd_index = sd_idx;
- pd_entry->valid = true;
- I40IW_INC_PD_REFCNT(pd_table);
- if (hmc_info->hmc_fn_id < I40IW_FIRST_VF_FPM_ID)
- I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, rel_pd_idx);
- else if (hw->hmc.hmc_fn_id != hmc_info->hmc_fn_id)
- I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, rel_pd_idx,
- hmc_info->hmc_fn_id);
- }
- I40IW_INC_BP_REFCNT(&pd_entry->bp);
-
- return 0;
-}
-
-/**
- * i40iw_remove_pd_bp - remove a backing page from a page descriptor
- * @hw: pointer to our HW structure
- * @hmc_info: pointer to the HMC configuration information structure
- * @idx: the page index
- * @is_pf: distinguishes a VF from a PF
- *
- * This function:
- * 1. Marks the entry in pd table (for paged address mode) or in sd table
- * (for direct address mode) invalid.
- * 2. Write to register PMPDINV to invalidate the backing page in FV cache
- * 3. Decrement the ref count for the pd _entry
- * assumptions:
- * 1. Caller can deallocate the memory used by backing storage after this
- * function returns.
- */
-enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info,
- u32 idx,
- bool is_pf)
-{
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_hmc_pd_table *pd_table;
- struct i40iw_hmc_sd_entry *sd_entry;
- u32 sd_idx, rel_pd_idx;
- struct i40iw_dma_mem *mem;
- u64 *pd_addr;
-
- sd_idx = idx / I40IW_HMC_PD_CNT_IN_SD;
- rel_pd_idx = idx % I40IW_HMC_PD_CNT_IN_SD;
- if (sd_idx >= hmc_info->sd_table.sd_cnt)
- return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
-
- sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
- if (sd_entry->entry_type != I40IW_SD_TYPE_PAGED)
- return I40IW_ERR_INVALID_SD_TYPE;
-
- pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
- pd_entry = &pd_table->pd_entry[rel_pd_idx];
- I40IW_DEC_BP_REFCNT(&pd_entry->bp);
- if (pd_entry->bp.ref_cnt)
- return 0;
-
- pd_entry->valid = false;
- I40IW_DEC_PD_REFCNT(pd_table);
- pd_addr = (u64 *)pd_table->pd_page_addr.va;
- pd_addr += rel_pd_idx;
- memset(pd_addr, 0, sizeof(u64));
- if (is_pf)
- I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
- else
- I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx,
- hmc_info->hmc_fn_id);
-
- if (!pd_entry->rsrc_pg) {
- mem = &pd_entry->bp.addr;
- if (!mem || !mem->va)
- return I40IW_ERR_PARAM;
- i40iw_free_dma_mem(hw, mem);
- }
- if (!pd_table->ref_cnt)
- i40iw_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
-
- return 0;
-}
-
-/**
- * i40iw_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
- * @hmc_info: pointer to the HMC configuration information structure
- * @idx: the page index
- */
-enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
-
- sd_entry = &hmc_info->sd_table.sd_entry[idx];
- I40IW_DEC_BP_REFCNT(&sd_entry->u.bp);
- if (sd_entry->u.bp.ref_cnt)
- return I40IW_ERR_NOT_READY;
-
- I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
- sd_entry->valid = false;
-
- return 0;
-}
-
-/**
- * i40iw_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
- * @hmc_info: pointer to the HMC configuration information structure
- * @idx: segment descriptor index to find the relevant page descriptor
- */
-enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info,
- u32 idx)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
-
- sd_entry = &hmc_info->sd_table.sd_entry[idx];
-
- if (sd_entry->u.pd_table.ref_cnt)
- return I40IW_ERR_NOT_READY;
-
- sd_entry->valid = false;
- I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
-
- return 0;
-}
-
-/**
- * i40iw_pf_init_vfhmc -
- * @vf_cnt_array: array of cnt values of iwarp hmc objects
- * @vf_hmc_fn_id: hmc function id ofr vf driver
- * @dev: pointer to i40iw_dev struct
- *
- * Called by pf driver to initialize hmc_info for vf driver instance.
- */
-enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev,
- u8 vf_hmc_fn_id,
- u32 *vf_cnt_array)
-{
- struct i40iw_hmc_info *hmc_info;
- enum i40iw_status_code ret_code = 0;
- u32 i;
-
- if ((vf_hmc_fn_id < I40IW_FIRST_VF_FPM_ID) ||
- (vf_hmc_fn_id >= I40IW_FIRST_VF_FPM_ID +
- I40IW_MAX_PE_ENABLED_VF_COUNT)) {
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: invalid vf_hmc_fn_id 0x%x\n",
- __func__, vf_hmc_fn_id);
- return I40IW_ERR_INVALID_HMCFN_ID;
- }
-
- ret_code = i40iw_sc_init_iw_hmc(dev, vf_hmc_fn_id);
- if (ret_code)
- return ret_code;
-
- hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, vf_hmc_fn_id);
-
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
- if (vf_cnt_array)
- hmc_info->hmc_obj[i].cnt =
- vf_cnt_array[i - I40IW_HMC_IW_QP];
- else
- hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.h b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
deleted file mode 100644
index 4c3fdd875621..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_hmc.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_HMC_H
-#define I40IW_HMC_H
-
-#include "i40iw_d.h"
-
-struct i40iw_hw;
-enum i40iw_status_code;
-
-#define I40IW_HMC_MAX_BP_COUNT 512
-#define I40IW_MAX_SD_ENTRIES 11
-#define I40IW_HW_DBG_HMC_INVALID_BP_MARK 0xCA
-
-#define I40IW_HMC_INFO_SIGNATURE 0x484D5347
-#define I40IW_HMC_PD_CNT_IN_SD 512
-#define I40IW_HMC_DIRECT_BP_SIZE 0x200000
-#define I40IW_HMC_MAX_SD_COUNT 4096
-#define I40IW_HMC_PAGED_BP_SIZE 4096
-#define I40IW_HMC_PD_BP_BUF_ALIGNMENT 4096
-#define I40IW_FIRST_VF_FPM_ID 16
-#define FPM_MULTIPLIER 1024
-
-#define I40IW_INC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt++)
-#define I40IW_INC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt++)
-#define I40IW_INC_BP_REFCNT(bp) ((bp)->ref_cnt++)
-
-#define I40IW_DEC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt--)
-#define I40IW_DEC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt--)
-#define I40IW_DEC_BP_REFCNT(bp) ((bp)->ref_cnt--)
-
-/**
- * I40IW_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
- * @hw: pointer to our hw struct
- * @sd_idx: segment descriptor index
- * @pd_idx: page descriptor index
- */
-#define I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx) \
- i40iw_wr32((hw), I40E_PFHMC_PDINV, \
- (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
- (0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT) | \
- ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
-/**
- * I40IW_INVALIDATE_VF_HMC_PD - Invalidates the pd cache in the hardware
- * @hw: pointer to our hw struct
- * @sd_idx: segment descriptor index
- * @pd_idx: page descriptor index
- * @hmc_fn_id: VF's function id
- */
-#define I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id) \
- i40iw_wr32(hw, I40E_GLHMC_VFPDINV(hmc_fn_id - I40IW_FIRST_VF_FPM_ID), \
- ((sd_idx << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
- (pd_idx << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
-struct i40iw_hmc_obj_info {
- u64 base;
- u32 max_cnt;
- u32 cnt;
- u64 size;
-};
-
-enum i40iw_sd_entry_type {
- I40IW_SD_TYPE_INVALID = 0,
- I40IW_SD_TYPE_PAGED = 1,
- I40IW_SD_TYPE_DIRECT = 2
-};
-
-struct i40iw_hmc_bp {
- enum i40iw_sd_entry_type entry_type;
- struct i40iw_dma_mem addr;
- u32 sd_pd_index;
- u32 ref_cnt;
-};
-
-struct i40iw_hmc_pd_entry {
- struct i40iw_hmc_bp bp;
- u32 sd_index;
- bool rsrc_pg;
- bool valid;
-};
-
-struct i40iw_hmc_pd_table {
- struct i40iw_dma_mem pd_page_addr;
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_virt_mem pd_entry_virt_mem;
- u32 ref_cnt;
- u32 sd_index;
-};
-
-struct i40iw_hmc_sd_entry {
- enum i40iw_sd_entry_type entry_type;
- bool valid;
-
- union {
- struct i40iw_hmc_pd_table pd_table;
- struct i40iw_hmc_bp bp;
- } u;
-};
-
-struct i40iw_hmc_sd_table {
- struct i40iw_virt_mem addr;
- u32 sd_cnt;
- u32 ref_cnt;
- struct i40iw_hmc_sd_entry *sd_entry;
-};
-
-struct i40iw_hmc_info {
- u32 signature;
- u8 hmc_fn_id;
- u16 first_sd_index;
-
- struct i40iw_hmc_obj_info *hmc_obj;
- struct i40iw_virt_mem hmc_obj_virt_mem;
- struct i40iw_hmc_sd_table sd_table;
- u16 sd_indexes[I40IW_HMC_MAX_SD_COUNT];
-};
-
-struct update_sd_entry {
- u64 cmd;
- u64 data;
-};
-
-struct i40iw_update_sds_info {
- u32 cnt;
- u8 hmc_fn_id;
- struct update_sd_entry entry[I40IW_MAX_SD_ENTRIES];
-};
-
-struct i40iw_ccq_cqe_info;
-struct i40iw_hmc_fcn_info {
- void (*callback_fcn)(struct i40iw_sc_dev *, void *,
- struct i40iw_ccq_cqe_info *);
- void *cqp_callback_param;
- u32 vf_id;
- u16 iw_vf_idx;
- bool free_fcn;
-};
-
-enum i40iw_hmc_rsrc_type {
- I40IW_HMC_IW_QP = 0,
- I40IW_HMC_IW_CQ = 1,
- I40IW_HMC_IW_SRQ = 2,
- I40IW_HMC_IW_HTE = 3,
- I40IW_HMC_IW_ARP = 4,
- I40IW_HMC_IW_APBVT_ENTRY = 5,
- I40IW_HMC_IW_MR = 6,
- I40IW_HMC_IW_XF = 7,
- I40IW_HMC_IW_XFFL = 8,
- I40IW_HMC_IW_Q1 = 9,
- I40IW_HMC_IW_Q1FL = 10,
- I40IW_HMC_IW_TIMER = 11,
- I40IW_HMC_IW_FSIMC = 12,
- I40IW_HMC_IW_FSIAV = 13,
- I40IW_HMC_IW_PBLE = 14,
- I40IW_HMC_IW_MAX = 15,
-};
-
-struct i40iw_hmc_create_obj_info {
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_virt_mem add_sd_virt_mem;
- u32 rsrc_type;
- u32 start_idx;
- u32 count;
- u32 add_sd_cnt;
- enum i40iw_sd_entry_type entry_type;
- bool is_pf;
-};
-
-struct i40iw_hmc_del_obj_info {
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_virt_mem del_sd_virt_mem;
- u32 rsrc_type;
- u32 start_idx;
- u32 count;
- u32 del_sd_cnt;
- bool is_pf;
-};
-
-enum i40iw_status_code i40iw_copy_dma_mem(struct i40iw_hw *hw, void *dest_buf,
- struct i40iw_dma_mem *src_mem, u64 src_offset, u64 size);
-enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info);
-enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *info,
- bool reset);
-enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev, u8 hmc_fn_id,
- u64 pa, u32 sd_idx, enum i40iw_sd_entry_type type,
- bool setsd);
-enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info);
-struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id);
-struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id);
-enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info, u32 sd_index,
- enum i40iw_sd_entry_type type, u64 direct_mode_sz);
-enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info, u32 pd_index,
- struct i40iw_dma_mem *rsrc_pg);
-enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info, u32 idx, bool is_pf);
-enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx);
-enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info, u32 idx);
-
-#define ENTER_SHARED_FUNCTION()
-#define EXIT_SHARED_FUNCTION()
-
-#endif /* I40IW_HMC_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
deleted file mode 100644
index 55a1fbf0e670..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ /dev/null
@@ -1,852 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if_vlan.h>
-
-#include "i40iw.h"
-
-/**
- * i40iw_initialize_hw_resources - initialize hw resource during open
- * @iwdev: iwarp device
- */
-u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
-{
- unsigned long num_pds;
- u32 resources_size;
- u32 max_mr;
- u32 max_qp;
- u32 max_cq;
- u32 arp_table_size;
- u32 mrdrvbits;
- void *resource_ptr;
-
- max_qp = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt;
- max_cq = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
- arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
- iwdev->max_cqe = 0xFFFFF;
- num_pds = I40IW_MAX_PDS;
- resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
- resources_size += sizeof(struct i40iw_qp **) * max_qp;
- iwdev->mem_resources = kzalloc(resources_size, GFP_KERNEL);
-
- if (!iwdev->mem_resources)
- return -ENOMEM;
-
- iwdev->max_qp = max_qp;
- iwdev->max_mr = max_mr;
- iwdev->max_cq = max_cq;
- iwdev->max_pd = num_pds;
- iwdev->arp_table_size = arp_table_size;
- iwdev->arp_table = (struct i40iw_arp_entry *)iwdev->mem_resources;
- resource_ptr = iwdev->mem_resources + (sizeof(struct i40iw_arp_entry) * arp_table_size);
-
- iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- iwdev->allocated_qps = resource_ptr;
- iwdev->allocated_cqs = &iwdev->allocated_qps[BITS_TO_LONGS(max_qp)];
- iwdev->allocated_mrs = &iwdev->allocated_cqs[BITS_TO_LONGS(max_cq)];
- iwdev->allocated_pds = &iwdev->allocated_mrs[BITS_TO_LONGS(max_mr)];
- iwdev->allocated_arps = &iwdev->allocated_pds[BITS_TO_LONGS(num_pds)];
- iwdev->qp_table = (struct i40iw_qp **)(&iwdev->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
- set_bit(0, iwdev->allocated_mrs);
- set_bit(0, iwdev->allocated_qps);
- set_bit(0, iwdev->allocated_cqs);
- set_bit(0, iwdev->allocated_pds);
- set_bit(0, iwdev->allocated_arps);
-
- /* Following for ILQ/IEQ */
- set_bit(1, iwdev->allocated_qps);
- set_bit(1, iwdev->allocated_cqs);
- set_bit(1, iwdev->allocated_pds);
- set_bit(2, iwdev->allocated_cqs);
- set_bit(2, iwdev->allocated_pds);
-
- spin_lock_init(&iwdev->resource_lock);
- spin_lock_init(&iwdev->qptable_lock);
- /* stag index mask has a minimum of 14 bits */
- mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14);
- iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
- return 0;
-}
-
-/**
- * i40iw_cqp_ce_handler - handle cqp completions
- * @iwdev: iwarp device
- * @arm: flag to arm after completions
- * @cq: cq for cqp completions
- */
-static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq, bool arm)
-{
- struct i40iw_cqp_request *cqp_request;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- u32 cqe_count = 0;
- struct i40iw_ccq_cqe_info info;
- int ret;
-
- do {
- memset(&info, 0, sizeof(info));
- ret = dev->ccq_ops->ccq_get_cqe_info(cq, &info);
- if (ret)
- break;
- cqp_request = (struct i40iw_cqp_request *)(unsigned long)info.scratch;
- if (info.error)
- i40iw_pr_err("opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
- info.op_code, info.maj_err_code, info.min_err_code);
- if (cqp_request) {
- cqp_request->compl_info.maj_err_code = info.maj_err_code;
- cqp_request->compl_info.min_err_code = info.min_err_code;
- cqp_request->compl_info.op_ret_val = info.op_ret_val;
- cqp_request->compl_info.error = info.error;
-
- if (cqp_request->waiting) {
- cqp_request->request_done = true;
- wake_up(&cqp_request->waitq);
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
- } else {
- if (cqp_request->callback_fcn)
- cqp_request->callback_fcn(cqp_request, 1);
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
- }
- }
-
- cqe_count++;
- } while (1);
-
- if (arm && cqe_count) {
- i40iw_process_bh(dev);
- dev->ccq_ops->ccq_arm(cq);
- }
-}
-
-/**
- * i40iw_iwarp_ce_handler - handle iwarp completions
- * @iwdev: iwarp device
- * @iwcp: iwarp cq receiving event
- */
-static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev,
- struct i40iw_sc_cq *iwcq)
-{
- struct i40iw_cq *i40iwcq = iwcq->back_cq;
-
- if (i40iwcq->ibcq.comp_handler)
- i40iwcq->ibcq.comp_handler(&i40iwcq->ibcq,
- i40iwcq->ibcq.cq_context);
-}
-
-/**
- * i40iw_puda_ce_handler - handle puda completion events
- * @iwdev: iwarp device
- * @cq: puda completion q for event
- */
-static void i40iw_puda_ce_handler(struct i40iw_device *iwdev,
- struct i40iw_sc_cq *cq)
-{
- struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)&iwdev->sc_dev;
- enum i40iw_status_code status;
- u32 compl_error;
-
- do {
- status = i40iw_puda_poll_completion(dev, cq, &compl_error);
- if (status == I40IW_ERR_QUEUE_EMPTY)
- break;
- if (status) {
- i40iw_pr_err("puda status = %d\n", status);
- break;
- }
- if (compl_error) {
- i40iw_pr_err("puda compl_err =0x%x\n", compl_error);
- break;
- }
- } while (1);
-
- dev->ccq_ops->ccq_arm(cq);
-}
-
-/**
- * i40iw_process_ceq - handle ceq for completions
- * @iwdev: iwarp device
- * @ceq: ceq having cq for completion
- */
-void i40iw_process_ceq(struct i40iw_device *iwdev, struct i40iw_ceq *ceq)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_sc_ceq *sc_ceq;
- struct i40iw_sc_cq *cq;
- bool arm = true;
-
- sc_ceq = &ceq->sc_ceq;
- do {
- cq = dev->ceq_ops->process_ceq(dev, sc_ceq);
- if (!cq)
- break;
-
- if (cq->cq_type == I40IW_CQ_TYPE_CQP)
- i40iw_cqp_ce_handler(iwdev, cq, arm);
- else if (cq->cq_type == I40IW_CQ_TYPE_IWARP)
- i40iw_iwarp_ce_handler(iwdev, cq);
- else if ((cq->cq_type == I40IW_CQ_TYPE_ILQ) ||
- (cq->cq_type == I40IW_CQ_TYPE_IEQ))
- i40iw_puda_ce_handler(iwdev, cq);
- } while (1);
-}
-
-/**
- * i40iw_next_iw_state - modify qp state
- * @iwqp: iwarp qp to modify
- * @state: next state for qp
- * @del_hash: del hash
- * @term: term message
- * @termlen: length of term message
- */
-void i40iw_next_iw_state(struct i40iw_qp *iwqp,
- u8 state,
- u8 del_hash,
- u8 term,
- u8 termlen)
-{
- struct i40iw_modify_qp_info info;
-
- memset(&info, 0, sizeof(info));
- info.next_iwarp_state = state;
- info.remove_hash_idx = del_hash;
- info.cq_num_valid = true;
- info.arp_cache_idx_valid = true;
- info.dont_send_term = true;
- info.dont_send_fin = true;
- info.termlen = termlen;
-
- if (term & I40IWQP_TERM_SEND_TERM_ONLY)
- info.dont_send_term = false;
- if (term & I40IWQP_TERM_SEND_FIN_ONLY)
- info.dont_send_fin = false;
- if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
- info.reset_tcp_conn = true;
- iwqp->hw_iwarp_state = state;
- i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
-}
-
-/**
- * i40iw_process_aeq - handle aeq events
- * @iwdev: iwarp device
- */
-void i40iw_process_aeq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_aeq *aeq = &iwdev->aeq;
- struct i40iw_sc_aeq *sc_aeq = &aeq->sc_aeq;
- struct i40iw_aeqe_info aeinfo;
- struct i40iw_aeqe_info *info = &aeinfo;
- int ret;
- struct i40iw_qp *iwqp = NULL;
- struct i40iw_sc_cq *cq = NULL;
- struct i40iw_cq *iwcq = NULL;
- struct i40iw_sc_qp *qp = NULL;
- struct i40iw_qp_host_ctx_info *ctx_info = NULL;
- unsigned long flags;
-
- u32 aeqcnt = 0;
-
- if (!sc_aeq->size)
- return;
-
- do {
- memset(info, 0, sizeof(*info));
- ret = dev->aeq_ops->get_next_aeqe(sc_aeq, info);
- if (ret)
- break;
-
- aeqcnt++;
- i40iw_debug(dev, I40IW_DEBUG_AEQ,
- "%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
- __func__, info->ae_id, info->qp, info->qp_cq_id);
- if (info->qp) {
- spin_lock_irqsave(&iwdev->qptable_lock, flags);
- iwqp = iwdev->qp_table[info->qp_cq_id];
- if (!iwqp) {
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- i40iw_debug(dev, I40IW_DEBUG_AEQ,
- "%s qp_id %d is already freed\n",
- __func__, info->qp_cq_id);
- continue;
- }
- i40iw_add_ref(&iwqp->ibqp);
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- qp = &iwqp->sc_qp;
- spin_lock_irqsave(&iwqp->lock, flags);
- iwqp->hw_tcp_state = info->tcp_state;
- iwqp->hw_iwarp_state = info->iwarp_state;
- iwqp->last_aeq = info->ae_id;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- ctx_info = &iwqp->ctx_info;
- ctx_info->err_rq_idx_valid = true;
- } else {
- if (info->ae_id != I40IW_AE_CQ_OPERATION_ERROR)
- continue;
- }
-
- switch (info->ae_id) {
- case I40IW_AE_LLP_FIN_RECEIVED:
- if (qp->term_flags)
- break;
- if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
- iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
- if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
- (iwqp->ibqp_state == IB_QPS_RTS)) {
- i40iw_next_iw_state(iwqp,
- I40IW_QP_STATE_CLOSING, 0, 0, 0);
- i40iw_cm_disconn(iwqp);
- }
- iwqp->cm_id->add_ref(iwqp->cm_id);
- i40iw_schedule_cm_timer(iwqp->cm_node,
- (struct i40iw_puda_buf *)iwqp,
- I40IW_TIMER_TYPE_CLOSE, 1, 0);
- }
- break;
- case I40IW_AE_LLP_CLOSE_COMPLETE:
- if (qp->term_flags)
- i40iw_terminate_done(qp, 0);
- else
- i40iw_cm_disconn(iwqp);
- break;
- case I40IW_AE_BAD_CLOSE:
- /* fall through */
- case I40IW_AE_RESET_SENT:
- i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0);
- i40iw_cm_disconn(iwqp);
- break;
- case I40IW_AE_LLP_CONNECTION_RESET:
- if (atomic_read(&iwqp->close_timer_started))
- break;
- i40iw_cm_disconn(iwqp);
- break;
- case I40IW_AE_QP_SUSPEND_COMPLETE:
- i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false);
- break;
- case I40IW_AE_TERMINATE_SENT:
- i40iw_terminate_send_fin(qp);
- break;
- case I40IW_AE_LLP_TERMINATE_RECEIVED:
- i40iw_terminate_received(qp, info);
- break;
- case I40IW_AE_CQ_OPERATION_ERROR:
- i40iw_pr_err("Processing an iWARP related AE for CQ misc = 0x%04X\n",
- info->ae_id);
- cq = (struct i40iw_sc_cq *)(unsigned long)info->compl_ctx;
- iwcq = (struct i40iw_cq *)cq->back_cq;
-
- if (iwcq->ibcq.event_handler) {
- struct ib_event ibevent;
-
- ibevent.device = iwcq->ibcq.device;
- ibevent.event = IB_EVENT_CQ_ERR;
- ibevent.element.cq = &iwcq->ibcq;
- iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
- }
- break;
- case I40IW_AE_LLP_DOUBT_REACHABILITY:
- break;
- case I40IW_AE_PRIV_OPERATION_DENIED:
- case I40IW_AE_STAG_ZERO_INVALID:
- case I40IW_AE_IB_RREQ_AND_Q1_FULL:
- case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
- case I40IW_AE_DDP_UBE_INVALID_MO:
- case I40IW_AE_DDP_UBE_INVALID_QN:
- case I40IW_AE_DDP_NO_L_BIT:
- case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
- case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
- case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST:
- case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
- case I40IW_AE_INVALID_ARP_ENTRY:
- case I40IW_AE_INVALID_TCP_OPTION_RCVD:
- case I40IW_AE_STALE_ARP_ENTRY:
- case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
- case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
- case I40IW_AE_LLP_SYN_RECEIVED:
- case I40IW_AE_LLP_TOO_MANY_RETRIES:
- case I40IW_AE_LCE_QP_CATASTROPHIC:
- case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
- case I40IW_AE_LCE_CQ_CATASTROPHIC:
- case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
- case I40IW_AE_UDA_XMIT_DGRAM_TOO_SHORT:
- ctx_info->err_rq_idx_valid = false;
- /* fall through */
- default:
- if (!info->sq && ctx_info->err_rq_idx_valid) {
- ctx_info->err_rq_idx = info->wqe_idx;
- ctx_info->tcp_info_valid = false;
- ctx_info->iwarp_info_valid = false;
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- iwqp->host_ctx.va,
- ctx_info);
- }
- i40iw_terminate_connection(qp, info);
- break;
- }
- if (info->qp)
- i40iw_rem_ref(&iwqp->ibqp);
- } while (1);
-
- if (aeqcnt)
- dev->aeq_ops->repost_aeq_entries(dev, aeqcnt);
-}
-
-/**
- * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt
- * @iwdev: iwarp device
- * @accel_local_port: port for apbvt
- * @add_port: add or delete port
- */
-static enum i40iw_status_code
-i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev,
- u16 accel_local_port,
- bool add_port)
-{
- struct i40iw_apbvt_info *info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.manage_apbvt_entry.info;
-
- memset(info, 0, sizeof(*info));
- info->add = add_port;
- info->port = cpu_to_le16(accel_local_port);
-
- cqp_info->cqp_cmd = OP_MANAGE_APBVT_ENTRY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage APBVT entry fail");
-
- return status;
-}
-
-/**
- * i40iw_manage_apbvt - add or delete tcp port
- * @iwdev: iwarp device
- * @accel_local_port: port for apbvt
- * @add_port: add or delete port
- */
-enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev,
- u16 accel_local_port,
- bool add_port)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- enum i40iw_status_code status;
- unsigned long flags;
- bool in_use;
-
- /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
- * protect against race where add APBVT CQP can race ahead of the delete
- * APBVT for same port.
- */
- if (add_port) {
- spin_lock_irqsave(&cm_core->apbvt_lock, flags);
- in_use = __test_and_set_bit(accel_local_port,
- cm_core->ports_in_use);
- spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
- if (in_use)
- return 0;
- return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
- true);
- } else {
- spin_lock_irqsave(&cm_core->apbvt_lock, flags);
- in_use = i40iw_port_in_use(cm_core, accel_local_port);
- if (in_use) {
- spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
- return 0;
- }
- __clear_bit(accel_local_port, cm_core->ports_in_use);
- status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
- false);
- spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
- return status;
- }
-}
-
-/**
- * i40iw_manage_arp_cache - manage hw arp cache
- * @iwdev: iwarp device
- * @mac_addr: mac address ptr
- * @ip_addr: ip addr for arp cache
- * @action: add, delete or modify
- */
-void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
- unsigned char *mac_addr,
- u32 *ip_addr,
- bool ipv4,
- u32 action)
-{
- struct i40iw_add_arp_cache_entry_info *info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- int arp_index;
-
- arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
- if (arp_index == -1)
- return;
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- if (action == I40IW_ARP_ADD) {
- cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
- info = &cqp_info->in.u.add_arp_cache_entry.info;
- memset(info, 0, sizeof(*info));
- info->arp_index = cpu_to_le16((u16)arp_index);
- info->permanent = true;
- ether_addr_copy(info->mac_addr, mac_addr);
- cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
- } else {
- cqp_info->cqp_cmd = OP_DELETE_ARP_CACHE_ENTRY;
- cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.del_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
- }
-
- cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->post_sq = 1;
- if (i40iw_handle_cqp_op(iwdev, cqp_request))
- i40iw_pr_err("CQP-OP Add/Del Arp Cache entry fail");
-}
-
-/**
- * i40iw_send_syn_cqp_callback - do syn/ack after qhash
- * @cqp_request: qhash cqp completion
- * @send_ack: flag send ack
- */
-static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u32 send_ack)
-{
- i40iw_send_syn(cqp_request->param, send_ack);
-}
-
-/**
- * i40iw_manage_qhash - add or modify qhash
- * @iwdev: iwarp device
- * @cminfo: cm info for qhash
- * @etype: type (syn or quad)
- * @mtype: type of qhash
- * @cmnode: cmnode associated with connection
- * @wait: wait for completion
- * @user_pri:user pri of the connection
- */
-enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
- struct i40iw_cm_info *cminfo,
- enum i40iw_quad_entry_type etype,
- enum i40iw_quad_hash_manage_type mtype,
- void *cmnode,
- bool wait)
-{
- struct i40iw_qhash_table_info *info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_sc_vsi *vsi = &iwdev->vsi;
- enum i40iw_status_code status;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, wait);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.manage_qhash_table_entry.info;
- memset(info, 0, sizeof(*info));
-
- info->vsi = &iwdev->vsi;
- info->manage = mtype;
- info->entry_type = etype;
- if (cminfo->vlan_id != 0xFFFF) {
- info->vlan_valid = true;
- info->vlan_id = cpu_to_le16(cminfo->vlan_id);
- } else {
- info->vlan_valid = false;
- }
-
- info->ipv4_valid = cminfo->ipv4;
- info->user_pri = cminfo->user_pri;
- ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
- info->qp_num = cpu_to_le32(vsi->ilq->qp_id);
- info->dest_port = cpu_to_le16(cminfo->loc_port);
- info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
- info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
- info->dest_ip[2] = cpu_to_le32(cminfo->loc_addr[2]);
- info->dest_ip[3] = cpu_to_le32(cminfo->loc_addr[3]);
- if (etype == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
- info->src_port = cpu_to_le16(cminfo->rem_port);
- info->src_ip[0] = cpu_to_le32(cminfo->rem_addr[0]);
- info->src_ip[1] = cpu_to_le32(cminfo->rem_addr[1]);
- info->src_ip[2] = cpu_to_le32(cminfo->rem_addr[2]);
- info->src_ip[3] = cpu_to_le32(cminfo->rem_addr[3]);
- }
- if (cmnode) {
- cqp_request->callback_fcn = i40iw_send_syn_cqp_callback;
- cqp_request->param = (void *)cmnode;
- }
-
- if (info->ipv4_valid)
- i40iw_debug(dev, I40IW_DEBUG_CM,
- "%s:%s IP=%pI4, port=%d, mac=%pM, vlan_id=%d\n",
- __func__, (!mtype) ? "DELETE" : "ADD",
- info->dest_ip,
- info->dest_port, info->mac_addr, cminfo->vlan_id);
- else
- i40iw_debug(dev, I40IW_DEBUG_CM,
- "%s:%s IP=%pI6, port=%d, mac=%pM, vlan_id=%d\n",
- __func__, (!mtype) ? "DELETE" : "ADD",
- info->dest_ip,
- info->dest_port, info->mac_addr, cminfo->vlan_id);
- cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->cqp_cmd = OP_MANAGE_QHASH_TABLE_ENTRY;
- cqp_info->post_sq = 1;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage Qhash Entry fail");
- return status;
-}
-
-/**
- * i40iw_hw_flush_wqes - flush qp's wqe
- * @iwdev: iwarp device
- * @qp: hardware control qp
- * @info: info for flush
- * @wait: flag wait for completion
- */
-enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
- struct i40iw_sc_qp *qp,
- struct i40iw_qp_flush_info *info,
- bool wait)
-{
- enum i40iw_status_code status;
- struct i40iw_qp_flush_info *hw_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
- memcpy(hw_info, info, sizeof(*hw_info));
-
- cqp_info->cqp_cmd = OP_QP_FLUSH_WQES;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_flush_wqes.qp = qp;
- cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- i40iw_pr_err("CQP-OP Flush WQE's fail");
- complete(&iwqp->sq_drained);
- complete(&iwqp->rq_drained);
- return status;
- }
- if (!cqp_request->compl_info.maj_err_code) {
- switch (cqp_request->compl_info.min_err_code) {
- case I40IW_CQP_COMPL_RQ_WQE_FLUSHED:
- complete(&iwqp->sq_drained);
- break;
- case I40IW_CQP_COMPL_SQ_WQE_FLUSHED:
- complete(&iwqp->rq_drained);
- break;
- case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED:
- break;
- default:
- complete(&iwqp->sq_drained);
- complete(&iwqp->rq_drained);
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * i40iw_gen_ae - generate AE
- * @iwdev: iwarp device
- * @qp: qp associated with AE
- * @info: info for ae
- * @wait: wait for completion
- */
-void i40iw_gen_ae(struct i40iw_device *iwdev,
- struct i40iw_sc_qp *qp,
- struct i40iw_gen_ae_info *info,
- bool wait)
-{
- struct i40iw_gen_ae_info *ae_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- ae_info = &cqp_request->info.in.u.gen_ae.info;
- memcpy(ae_info, info, sizeof(*ae_info));
-
- cqp_info->cqp_cmd = OP_GEN_AE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.gen_ae.qp = qp;
- cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request;
- if (i40iw_handle_cqp_op(iwdev, cqp_request))
- i40iw_pr_err("CQP OP failed attempting to generate ae_code=0x%x\n",
- info->ae_code);
-}
-
-/**
- * i40iw_hw_manage_vf_pble_bp - manage vf pbles
- * @iwdev: iwarp device
- * @info: info for managing pble
- * @wait: flag wait for completion
- */
-enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
- struct i40iw_manage_vf_pble_info *info,
- bool wait)
-{
- enum i40iw_status_code status;
- struct i40iw_manage_vf_pble_info *hw_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- if ((iwdev->init_state < CCQ_CREATED) && wait)
- wait = false;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- hw_info = &cqp_request->info.in.u.manage_vf_pble_bp.info;
- memcpy(hw_info, info, sizeof(*hw_info));
-
- cqp_info->cqp_cmd = OP_MANAGE_VF_PBLE_BP;
- cqp_info->post_sq = 1;
- cqp_info->in.u.manage_vf_pble_bp.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_vf_pble_bp.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage VF pble_bp fail");
- return status;
-}
-
-/**
- * i40iw_get_ib_wc - return change flush code to IB's
- * @opcode: iwarp flush code
- */
-static enum ib_wc_status i40iw_get_ib_wc(enum i40iw_flush_opcode opcode)
-{
- switch (opcode) {
- case FLUSH_PROT_ERR:
- return IB_WC_LOC_PROT_ERR;
- case FLUSH_REM_ACCESS_ERR:
- return IB_WC_REM_ACCESS_ERR;
- case FLUSH_LOC_QP_OP_ERR:
- return IB_WC_LOC_QP_OP_ERR;
- case FLUSH_REM_OP_ERR:
- return IB_WC_REM_OP_ERR;
- case FLUSH_LOC_LEN_ERR:
- return IB_WC_LOC_LEN_ERR;
- case FLUSH_GENERAL_ERR:
- return IB_WC_GENERAL_ERR;
- case FLUSH_FATAL_ERR:
- default:
- return IB_WC_FATAL_ERR;
- }
-}
-
-/**
- * i40iw_set_flush_info - set flush info
- * @pinfo: set flush info
- * @min: minor err
- * @maj: major err
- * @opcode: flush error code
- */
-static void i40iw_set_flush_info(struct i40iw_qp_flush_info *pinfo,
- u16 *min,
- u16 *maj,
- enum i40iw_flush_opcode opcode)
-{
- *min = (u16)i40iw_get_ib_wc(opcode);
- *maj = CQE_MAJOR_DRV;
- pinfo->userflushcode = true;
-}
-
-/**
- * i40iw_flush_wqes - flush wqe for qp
- * @iwdev: iwarp device
- * @iwqp: qp to flush wqes
- */
-void i40iw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp *iwqp)
-{
- struct i40iw_qp_flush_info info;
- struct i40iw_qp_flush_info *pinfo = &info;
-
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- memset(pinfo, 0, sizeof(*pinfo));
- info.sq = true;
- info.rq = true;
- if (qp->term_flags) {
- i40iw_set_flush_info(pinfo, &pinfo->sq_minor_code,
- &pinfo->sq_major_code, qp->flush_code);
- i40iw_set_flush_info(pinfo, &pinfo->rq_minor_code,
- &pinfo->rq_major_code, qp->flush_code);
- }
- (void)i40iw_hw_flush_wqes(iwdev, &iwqp->sc_qp, &info, true);
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
deleted file mode 100644
index 10932baee279..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ /dev/null
@@ -1,2066 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if_vlan.h>
-#include <net/addrconf.h>
-
-#include "i40iw.h"
-#include "i40iw_register.h"
-#include <net/netevent.h>
-#define CLIENT_IW_INTERFACE_VERSION_MAJOR 0
-#define CLIENT_IW_INTERFACE_VERSION_MINOR 01
-#define CLIENT_IW_INTERFACE_VERSION_BUILD 00
-
-#define DRV_VERSION_MAJOR 0
-#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 123
-#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
- __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
-
-static int push_mode;
-module_param(push_mode, int, 0644);
-MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
-
-static int debug;
-module_param(debug, int, 0644);
-MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
-
-static int resource_profile;
-module_param(resource_profile, int, 0644);
-MODULE_PARM_DESC(resource_profile,
- "Resource Profile: 0=no VF RDMA support (default), 1=Weighted VF, 2=Even Distribution");
-
-static int max_rdma_vfs = 32;
-module_param(max_rdma_vfs, int, 0644);
-MODULE_PARM_DESC(max_rdma_vfs, "Maximum VF count: 0-32 32=default");
-static int mpa_version = 2;
-module_param(mpa_version, int, 0644);
-MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2");
-
-MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
-MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-
-static struct i40e_client i40iw_client;
-static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw";
-
-static LIST_HEAD(i40iw_handlers);
-static spinlock_t i40iw_handler_lock;
-
-static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
- u32 vf_id, u8 *msg, u16 len);
-
-static struct notifier_block i40iw_inetaddr_notifier = {
- .notifier_call = i40iw_inetaddr_event
-};
-
-static struct notifier_block i40iw_inetaddr6_notifier = {
- .notifier_call = i40iw_inet6addr_event
-};
-
-static struct notifier_block i40iw_net_notifier = {
- .notifier_call = i40iw_net_event
-};
-
-static struct notifier_block i40iw_netdevice_notifier = {
- .notifier_call = i40iw_netdevice_event
-};
-
-/**
- * i40iw_find_i40e_handler - find a handler given a client info
- * @ldev: pointer to a client info
- */
-static struct i40iw_handler *i40iw_find_i40e_handler(struct i40e_info *ldev)
-{
- struct i40iw_handler *hdl;
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_for_each_entry(hdl, &i40iw_handlers, list) {
- if (hdl->ldev.netdev == ldev->netdev) {
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return hdl;
- }
- }
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return NULL;
-}
-
-/**
- * i40iw_find_netdev - find a handler given a netdev
- * @netdev: pointer to net_device
- */
-struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev)
-{
- struct i40iw_handler *hdl;
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_for_each_entry(hdl, &i40iw_handlers, list) {
- if (hdl->ldev.netdev == netdev) {
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return hdl;
- }
- }
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return NULL;
-}
-
-/**
- * i40iw_add_handler - add a handler to the list
- * @hdl: handler to be added to the handler list
- */
-static void i40iw_add_handler(struct i40iw_handler *hdl)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_add(&hdl->list, &i40iw_handlers);
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
-}
-
-/**
- * i40iw_del_handler - delete a handler from the list
- * @hdl: handler to be deleted from the handler list
- */
-static int i40iw_del_handler(struct i40iw_handler *hdl)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_del(&hdl->list);
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return 0;
-}
-
-/**
- * i40iw_enable_intr - set up device interrupts
- * @dev: hardware control device structure
- * @msix_id: id of the interrupt to be enabled
- */
-static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
-{
- u32 val;
-
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_id - 1), val);
- else
- i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_id - 1), val);
-}
-
-/**
- * i40iw_dpc - tasklet for aeq and ceq 0
- * @data: iwarp device
- */
-static void i40iw_dpc(unsigned long data)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)data;
-
- if (iwdev->msix_shared)
- i40iw_process_ceq(iwdev, iwdev->ceqlist);
- i40iw_process_aeq(iwdev);
- i40iw_enable_intr(&iwdev->sc_dev, iwdev->iw_msixtbl[0].idx);
-}
-
-/**
- * i40iw_ceq_dpc - dpc handler for CEQ
- * @data: data points to CEQ
- */
-static void i40iw_ceq_dpc(unsigned long data)
-{
- struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
- struct i40iw_device *iwdev = iwceq->iwdev;
-
- i40iw_process_ceq(iwdev, iwceq);
- i40iw_enable_intr(&iwdev->sc_dev, iwceq->msix_idx);
-}
-
-/**
- * i40iw_irq_handler - interrupt handler for aeq and ceq0
- * @irq: Interrupt request number
- * @data: iwarp device
- */
-static irqreturn_t i40iw_irq_handler(int irq, void *data)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)data;
-
- tasklet_schedule(&iwdev->dpc_tasklet);
- return IRQ_HANDLED;
-}
-
-/**
- * i40iw_destroy_cqp - destroy control qp
- * @iwdev: iwarp device
- * @create_done: 1 if cqp create poll was success
- *
- * Issue destroy cqp request and
- * free the resources associated with the cqp
- */
-static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cqp *cqp = &iwdev->cqp;
-
- if (free_hwcqp)
- dev->cqp_ops->cqp_destroy(dev->cqp);
-
- i40iw_cleanup_pending_cqp_op(iwdev);
-
- i40iw_free_dma_mem(dev->hw, &cqp->sq);
- kfree(cqp->scratch_array);
- iwdev->cqp.scratch_array = NULL;
-
- kfree(cqp->cqp_requests);
- cqp->cqp_requests = NULL;
-}
-
-/**
- * i40iw_disable_irqs - disable device interrupts
- * @dev: hardware control device structure
- * @msic_vec: msix vector to disable irq
- * @dev_id: parameter to pass to free_irq (used during irq setup)
- *
- * The function is called when destroying aeq/ceq
- */
-static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
- struct i40iw_msix_vector *msix_vec,
- void *dev_id)
-{
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
- else
- i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
- irq_set_affinity_hint(msix_vec->irq, NULL);
- free_irq(msix_vec->irq, dev_id);
-}
-
-/**
- * i40iw_destroy_aeq - destroy aeq
- * @iwdev: iwarp device
- *
- * Issue a destroy aeq request and
- * free the resources associated with the aeq
- * The function is called during driver unload
- */
-static void i40iw_destroy_aeq(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status = I40IW_ERR_NOT_READY;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_aeq *aeq = &iwdev->aeq;
-
- if (!iwdev->msix_shared)
- i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
- if (iwdev->reset)
- goto exit;
-
- if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
- status = dev->aeq_ops->aeq_destroy_done(&aeq->sc_aeq);
- if (status)
- i40iw_pr_err("destroy aeq failed %d\n", status);
-
-exit:
- i40iw_free_dma_mem(dev->hw, &aeq->mem);
-}
-
-/**
- * i40iw_destroy_ceq - destroy ceq
- * @iwdev: iwarp device
- * @iwceq: ceq to be destroyed
- *
- * Issue a destroy ceq request and
- * free the resources associated with the ceq
- */
-static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq)
-{
- enum i40iw_status_code status;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-
- if (iwdev->reset)
- goto exit;
-
- status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
- if (status) {
- i40iw_pr_err("ceq destroy command failed %d\n", status);
- goto exit;
- }
-
- status = dev->ceq_ops->cceq_destroy_done(&iwceq->sc_ceq);
- if (status)
- i40iw_pr_err("ceq destroy completion failed %d\n", status);
-exit:
- i40iw_free_dma_mem(dev->hw, &iwceq->mem);
-}
-
-/**
- * i40iw_dele_ceqs - destroy all ceq's
- * @iwdev: iwarp device
- *
- * Go through all of the device ceq's and for each ceq
- * disable the ceq interrupt and destroy the ceq
- */
-static void i40iw_dele_ceqs(struct i40iw_device *iwdev)
-{
- u32 i = 0;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_ceq *iwceq = iwdev->ceqlist;
- struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
-
- if (iwdev->msix_shared) {
- i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
- i40iw_destroy_ceq(iwdev, iwceq);
- iwceq++;
- i++;
- }
-
- for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
- i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
- i40iw_destroy_ceq(iwdev, iwceq);
- }
-
- iwdev->sc_dev.ceq_valid = false;
-}
-
-/**
- * i40iw_destroy_ccq - destroy control cq
- * @iwdev: iwarp device
- *
- * Issue destroy ccq request and
- * free the resources associated with the ccq
- */
-static void i40iw_destroy_ccq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_ccq *ccq = &iwdev->ccq;
- enum i40iw_status_code status = 0;
-
- if (!iwdev->reset)
- status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
- if (status)
- i40iw_pr_err("ccq destroy failed %d\n", status);
- i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
-}
-
-/* types of hmc objects */
-static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = {
- I40IW_HMC_IW_QP,
- I40IW_HMC_IW_CQ,
- I40IW_HMC_IW_HTE,
- I40IW_HMC_IW_ARP,
- I40IW_HMC_IW_APBVT_ENTRY,
- I40IW_HMC_IW_MR,
- I40IW_HMC_IW_XF,
- I40IW_HMC_IW_XFFL,
- I40IW_HMC_IW_Q1,
- I40IW_HMC_IW_Q1FL,
- I40IW_HMC_IW_TIMER,
-};
-
-/**
- * i40iw_close_hmc_objects_type - delete hmc objects of a given type
- * @iwdev: iwarp device
- * @obj_type: the hmc object type to be deleted
- * @is_pf: true if the function is PF otherwise false
- * @reset: true if called before reset
- */
-static void i40iw_close_hmc_objects_type(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type obj_type,
- struct i40iw_hmc_info *hmc_info,
- bool is_pf,
- bool reset)
-{
- struct i40iw_hmc_del_obj_info info;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = hmc_info;
- info.rsrc_type = obj_type;
- info.count = hmc_info->hmc_obj[obj_type].cnt;
- info.is_pf = is_pf;
- if (dev->hmc_ops->del_hmc_object(dev, &info, reset))
- i40iw_pr_err("del obj of type %d failed\n", obj_type);
-}
-
-/**
- * i40iw_del_hmc_objects - remove all device hmc objects
- * @dev: iwarp device
- * @hmc_info: hmc_info to free
- * @is_pf: true if hmc_info belongs to PF, not vf nor allocated
- * by PF on behalf of VF
- * @reset: true if called before reset
- */
-static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_info *hmc_info,
- bool is_pf,
- bool reset)
-{
- unsigned int i;
-
- for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++)
- i40iw_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, is_pf, reset);
-}
-
-/**
- * i40iw_ceq_handler - interrupt handler for ceq
- * @data: ceq pointer
- */
-static irqreturn_t i40iw_ceq_handler(int irq, void *data)
-{
- struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
-
- if (iwceq->irq != irq)
- i40iw_pr_err("expected irq = %d received irq = %d\n", iwceq->irq, irq);
- tasklet_schedule(&iwceq->dpc_tasklet);
- return IRQ_HANDLED;
-}
-
-/**
- * i40iw_create_hmc_obj_type - create hmc object of a given type
- * @dev: hardware control device structure
- * @info: information for the hmc object to create
- */
-static enum i40iw_status_code i40iw_create_hmc_obj_type(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info)
-{
- return dev->hmc_ops->create_hmc_object(dev, info);
-}
-
-/**
- * i40iw_create_hmc_objs - create all hmc objects for the device
- * @iwdev: iwarp device
- * @is_pf: true if the function is PF otherwise false
- *
- * Create the device hmc objects and allocate hmc pages
- * Return 0 if successful, otherwise clean up and return error
- */
-static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
- bool is_pf)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_hmc_create_obj_info info;
- enum i40iw_status_code status;
- int i;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = dev->hmc_info;
- info.is_pf = is_pf;
- info.entry_type = iwdev->sd_type;
- for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
- info.rsrc_type = iw_hmc_obj_types[i];
- info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
- info.add_sd_cnt = 0;
- status = i40iw_create_hmc_obj_type(dev, &info);
- if (status) {
- i40iw_pr_err("create obj type %d status = %d\n",
- iw_hmc_obj_types[i], status);
- break;
- }
- }
- if (!status)
- return (dev->cqp_misc_ops->static_hmc_pages_allocated(dev->cqp, 0,
- dev->hmc_fn_id,
- true, true));
-
- while (i) {
- i--;
- /* destroy the hmc objects of a given type */
- i40iw_close_hmc_objects_type(dev,
- iw_hmc_obj_types[i],
- dev->hmc_info,
- is_pf,
- false);
- }
- return status;
-}
-
-/**
- * i40iw_obj_aligned_mem - get aligned memory from device allocated memory
- * @iwdev: iwarp device
- * @memptr: points to the memory addresses
- * @size: size of memory needed
- * @mask: mask for the aligned memory
- *
- * Get aligned memory of the requested size and
- * update the memptr to point to the new aligned memory
- * Return 0 if successful, otherwise return no memory error
- */
-enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
- struct i40iw_dma_mem *memptr,
- u32 size,
- u32 mask)
-{
- unsigned long va, newva;
- unsigned long extra;
-
- va = (unsigned long)iwdev->obj_next.va;
- newva = va;
- if (mask)
- newva = ALIGN(va, (mask + 1));
- extra = newva - va;
- memptr->va = (u8 *)va + extra;
- memptr->pa = iwdev->obj_next.pa + extra;
- memptr->size = size;
- if ((memptr->va + size) > (iwdev->obj_mem.va + iwdev->obj_mem.size))
- return I40IW_ERR_NO_MEMORY;
-
- iwdev->obj_next.va = memptr->va + size;
- iwdev->obj_next.pa = memptr->pa + size;
- return 0;
-}
-
-/**
- * i40iw_create_cqp - create control qp
- * @iwdev: iwarp device
- *
- * Return 0, if the cqp and all the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status;
- u32 sqsize = I40IW_CQP_SW_SQSIZE_2048;
- struct i40iw_dma_mem mem;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cqp_init_info cqp_init_info;
- struct i40iw_cqp *cqp = &iwdev->cqp;
- u16 maj_err, min_err;
- int i;
-
- cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
- if (!cqp->cqp_requests)
- return I40IW_ERR_NO_MEMORY;
- cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
- if (!cqp->scratch_array) {
- kfree(cqp->cqp_requests);
- return I40IW_ERR_NO_MEMORY;
- }
- dev->cqp = &cqp->sc_cqp;
- dev->cqp->dev = dev;
- memset(&cqp_init_info, 0, sizeof(cqp_init_info));
- status = i40iw_allocate_dma_mem(dev->hw, &cqp->sq,
- (sizeof(struct i40iw_cqp_sq_wqe) * sqsize),
- I40IW_CQP_ALIGNMENT);
- if (status)
- goto exit;
- status = i40iw_obj_aligned_mem(iwdev, &mem, sizeof(struct i40iw_cqp_ctx),
- I40IW_HOST_CTX_ALIGNMENT_MASK);
- if (status)
- goto exit;
- dev->cqp->host_ctx_pa = mem.pa;
- dev->cqp->host_ctx = mem.va;
- /* populate the cqp init info */
- cqp_init_info.dev = dev;
- cqp_init_info.sq_size = sqsize;
- cqp_init_info.sq = cqp->sq.va;
- cqp_init_info.sq_pa = cqp->sq.pa;
- cqp_init_info.host_ctx_pa = mem.pa;
- cqp_init_info.host_ctx = mem.va;
- cqp_init_info.hmc_profile = iwdev->resource_profile;
- cqp_init_info.enabled_vf_count = iwdev->max_rdma_vfs;
- cqp_init_info.scratch_array = cqp->scratch_array;
- status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
- if (status) {
- i40iw_pr_err("cqp init status %d\n", status);
- goto exit;
- }
- status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err);
- if (status) {
- i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
- status, maj_err, min_err);
- goto exit;
- }
- spin_lock_init(&cqp->req_lock);
- INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
- INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
- /* init the waitq of the cqp_requests and add them to the list */
- for (i = 0; i < sqsize; i++) {
- init_waitqueue_head(&cqp->cqp_requests[i].waitq);
- list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
- }
- return 0;
-exit:
- /* clean up the created resources */
- i40iw_destroy_cqp(iwdev, false);
- return status;
-}
-
-/**
- * i40iw_create_ccq - create control cq
- * @iwdev: iwarp device
- *
- * Return 0, if the ccq and the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_ccq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_dma_mem mem;
- enum i40iw_status_code status;
- struct i40iw_ccq_init_info info;
- struct i40iw_ccq *ccq = &iwdev->ccq;
-
- memset(&info, 0, sizeof(info));
- dev->ccq = &ccq->sc_cq;
- dev->ccq->dev = dev;
- info.dev = dev;
- ccq->shadow_area.size = sizeof(struct i40iw_cq_shadow_area);
- ccq->mem_cq.size = sizeof(struct i40iw_cqe) * IW_CCQ_SIZE;
- status = i40iw_allocate_dma_mem(dev->hw, &ccq->mem_cq,
- ccq->mem_cq.size, I40IW_CQ0_ALIGNMENT);
- if (status)
- goto exit;
- status = i40iw_obj_aligned_mem(iwdev, &mem, ccq->shadow_area.size,
- I40IW_SHADOWAREA_MASK);
- if (status)
- goto exit;
- ccq->sc_cq.back_cq = (void *)ccq;
- /* populate the ccq init info */
- info.cq_base = ccq->mem_cq.va;
- info.cq_pa = ccq->mem_cq.pa;
- info.num_elem = IW_CCQ_SIZE;
- info.shadow_area = mem.va;
- info.shadow_area_pa = mem.pa;
- info.ceqe_mask = false;
- info.ceq_id_valid = true;
- info.shadow_read_threshold = 16;
- status = dev->ccq_ops->ccq_init(dev->ccq, &info);
- if (!status)
- status = dev->ccq_ops->ccq_create(dev->ccq, 0, true, true);
-exit:
- if (status)
- i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
- return status;
-}
-
-/**
- * i40iw_configure_ceq_vector - set up the msix interrupt vector for ceq
- * @iwdev: iwarp device
- * @msix_vec: interrupt vector information
- * @iwceq: ceq associated with the vector
- * @ceq_id: the id number of the iwceq
- *
- * Allocate interrupt resources and enable irq handling
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq,
- u32 ceq_id,
- struct i40iw_msix_vector *msix_vec)
-{
- enum i40iw_status_code status;
-
- if (iwdev->msix_shared && !ceq_id) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
- status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
- } else {
- tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
- status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
- }
-
- cpumask_clear(&msix_vec->mask);
- cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
- irq_set_affinity_hint(msix_vec->irq, &msix_vec->mask);
-
- if (status) {
- i40iw_pr_err("ceq irq config fail\n");
- return I40IW_ERR_CONFIG;
- }
- msix_vec->ceq_id = ceq_id;
-
- return 0;
-}
-
-/**
- * i40iw_create_ceq - create completion event queue
- * @iwdev: iwarp device
- * @iwceq: pointer to the ceq resources to be created
- * @ceq_id: the id number of the iwceq
- *
- * Return 0, if the ceq and the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_ceq(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq,
- u32 ceq_id)
-{
- enum i40iw_status_code status;
- struct i40iw_ceq_init_info info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- u64 scratch;
-
- memset(&info, 0, sizeof(info));
- info.ceq_id = ceq_id;
- iwceq->iwdev = iwdev;
- iwceq->mem.size = sizeof(struct i40iw_ceqe) *
- iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- status = i40iw_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size,
- I40IW_CEQ_ALIGNMENT);
- if (status)
- goto exit;
- info.ceq_id = ceq_id;
- info.ceqe_base = iwceq->mem.va;
- info.ceqe_pa = iwceq->mem.pa;
-
- info.elem_cnt = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- iwceq->sc_ceq.ceq_id = ceq_id;
- info.dev = dev;
- scratch = (uintptr_t)&iwdev->cqp.sc_cqp;
- status = dev->ceq_ops->ceq_init(&iwceq->sc_ceq, &info);
- if (!status)
- status = dev->ceq_ops->cceq_create(&iwceq->sc_ceq, scratch);
-
-exit:
- if (status)
- i40iw_free_dma_mem(dev->hw, &iwceq->mem);
- return status;
-}
-
-void i40iw_request_reset(struct i40iw_device *iwdev)
-{
- struct i40e_info *ldev = iwdev->ldev;
-
- ldev->ops->request_reset(ldev, iwdev->client, 1);
-}
-
-/**
- * i40iw_setup_ceqs - manage the device ceq's and their interrupt resources
- * @iwdev: iwarp device
- * @ldev: i40e lan device
- *
- * Allocate a list for all device completion event queues
- * Create the ceq's and configure their msix interrupt vectors
- * Return 0, if at least one ceq is successfully set up, otherwise return error
- */
-static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
- struct i40e_info *ldev)
-{
- u32 i;
- u32 ceq_id;
- struct i40iw_ceq *iwceq;
- struct i40iw_msix_vector *msix_vec;
- enum i40iw_status_code status = 0;
- u32 num_ceqs;
-
- if (ldev && ldev->ops && ldev->ops->setup_qvlist) {
- status = ldev->ops->setup_qvlist(ldev, &i40iw_client,
- iwdev->iw_qvlist);
- if (status)
- goto exit;
- } else {
- status = I40IW_ERR_BAD_PTR;
- goto exit;
- }
-
- num_ceqs = min(iwdev->msix_count, iwdev->sc_dev.hmc_fpm_misc.max_ceqs);
- iwdev->ceqlist = kcalloc(num_ceqs, sizeof(*iwdev->ceqlist), GFP_KERNEL);
- if (!iwdev->ceqlist) {
- status = I40IW_ERR_NO_MEMORY;
- goto exit;
- }
- i = (iwdev->msix_shared) ? 0 : 1;
- for (ceq_id = 0; i < num_ceqs; i++, ceq_id++) {
- iwceq = &iwdev->ceqlist[ceq_id];
- status = i40iw_create_ceq(iwdev, iwceq, ceq_id);
- if (status) {
- i40iw_pr_err("create ceq status = %d\n", status);
- break;
- }
-
- msix_vec = &iwdev->iw_msixtbl[i];
- iwceq->irq = msix_vec->irq;
- iwceq->msix_idx = msix_vec->idx;
- status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
- if (status) {
- i40iw_destroy_ceq(iwdev, iwceq);
- break;
- }
- i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
- iwdev->ceqs_count++;
- }
-exit:
- if (status && !iwdev->ceqs_count) {
- kfree(iwdev->ceqlist);
- iwdev->ceqlist = NULL;
- return status;
- } else {
- iwdev->sc_dev.ceq_valid = true;
- return 0;
- }
-
-}
-
-/**
- * i40iw_configure_aeq_vector - set up the msix vector for aeq
- * @iwdev: iwarp device
- *
- * Allocate interrupt resources and enable irq handling
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iwdev)
-{
- struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
- u32 ret = 0;
-
- if (!iwdev->msix_shared) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
- ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
- }
- if (ret) {
- i40iw_pr_err("aeq irq config fail\n");
- return I40IW_ERR_CONFIG;
- }
-
- return 0;
-}
-
-/**
- * i40iw_create_aeq - create async event queue
- * @iwdev: iwarp device
- *
- * Return 0, if the aeq and the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_aeq(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status;
- struct i40iw_aeq_init_info info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_aeq *aeq = &iwdev->aeq;
- u64 scratch = 0;
- u32 aeq_size;
-
- aeq_size = 2 * iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt +
- iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- memset(&info, 0, sizeof(info));
- aeq->mem.size = sizeof(struct i40iw_sc_aeqe) * aeq_size;
- status = i40iw_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size,
- I40IW_AEQ_ALIGNMENT);
- if (status)
- goto exit;
-
- info.aeqe_base = aeq->mem.va;
- info.aeq_elem_pa = aeq->mem.pa;
- info.elem_cnt = aeq_size;
- info.dev = dev;
- status = dev->aeq_ops->aeq_init(&aeq->sc_aeq, &info);
- if (status)
- goto exit;
- status = dev->aeq_ops->aeq_create(&aeq->sc_aeq, scratch, 1);
- if (!status)
- status = dev->aeq_ops->aeq_create_done(&aeq->sc_aeq);
-exit:
- if (status)
- i40iw_free_dma_mem(dev->hw, &aeq->mem);
- return status;
-}
-
-/**
- * i40iw_setup_aeq - set up the device aeq
- * @iwdev: iwarp device
- *
- * Create the aeq and configure its msix interrupt vector
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- enum i40iw_status_code status;
-
- status = i40iw_create_aeq(iwdev);
- if (status)
- return status;
-
- status = i40iw_configure_aeq_vector(iwdev);
- if (status) {
- i40iw_destroy_aeq(iwdev);
- return status;
- }
-
- if (!iwdev->msix_shared)
- i40iw_enable_intr(dev, iwdev->iw_msixtbl[0].idx);
- return 0;
-}
-
-/**
- * i40iw_initialize_ilq - create iwarp local queue for cm
- * @iwdev: iwarp device
- *
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
-{
- struct i40iw_puda_rsrc_info info;
- enum i40iw_status_code status;
-
- memset(&info, 0, sizeof(info));
- info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
- info.cq_id = 1;
- info.qp_id = 0;
- info.count = 1;
- info.pd_id = 1;
- info.sq_size = 8192;
- info.rq_size = 8192;
- info.buf_size = 1024;
- info.tx_buf_cnt = 16384;
- info.receive = i40iw_receive_ilq;
- info.xmit_complete = i40iw_free_sqbuf;
- status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
- if (status)
- i40iw_pr_err("ilq create fail\n");
- return status;
-}
-
-/**
- * i40iw_initialize_ieq - create iwarp exception queue
- * @iwdev: iwarp device
- *
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
-{
- struct i40iw_puda_rsrc_info info;
- enum i40iw_status_code status;
-
- memset(&info, 0, sizeof(info));
- info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
- info.cq_id = 2;
- info.qp_id = iwdev->vsi.exception_lan_queue;
- info.count = 1;
- info.pd_id = 2;
- info.sq_size = 8192;
- info.rq_size = 8192;
- info.buf_size = iwdev->vsi.mtu + VLAN_ETH_HLEN;
- info.tx_buf_cnt = 4096;
- status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
- if (status)
- i40iw_pr_err("ieq create fail\n");
- return status;
-}
-
-/**
- * i40iw_reinitialize_ieq - destroy and re-create ieq
- * @dev: iwarp device
- */
-void i40iw_reinitialize_ieq(struct i40iw_sc_dev *dev)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, false);
- if (i40iw_initialize_ieq(iwdev)) {
- iwdev->reset = true;
- i40iw_request_reset(iwdev);
- }
-}
-
-/**
- * i40iw_hmc_setup - create hmc objects for the device
- * @iwdev: iwarp device
- *
- * Set up the device private memory space for the number and size of
- * the hmc objects and create the objects
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_hmc_setup(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status;
-
- iwdev->sd_type = I40IW_SD_TYPE_DIRECT;
- status = i40iw_config_fpm_values(&iwdev->sc_dev, IW_CFG_FPM_QP_COUNT);
- if (status)
- goto exit;
- status = i40iw_create_hmc_objs(iwdev, true);
- if (status)
- goto exit;
- iwdev->init_state = HMC_OBJS_CREATED;
-exit:
- return status;
-}
-
-/**
- * i40iw_del_init_mem - deallocate memory resources
- * @iwdev: iwarp device
- */
-static void i40iw_del_init_mem(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-
- i40iw_free_dma_mem(&iwdev->hw, &iwdev->obj_mem);
- kfree(dev->hmc_info->sd_table.sd_entry);
- dev->hmc_info->sd_table.sd_entry = NULL;
- kfree(iwdev->mem_resources);
- iwdev->mem_resources = NULL;
- kfree(iwdev->ceqlist);
- iwdev->ceqlist = NULL;
- kfree(iwdev->iw_msixtbl);
- iwdev->iw_msixtbl = NULL;
- kfree(iwdev->hmc_info_mem);
- iwdev->hmc_info_mem = NULL;
-}
-
-/**
- * i40iw_del_macip_entry - remove a mac ip address entry from the hw table
- * @iwdev: iwarp device
- * @idx: the index of the mac ip address to delete
- */
-static void i40iw_del_macip_entry(struct i40iw_device *iwdev, u8 idx)
-{
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status = 0;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- i40iw_pr_err("cqp_request memory failed\n");
- return;
- }
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_DELETE_LOCAL_MAC_IPADDR_ENTRY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.del_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
- cqp_info->in.u.del_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.del_local_mac_ipaddr_entry.entry_idx = idx;
- cqp_info->in.u.del_local_mac_ipaddr_entry.ignore_ref_count = 0;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Del MAC Ip entry fail");
-}
-
-/**
- * i40iw_add_mac_ipaddr_entry - add a mac ip address entry to the hw table
- * @iwdev: iwarp device
- * @mac_addr: pointer to mac address
- * @idx: the index of the mac ip address to add
- */
-static enum i40iw_status_code i40iw_add_mac_ipaddr_entry(struct i40iw_device *iwdev,
- u8 *mac_addr,
- u8 idx)
-{
- struct i40iw_local_mac_ipaddr_entry_info *info;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status = 0;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- i40iw_pr_err("cqp_request memory failed\n");
- return I40IW_ERR_NO_MEMORY;
- }
-
- cqp_info = &cqp_request->info;
-
- cqp_info->post_sq = 1;
- info = &cqp_info->in.u.add_local_mac_ipaddr_entry.info;
- ether_addr_copy(info->mac_addr, mac_addr);
- info->entry_idx = idx;
- cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->cqp_cmd = OP_ADD_LOCAL_MAC_IPADDR_ENTRY;
- cqp_info->in.u.add_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
- cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Add MAC Ip entry fail");
- return status;
-}
-
-/**
- * i40iw_alloc_local_mac_ipaddr_entry - allocate a mac ip address entry
- * @iwdev: iwarp device
- * @mac_ip_tbl_idx: the index of the new mac ip address
- *
- * Allocate a mac ip address entry and update the mac_ip_tbl_idx
- * to hold the index of the newly created mac ip address
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_alloc_local_mac_ipaddr_entry(struct i40iw_device *iwdev,
- u16 *mac_ip_tbl_idx)
-{
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status = 0;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- i40iw_pr_err("cqp_request memory failed\n");
- return I40IW_ERR_NO_MEMORY;
- }
-
- /* increment refcount, because we need the cqp request ret value */
- atomic_inc(&cqp_request->refcount);
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.alloc_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
- cqp_info->in.u.alloc_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- *mac_ip_tbl_idx = cqp_request->compl_info.op_ret_val;
- else
- i40iw_pr_err("CQP-OP Alloc MAC Ip entry fail");
- /* decrement refcount and free the cqp request, if no longer used */
- i40iw_put_cqp_request(iwcqp, cqp_request);
- return status;
-}
-
-/**
- * i40iw_alloc_set_mac_ipaddr - set up a mac ip address table entry
- * @iwdev: iwarp device
- * @macaddr: pointer to mac address
- *
- * Allocate a mac ip address entry and add it to the hw table
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iwdev,
- u8 *macaddr)
-{
- enum i40iw_status_code status;
-
- status = i40iw_alloc_local_mac_ipaddr_entry(iwdev, &iwdev->mac_ip_table_idx);
- if (!status) {
- status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
- (u8)iwdev->mac_ip_table_idx);
- if (status)
- i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
- }
- return status;
-}
-
-/**
- * i40iw_add_ipv6_addr - add ipv6 address to the hw arp table
- * @iwdev: iwarp device
- */
-static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
-{
- struct net_device *ip_dev;
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifp, *tmp;
- u32 local_ipaddr6[4];
-
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
- if ((((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF) &&
- (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
- (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
- idev = __in6_dev_get(ip_dev);
- if (!idev) {
- i40iw_pr_err("ipv6 inet device not found\n");
- break;
- }
- list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
- i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
- rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
- i40iw_copy_ip_ntohl(local_ipaddr6,
- ifp->addr.in6_u.u6_addr32);
- i40iw_manage_arp_cache(iwdev,
- ip_dev->dev_addr,
- local_ipaddr6,
- false,
- I40IW_ARP_ADD);
- }
- }
- }
- rcu_read_unlock();
-}
-
-/**
- * i40iw_add_ipv4_addr - add ipv4 address to the hw arp table
- * @iwdev: iwarp device
- */
-static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
-{
- struct net_device *dev;
- struct in_device *idev;
- bool got_lock = true;
- u32 ip_addr;
-
- if (!rtnl_trylock())
- got_lock = false;
-
- for_each_netdev(&init_net, dev) {
- if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) &&
- (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
- (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
- idev = in_dev_get(dev);
- for_ifa(idev) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
- "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
- rdma_vlan_dev_vlan_id(dev), dev->dev_addr);
-
- ip_addr = ntohl(ifa->ifa_address);
- i40iw_manage_arp_cache(iwdev,
- dev->dev_addr,
- &ip_addr,
- true,
- I40IW_ARP_ADD);
- }
- endfor_ifa(idev);
- in_dev_put(idev);
- }
- }
- if (got_lock)
- rtnl_unlock();
-}
-
-/**
- * i40iw_add_mac_ip - add mac and ip addresses
- * @iwdev: iwarp device
- *
- * Create and add a mac ip address entry to the hw table and
- * ipv4/ipv6 addresses to the arp cache
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_add_mac_ip(struct i40iw_device *iwdev)
-{
- struct net_device *netdev = iwdev->netdev;
- enum i40iw_status_code status;
-
- status = i40iw_alloc_set_mac_ipaddr(iwdev, (u8 *)netdev->dev_addr);
- if (status)
- return status;
- i40iw_add_ipv4_addr(iwdev);
- i40iw_add_ipv6_addr(iwdev);
- return 0;
-}
-
-/**
- * i40iw_wait_pe_ready - Check if firmware is ready
- * @hw: provides access to registers
- */
-static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
-{
- u32 statusfw;
- u32 statuscpu0;
- u32 statuscpu1;
- u32 statuscpu2;
- u32 retrycount = 0;
-
- do {
- statusfw = i40iw_rd32(hw, I40E_GLPE_FWLDSTATUS);
- i40iw_pr_info("[%04d] fm load status[x%04X]\n", __LINE__, statusfw);
- statuscpu0 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS0);
- i40iw_pr_info("[%04d] CSR_CQP status[x%04X]\n", __LINE__, statuscpu0);
- statuscpu1 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS1);
- i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS1 status[x%04X]\n",
- __LINE__, statuscpu1);
- statuscpu2 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS2);
- i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS2 status[x%04X]\n",
- __LINE__, statuscpu2);
- if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
- break; /* SUCCESS */
- msleep(1000);
- retrycount++;
- } while (retrycount < 14);
- i40iw_wr32(hw, 0xb4040, 0x4C104C5);
-}
-
-/**
- * i40iw_initialize_dev - initialize device
- * @iwdev: iwarp device
- * @ldev: lan device information
- *
- * Allocate memory for the hmc objects and initialize iwdev
- * Return 0 if successful, otherwise clean up the resources
- * and return error
- */
-static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
- struct i40e_info *ldev)
-{
- enum i40iw_status_code status;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_device_init_info info;
- struct i40iw_vsi_init_info vsi_info;
- struct i40iw_dma_mem mem;
- struct i40iw_l2params l2params;
- u32 size;
- struct i40iw_vsi_stats_info stats_info;
- u16 last_qset = I40IW_NO_QSET;
- u16 qset;
- u32 i;
-
- memset(&l2params, 0, sizeof(l2params));
- memset(&info, 0, sizeof(info));
- size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
- (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
- iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
- if (!iwdev->hmc_info_mem)
- return I40IW_ERR_NO_MEMORY;
-
- iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
- dev->hmc_info = &iwdev->hw.hmc;
- dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
- status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
- I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
- if (status)
- goto error;
- info.fpm_query_buf_pa = mem.pa;
- info.fpm_query_buf = mem.va;
- status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
- I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
- if (status)
- goto error;
- info.fpm_commit_buf_pa = mem.pa;
- info.fpm_commit_buf = mem.va;
- info.hmc_fn_id = ldev->fid;
- info.is_pf = (ldev->ftype) ? false : true;
- info.bar0 = ldev->hw_addr;
- info.hw = &iwdev->hw;
- info.debug_mask = debug;
- l2params.mtu =
- (ldev->params.mtu) ? ldev->params.mtu : I40IW_DEFAULT_MTU;
- for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
- qset = ldev->params.qos.prio_qos[i].qs_handle;
- l2params.qs_handle_list[i] = qset;
- if (last_qset == I40IW_NO_QSET)
- last_qset = qset;
- else if ((qset != last_qset) && (qset != I40IW_NO_QSET))
- iwdev->dcb = true;
- }
- i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb);
- info.vchnl_send = i40iw_virtchnl_send;
- status = i40iw_device_init(&iwdev->sc_dev, &info);
-
- if (status)
- goto error;
- memset(&vsi_info, 0, sizeof(vsi_info));
- vsi_info.dev = &iwdev->sc_dev;
- vsi_info.back_vsi = (void *)iwdev;
- vsi_info.params = &l2params;
- vsi_info.exception_lan_queue = 1;
- i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info);
-
- if (dev->is_pf) {
- memset(&stats_info, 0, sizeof(stats_info));
- stats_info.fcn_id = ldev->fid;
- stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
- if (!stats_info.pestat) {
- status = I40IW_ERR_NO_MEMORY;
- goto error;
- }
- stats_info.stats_initialize = true;
- if (stats_info.pestat)
- i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
- }
- return status;
-error:
- kfree(iwdev->hmc_info_mem);
- iwdev->hmc_info_mem = NULL;
- return status;
-}
-
-/**
- * i40iw_register_notifiers - register tcp ip notifiers
- */
-static void i40iw_register_notifiers(void)
-{
- register_inetaddr_notifier(&i40iw_inetaddr_notifier);
- register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
- register_netevent_notifier(&i40iw_net_notifier);
- register_netdevice_notifier(&i40iw_netdevice_notifier);
-}
-
-/**
- * i40iw_unregister_notifiers - unregister tcp ip notifiers
- */
-
-static void i40iw_unregister_notifiers(void)
-{
- unregister_netevent_notifier(&i40iw_net_notifier);
- unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
- unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
- unregister_netdevice_notifier(&i40iw_netdevice_notifier);
-}
-
-/**
- * i40iw_save_msix_info - copy msix vector information to iwarp device
- * @iwdev: iwarp device
- * @ldev: lan device information
- *
- * Allocate iwdev msix table and copy the ldev msix info to the table
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
- struct i40e_info *ldev)
-{
- struct i40e_qvlist_info *iw_qvlist;
- struct i40e_qv_info *iw_qvinfo;
- u32 ceq_idx;
- u32 i;
- u32 size;
-
- if (!ldev->msix_count) {
- i40iw_pr_err("No MSI-X vectors\n");
- return I40IW_ERR_CONFIG;
- }
-
- iwdev->msix_count = ldev->msix_count;
-
- size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
- size += sizeof(struct i40e_qvlist_info);
- size += sizeof(struct i40e_qv_info) * iwdev->msix_count - 1;
- iwdev->iw_msixtbl = kzalloc(size, GFP_KERNEL);
-
- if (!iwdev->iw_msixtbl)
- return I40IW_ERR_NO_MEMORY;
- iwdev->iw_qvlist = (struct i40e_qvlist_info *)(&iwdev->iw_msixtbl[iwdev->msix_count]);
- iw_qvlist = iwdev->iw_qvlist;
- iw_qvinfo = iw_qvlist->qv_info;
- iw_qvlist->num_vectors = iwdev->msix_count;
- if (iwdev->msix_count <= num_online_cpus())
- iwdev->msix_shared = true;
- for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
- iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
- iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
- iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx;
- if (i == 0) {
- iw_qvinfo->aeq_idx = 0;
- if (iwdev->msix_shared)
- iw_qvinfo->ceq_idx = ceq_idx++;
- else
- iw_qvinfo->ceq_idx = I40E_QUEUE_INVALID_IDX;
- } else {
- iw_qvinfo->aeq_idx = I40E_QUEUE_INVALID_IDX;
- iw_qvinfo->ceq_idx = ceq_idx++;
- }
- iw_qvinfo->itr_idx = 3;
- iw_qvinfo->v_idx = iwdev->iw_msixtbl[i].idx;
- }
- return 0;
-}
-
-/**
- * i40iw_deinit_device - clean up the device resources
- * @iwdev: iwarp device
- *
- * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
- * destroy the device queues and free the pble and the hmc objects
- */
-static void i40iw_deinit_device(struct i40iw_device *iwdev)
-{
- struct i40e_info *ldev = iwdev->ldev;
-
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-
- i40iw_pr_info("state = %d\n", iwdev->init_state);
- if (iwdev->param_wq)
- destroy_workqueue(iwdev->param_wq);
-
- switch (iwdev->init_state) {
- case RDMA_DEV_REGISTERED:
- iwdev->iw_status = 0;
- i40iw_port_ibevent(iwdev);
- i40iw_destroy_rdma_device(iwdev->iwibdev);
- /* fallthrough */
- case IP_ADDR_REGISTERED:
- if (!iwdev->reset)
- i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
- /* fallthrough */
- /* fallthrough */
- case PBLE_CHUNK_MEM:
- i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
- /* fallthrough */
- case CEQ_CREATED:
- i40iw_dele_ceqs(iwdev);
- /* fallthrough */
- case AEQ_CREATED:
- i40iw_destroy_aeq(iwdev);
- /* fallthrough */
- case IEQ_CREATED:
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, iwdev->reset);
- /* fallthrough */
- case ILQ_CREATED:
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, iwdev->reset);
- /* fallthrough */
- case CCQ_CREATED:
- i40iw_destroy_ccq(iwdev);
- /* fallthrough */
- case HMC_OBJS_CREATED:
- i40iw_del_hmc_objects(dev, dev->hmc_info, true, iwdev->reset);
- /* fallthrough */
- case CQP_CREATED:
- i40iw_destroy_cqp(iwdev, true);
- /* fallthrough */
- case INITIAL_STATE:
- i40iw_cleanup_cm_core(&iwdev->cm_core);
- if (iwdev->vsi.pestat) {
- i40iw_vsi_stats_free(&iwdev->vsi);
- kfree(iwdev->vsi.pestat);
- }
- i40iw_del_init_mem(iwdev);
- break;
- case INVALID_STATE:
- /* fallthrough */
- default:
- i40iw_pr_err("bad init_state = %d\n", iwdev->init_state);
- break;
- }
-
- i40iw_del_handler(i40iw_find_i40e_handler(ldev));
- kfree(iwdev->hdl);
-}
-
-/**
- * i40iw_setup_init_state - set up the initial device struct
- * @hdl: handler for iwarp device - one per instance
- * @ldev: lan device information
- * @client: iwarp client information, provided during registration
- *
- * Initialize the iwarp device and its hdl information
- * using the ldev and client information
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
- struct i40e_info *ldev,
- struct i40e_client *client)
-{
- struct i40iw_device *iwdev = &hdl->device;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- enum i40iw_status_code status;
-
- memcpy(&hdl->ldev, ldev, sizeof(*ldev));
-
- iwdev->mpa_version = mpa_version;
- iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
- (u8)resource_profile + I40IW_HMC_PROFILE_DEFAULT :
- I40IW_HMC_PROFILE_DEFAULT;
- iwdev->max_rdma_vfs =
- (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0;
- iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
- iwdev->netdev = ldev->netdev;
- hdl->client = client;
- if (!ldev->ftype)
- iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
- else
- iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_VF_DB_ADDR_OFFSET;
-
- status = i40iw_save_msix_info(iwdev, ldev);
- if (status)
- return status;
- iwdev->hw.dev_context = (void *)ldev->pcidev;
- iwdev->hw.hw_addr = ldev->hw_addr;
- status = i40iw_allocate_dma_mem(&iwdev->hw,
- &iwdev->obj_mem, 8192, 4096);
- if (status)
- goto exit;
- iwdev->obj_next = iwdev->obj_mem;
- iwdev->push_mode = push_mode;
-
- init_waitqueue_head(&iwdev->vchnl_waitq);
- init_waitqueue_head(&dev->vf_reqs);
- init_waitqueue_head(&iwdev->close_wq);
-
- status = i40iw_initialize_dev(iwdev, ldev);
-exit:
- if (status) {
- kfree(iwdev->iw_msixtbl);
- i40iw_free_dma_mem(dev->hw, &iwdev->obj_mem);
- iwdev->iw_msixtbl = NULL;
- }
- return status;
-}
-
-/**
- * i40iw_get_used_rsrc - determine resources used internally
- * @iwdev: iwarp device
- *
- * Called after internal allocations
- */
-static void i40iw_get_used_rsrc(struct i40iw_device *iwdev)
-{
- iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0);
- iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0);
- iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0);
- iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0);
-}
-
-/**
- * i40iw_open - client interface operation open for iwarp/uda device
- * @ldev: lan device information
- * @client: iwarp client information, provided during registration
- *
- * Called by the lan driver during the processing of client register
- * Create device resources, set up queues, pble and hmc objects and
- * register the device with the ib verbs interface
- * Return 0 if successful, otherwise return error
- */
-static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
-{
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
- enum i40iw_status_code status;
- struct i40iw_handler *hdl;
-
- hdl = i40iw_find_netdev(ldev->netdev);
- if (hdl)
- return 0;
-
- hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
- if (!hdl)
- return -ENOMEM;
- iwdev = &hdl->device;
- iwdev->hdl = hdl;
- dev = &iwdev->sc_dev;
- if (i40iw_setup_cm_core(iwdev)) {
- kfree(iwdev->hdl);
- return -ENOMEM;
- }
-
- dev->back_dev = (void *)iwdev;
- iwdev->ldev = &hdl->ldev;
- iwdev->client = client;
- mutex_init(&iwdev->pbl_mutex);
- i40iw_add_handler(hdl);
-
- do {
- status = i40iw_setup_init_state(hdl, ldev, client);
- if (status)
- break;
- iwdev->init_state = INITIAL_STATE;
- if (dev->is_pf)
- i40iw_wait_pe_ready(dev->hw);
- status = i40iw_create_cqp(iwdev);
- if (status)
- break;
- iwdev->init_state = CQP_CREATED;
- status = i40iw_hmc_setup(iwdev);
- if (status)
- break;
- status = i40iw_create_ccq(iwdev);
- if (status)
- break;
- iwdev->init_state = CCQ_CREATED;
- status = i40iw_initialize_ilq(iwdev);
- if (status)
- break;
- iwdev->init_state = ILQ_CREATED;
- status = i40iw_initialize_ieq(iwdev);
- if (status)
- break;
- iwdev->init_state = IEQ_CREATED;
- status = i40iw_setup_aeq(iwdev);
- if (status)
- break;
- iwdev->init_state = AEQ_CREATED;
- status = i40iw_setup_ceqs(iwdev, ldev);
- if (status)
- break;
- iwdev->init_state = CEQ_CREATED;
- status = i40iw_initialize_hw_resources(iwdev);
- if (status)
- break;
- i40iw_get_used_rsrc(iwdev);
- dev->ccq_ops->ccq_arm(dev->ccq);
- status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
- if (status)
- break;
- iwdev->init_state = PBLE_CHUNK_MEM;
- iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
- status = i40iw_add_mac_ip(iwdev);
- if (status)
- break;
- iwdev->init_state = IP_ADDR_REGISTERED;
- if (i40iw_register_rdma_device(iwdev)) {
- i40iw_pr_err("register rdma device fail\n");
- break;
- };
-
- iwdev->init_state = RDMA_DEV_REGISTERED;
- iwdev->iw_status = 1;
- i40iw_port_ibevent(iwdev);
- iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM);
- if(iwdev->param_wq == NULL)
- break;
- i40iw_pr_info("i40iw_open completed\n");
- return 0;
- } while (0);
-
- i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
- i40iw_deinit_device(iwdev);
- return -ERESTART;
-}
-
-/**
- * i40iw_l2params_worker - worker for l2 params change
- * @work: work pointer for l2 params
- */
-static void i40iw_l2params_worker(struct work_struct *work)
-{
- struct l2params_work *dwork =
- container_of(work, struct l2params_work, work);
- struct i40iw_device *iwdev = dwork->iwdev;
-
- i40iw_change_l2params(&iwdev->vsi, &dwork->l2params);
- atomic_dec(&iwdev->params_busy);
- kfree(work);
-}
-
-/**
- * i40iw_l2param_change - handle qs handles for qos and mss change
- * @ldev: lan device information
- * @client: client for paramater change
- * @params: new parameters from L2
- */
-static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client,
- struct i40e_params *params)
-{
- struct i40iw_handler *hdl;
- struct i40iw_l2params *l2params;
- struct l2params_work *work;
- struct i40iw_device *iwdev;
- int i;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- iwdev = &hdl->device;
-
- if (atomic_read(&iwdev->params_busy))
- return;
-
-
- work = kzalloc(sizeof(*work), GFP_KERNEL);
- if (!work)
- return;
-
- atomic_inc(&iwdev->params_busy);
-
- work->iwdev = iwdev;
- l2params = &work->l2params;
- for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++)
- l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle;
-
- l2params->mtu = (params->mtu) ? params->mtu : iwdev->vsi.mtu;
-
- INIT_WORK(&work->work, i40iw_l2params_worker);
- queue_work(iwdev->param_wq, &work->work);
-}
-
-/**
- * i40iw_close - client interface operation close for iwarp/uda device
- * @ldev: lan device information
- * @client: client to close
- *
- * Called by the lan driver during the processing of client unregister
- * Destroy and clean up the driver resources
- */
-static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool reset)
-{
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- iwdev = &hdl->device;
- iwdev->closing = true;
-
- if (reset)
- iwdev->reset = true;
-
- i40iw_cm_teardown_connections(iwdev, NULL, NULL, true);
- destroy_workqueue(iwdev->virtchnl_wq);
- i40iw_deinit_device(iwdev);
-}
-
-/**
- * i40iw_vf_reset - process VF reset
- * @ldev: lan device information
- * @client: client interface instance
- * @vf_id: virtual function id
- *
- * Called when a VF is reset by the PF
- * Destroy and clean up the VF resources
- */
-static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u32 vf_id)
-{
- struct i40iw_handler *hdl;
- struct i40iw_sc_dev *dev;
- struct i40iw_hmc_fcn_info hmc_fcn_info;
- struct i40iw_virt_mem vf_dev_mem;
- struct i40iw_vfdev *tmp_vfdev;
- unsigned int i;
- unsigned long flags;
- struct i40iw_device *iwdev;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- dev = &hdl->device.sc_dev;
- iwdev = (struct i40iw_device *)dev->back_dev;
-
- for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
- if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
- continue;
- /* free all resources allocated on behalf of vf */
- tmp_vfdev = dev->vf_dev[i];
- spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags);
- dev->vf_dev[i] = NULL;
- spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags);
- i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
- /* remove vf hmc function */
- memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
- hmc_fcn_info.vf_id = vf_id;
- hmc_fcn_info.iw_vf_idx = tmp_vfdev->iw_vf_idx;
- hmc_fcn_info.free_fcn = true;
- i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
- /* free vf_dev */
- vf_dev_mem.va = tmp_vfdev;
- vf_dev_mem.size = sizeof(struct i40iw_vfdev) +
- sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX;
- i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
- break;
- }
-}
-
-/**
- * i40iw_vf_enable - enable a number of VFs
- * @ldev: lan device information
- * @client: client interface instance
- * @num_vfs: number of VFs for the PF
- *
- * Called when the number of VFs changes
- */
-static void i40iw_vf_enable(struct i40e_info *ldev,
- struct i40e_client *client,
- u32 num_vfs)
-{
- struct i40iw_handler *hdl;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- if (num_vfs > I40IW_MAX_PE_ENABLED_VF_COUNT)
- hdl->device.max_enabled_vfs = I40IW_MAX_PE_ENABLED_VF_COUNT;
- else
- hdl->device.max_enabled_vfs = num_vfs;
-}
-
-/**
- * i40iw_vf_capable - check if VF capable
- * @ldev: lan device information
- * @client: client interface instance
- * @vf_id: virtual function id
- *
- * Return 1 if a VF slot is available or if VF is already RDMA enabled
- * Return 0 otherwise
- */
-static int i40iw_vf_capable(struct i40e_info *ldev,
- struct i40e_client *client,
- u32 vf_id)
-{
- struct i40iw_handler *hdl;
- struct i40iw_sc_dev *dev;
- unsigned int i;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return 0;
-
- dev = &hdl->device.sc_dev;
-
- for (i = 0; i < hdl->device.max_enabled_vfs; i++) {
- if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id == vf_id))
- return 1;
- }
-
- return 0;
-}
-
-/**
- * i40iw_virtchnl_receive - receive a message through the virtual channel
- * @ldev: lan device information
- * @client: client interface instance
- * @vf_id: virtual function id associated with the message
- * @msg: message buffer pointer
- * @len: length of the message
- *
- * Invoke virtual channel receive operation for the given msg
- * Return 0 if successful, otherwise return error
- */
-static int i40iw_virtchnl_receive(struct i40e_info *ldev,
- struct i40e_client *client,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_handler *hdl;
- struct i40iw_sc_dev *dev;
- struct i40iw_device *iwdev;
- int ret_code = I40IW_NOT_SUPPORTED;
-
- if (!len || !msg)
- return I40IW_ERR_PARAM;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return I40IW_ERR_PARAM;
-
- dev = &hdl->device.sc_dev;
- iwdev = dev->back_dev;
-
- if (dev->vchnl_if.vchnl_recv) {
- ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
- if (!dev->is_pf) {
- atomic_dec(&iwdev->vchnl_msgs);
- wake_up(&iwdev->vchnl_waitq);
- }
- }
- return ret_code;
-}
-
-/**
- * i40iw_vf_clear_to_send - wait to send virtual channel message
- * @dev: iwarp device *
- * Wait for until virtual channel is clear
- * before sending the next message
- *
- * Returns false if error
- * Returns true if clear to send
- */
-bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
-{
- struct i40iw_device *iwdev;
- wait_queue_entry_t wait;
-
- iwdev = dev->back_dev;
-
- if (!wq_has_sleeper(&dev->vf_reqs) &&
- (atomic_read(&iwdev->vchnl_msgs) == 0))
- return true; /* virtual channel is clear */
-
- init_wait(&wait);
- add_wait_queue_exclusive(&dev->vf_reqs, &wait);
-
- if (!wait_event_timeout(dev->vf_reqs,
- (atomic_read(&iwdev->vchnl_msgs) == 0),
- I40IW_VCHNL_EVENT_TIMEOUT))
- dev->vchnl_up = false;
-
- remove_wait_queue(&dev->vf_reqs, &wait);
-
- return dev->vchnl_up;
-}
-
-/**
- * i40iw_virtchnl_send - send a message through the virtual channel
- * @dev: iwarp device
- * @vf_id: virtual function id associated with the message
- * @msg: virtual channel message buffer pointer
- * @len: length of the message
- *
- * Invoke virtual channel send operation for the given msg
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_device *iwdev;
- struct i40e_info *ldev;
-
- if (!dev || !dev->back_dev)
- return I40IW_ERR_BAD_PTR;
-
- iwdev = dev->back_dev;
- ldev = iwdev->ldev;
-
- if (ldev && ldev->ops && ldev->ops->virtchnl_send)
- return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
- return I40IW_ERR_BAD_PTR;
-}
-
-/* client interface functions */
-static const struct i40e_client_ops i40e_ops = {
- .open = i40iw_open,
- .close = i40iw_close,
- .l2_param_change = i40iw_l2param_change,
- .virtchnl_receive = i40iw_virtchnl_receive,
- .vf_reset = i40iw_vf_reset,
- .vf_enable = i40iw_vf_enable,
- .vf_capable = i40iw_vf_capable
-};
-
-/**
- * i40iw_init_module - driver initialization function
- *
- * First function to call when the driver is loaded
- * Register the driver as i40e client and port mapper client
- */
-static int __init i40iw_init_module(void)
-{
- int ret;
-
- memset(&i40iw_client, 0, sizeof(i40iw_client));
- i40iw_client.version.major = CLIENT_IW_INTERFACE_VERSION_MAJOR;
- i40iw_client.version.minor = CLIENT_IW_INTERFACE_VERSION_MINOR;
- i40iw_client.version.build = CLIENT_IW_INTERFACE_VERSION_BUILD;
- i40iw_client.ops = &i40e_ops;
- memcpy(i40iw_client.name, i40iw_client_name, I40E_CLIENT_STR_LENGTH);
- i40iw_client.type = I40E_CLIENT_IWARP;
- spin_lock_init(&i40iw_handler_lock);
- ret = i40e_register_client(&i40iw_client);
- i40iw_register_notifiers();
-
- return ret;
-}
-
-/**
- * i40iw_exit_module - driver exit clean up function
- *
- * The function is called just before the driver is unloaded
- * Unregister the driver as i40e client and port mapper client
- */
-static void __exit i40iw_exit_module(void)
-{
- i40iw_unregister_notifiers();
- i40e_unregister_client(&i40iw_client);
-}
-
-module_init(i40iw_init_module);
-module_exit(i40iw_exit_module);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
deleted file mode 100644
index d474aad62a81..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_OSDEP_H
-#define I40IW_OSDEP_H
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/bitops.h>
-#include <net/tcp.h>
-#include <crypto/hash.h>
-/* get readq/writeq support for 32 bit kernels, use the low-first version */
-#include <linux/io-64-nonatomic-lo-hi.h>
-
-#define STATS_TIMER_DELAY 1000
-
-static inline void set_64bit_val(u64 *wqe_words, u32 byte_index, u64 value)
-{
- wqe_words[byte_index >> 3] = value;
-}
-
-/**
- * set_32bit_val - set 32 value to hw wqe
- * @wqe_words: wqe addr to write
- * @byte_index: index in wqe
- * @value: value to write
- **/
-static inline void set_32bit_val(u32 *wqe_words, u32 byte_index, u32 value)
-{
- wqe_words[byte_index >> 2] = value;
-}
-
-/**
- * get_64bit_val - read 64 bit value from wqe
- * @wqe_words: wqe addr
- * @byte_index: index to read from
- * @value: read value
- **/
-static inline void get_64bit_val(u64 *wqe_words, u32 byte_index, u64 *value)
-{
- *value = wqe_words[byte_index >> 3];
-}
-
-/**
- * get_32bit_val - read 32 bit value from wqe
- * @wqe_words: wqe addr
- * @byte_index: index to reaad from
- * @value: return 32 bit value
- **/
-static inline void get_32bit_val(u32 *wqe_words, u32 byte_index, u32 *value)
-{
- *value = wqe_words[byte_index >> 2];
-}
-
-struct i40iw_dma_mem {
- void *va;
- dma_addr_t pa;
- u32 size;
-} __packed;
-
-struct i40iw_virt_mem {
- void *va;
- u32 size;
-} __packed;
-
-#define i40iw_debug(h, m, s, ...) \
-do { \
- if (((m) & (h)->debug_mask)) \
- pr_info("i40iw " s, ##__VA_ARGS__); \
-} while (0)
-
-#define i40iw_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
-
-#define I40E_GLHMC_VFSDCMD(_i) (0x000C8000 + ((_i) * 4)) \
- /* _i=0...31 */
-#define I40E_GLHMC_VFSDCMD_MAX_INDEX 31
-#define I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT 0
-#define I40E_GLHMC_VFSDCMD_PMSDIDX_MASK (0xFFF \
- << I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT)
-#define I40E_GLHMC_VFSDCMD_PF_SHIFT 16
-#define I40E_GLHMC_VFSDCMD_PF_MASK (0xF << I40E_GLHMC_VFSDCMD_PF_SHIFT)
-#define I40E_GLHMC_VFSDCMD_VF_SHIFT 20
-#define I40E_GLHMC_VFSDCMD_VF_MASK (0x1FF << I40E_GLHMC_VFSDCMD_VF_SHIFT)
-#define I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT 29
-#define I40E_GLHMC_VFSDCMD_PMF_TYPE_MASK (0x3 \
- << I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT)
-#define I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT 31
-#define I40E_GLHMC_VFSDCMD_PMSDWR_MASK (0x1 << I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT)
-
-#define I40E_GLHMC_VFSDDATAHIGH(_i) (0x000C8200 + ((_i) * 4)) \
- /* _i=0...31 */
-#define I40E_GLHMC_VFSDDATAHIGH_MAX_INDEX 31
-#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT 0
-#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_MASK (0xFFFFFFFF \
- << I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT)
-
-#define I40E_GLHMC_VFSDDATALOW(_i) (0x000C8100 + ((_i) * 4)) \
- /* _i=0...31 */
-#define I40E_GLHMC_VFSDDATALOW_MAX_INDEX 31
-#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT 0
-#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_MASK (0x1 \
- << I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT)
-#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT 1
-#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_MASK (0x1 \
- << I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT)
-#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT 2
-#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_MASK (0x3FF \
- << I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT)
-#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT 12
-#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_MASK (0xFFFFF \
- << I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT)
-
-#define I40E_GLPE_FWLDSTATUS 0x0000D200
-#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT 0
-#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_DONE_SHIFT 1
-#define I40E_GLPE_FWLDSTATUS_DONE_MASK (0x1 << I40E_GLPE_FWLDSTATUS_DONE_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT 2
-#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT 3
-#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT 4
-#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT)
-
-struct i40iw_sc_dev;
-struct i40iw_sc_qp;
-struct i40iw_puda_buf;
-struct i40iw_puda_completion_info;
-struct i40iw_update_sds_info;
-struct i40iw_hmc_fcn_info;
-struct i40iw_virtchnl_work_info;
-struct i40iw_manage_vf_pble_info;
-struct i40iw_device;
-struct i40iw_hmc_info;
-struct i40iw_hw;
-
-u8 __iomem *i40iw_get_hw_addr(void *dev);
-void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
-bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev);
-enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
- u32 length, u32 value);
-struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
-void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum);
-void i40iw_free_hash_desc(struct shash_desc *);
-enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **);
-enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info);
-enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_fcn_info *hmcfcninfo);
-enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id);
-enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id);
-enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *mem);
-enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev,
- struct i40iw_manage_vf_pble_info *info);
-void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
-void *i40iw_remove_head(struct list_head *list);
-void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
-
-void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
-void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
-void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp);
-void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp);
-
-enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
- struct i40iw_manage_vf_pble_info *info,
- bool wait);
-struct i40iw_sc_vsi;
-void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi);
-void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi);
-#define i40iw_mmiowb() do { } while (0)
-void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
-u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg);
-#endif /* _I40IW_OSDEP_H_ */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
deleted file mode 100644
index 11d3a2a72100..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_p.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_P_H
-#define I40IW_P_H
-
-#define PAUSE_TIMER_VALUE 0xFFFF
-#define REFRESH_THRESHOLD 0x7FFF
-#define HIGH_THRESHOLD 0x800
-#define LOW_THRESHOLD 0x200
-#define ALL_TC2PFC 0xFF
-#define CQP_COMPL_WAIT_TIME 0x3E8
-#define CQP_TIMEOUT_THRESHOLD 5
-
-void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
- char *desc, u64 *buf, u32 size);
-/* init operations */
-enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
- struct i40iw_device_init_info *info);
-
-void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
-
-u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
-
-void i40iw_check_cqp_progress(struct i40iw_cqp_timeout *cqp_timeout, struct i40iw_sc_dev *dev);
-
-enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp,
- struct i40iw_fast_reg_stag_info *info,
- bool post_sq);
-
-void i40iw_insert_wqe_hdr(u64 *wqe, u64 header);
-
-/* HMC/FPM functions */
-enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id);
-
-enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
- u32 *vf_cnt_array);
-
-/* stats functions */
-void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats);
-void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values);
-void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_32b index,
- u64 *value);
-void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_64b index,
- u64 *value);
-void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf);
-
-/* vsi misc functions */
-enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info);
-void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi);
-void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info);
-
-void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params);
-void i40iw_qp_add_qos(struct i40iw_sc_qp *qp);
-void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp);
-void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
-
-void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
-
-void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
-
-enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp, u64 scratch);
-
-enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp, u64 scratch);
-
-enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp *cqp,
- u64 scratch, u8 hmc_fn_id,
- bool post_sq,
- bool poll_registers);
-
-enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count);
-
-void free_sd_mem(struct i40iw_sc_dev *dev);
-
-enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
- struct cqp_commands_info *pcmdinfo);
-
-enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev);
-
-/* prototype for functions used for dynamic memory allocation */
-enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
- struct i40iw_dma_mem *mem, u64 size,
- u32 alignment);
-void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem);
-enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem, u32 size);
-enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem);
-u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq);
-void i40iw_reinitialize_ieq(struct i40iw_sc_dev *dev);
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
deleted file mode 100644
index 540aab5e502d..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ /dev/null
@@ -1,612 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_status.h"
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_hmc.h"
-
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-
-#include <linux/pci.h>
-#include <linux/genalloc.h>
-#include <linux/vmalloc.h>
-#include "i40iw_pble.h"
-#include "i40iw.h"
-
-struct i40iw_device;
-static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc);
-static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk);
-
-/**
- * i40iw_destroy_pble_pool - destroy pool during module unload
- * @pble_rsrc: pble resources
- */
-void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc)
-{
- struct list_head *clist;
- struct list_head *tlist;
- struct i40iw_chunk *chunk;
- struct i40iw_pble_pool *pinfo = &pble_rsrc->pinfo;
-
- if (pinfo->pool) {
- list_for_each_safe(clist, tlist, &pinfo->clist) {
- chunk = list_entry(clist, struct i40iw_chunk, list);
- if (chunk->type == I40IW_VMALLOC)
- i40iw_free_vmalloc_mem(dev->hw, chunk);
- kfree(chunk);
- }
- gen_pool_destroy(pinfo->pool);
- }
-}
-
-/**
- * i40iw_hmc_init_pble - Initialize pble resources during module load
- * @dev: i40iw_sc_dev struct
- * @pble_rsrc: pble resources
- */
-enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc)
-{
- struct i40iw_hmc_info *hmc_info;
- u32 fpm_idx = 0;
-
- hmc_info = dev->hmc_info;
- pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].base;
- /* Now start the pble' on 4k boundary */
- if (pble_rsrc->fpm_base_addr & 0xfff)
- fpm_idx = (PAGE_SIZE - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
-
- pble_rsrc->unallocated_pble =
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt - fpm_idx;
- pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
-
- pble_rsrc->pinfo.pool_shift = POOL_SHIFT;
- pble_rsrc->pinfo.pool = gen_pool_create(pble_rsrc->pinfo.pool_shift, -1);
- INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
- if (!pble_rsrc->pinfo.pool)
- goto error;
-
- if (add_pble_pool(dev, pble_rsrc))
- goto error;
-
- return 0;
-
- error:i40iw_destroy_pble_pool(dev, pble_rsrc);
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
- * @ pble_rsrc: structure containing fpm address
- * @ idx: where to return indexes
- */
-static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct sd_pd_idx *idx)
-{
- idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_DIRECT_BP_SIZE;
- idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_PAGED_BP_SIZE;
- idx->rel_pd_idx = (idx->pd_idx % I40IW_HMC_PD_CNT_IN_SD);
-}
-
-/**
- * add_sd_direct - add sd direct for pble
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource ptr
- * @info: page info for sd
- */
-static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_add_page_info *info)
-{
- enum i40iw_status_code ret_code = 0;
- struct sd_pd_idx *idx = &info->idx;
- struct i40iw_chunk *chunk = info->chunk;
- struct i40iw_hmc_info *hmc_info = info->hmc_info;
- struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
- u32 offset = 0;
-
- if (!sd_entry->valid) {
- if (dev->is_pf) {
- ret_code = i40iw_add_sd_table_entry(dev->hw, hmc_info,
- info->idx.sd_idx,
- I40IW_SD_TYPE_DIRECT,
- I40IW_HMC_DIRECT_BP_SIZE);
- if (ret_code)
- return ret_code;
- chunk->type = I40IW_DMA_COHERENT;
- }
- }
- offset = idx->rel_pd_idx << I40IW_HMC_PAGED_BP_SHIFT;
- chunk->size = info->pages << I40IW_HMC_PAGED_BP_SHIFT;
- chunk->vaddr = ((u8 *)sd_entry->u.bp.addr.va + offset);
- chunk->fpm_addr = pble_rsrc->next_fpm_addr;
- i40iw_debug(dev, I40IW_DEBUG_PBLE, "chunk_size[%d] = 0x%x vaddr=%p fpm_addr = %llx\n",
- chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
- return 0;
-}
-
-/**
- * i40iw_free_vmalloc_mem - free vmalloc during close
- * @hw: hw struct
- * @chunk: chunk information for vmalloc
- */
-static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
- int i;
-
- if (!chunk->pg_cnt)
- goto done;
- for (i = 0; i < chunk->pg_cnt; i++)
- dma_unmap_page(&pcidev->dev, chunk->dmaaddrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
-
- done:
- kfree(chunk->dmaaddrs);
- chunk->dmaaddrs = NULL;
- vfree(chunk->vaddr);
- chunk->vaddr = NULL;
- chunk->type = 0;
-}
-
-/**
- * i40iw_get_vmalloc_mem - get 2M page for sd
- * @hw: hardware address
- * @chunk: chunk to adf
- * @pg_cnt: #of 4 K pages
- */
-static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
- struct i40iw_chunk *chunk,
- int pg_cnt)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
- struct page *page;
- u8 *addr;
- u32 size;
- int i;
-
- chunk->dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
- if (!chunk->dmaaddrs)
- return I40IW_ERR_NO_MEMORY;
- size = PAGE_SIZE * pg_cnt;
- chunk->vaddr = vmalloc(size);
- if (!chunk->vaddr) {
- kfree(chunk->dmaaddrs);
- chunk->dmaaddrs = NULL;
- return I40IW_ERR_NO_MEMORY;
- }
- chunk->size = size;
- addr = (u8 *)chunk->vaddr;
- for (i = 0; i < pg_cnt; i++) {
- page = vmalloc_to_page((void *)addr);
- if (!page)
- break;
- chunk->dmaaddrs[i] = dma_map_page(&pcidev->dev, page, 0,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
- if (dma_mapping_error(&pcidev->dev, chunk->dmaaddrs[i]))
- break;
- addr += PAGE_SIZE;
- }
-
- chunk->pg_cnt = i;
- chunk->type = I40IW_VMALLOC;
- if (i == pg_cnt)
- return 0;
-
- i40iw_free_vmalloc_mem(hw, chunk);
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * fpm_to_idx - given fpm address, get pble index
- * @pble_rsrc: pble resource management
- * @addr: fpm address for index
- */
-static inline u32 fpm_to_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, u64 addr)
-{
- return (addr - (pble_rsrc->fpm_base_addr)) >> 3;
-}
-
-/**
- * add_bp_pages - add backing pages for sd
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource management
- * @info: page info for sd
- */
-static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_add_page_info *info)
-{
- u8 *addr;
- struct i40iw_dma_mem mem;
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
- struct i40iw_hmc_info *hmc_info = info->hmc_info;
- struct i40iw_chunk *chunk = info->chunk;
- struct i40iw_manage_vf_pble_info vf_pble_info;
- enum i40iw_status_code status = 0;
- u32 rel_pd_idx = info->idx.rel_pd_idx;
- u32 pd_idx = info->idx.pd_idx;
- u32 i;
-
- status = i40iw_get_vmalloc_mem(dev->hw, chunk, info->pages);
- if (status)
- return I40IW_ERR_NO_MEMORY;
- status = i40iw_add_sd_table_entry(dev->hw, hmc_info,
- info->idx.sd_idx, I40IW_SD_TYPE_PAGED,
- I40IW_HMC_DIRECT_BP_SIZE);
- if (status)
- goto error;
- if (!dev->is_pf) {
- status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE,
- fpm_to_idx(pble_rsrc,
- pble_rsrc->next_fpm_addr),
- (info->pages << PBLE_512_SHIFT));
- if (status) {
- i40iw_pr_err("allocate PBLEs in the PF. Error %i\n", status);
- goto error;
- }
- }
- addr = chunk->vaddr;
- for (i = 0; i < info->pages; i++) {
- mem.pa = chunk->dmaaddrs[i];
- mem.size = PAGE_SIZE;
- mem.va = (void *)(addr);
- pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
- if (!pd_entry->valid) {
- status = i40iw_add_pd_table_entry(dev->hw, hmc_info, pd_idx++, &mem);
- if (status)
- goto error;
- addr += PAGE_SIZE;
- } else {
- i40iw_pr_err("pd entry is valid expecting to be invalid\n");
- }
- }
- if (!dev->is_pf) {
- vf_pble_info.first_pd_index = info->idx.rel_pd_idx;
- vf_pble_info.inv_pd_ent = false;
- vf_pble_info.pd_entry_cnt = PBLE_PER_PAGE;
- vf_pble_info.pd_pl_pba = sd_entry->u.pd_table.pd_page_addr.pa;
- vf_pble_info.sd_index = info->idx.sd_idx;
- status = i40iw_hw_manage_vf_pble_bp(dev->back_dev,
- &vf_pble_info, true);
- if (status) {
- i40iw_pr_err("CQP manage VF PBLE BP failed. %i\n", status);
- goto error;
- }
- }
- chunk->fpm_addr = pble_rsrc->next_fpm_addr;
- return 0;
-error:
- i40iw_free_vmalloc_mem(dev->hw, chunk);
- return status;
-}
-
-/**
- * add_pble_pool - add a sd entry for pble resoure
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource management
- */
-static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_chunk *chunk;
- struct i40iw_add_page_info info;
- struct sd_pd_idx *idx = &info.idx;
- enum i40iw_status_code ret_code = 0;
- enum i40iw_sd_entry_type sd_entry_type;
- u64 sd_reg_val = 0;
- u32 pages;
-
- if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
- return I40IW_ERR_NO_MEMORY;
- if (pble_rsrc->next_fpm_addr & 0xfff) {
- i40iw_pr_err("next fpm_addr %llx\n", pble_rsrc->next_fpm_addr);
- return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
- }
- chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
- if (!chunk)
- return I40IW_ERR_NO_MEMORY;
- hmc_info = dev->hmc_info;
- chunk->fpm_addr = pble_rsrc->next_fpm_addr;
- get_sd_pd_idx(pble_rsrc, idx);
- sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
- pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
- idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
- pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
- info.chunk = chunk;
- info.hmc_info = hmc_info;
- info.pages = pages;
- info.sd_entry = sd_entry;
- if (!sd_entry->valid) {
- sd_entry_type = (!idx->rel_pd_idx &&
- (pages == I40IW_HMC_PD_CNT_IN_SD) &&
- dev->is_pf) ? I40IW_SD_TYPE_DIRECT : I40IW_SD_TYPE_PAGED;
- } else {
- sd_entry_type = sd_entry->entry_type;
- }
- i40iw_debug(dev, I40IW_DEBUG_PBLE,
- "pages = %d, unallocated_pble[%u] current_fpm_addr = %llx\n",
- pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr);
- i40iw_debug(dev, I40IW_DEBUG_PBLE, "sd_entry_type = %d sd_entry valid = %d\n",
- sd_entry_type, sd_entry->valid);
-
- if (sd_entry_type == I40IW_SD_TYPE_DIRECT)
- ret_code = add_sd_direct(dev, pble_rsrc, &info);
- if (ret_code)
- sd_entry_type = I40IW_SD_TYPE_PAGED;
- else
- pble_rsrc->stats_direct_sds++;
-
- if (sd_entry_type == I40IW_SD_TYPE_PAGED) {
- ret_code = add_bp_pages(dev, pble_rsrc, &info);
- if (ret_code)
- goto error;
- else
- pble_rsrc->stats_paged_sds++;
- }
-
- if (gen_pool_add_virt(pble_rsrc->pinfo.pool, (unsigned long)chunk->vaddr,
- (phys_addr_t)chunk->fpm_addr, chunk->size, -1)) {
- i40iw_pr_err("could not allocate memory by gen_pool_addr_virt()\n");
- ret_code = I40IW_ERR_NO_MEMORY;
- goto error;
- }
- pble_rsrc->next_fpm_addr += chunk->size;
- i40iw_debug(dev, I40IW_DEBUG_PBLE, "next_fpm_addr = %llx chunk_size[%u] = 0x%x\n",
- pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
- pble_rsrc->unallocated_pble -= (chunk->size >> 3);
- list_add(&chunk->list, &pble_rsrc->pinfo.clist);
- sd_reg_val = (sd_entry_type == I40IW_SD_TYPE_PAGED) ?
- sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
- if (sd_entry->valid)
- return 0;
- if (dev->is_pf) {
- ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
- sd_reg_val, idx->sd_idx,
- sd_entry->entry_type, true);
- if (ret_code) {
- i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
- goto error;
- }
- }
-
- sd_entry->valid = true;
- return 0;
- error:
- kfree(chunk);
- return ret_code;
-}
-
-/**
- * free_lvl2 - fee level 2 pble
- * @pble_rsrc: pble resource management
- * @palloc: level 2 pble allocation
- */
-static void free_lvl2(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc)
-{
- u32 i;
- struct gen_pool *pool;
- struct i40iw_pble_level2 *lvl2 = &palloc->level2;
- struct i40iw_pble_info *root = &lvl2->root;
- struct i40iw_pble_info *leaf = lvl2->leaf;
-
- pool = pble_rsrc->pinfo.pool;
-
- for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
- if (leaf->addr)
- gen_pool_free(pool, leaf->addr, (leaf->cnt << 3));
- else
- break;
- }
-
- if (root->addr)
- gen_pool_free(pool, root->addr, (root->cnt << 3));
-
- kfree(lvl2->leaf);
- lvl2->leaf = NULL;
-}
-
-/**
- * get_lvl2_pble - get level 2 pble resource
- * @pble_rsrc: pble resource management
- * @palloc: level 2 pble allocation
- * @pool: pool pointer
- */
-static enum i40iw_status_code get_lvl2_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- struct gen_pool *pool)
-{
- u32 lf4k, lflast, total, i;
- u32 pblcnt = PBLE_PER_PAGE;
- u64 *addr;
- struct i40iw_pble_level2 *lvl2 = &palloc->level2;
- struct i40iw_pble_info *root = &lvl2->root;
- struct i40iw_pble_info *leaf;
-
- /* number of full 512 (4K) leafs) */
- lf4k = palloc->total_cnt >> 9;
- lflast = palloc->total_cnt % PBLE_PER_PAGE;
- total = (lflast == 0) ? lf4k : lf4k + 1;
- lvl2->leaf_cnt = total;
-
- leaf = kzalloc((sizeof(*leaf) * total), GFP_ATOMIC);
- if (!leaf)
- return I40IW_ERR_NO_MEMORY;
- lvl2->leaf = leaf;
- /* allocate pbles for the root */
- root->addr = gen_pool_alloc(pool, (total << 3));
- if (!root->addr) {
- kfree(lvl2->leaf);
- lvl2->leaf = NULL;
- return I40IW_ERR_NO_MEMORY;
- }
- root->idx = fpm_to_idx(pble_rsrc,
- (u64)gen_pool_virt_to_phys(pool, root->addr));
- root->cnt = total;
- addr = (u64 *)root->addr;
- for (i = 0; i < total; i++, leaf++) {
- pblcnt = (lflast && ((i + 1) == total)) ? lflast : PBLE_PER_PAGE;
- leaf->addr = gen_pool_alloc(pool, (pblcnt << 3));
- if (!leaf->addr)
- goto error;
- leaf->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool, leaf->addr));
-
- leaf->cnt = pblcnt;
- *addr = (u64)leaf->idx;
- addr++;
- }
- palloc->level = I40IW_LEVEL_2;
- pble_rsrc->stats_lvl2++;
- return 0;
- error:
- free_lvl2(pble_rsrc, palloc);
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * get_lvl1_pble - get level 1 pble resource
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource management
- * @palloc: level 1 pble allocation
- */
-static enum i40iw_status_code get_lvl1_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc)
-{
- u64 *addr;
- struct gen_pool *pool;
- struct i40iw_pble_info *lvl1 = &palloc->level1;
-
- pool = pble_rsrc->pinfo.pool;
- addr = (u64 *)gen_pool_alloc(pool, (palloc->total_cnt << 3));
-
- if (!addr)
- return I40IW_ERR_NO_MEMORY;
-
- palloc->level = I40IW_LEVEL_1;
- lvl1->addr = (unsigned long)addr;
- lvl1->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool,
- (unsigned long)addr));
- lvl1->cnt = palloc->total_cnt;
- pble_rsrc->stats_lvl1++;
- return 0;
-}
-
-/**
- * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
- * @dev: i40iw_sc_dev struct
- * @pble_rsrc: pble resources
- * @palloc: contains all inforamtion regarding pble (idx + pble addr)
- * @pool: pointer to general purpose special memory pool descriptor
- */
-static inline enum i40iw_status_code get_lvl1_lvl2_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- struct gen_pool *pool)
-{
- enum i40iw_status_code status = 0;
-
- status = get_lvl1_pble(dev, pble_rsrc, palloc);
- if (status && (palloc->total_cnt > PBLE_PER_PAGE))
- status = get_lvl2_pble(pble_rsrc, palloc, pool);
- return status;
-}
-
-/**
- * i40iw_get_pble - allocate pbles from the pool
- * @dev: i40iw_sc_dev struct
- * @pble_rsrc: pble resources
- * @palloc: contains all inforamtion regarding pble (idx + pble addr)
- * @pble_cnt: #of pbles requested
- */
-enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- u32 pble_cnt)
-{
- struct gen_pool *pool;
- enum i40iw_status_code status = 0;
- u32 max_sds = 0;
- int i;
-
- pool = pble_rsrc->pinfo.pool;
- palloc->total_cnt = pble_cnt;
- palloc->level = I40IW_LEVEL_0;
- /*check first to see if we can get pble's without acquiring additional sd's */
- status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
- if (!status)
- goto exit;
- max_sds = (palloc->total_cnt >> 18) + 1;
- for (i = 0; i < max_sds; i++) {
- status = add_pble_pool(dev, pble_rsrc);
- if (status)
- break;
- status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
- if (!status)
- break;
- }
-exit:
- if (!status)
- pble_rsrc->stats_alloc_ok++;
- else
- pble_rsrc->stats_alloc_fail++;
-
- return status;
-}
-
-/**
- * i40iw_free_pble - put pbles back into pool
- * @pble_rsrc: pble resources
- * @palloc: contains all inforamtion regarding pble resource being freed
- */
-void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc)
-{
- struct gen_pool *pool;
-
- pool = pble_rsrc->pinfo.pool;
- if (palloc->level == I40IW_LEVEL_2)
- free_lvl2(pble_rsrc, palloc);
- else
- gen_pool_free(pool, palloc->level1.addr,
- (palloc->level1.cnt << 3));
- pble_rsrc->stats_alloc_freed++;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.h b/drivers/infiniband/hw/i40iw/i40iw_pble.h
deleted file mode 100644
index 7b1851d21cc0..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_PBLE_H
-#define I40IW_PBLE_H
-
-#define POOL_SHIFT 6
-#define PBLE_PER_PAGE 512
-#define I40IW_HMC_PAGED_BP_SHIFT 12
-#define PBLE_512_SHIFT 9
-
-enum i40iw_pble_level {
- I40IW_LEVEL_0 = 0,
- I40IW_LEVEL_1 = 1,
- I40IW_LEVEL_2 = 2
-};
-
-enum i40iw_alloc_type {
- I40IW_NO_ALLOC = 0,
- I40IW_DMA_COHERENT = 1,
- I40IW_VMALLOC = 2
-};
-
-struct i40iw_pble_info {
- unsigned long addr;
- u32 idx;
- u32 cnt;
-};
-
-struct i40iw_pble_level2 {
- struct i40iw_pble_info root;
- struct i40iw_pble_info *leaf;
- u32 leaf_cnt;
-};
-
-struct i40iw_pble_alloc {
- u32 total_cnt;
- enum i40iw_pble_level level;
- union {
- struct i40iw_pble_info level1;
- struct i40iw_pble_level2 level2;
- };
-};
-
-struct sd_pd_idx {
- u32 sd_idx;
- u32 pd_idx;
- u32 rel_pd_idx;
-};
-
-struct i40iw_add_page_info {
- struct i40iw_chunk *chunk;
- struct i40iw_hmc_sd_entry *sd_entry;
- struct i40iw_hmc_info *hmc_info;
- struct sd_pd_idx idx;
- u32 pages;
-};
-
-struct i40iw_chunk {
- struct list_head list;
- u32 size;
- void *vaddr;
- u64 fpm_addr;
- u32 pg_cnt;
- dma_addr_t *dmaaddrs;
- enum i40iw_alloc_type type;
-};
-
-struct i40iw_pble_pool {
- struct gen_pool *pool;
- struct list_head clist;
- u32 total_pble_alloc;
- u32 free_pble_cnt;
- u32 pool_shift;
-};
-
-struct i40iw_hmc_pble_rsrc {
- u32 unallocated_pble;
- u64 fpm_base_addr;
- u64 next_fpm_addr;
- struct i40iw_pble_pool pinfo;
-
- u32 stats_direct_sds;
- u32 stats_paged_sds;
- u64 stats_alloc_ok;
- u64 stats_alloc_fail;
- u64 stats_alloc_freed;
- u64 stats_lvl1;
- u64 stats_lvl2;
-};
-
-void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc);
-enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc);
-void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct i40iw_pble_alloc *palloc);
-enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- u32 pble_cnt);
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
deleted file mode 100644
index d9c7ae6a7030..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ /dev/null
@@ -1,1493 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_puda.h"
-
-static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_buf *buf);
-static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
-static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
-static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
- *rsrc, bool initial);
-/**
- * i40iw_puda_get_listbuf - get buffer from puda list
- * @list: list to use for buffers (ILQ or IEQ)
- */
-static struct i40iw_puda_buf *i40iw_puda_get_listbuf(struct list_head *list)
-{
- struct i40iw_puda_buf *buf = NULL;
-
- if (!list_empty(list)) {
- buf = (struct i40iw_puda_buf *)list->next;
- list_del((struct list_head *)&buf->list);
- }
- return buf;
-}
-
-/**
- * i40iw_puda_get_bufpool - return buffer from resource
- * @rsrc: resource to use for buffer
- */
-struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_puda_buf *buf = NULL;
- struct list_head *list = &rsrc->bufpool;
- unsigned long flags;
-
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- buf = i40iw_puda_get_listbuf(list);
- if (buf)
- rsrc->avail_buf_count--;
- else
- rsrc->stats_buf_alloc_fail++;
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- return buf;
-}
-
-/**
- * i40iw_puda_ret_bufpool - return buffer to rsrc list
- * @rsrc: resource to use for buffer
- * @buf: buffe to return to resouce
- */
-void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
- struct i40iw_puda_buf *buf)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- list_add(&buf->list, &rsrc->bufpool);
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- rsrc->avail_buf_count++;
-}
-
-/**
- * i40iw_puda_post_recvbuf - set wqe for rcv buffer
- * @rsrc: resource ptr
- * @wqe_idx: wqe index to use
- * @buf: puda buffer for rcv q
- * @initial: flag if during init time
- */
-static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx,
- struct i40iw_puda_buf *buf, bool initial)
-{
- u64 *wqe;
- struct i40iw_sc_qp *qp = &rsrc->qp;
- u64 offset24 = 0;
-
- qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
- wqe = qp->qp_uk.rq_base[wqe_idx].elem;
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s: wqe_idx= %d buf = %p wqe = %p\n", __func__,
- wqe_idx, buf, wqe);
- if (!initial)
- get_64bit_val(wqe, 24, &offset24);
-
- offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
-
- set_64bit_val(wqe, 0, buf->mem.pa);
- set_64bit_val(wqe, 8,
- LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN));
- i40iw_insert_wqe_hdr(wqe, offset24);
-}
-
-/**
- * i40iw_puda_replenish_rq - post rcv buffers
- * @rsrc: resource to use for buffer
- * @initial: flag if during init time
- */
-static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc,
- bool initial)
-{
- u32 i;
- u32 invalid_cnt = rsrc->rxq_invalid_cnt;
- struct i40iw_puda_buf *buf = NULL;
-
- for (i = 0; i < invalid_cnt; i++) {
- buf = i40iw_puda_get_bufpool(rsrc);
- if (!buf)
- return I40IW_ERR_list_empty;
- i40iw_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf,
- initial);
- rsrc->rx_wqe_idx =
- ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
- rsrc->rxq_invalid_cnt--;
- }
- return 0;
-}
-
-/**
- * i40iw_puda_alloc_buf - allocate mem for buffer
- * @dev: iwarp device
- * @length: length of buffer
- */
-static struct i40iw_puda_buf *i40iw_puda_alloc_buf(struct i40iw_sc_dev *dev,
- u32 length)
-{
- struct i40iw_puda_buf *buf = NULL;
- struct i40iw_virt_mem buf_mem;
- enum i40iw_status_code ret;
-
- ret = i40iw_allocate_virt_mem(dev->hw, &buf_mem,
- sizeof(struct i40iw_puda_buf));
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s: error mem for buf\n", __func__);
- return NULL;
- }
- buf = (struct i40iw_puda_buf *)buf_mem.va;
- ret = i40iw_allocate_dma_mem(dev->hw, &buf->mem, length, 1);
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s: error dma mem for buf\n", __func__);
- i40iw_free_virt_mem(dev->hw, &buf_mem);
- return NULL;
- }
- buf->buf_mem.va = buf_mem.va;
- buf->buf_mem.size = buf_mem.size;
- return buf;
-}
-
-/**
- * i40iw_puda_dele_buf - delete buffer back to system
- * @dev: iwarp device
- * @buf: buffer to free
- */
-static void i40iw_puda_dele_buf(struct i40iw_sc_dev *dev,
- struct i40iw_puda_buf *buf)
-{
- i40iw_free_dma_mem(dev->hw, &buf->mem);
- i40iw_free_virt_mem(dev->hw, &buf->buf_mem);
-}
-
-/**
- * i40iw_puda_get_next_send_wqe - return next wqe for processing
- * @qp: puda qp for wqe
- * @wqe_idx: wqe index for caller
- */
-static u64 *i40iw_puda_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
-{
- u64 *wqe = NULL;
- enum i40iw_status_code ret_code = 0;
-
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return wqe;
- wqe = qp->sq_base[*wqe_idx].elem;
-
- return wqe;
-}
-
-/**
- * i40iw_puda_poll_info - poll cq for completion
- * @cq: cq for poll
- * @info: info return for successful completion
- */
-static enum i40iw_status_code i40iw_puda_poll_info(struct i40iw_sc_cq *cq,
- struct i40iw_puda_completion_info *info)
-{
- u64 qword0, qword2, qword3;
- u64 *cqe;
- u64 comp_ctx;
- bool valid_bit;
- u32 major_err, minor_err;
- bool error;
-
- cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&cq->cq_uk);
- get_64bit_val(cqe, 24, &qword3);
- valid_bit = (bool)RS_64(qword3, I40IW_CQ_VALID);
-
- if (valid_bit != cq->cq_uk.polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- i40iw_debug_buf(cq->dev, I40IW_DEBUG_PUDA, "PUDA CQE", cqe, 32);
- error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
- if (error) {
- i40iw_debug(cq->dev, I40IW_DEBUG_PUDA, "%s receive error\n", __func__);
- major_err = (u32)(RS_64(qword3, I40IW_CQ_MAJERR));
- minor_err = (u32)(RS_64(qword3, I40IW_CQ_MINERR));
- info->compl_error = major_err << 16 | minor_err;
- return I40IW_ERR_CQ_COMPL_ERROR;
- }
-
- get_64bit_val(cqe, 0, &qword0);
- get_64bit_val(cqe, 16, &qword2);
-
- info->q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
- info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
-
- get_64bit_val(cqe, 8, &comp_ctx);
- info->qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
- info->wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
-
- if (info->q_type == I40IW_CQE_QTYPE_RQ) {
- info->vlan_valid = (bool)RS_64(qword3, I40IW_VLAN_TAG_VALID);
- info->l4proto = (u8)RS_64(qword2, I40IW_UDA_L4PROTO);
- info->l3proto = (u8)RS_64(qword2, I40IW_UDA_L3PROTO);
- info->payload_len = (u16)RS_64(qword0, I40IW_UDA_PAYLOADLEN);
- }
-
- return 0;
-}
-
-/**
- * i40iw_puda_poll_completion - processes completion for cq
- * @dev: iwarp device
- * @cq: cq getting interrupt
- * @compl_err: return any completion err
- */
-enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
- struct i40iw_sc_cq *cq, u32 *compl_err)
-{
- struct i40iw_qp_uk *qp;
- struct i40iw_cq_uk *cq_uk = &cq->cq_uk;
- struct i40iw_puda_completion_info info;
- enum i40iw_status_code ret = 0;
- struct i40iw_puda_buf *buf;
- struct i40iw_puda_rsrc *rsrc;
- void *sqwrid;
- u8 cq_type = cq->cq_type;
- unsigned long flags;
-
- if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
- rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq;
- } else {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
- return I40IW_ERR_BAD_PTR;
- }
- memset(&info, 0, sizeof(info));
- ret = i40iw_puda_poll_info(cq, &info);
- *compl_err = info.compl_error;
- if (ret == I40IW_ERR_QUEUE_EMPTY)
- return ret;
- if (ret)
- goto done;
-
- qp = info.qp;
- if (!qp || !rsrc) {
- ret = I40IW_ERR_BAD_PTR;
- goto done;
- }
-
- if (qp->qp_id != rsrc->qp_id) {
- ret = I40IW_ERR_BAD_PTR;
- goto done;
- }
-
- if (info.q_type == I40IW_CQE_QTYPE_RQ) {
- buf = (struct i40iw_puda_buf *)(uintptr_t)qp->rq_wrid_array[info.wqe_idx];
- /* Get all the tcpip information in the buf header */
- ret = i40iw_puda_get_tcpip_info(&info, buf);
- if (ret) {
- rsrc->stats_rcvd_pkt_err++;
- if (cq_type == I40IW_CQ_TYPE_ILQ) {
- i40iw_ilq_putback_rcvbuf(&rsrc->qp,
- info.wqe_idx);
- } else {
- i40iw_puda_ret_bufpool(rsrc, buf);
- i40iw_puda_replenish_rq(rsrc, false);
- }
- goto done;
- }
-
- rsrc->stats_pkt_rcvd++;
- rsrc->compl_rxwqe_idx = info.wqe_idx;
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
- rsrc->receive(rsrc->vsi, buf);
- if (cq_type == I40IW_CQ_TYPE_ILQ)
- i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
- else
- i40iw_puda_replenish_rq(rsrc, false);
-
- } else {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
- sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
- I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
- rsrc->xmit_complete(rsrc->vsi, sqwrid);
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- rsrc->tx_wqe_avail_cnt++;
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- if (!list_empty(&rsrc->txpend))
- i40iw_puda_send_buf(rsrc, NULL);
- }
-
-done:
- I40IW_RING_MOVE_HEAD(cq_uk->cq_ring, ret);
- if (I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring) == 0)
- cq_uk->polarity = !cq_uk->polarity;
- /* update cq tail in cq shadow memory also */
- I40IW_RING_MOVE_TAIL(cq_uk->cq_ring);
- set_64bit_val(cq_uk->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring));
- return 0;
-}
-
-/**
- * i40iw_puda_send - complete send wqe for transmit
- * @qp: puda qp for send
- * @info: buffer information for transmit
- */
-enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
- struct i40iw_puda_send_info *info)
-{
- u64 *wqe;
- u32 iplen, l4len;
- u64 header[2];
- u32 wqe_idx;
- u8 iipt;
-
- /* number of 32 bits DWORDS in header */
- l4len = info->tcplen >> 2;
- if (info->ipv4) {
- iipt = 3;
- iplen = 5;
- } else {
- iipt = 1;
- iplen = 10;
- }
-
- wqe = i40iw_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
- /* Third line of WQE descriptor */
- /* maclen is in words */
- header[0] = LS_64((info->maclen >> 1), I40IW_UDA_QPSQ_MACLEN) |
- LS_64(iplen, I40IW_UDA_QPSQ_IPLEN) | LS_64(1, I40IW_UDA_QPSQ_L4T) |
- LS_64(iipt, I40IW_UDA_QPSQ_IIPT) |
- LS_64(l4len, I40IW_UDA_QPSQ_L4LEN);
- /* Forth line of WQE descriptor */
- header[1] = LS_64(I40IW_OP_TYPE_SEND, I40IW_UDA_QPSQ_OPCODE) |
- LS_64(1, I40IW_UDA_QPSQ_SIGCOMPL) |
- LS_64(info->doloopback, I40IW_UDA_QPSQ_DOLOOPBACK) |
- LS_64(qp->qp_uk.swqe_polarity, I40IW_UDA_QPSQ_VALID);
-
- set_64bit_val(wqe, 0, info->paddr);
- set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
- set_64bit_val(wqe, 16, header[0]);
-
- i40iw_insert_wqe_hdr(wqe, header[1]);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
- i40iw_qp_post_wr(&qp->qp_uk);
- return 0;
-}
-
-/**
- * i40iw_puda_send_buf - transmit puda buffer
- * @rsrc: resource to use for buffer
- * @buf: puda buffer to transmit
- */
-void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf)
-{
- struct i40iw_puda_send_info info;
- enum i40iw_status_code ret = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- /* if no wqe available or not from a completion and we have
- * pending buffers, we must queue new buffer
- */
- if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
- list_add_tail(&buf->list, &rsrc->txpend);
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- rsrc->stats_sent_pkt_q++;
- if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s: adding to txpend\n", __func__);
- return;
- }
- rsrc->tx_wqe_avail_cnt--;
- /* if we are coming from a completion and have pending buffers
- * then Get one from pending list
- */
- if (!buf) {
- buf = i40iw_puda_get_listbuf(&rsrc->txpend);
- if (!buf)
- goto done;
- }
-
- info.scratch = (void *)buf;
- info.paddr = buf->mem.pa;
- info.len = buf->totallen;
- info.tcplen = buf->tcphlen;
- info.maclen = buf->maclen;
- info.ipv4 = buf->ipv4;
- info.doloopback = (rsrc->type == I40IW_PUDA_RSRC_TYPE_IEQ);
-
- ret = i40iw_puda_send(&rsrc->qp, &info);
- if (ret) {
- rsrc->tx_wqe_avail_cnt++;
- rsrc->stats_sent_pkt_q++;
- list_add(&buf->list, &rsrc->txpend);
- if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s: adding to puda_send\n", __func__);
- } else {
- rsrc->stats_pkt_sent++;
- }
-done:
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
-}
-
-/**
- * i40iw_puda_qp_setctx - during init, set qp's context
- * @rsrc: qp's resource
- */
-static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_sc_qp *qp = &rsrc->qp;
- u64 *qp_ctx = qp->hw_host_ctx;
-
- set_64bit_val(qp_ctx, 8, qp->sq_pa);
- set_64bit_val(qp_ctx, 16, qp->rq_pa);
-
- set_64bit_val(qp_ctx, 24,
- LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
- LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE));
-
- set_64bit_val(qp_ctx, 48, LS_64(rsrc->buf_size, I40IW_UDA_QPC_MAXFRAMESIZE));
- set_64bit_val(qp_ctx, 56, 0);
- set_64bit_val(qp_ctx, 64, 1);
-
- set_64bit_val(qp_ctx, 136,
- LS_64(rsrc->cq_id, I40IWQPC_TXCQNUM) |
- LS_64(rsrc->cq_id, I40IWQPC_RXCQNUM));
-
- set_64bit_val(qp_ctx, 160, LS_64(1, I40IWQPC_PRIVEN));
-
- set_64bit_val(qp_ctx, 168,
- LS_64((uintptr_t)qp, I40IWQPC_QPCOMPCTX));
-
- set_64bit_val(qp_ctx, 176,
- LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
- LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
- LS_64(qp->qs_handle, I40IWQPC_QSHANDLE));
-
- i40iw_debug_buf(rsrc->dev, I40IW_DEBUG_PUDA, "PUDA QP CONTEXT",
- qp_ctx, I40IW_QP_CTX_SIZE);
-}
-
-/**
- * i40iw_puda_qp_wqe - setup wqe for qp create
- * @rsrc: resource for qp
- */
-static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
- struct i40iw_ccq_cqe_info compl_info;
- enum i40iw_status_code status = 0;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(I40IW_QP_TYPE_UDA, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(1, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32);
- i40iw_sc_cqp_post_sq(cqp);
- status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_CREATE_QP,
- &compl_info);
- return status;
-}
-
-/**
- * i40iw_puda_qp_create - create qp for resource
- * @rsrc: resource to use for buffer
- */
-static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_sc_qp *qp = &rsrc->qp;
- struct i40iw_qp_uk *ukqp = &qp->qp_uk;
- enum i40iw_status_code ret = 0;
- u32 sq_size, rq_size, t_size;
- struct i40iw_dma_mem *mem;
-
- sq_size = rsrc->sq_size * I40IW_QP_WQE_MIN_SIZE;
- rq_size = rsrc->rq_size * I40IW_QP_WQE_MIN_SIZE;
- t_size = (sq_size + rq_size + (I40IW_SHADOW_AREA_SIZE << 3) +
- I40IW_QP_CTX_SIZE);
- /* Get page aligned memory */
- ret =
- i40iw_allocate_dma_mem(rsrc->dev->hw, &rsrc->qpmem, t_size,
- I40IW_HW_PAGE_SIZE);
- if (ret) {
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s: error dma mem\n", __func__);
- return ret;
- }
-
- mem = &rsrc->qpmem;
- memset(mem->va, 0, t_size);
- qp->hw_sq_size = i40iw_get_encoded_wqe_size(rsrc->sq_size, false);
- qp->hw_rq_size = i40iw_get_encoded_wqe_size(rsrc->rq_size, false);
- qp->pd = &rsrc->sc_pd;
- qp->qp_type = I40IW_QP_TYPE_UDA;
- qp->dev = rsrc->dev;
- qp->back_qp = (void *)rsrc;
- qp->sq_pa = mem->pa;
- qp->rq_pa = qp->sq_pa + sq_size;
- qp->vsi = rsrc->vsi;
- ukqp->sq_base = mem->va;
- ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
- ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
- qp->shadow_area_pa = qp->rq_pa + rq_size;
- qp->hw_host_ctx = ukqp->shadow_area + I40IW_SHADOW_AREA_SIZE;
- qp->hw_host_ctx_pa =
- qp->shadow_area_pa + (I40IW_SHADOW_AREA_SIZE << 3);
- ukqp->qp_id = rsrc->qp_id;
- ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
- ukqp->rq_wrid_array = rsrc->rq_wrid_array;
-
- ukqp->qp_id = rsrc->qp_id;
- ukqp->sq_size = rsrc->sq_size;
- ukqp->rq_size = rsrc->rq_size;
-
- I40IW_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
- I40IW_RING_INIT(ukqp->initial_ring, ukqp->sq_size);
- I40IW_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
-
- if (qp->pd->dev->is_pf)
- ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
- I40E_PFPE_WQEALLOC);
- else
- ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
- I40E_VFPE_WQEALLOC1);
-
- qp->user_pri = 0;
- i40iw_qp_add_qos(qp);
- i40iw_puda_qp_setctx(rsrc);
- if (rsrc->dev->ceq_valid)
- ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp);
- else
- ret = i40iw_puda_qp_wqe(rsrc->dev, qp);
- if (ret) {
- i40iw_qp_rem_qos(qp);
- i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
- }
- return ret;
-}
-
-/**
- * i40iw_puda_cq_wqe - setup wqe for cq create
- * @rsrc: resource for cq
- */
-static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- struct i40iw_ccq_cqe_info compl_info;
- enum i40iw_status_code status = 0;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 16,
- LS_64(cq->shadow_read_threshold,
- I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, cq->cq_pa);
-
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
-
- header = cq->cq_uk.cq_id |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(dev->cqp);
- status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_CREATE_CQ,
- &compl_info);
- return status;
-}
-
-/**
- * i40iw_puda_cq_create - create cq for resource
- * @rsrc: resource for which cq to create
- */
-static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_sc_dev *dev = rsrc->dev;
- struct i40iw_sc_cq *cq = &rsrc->cq;
- enum i40iw_status_code ret = 0;
- u32 tsize, cqsize;
- struct i40iw_dma_mem *mem;
- struct i40iw_cq_init_info info;
- struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
-
- cq->vsi = rsrc->vsi;
- cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
- tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
- ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
- I40IW_CQ0_ALIGNMENT);
- if (ret)
- return ret;
-
- mem = &rsrc->cqmem;
- memset(&info, 0, sizeof(info));
- info.dev = dev;
- info.type = (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ) ?
- I40IW_CQ_TYPE_ILQ : I40IW_CQ_TYPE_IEQ;
- info.shadow_read_threshold = rsrc->cq_size >> 2;
- info.ceq_id_valid = true;
- info.cq_base_pa = mem->pa;
- info.shadow_area_pa = mem->pa + cqsize;
- init_info->cq_base = mem->va;
- init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
- init_info->cq_size = rsrc->cq_size;
- init_info->cq_id = rsrc->cq_id;
- info.ceqe_mask = true;
- info.ceq_id_valid = true;
- ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
- if (ret)
- goto error;
- if (rsrc->dev->ceq_valid)
- ret = i40iw_cqp_cq_create_cmd(dev, cq);
- else
- ret = i40iw_puda_cq_wqe(dev, cq);
-error:
- if (ret)
- i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
- return ret;
-}
-
-/**
- * i40iw_puda_free_qp - free qp for resource
- * @rsrc: resource for which qp to free
- */
-static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc)
-{
- enum i40iw_status_code ret;
- struct i40iw_ccq_cqe_info compl_info;
- struct i40iw_sc_dev *dev = rsrc->dev;
-
- if (rsrc->dev->ceq_valid) {
- i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp);
- return;
- }
-
- ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
- 0, false, true, true);
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error puda qp destroy wqe\n",
- __func__);
-
- if (!ret) {
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_QP,
- &compl_info);
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error puda qp destroy failed\n",
- __func__);
- }
-}
-
-/**
- * i40iw_puda_free_cq - free cq for resource
- * @rsrc: resource for which cq to free
- */
-static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
-{
- enum i40iw_status_code ret;
- struct i40iw_ccq_cqe_info compl_info;
- struct i40iw_sc_dev *dev = rsrc->dev;
-
- if (rsrc->dev->ceq_valid) {
- i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq);
- return;
- }
- ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
-
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error ieq cq destroy\n",
- __func__);
-
- if (!ret) {
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_CQ,
- &compl_info);
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy done\n",
- __func__);
- }
-}
-
-/**
- * i40iw_puda_dele_resources - delete all resources during close
- * @dev: iwarp device
- * @type: type of resource to dele
- * @reset: true if reset chip
- */
-void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
- enum puda_resource_type type,
- bool reset)
-{
- struct i40iw_sc_dev *dev = vsi->dev;
- struct i40iw_puda_rsrc *rsrc;
- struct i40iw_puda_buf *buf = NULL;
- struct i40iw_puda_buf *nextbuf = NULL;
- struct i40iw_virt_mem *vmem;
-
- switch (type) {
- case I40IW_PUDA_RSRC_TYPE_ILQ:
- rsrc = vsi->ilq;
- vmem = &vsi->ilq_mem;
- break;
- case I40IW_PUDA_RSRC_TYPE_IEQ:
- rsrc = vsi->ieq;
- vmem = &vsi->ieq_mem;
- break;
- default:
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
- __func__, type);
- return;
- }
-
- switch (rsrc->completion) {
- case PUDA_HASH_CRC_COMPLETE:
- i40iw_free_hash_desc(rsrc->hash_desc);
- /* fall through */
- case PUDA_QP_CREATED:
- if (!reset)
- i40iw_puda_free_qp(rsrc);
-
- i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
- /* fallthrough */
- case PUDA_CQ_CREATED:
- if (!reset)
- i40iw_puda_free_cq(rsrc);
-
- i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
- break;
- default:
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s error no resources\n", __func__);
- break;
- }
- /* Free all allocated puda buffers for both tx and rx */
- buf = rsrc->alloclist;
- while (buf) {
- nextbuf = buf->next;
- i40iw_puda_dele_buf(dev, buf);
- buf = nextbuf;
- rsrc->alloc_buf_count--;
- }
- i40iw_free_virt_mem(dev->hw, vmem);
-}
-
-/**
- * i40iw_puda_allocbufs - allocate buffers for resource
- * @rsrc: resource for buffer allocation
- * @count: number of buffers to create
- */
-static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
- u32 count)
-{
- u32 i;
- struct i40iw_puda_buf *buf;
- struct i40iw_puda_buf *nextbuf;
-
- for (i = 0; i < count; i++) {
- buf = i40iw_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
- if (!buf) {
- rsrc->stats_buf_alloc_fail++;
- return I40IW_ERR_NO_MEMORY;
- }
- i40iw_puda_ret_bufpool(rsrc, buf);
- rsrc->alloc_buf_count++;
- if (!rsrc->alloclist) {
- rsrc->alloclist = buf;
- } else {
- nextbuf = rsrc->alloclist;
- rsrc->alloclist = buf;
- buf->next = nextbuf;
- }
- }
- rsrc->avail_buf_count = rsrc->alloc_buf_count;
- return 0;
-}
-
-/**
- * i40iw_puda_create_rsrc - create resouce (ilq or ieq)
- * @dev: iwarp device
- * @info: resource information
- */
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_rsrc_info *info)
-{
- struct i40iw_sc_dev *dev = vsi->dev;
- enum i40iw_status_code ret = 0;
- struct i40iw_puda_rsrc *rsrc;
- u32 pudasize;
- u32 sqwridsize, rqwridsize;
- struct i40iw_virt_mem *vmem;
-
- info->count = 1;
- pudasize = sizeof(struct i40iw_puda_rsrc);
- sqwridsize = info->sq_size * sizeof(struct i40iw_sq_uk_wr_trk_info);
- rqwridsize = info->rq_size * 8;
- switch (info->type) {
- case I40IW_PUDA_RSRC_TYPE_ILQ:
- vmem = &vsi->ilq_mem;
- break;
- case I40IW_PUDA_RSRC_TYPE_IEQ:
- vmem = &vsi->ieq_mem;
- break;
- default:
- return I40IW_NOT_SUPPORTED;
- }
- ret =
- i40iw_allocate_virt_mem(dev->hw, vmem,
- pudasize + sqwridsize + rqwridsize);
- if (ret)
- return ret;
- rsrc = (struct i40iw_puda_rsrc *)vmem->va;
- spin_lock_init(&rsrc->bufpool_lock);
- if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
- vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va;
- vsi->ilq_count = info->count;
- rsrc->receive = info->receive;
- rsrc->xmit_complete = info->xmit_complete;
- } else {
- vmem = &vsi->ieq_mem;
- vsi->ieq_count = info->count;
- vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va;
- rsrc->receive = i40iw_ieq_receive;
- rsrc->xmit_complete = i40iw_ieq_tx_compl;
- }
-
- rsrc->type = info->type;
- rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
- rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
- /* Initialize all ieq lists */
- INIT_LIST_HEAD(&rsrc->bufpool);
- INIT_LIST_HEAD(&rsrc->txpend);
-
- rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
- dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id, -1);
- rsrc->qp_id = info->qp_id;
- rsrc->cq_id = info->cq_id;
- rsrc->sq_size = info->sq_size;
- rsrc->rq_size = info->rq_size;
- rsrc->cq_size = info->rq_size + info->sq_size;
- rsrc->buf_size = info->buf_size;
- rsrc->dev = dev;
- rsrc->vsi = vsi;
-
- ret = i40iw_puda_cq_create(rsrc);
- if (!ret) {
- rsrc->completion = PUDA_CQ_CREATED;
- ret = i40iw_puda_qp_create(rsrc);
- }
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n",
- __func__);
- goto error;
- }
- rsrc->completion = PUDA_QP_CREATED;
-
- ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error alloc_buf\n",
- __func__);
- goto error;
- }
-
- rsrc->rxq_invalid_cnt = info->rq_size;
- ret = i40iw_puda_replenish_rq(rsrc, true);
- if (ret)
- goto error;
-
- if (info->type == I40IW_PUDA_RSRC_TYPE_IEQ) {
- if (!i40iw_init_hash_desc(&rsrc->hash_desc)) {
- rsrc->check_crc = true;
- rsrc->completion = PUDA_HASH_CRC_COMPLETE;
- ret = 0;
- }
- }
-
- dev->ccq_ops->ccq_arm(&rsrc->cq);
- return ret;
- error:
- i40iw_puda_dele_resources(vsi, info->type, false);
-
- return ret;
-}
-
-/**
- * i40iw_ilq_putback_rcvbuf - ilq buffer to put back on rq
- * @qp: ilq's qp resource
- * @wqe_idx: wqe index of completed rcvbuf
- */
-static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx)
-{
- u64 *wqe;
- u64 offset24;
-
- wqe = qp->qp_uk.rq_base[wqe_idx].elem;
- get_64bit_val(wqe, 24, &offset24);
- offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
- set_64bit_val(wqe, 24, offset24);
-}
-
-/**
- * i40iw_ieq_get_fpdu - given length return fpdu length
- * @length: length if fpdu
- */
-static u16 i40iw_ieq_get_fpdu_length(u16 length)
-{
- u16 fpdu_len;
-
- fpdu_len = length + I40IW_IEQ_MPA_FRAMING;
- fpdu_len = (fpdu_len + 3) & 0xfffffffc;
- return fpdu_len;
-}
-
-/**
- * i40iw_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
- * @buf: rcv buffer with partial
- * @txbuf: tx buffer for sendign back
- * @buf_offset: rcv buffer offset to copy from
- * @txbuf_offset: at offset in tx buf to copy
- * @length: length of data to copy
- */
-static void i40iw_ieq_copy_to_txbuf(struct i40iw_puda_buf *buf,
- struct i40iw_puda_buf *txbuf,
- u16 buf_offset, u32 txbuf_offset,
- u32 length)
-{
- void *mem1 = (u8 *)buf->mem.va + buf_offset;
- void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
-
- memcpy(mem2, mem1, length);
-}
-
-/**
- * i40iw_ieq_setup_tx_buf - setup tx buffer for partial handling
- * @buf: reeive buffer with partial
- * @txbuf: buffer to prepare
- */
-static void i40iw_ieq_setup_tx_buf(struct i40iw_puda_buf *buf,
- struct i40iw_puda_buf *txbuf)
-{
- txbuf->maclen = buf->maclen;
- txbuf->tcphlen = buf->tcphlen;
- txbuf->ipv4 = buf->ipv4;
- txbuf->hdrlen = buf->hdrlen;
- i40iw_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
-}
-
-/**
- * i40iw_ieq_check_first_buf - check if rcv buffer's seq is in range
- * @buf: receive exception buffer
- * @fps: first partial sequence number
- */
-static void i40iw_ieq_check_first_buf(struct i40iw_puda_buf *buf, u32 fps)
-{
- u32 offset;
-
- if (buf->seqnum < fps) {
- offset = fps - buf->seqnum;
- if (offset > buf->datalen)
- return;
- buf->data += offset;
- buf->datalen -= (u16)offset;
- buf->seqnum = fps;
- }
-}
-
-/**
- * i40iw_ieq_compl_pfpdu - write txbuf with full fpdu
- * @ieq: ieq resource
- * @rxlist: ieq's received buffer list
- * @pbufl: temporary list for buffers for fpddu
- * @txbuf: tx buffer for fpdu
- * @fpdu_len: total length of fpdu
- */
-static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
- struct list_head *rxlist,
- struct list_head *pbufl,
- struct i40iw_puda_buf *txbuf,
- u16 fpdu_len)
-{
- struct i40iw_puda_buf *buf;
- u32 nextseqnum;
- u16 txoffset, bufoffset;
-
- buf = i40iw_puda_get_listbuf(pbufl);
- if (!buf)
- return;
- nextseqnum = buf->seqnum + fpdu_len;
- txbuf->totallen = buf->hdrlen + fpdu_len;
- txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
- i40iw_ieq_setup_tx_buf(buf, txbuf);
-
- txoffset = buf->hdrlen;
- bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
-
- do {
- if (buf->datalen >= fpdu_len) {
- /* copied full fpdu */
- i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, fpdu_len);
- buf->datalen -= fpdu_len;
- buf->data += fpdu_len;
- buf->seqnum = nextseqnum;
- break;
- }
- /* copy partial fpdu */
- i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, buf->datalen);
- txoffset += buf->datalen;
- fpdu_len -= buf->datalen;
- i40iw_puda_ret_bufpool(ieq, buf);
- buf = i40iw_puda_get_listbuf(pbufl);
- if (!buf)
- return;
- bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
- } while (1);
-
- /* last buffer on the list*/
- if (buf->datalen)
- list_add(&buf->list, rxlist);
- else
- i40iw_puda_ret_bufpool(ieq, buf);
-}
-
-/**
- * i40iw_ieq_create_pbufl - create buffer list for single fpdu
- * @rxlist: resource list for receive ieq buffes
- * @pbufl: temp. list for buffers for fpddu
- * @buf: first receive buffer
- * @fpdu_len: total length of fpdu
- */
-static enum i40iw_status_code i40iw_ieq_create_pbufl(
- struct i40iw_pfpdu *pfpdu,
- struct list_head *rxlist,
- struct list_head *pbufl,
- struct i40iw_puda_buf *buf,
- u16 fpdu_len)
-{
- enum i40iw_status_code status = 0;
- struct i40iw_puda_buf *nextbuf;
- u32 nextseqnum;
- u16 plen = fpdu_len - buf->datalen;
- bool done = false;
-
- nextseqnum = buf->seqnum + buf->datalen;
- do {
- nextbuf = i40iw_puda_get_listbuf(rxlist);
- if (!nextbuf) {
- status = I40IW_ERR_list_empty;
- break;
- }
- list_add_tail(&nextbuf->list, pbufl);
- if (nextbuf->seqnum != nextseqnum) {
- pfpdu->bad_seq_num++;
- status = I40IW_ERR_SEQ_NUM;
- break;
- }
- if (nextbuf->datalen >= plen) {
- done = true;
- } else {
- plen -= nextbuf->datalen;
- nextseqnum = nextbuf->seqnum + nextbuf->datalen;
- }
-
- } while (!done);
-
- return status;
-}
-
-/**
- * i40iw_ieq_handle_partial - process partial fpdu buffer
- * @ieq: ieq resource
- * @pfpdu: partial management per user qp
- * @buf: receive buffer
- * @fpdu_len: fpdu len in the buffer
- */
-static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *ieq,
- struct i40iw_pfpdu *pfpdu,
- struct i40iw_puda_buf *buf,
- u16 fpdu_len)
-{
- enum i40iw_status_code status = 0;
- u8 *crcptr;
- u32 mpacrc;
- u32 seqnum = buf->seqnum;
- struct list_head pbufl; /* partial buffer list */
- struct i40iw_puda_buf *txbuf = NULL;
- struct list_head *rxlist = &pfpdu->rxlist;
-
- INIT_LIST_HEAD(&pbufl);
- list_add(&buf->list, &pbufl);
-
- status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
- if (status)
- goto error;
-
- txbuf = i40iw_puda_get_bufpool(ieq);
- if (!txbuf) {
- pfpdu->no_tx_bufs++;
- status = I40IW_ERR_NO_TXBUFS;
- goto error;
- }
-
- i40iw_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
- i40iw_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
- crcptr = txbuf->data + fpdu_len - 4;
- mpacrc = *(u32 *)crcptr;
- if (ieq->check_crc) {
- status = i40iw_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
- (fpdu_len - 4), mpacrc);
- if (status) {
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: error bad crc\n", __func__);
- goto error;
- }
- }
-
- i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "IEQ TX BUFFER",
- txbuf->mem.va, txbuf->totallen);
- i40iw_puda_send_buf(ieq, txbuf);
- pfpdu->rcv_nxt = seqnum + fpdu_len;
- return status;
- error:
- while (!list_empty(&pbufl)) {
- buf = (struct i40iw_puda_buf *)(pbufl.prev);
- list_del(&buf->list);
- list_add(&buf->list, rxlist);
- }
- if (txbuf)
- i40iw_puda_ret_bufpool(ieq, txbuf);
- return status;
-}
-
-/**
- * i40iw_ieq_process_buf - process buffer rcvd for ieq
- * @ieq: ieq resource
- * @pfpdu: partial management per user qp
- * @buf: receive buffer
- */
-static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
- struct i40iw_pfpdu *pfpdu,
- struct i40iw_puda_buf *buf)
-{
- u16 fpdu_len = 0;
- u16 datalen = buf->datalen;
- u8 *datap = buf->data;
- u8 *crcptr;
- u16 ioffset = 0;
- u32 mpacrc;
- u32 seqnum = buf->seqnum;
- u16 length = 0;
- u16 full = 0;
- bool partial = false;
- struct i40iw_puda_buf *txbuf;
- struct list_head *rxlist = &pfpdu->rxlist;
- enum i40iw_status_code ret = 0;
- enum i40iw_status_code status = 0;
-
- ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
- while (datalen) {
- fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap));
- if (fpdu_len > pfpdu->max_fpdu_data) {
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: error bad fpdu_len\n", __func__);
- status = I40IW_ERR_MPA_CRC;
- list_add(&buf->list, rxlist);
- return status;
- }
-
- if (datalen < fpdu_len) {
- partial = true;
- break;
- }
- crcptr = datap + fpdu_len - 4;
- mpacrc = *(u32 *)crcptr;
- if (ieq->check_crc)
- ret = i40iw_ieq_check_mpacrc(ieq->hash_desc,
- datap, fpdu_len - 4, mpacrc);
- if (ret) {
- status = I40IW_ERR_MPA_CRC;
- list_add(&buf->list, rxlist);
- return status;
- }
- full++;
- pfpdu->fpdu_processed++;
- datap += fpdu_len;
- length += fpdu_len;
- datalen -= fpdu_len;
- }
- if (full) {
- /* copy full pdu's in the txbuf and send them out */
- txbuf = i40iw_puda_get_bufpool(ieq);
- if (!txbuf) {
- pfpdu->no_tx_bufs++;
- status = I40IW_ERR_NO_TXBUFS;
- list_add(&buf->list, rxlist);
- return status;
- }
- /* modify txbuf's buffer header */
- i40iw_ieq_setup_tx_buf(buf, txbuf);
- /* copy full fpdu's to new buffer */
- i40iw_ieq_copy_to_txbuf(buf, txbuf, ioffset, buf->hdrlen,
- length);
- txbuf->totallen = buf->hdrlen + length;
-
- i40iw_ieq_update_tcpip_info(txbuf, length, buf->seqnum);
- i40iw_puda_send_buf(ieq, txbuf);
-
- if (!datalen) {
- pfpdu->rcv_nxt = buf->seqnum + length;
- i40iw_puda_ret_bufpool(ieq, buf);
- return status;
- }
- buf->data = datap;
- buf->seqnum = seqnum + length;
- buf->datalen = datalen;
- pfpdu->rcv_nxt = buf->seqnum;
- }
- if (partial)
- status = i40iw_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
-
- return status;
-}
-
-/**
- * i40iw_ieq_process_fpdus - process fpdu's buffers on its list
- * @qp: qp for which partial fpdus
- * @ieq: ieq resource
- */
-static void i40iw_ieq_process_fpdus(struct i40iw_sc_qp *qp,
- struct i40iw_puda_rsrc *ieq)
-{
- struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
- struct list_head *rxlist = &pfpdu->rxlist;
- struct i40iw_puda_buf *buf;
- enum i40iw_status_code status;
-
- do {
- if (list_empty(rxlist))
- break;
- buf = i40iw_puda_get_listbuf(rxlist);
- if (!buf) {
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: error no buf\n", __func__);
- break;
- }
- if (buf->seqnum != pfpdu->rcv_nxt) {
- /* This could be out of order or missing packet */
- pfpdu->out_of_order++;
- list_add(&buf->list, rxlist);
- break;
- }
- /* keep processing buffers from the head of the list */
- status = i40iw_ieq_process_buf(ieq, pfpdu, buf);
- if (status == I40IW_ERR_MPA_CRC) {
- pfpdu->mpa_crc_err = true;
- while (!list_empty(rxlist)) {
- buf = i40iw_puda_get_listbuf(rxlist);
- i40iw_puda_ret_bufpool(ieq, buf);
- pfpdu->crc_err++;
- }
- /* create CQP for AE */
- i40iw_ieq_mpa_crc_ae(ieq->dev, qp);
- }
- } while (!status);
-}
-
-/**
- * i40iw_ieq_handle_exception - handle qp's exception
- * @ieq: ieq resource
- * @qp: qp receiving excpetion
- * @buf: receive buffer
- */
-static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
- struct i40iw_sc_qp *qp,
- struct i40iw_puda_buf *buf)
-{
- struct i40iw_puda_buf *tmpbuf = NULL;
- struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
- u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
- u32 rcv_wnd = hw_host_ctx[23];
- /* first partial seq # in q2 */
- u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET);
- struct list_head *rxlist = &pfpdu->rxlist;
- struct list_head *plist;
-
- pfpdu->total_ieq_bufs++;
-
- if (pfpdu->mpa_crc_err) {
- pfpdu->crc_err++;
- goto error;
- }
- if (pfpdu->mode && (fps != pfpdu->fps)) {
- /* clean up qp as it is new partial sequence */
- i40iw_ieq_cleanup_qp(ieq, qp);
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: restarting new partial\n", __func__);
- pfpdu->mode = false;
- }
-
- if (!pfpdu->mode) {
- i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "Q2 BUFFER", (u64 *)qp->q2_buf, 128);
- /* First_Partial_Sequence_Number check */
- pfpdu->rcv_nxt = fps;
- pfpdu->fps = fps;
- pfpdu->mode = true;
- pfpdu->max_fpdu_data = (buf->ipv4) ? (ieq->vsi->mtu - I40IW_MTU_TO_MSS_IPV4) :
- (ieq->vsi->mtu - I40IW_MTU_TO_MSS_IPV6);
- pfpdu->pmode_count++;
- INIT_LIST_HEAD(rxlist);
- i40iw_ieq_check_first_buf(buf, fps);
- }
-
- if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
- pfpdu->bad_seq_num++;
- goto error;
- }
-
- if (!list_empty(rxlist)) {
- tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
- while ((struct list_head *)tmpbuf != rxlist) {
- if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
- break;
- plist = &tmpbuf->list;
- tmpbuf = (struct i40iw_puda_buf *)plist->next;
- }
- /* Insert buf before tmpbuf */
- list_add_tail(&buf->list, &tmpbuf->list);
- } else {
- list_add_tail(&buf->list, rxlist);
- }
- i40iw_ieq_process_fpdus(qp, ieq);
- return;
- error:
- i40iw_puda_ret_bufpool(ieq, buf);
-}
-
-/**
- * i40iw_ieq_receive - received exception buffer
- * @dev: iwarp device
- * @buf: exception buffer received
- */
-static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_buf *buf)
-{
- struct i40iw_puda_rsrc *ieq = vsi->ieq;
- struct i40iw_sc_qp *qp = NULL;
- u32 wqe_idx = ieq->compl_rxwqe_idx;
-
- qp = i40iw_ieq_get_qp(vsi->dev, buf);
- if (!qp) {
- ieq->stats_bad_qp_id++;
- i40iw_puda_ret_bufpool(ieq, buf);
- } else {
- i40iw_ieq_handle_exception(ieq, qp, buf);
- }
- /*
- * ieq->rx_wqe_idx is used by i40iw_puda_replenish_rq()
- * on which wqe_idx to start replenish rq
- */
- if (!ieq->rxq_invalid_cnt)
- ieq->rx_wqe_idx = wqe_idx;
- ieq->rxq_invalid_cnt++;
-}
-
-/**
- * i40iw_ieq_tx_compl - put back after sending completed exception buffer
- * @vsi: pointer to the vsi structure
- * @sqwrid: pointer to puda buffer
- */
-static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
-{
- struct i40iw_puda_rsrc *ieq = vsi->ieq;
- struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
-
- i40iw_puda_ret_bufpool(ieq, buf);
-}
-
-/**
- * i40iw_ieq_cleanup_qp - qp is being destroyed
- * @ieq: ieq resource
- * @qp: all pending fpdu buffers
- */
-void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
-{
- struct i40iw_puda_buf *buf;
- struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
- struct list_head *rxlist = &pfpdu->rxlist;
-
- if (!pfpdu->mode)
- return;
- while (!list_empty(rxlist)) {
- buf = i40iw_puda_get_listbuf(rxlist);
- i40iw_puda_ret_bufpool(ieq, buf);
- }
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
deleted file mode 100644
index 53a7d58c84b5..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_PUDA_H
-#define I40IW_PUDA_H
-
-#define I40IW_IEQ_MPA_FRAMING 6
-
-struct i40iw_sc_dev;
-struct i40iw_sc_qp;
-struct i40iw_sc_cq;
-
-enum puda_resource_type {
- I40IW_PUDA_RSRC_TYPE_ILQ = 1,
- I40IW_PUDA_RSRC_TYPE_IEQ
-};
-
-enum puda_rsrc_complete {
- PUDA_CQ_CREATED = 1,
- PUDA_QP_CREATED,
- PUDA_TX_COMPLETE,
- PUDA_RX_COMPLETE,
- PUDA_HASH_CRC_COMPLETE
-};
-
-struct i40iw_puda_completion_info {
- struct i40iw_qp_uk *qp;
- u8 q_type;
- u8 vlan_valid;
- u8 l3proto;
- u8 l4proto;
- u16 payload_len;
- u32 compl_error; /* No_err=0, else major and minor err code */
- u32 qp_id;
- u32 wqe_idx;
-};
-
-struct i40iw_puda_send_info {
- u64 paddr; /* Physical address */
- u32 len;
- u8 tcplen;
- u8 maclen;
- bool ipv4;
- bool doloopback;
- void *scratch;
-};
-
-struct i40iw_puda_buf {
- struct list_head list; /* MUST be first entry */
- struct i40iw_dma_mem mem; /* DMA memory for the buffer */
- struct i40iw_puda_buf *next; /* for alloclist in rsrc struct */
- struct i40iw_virt_mem buf_mem; /* Buffer memory for this buffer */
- void *scratch;
- u8 *iph;
- u8 *tcph;
- u8 *data;
- u16 datalen;
- u16 vlan_id;
- u8 tcphlen; /* tcp length in bytes */
- u8 maclen; /* mac length in bytes */
- u32 totallen; /* machlen+iphlen+tcphlen+datalen */
- atomic_t refcount;
- u8 hdrlen;
- bool ipv4;
- u32 seqnum;
-};
-
-struct i40iw_puda_rsrc_info {
- enum puda_resource_type type; /* ILQ or IEQ */
- u32 count;
- u16 pd_id;
- u32 cq_id;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u16 buf_size;
- u16 mss;
- u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
- void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
-};
-
-struct i40iw_puda_rsrc {
- struct i40iw_sc_cq cq;
- struct i40iw_sc_qp qp;
- struct i40iw_sc_pd sc_pd;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_vsi *vsi;
- struct i40iw_dma_mem cqmem;
- struct i40iw_dma_mem qpmem;
- struct i40iw_virt_mem ilq_mem;
- enum puda_rsrc_complete completion;
- enum puda_resource_type type;
- u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */
- u16 mss;
- u32 cq_id;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u32 cq_size;
- struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
- u64 *rq_wrid_array;
- u32 compl_rxwqe_idx;
- u32 rx_wqe_idx;
- u32 rxq_invalid_cnt;
- u32 tx_wqe_avail_cnt;
- bool check_crc;
- struct shash_desc *hash_desc;
- struct list_head txpend;
- struct list_head bufpool; /* free buffers pool list for recv and xmit */
- u32 alloc_buf_count;
- u32 avail_buf_count; /* snapshot of currently available buffers */
- spinlock_t bufpool_lock;
- struct i40iw_puda_buf *alloclist;
- void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
- /* puda stats */
- u64 stats_buf_alloc_fail;
- u64 stats_pkt_rcvd;
- u64 stats_pkt_sent;
- u64 stats_rcvd_pkt_err;
- u64 stats_sent_pkt_q;
- u64 stats_bad_qp_id;
-};
-
-struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc);
-void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
- struct i40iw_puda_buf *buf);
-void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
- struct i40iw_puda_send_info *info);
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_rsrc_info *info);
-void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
- enum puda_resource_type type,
- bool reset);
-enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
- struct i40iw_sc_cq *cq, u32 *compl_err);
-
-struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr, u32 length, u32 value);
-enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc);
-void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-void i40iw_free_hash_desc(struct shash_desc *desc);
-void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
- u32 seqnum);
-enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
-void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
-void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_register.h b/drivers/infiniband/hw/i40iw/i40iw_register.h
deleted file mode 100644
index 57768184e251..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_register.h
+++ /dev/null
@@ -1,1030 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_REGISTER_H
-#define I40IW_REGISTER_H
-
-#define I40E_GLGEN_STAT 0x000B612C /* Reset: POR */
-
-#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
-#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
-#define I40E_PFHMC_PDINV_PMSDIDX_MASK (0xFFF << I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
-#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
-#define I40E_PFHMC_PDINV_PMPDIDX_MASK (0x1FF << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
-#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT 31
-#define I40E_PFHMC_SDCMD_PMSDWR_MASK (0x1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
-#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT 0
-#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
-#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT 1
-#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
-#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
-#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK (0x3FF << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
-
-#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
-#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT 0
-#define I40E_PFINT_DYN_CTLN_INTENA_MASK (0x1 << I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
-#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT 1
-#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK (0x1 << I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
-#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
-#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
-
-#define I40E_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
-#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-
-#define I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT 15
-#define I40E_PFHMC_PDINV_PMSDPARTSEL_MASK (0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT)
-#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
-#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT 4
-#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK (0x3 << I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
-#define I40E_GLPCI_DREVID 0x0009C480 /* Reset: PCIR */
-#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
-#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK 0xFF
-
-#define I40E_PFPE_AEQALLOC 0x00131180 /* Reset: PFR */
-#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
-#define I40E_PFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
-#define I40E_PFPE_CCQPHIGH 0x00008200 /* Reset: PFR */
-#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
-#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
-#define I40E_PFPE_CCQPLOW 0x00008180 /* Reset: PFR */
-#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
-#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
-#define I40E_PFPE_CCQPSTATUS 0x00008100 /* Reset: PFR */
-#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
-#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
-#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
-#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_MASK (0x7 << I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
-#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
-#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_MASK (0x3F << I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
-#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
-#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
-#define I40E_PFPE_CQACK 0x00131100 /* Reset: PFR */
-#define I40E_PFPE_CQACK_PECQID_SHIFT 0
-#define I40E_PFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_PFPE_CQACK_PECQID_SHIFT)
-#define I40E_PFPE_CQARM 0x00131080 /* Reset: PFR */
-#define I40E_PFPE_CQARM_PECQID_SHIFT 0
-#define I40E_PFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_PFPE_CQARM_PECQID_SHIFT)
-#define I40E_PFPE_CQPDB 0x00008000 /* Reset: PFR */
-#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
-#define I40E_PFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_PFPE_CQPDB_WQHEAD_SHIFT)
-#define I40E_PFPE_CQPERRCODES 0x00008880 /* Reset: PFR */
-#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
-#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
-#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
-#define I40E_PFPE_CQPTAIL 0x00008080 /* Reset: PFR */
-#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT 0
-#define I40E_PFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
-#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
-#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
-#define I40E_PFPE_FLMQ1ALLOCERR 0x00008980 /* Reset: PFR */
-#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_PFPE_FLMXMITALLOCERR 0x00008900 /* Reset: PFR */
-#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_PFPE_IPCONFIG0 0x00008280 /* Reset: PFR */
-#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT 0
-#define I40E_PFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
-#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
-#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
-#define I40E_PFPE_MRTEIDXMASK 0x00008600 /* Reset: PFR */
-#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
-#define I40E_PFPE_RCVUNEXPECTEDERROR 0x00008680 /* Reset: PFR */
-#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_PFPE_TCPNOWTIMER 0x00008580 /* Reset: PFR */
-#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
-#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
-
-#define I40E_PFPE_WQEALLOC 0x00138C00 /* Reset: PFR */
-#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT 0
-#define I40E_PFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
-#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
-#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
-
-#define I40E_VFPE_AEQALLOC(_VF) (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_AEQALLOC_MAX_INDEX 127
-#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
-#define I40E_VFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
-#define I40E_VFPE_CCQPHIGH(_VF) (0x00001000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CCQPHIGH_MAX_INDEX 127
-#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
-#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
-#define I40E_VFPE_CCQPLOW(_VF) (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CCQPLOW_MAX_INDEX 127
-#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
-#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
-#define I40E_VFPE_CCQPSTATUS(_VF) (0x00000800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CCQPSTATUS_MAX_INDEX 127
-#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
-#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
-#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_MASK (0x7 << I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
-#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_MASK (0x3F << I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
-#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
-#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
-#define I40E_VFPE_CQACK(_VF) (0x00130800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQACK_MAX_INDEX 127
-#define I40E_VFPE_CQACK_PECQID_SHIFT 0
-#define I40E_VFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK_PECQID_SHIFT)
-#define I40E_VFPE_CQARM(_VF) (0x00130400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQARM_MAX_INDEX 127
-#define I40E_VFPE_CQARM_PECQID_SHIFT 0
-#define I40E_VFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM_PECQID_SHIFT)
-#define I40E_VFPE_CQPDB(_VF) (0x00000000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQPDB_MAX_INDEX 127
-#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
-#define I40E_VFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB_WQHEAD_SHIFT)
-#define I40E_VFPE_CQPERRCODES(_VF) (0x00001800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQPERRCODES_MAX_INDEX 127
-#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
-#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
-#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
-#define I40E_VFPE_CQPTAIL(_VF) (0x00000400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQPTAIL_MAX_INDEX 127
-#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT 0
-#define I40E_VFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
-#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
-#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
-#define I40E_VFPE_IPCONFIG0(_VF) (0x00001400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_IPCONFIG0_MAX_INDEX 127
-#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT 0
-#define I40E_VFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
-#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
-#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
-#define I40E_VFPE_MRTEIDXMASK(_VF) (0x00003000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX 127
-#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
-#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF) (0x00003400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX 127
-#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_VFPE_TCPNOWTIMER(_VF) (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX 127
-#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
-#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
-#define I40E_VFPE_WQEALLOC(_VF) (0x00138000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_WQEALLOC_MAX_INDEX 127
-#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT 0
-#define I40E_VFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
-#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
-#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
-
-#define I40E_GLPE_CPUSTATUS0 0x0000D040 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
-#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
-#define I40E_GLPE_CPUSTATUS1 0x0000D044 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
-#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
-#define I40E_GLPE_CPUSTATUS2 0x0000D048 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
-#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
-#define I40E_GLPE_CPUTRIG0 0x0000D060 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT 0
-#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_MASK (0xFFFF << I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT)
-#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT 17
-#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_MASK (0x1 << I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT)
-#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT 18
-#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_MASK (0x1 << I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT)
-#define I40E_GLPE_DUAL40_RUPM 0x0000DA04 /* Reset: PE_CORER */
-#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT 0
-#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_MASK (0x1 << I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT)
-#define I40E_GLPE_PFAEQEDROPCNT(_i) (0x00131440 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_GLPE_PFAEQEDROPCNT_MAX_INDEX 15
-#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
-#define I40E_GLPE_PFCEQEDROPCNT(_i) (0x001313C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_GLPE_PFCEQEDROPCNT_MAX_INDEX 15
-#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
-#define I40E_GLPE_PFCQEDROPCNT(_i) (0x00131340 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_GLPE_PFCQEDROPCNT_MAX_INDEX 15
-#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT 0
-#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT)
-#define I40E_GLPE_RUPM_CQPPOOL 0x0000DACC /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_MASK (0xFF << I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT)
-#define I40E_GLPE_RUPM_FLRPOOL 0x0000DAC4 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_MASK (0xFF << I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL 0x0000DA00 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT 0
-#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_MASK (0xFF << I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT 26
-#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT 27
-#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT 28
-#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT 29
-#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT 30
-#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT 31
-#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_MASK (0x1 << I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT)
-#define I40E_GLPE_RUPM_PTXPOOL 0x0000DAC8 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_MASK (0xFF << I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT)
-#define I40E_GLPE_RUPM_PUSHPOOL 0x0000DAC0 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_MASK (0xFF << I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT)
-#define I40E_GLPE_RUPM_TXHOST_EN 0x0000DA08 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT 0
-#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_MASK (0x1 << I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT)
-#define I40E_GLPE_VFAEQEDROPCNT(_i) (0x00132540 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-#define I40E_GLPE_VFAEQEDROPCNT_MAX_INDEX 31
-#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
-#define I40E_GLPE_VFCEQEDROPCNT(_i) (0x00132440 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-#define I40E_GLPE_VFCEQEDROPCNT_MAX_INDEX 31
-#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
-#define I40E_GLPE_VFCQEDROPCNT(_i) (0x00132340 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-#define I40E_GLPE_VFCQEDROPCNT_MAX_INDEX 31
-#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT 0
-#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT)
-#define I40E_GLPE_VFFLMOBJCTRL(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX 31
-#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
-#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
-#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT 8
-#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
-#define I40E_GLPE_VFFLMQ1ALLOCERR(_i) (0x0000C700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX 31
-#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_GLPE_VFFLMXMITALLOCERR(_i) (0x0000C600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX 31
-#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_GLPE_VFUDACTRL(_i) (0x0000C000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFUDACTRL_MAX_INDEX 31
-#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT 0
-#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT 1
-#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT 2
-#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT 3
-#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
-#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK (0x1 << I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
-#define I40E_GLPE_VFUDAUCFBQPN(_i) (0x0000C100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX 31
-#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT 0
-#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK (0x3FFFF << I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
-#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
-#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK (0x1 << I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
-
-#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
-#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
-#define I40E_GLPES_PFIP4RXFRAGSHI(_i) (0x00010804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXMCOCTSHI(_i) (0x00010A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXMCPKTSHI(_i) (0x00010C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXOCTSHI(_i) (0x00010204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXPKTSHI(_i) (0x00010404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
-#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
-#define I40E_GLPES_PFIP4TXFRAGSHI(_i) (0x00011E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXMCOCTSHI(_i) (0x00012004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXMCPKTSHI(_i) (0x00012204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
-#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
-#define I40E_GLPES_PFIP4TXOCTSHI(_i) (0x00011A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXPKTSHI(_i) (0x00011C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
-#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
-#define I40E_GLPES_PFIP6RXFRAGSHI(_i) (0x00011404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXMCOCTSHI(_i) (0x00011604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXMCPKTSHI(_i) (0x00011804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXOCTSHI(_i) (0x00010E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXPKTSHI(_i) (0x00011004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
-#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
-#define I40E_GLPES_PFIP6TXFRAGSHI(_i) (0x00012804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXMCOCTSHI(_i) (0x00012A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXMCPKTSHI(_i) (0x00012C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
-#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
-#define I40E_GLPES_PFIP6TXOCTSHI(_i) (0x00012404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXPKTSHI(_i) (0x00012604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
-#define I40E_GLPES_PFRDMARXRDSHI(_i) (0x00013E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_PFRDMARXSNDSHI(_i) (0x00014004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_PFRDMARXWRSHI(_i) (0x00013C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_PFRDMATXRDSHI(_i) (0x00014404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_PFRDMATXSNDSHI(_i) (0x00014604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_PFRDMATXWRSHI(_i) (0x00014204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_PFRDMAVBNDHI(_i) (0x00014804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
-#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
-#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
-#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
-#define I40E_GLPES_PFRDMAVINVHI(_i) (0x00014A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
-#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
-#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
-#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
-#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRXVLANERR_MAX_INDEX 15
-#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
-#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
-#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
-#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
-#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
-#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
-#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
-#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
-#define I40E_GLPES_PFTCPRXSEGSHI(_i) (0x00013004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
-#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
-#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
-#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
-#define I40E_GLPES_PFTCPTXSEGHI(_i) (0x00013404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX 15
-#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
-#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
-#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX 15
-#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
-#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
-#define I40E_GLPES_PFUDPRXPKTSHI(_i) (0x00013804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
-#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
-#define I40E_GLPES_PFUDPTXPKTSHI(_i) (0x00013A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
-#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
-#define I40E_GLPES_RDMARXMULTFPDUSHI 0x0001E014 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
-#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
-#define I40E_GLPES_RDMARXMULTFPDUSLO 0x0001E010 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
-#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
-#define I40E_GLPES_RDMARXOOODDPHI 0x0001E01C /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
-#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
-#define I40E_GLPES_RDMARXOOODDPLO 0x0001E018 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
-#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
-#define I40E_GLPES_RDMARXOOONOMARK 0x0001E004 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
-#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
-#define I40E_GLPES_RDMARXUNALIGN 0x0001E000 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
-#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
-#define I40E_GLPES_TCPRXFOURHOLEHI 0x0001E044 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXFOURHOLELO 0x0001E040 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
-#define I40E_GLPES_TCPRXONEHOLEHI 0x0001E02C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXONEHOLELO 0x0001E028 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
-#define I40E_GLPES_TCPRXPUREACKHI 0x0001E024 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
-#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
-#define I40E_GLPES_TCPRXPUREACKSLO 0x0001E020 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
-#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
-#define I40E_GLPES_TCPRXTHREEHOLEHI 0x0001E03C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXTHREEHOLELO 0x0001E038 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
-#define I40E_GLPES_TCPRXTWOHOLEHI 0x0001E034 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXTWOHOLELO 0x0001E030 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
-#define I40E_GLPES_TCPTXRETRANSFASTHI 0x0001E04C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
-#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
-#define I40E_GLPES_TCPTXRETRANSFASTLO 0x0001E048 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
-#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSFASTHI 0x0001E054 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSFASTLO 0x0001E050 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSHI 0x0001E05C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSLO 0x0001E058 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXDISCARD(_i) (0x00018600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
-#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
-#define I40E_GLPES_VFIP4RXFRAGSHI(_i) (0x00018804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXFRAGSLO(_i) (0x00018800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXMCOCTSHI(_i) (0x00018A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXMCOCTSLO(_i) (0x00018A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXMCPKTSHI(_i) (0x00018C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXMCPKTSLO(_i) (0x00018C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXOCTSHI(_i) (0x00018204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXOCTSLO(_i) (0x00018200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXPKTSHI(_i) (0x00018404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXPKTSLO(_i) (0x00018400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXTRUNC(_i) (0x00018700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
-#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
-#define I40E_GLPES_VFIP4TXFRAGSHI(_i) (0x00019E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXFRAGSLO(_i) (0x00019E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXMCOCTSHI(_i) (0x0001A004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXMCOCTSLO(_i) (0x0001A000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXMCPKTSHI(_i) (0x0001A204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXMCPKTSLO(_i) (0x0001A200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXNOROUTE(_i) (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
-#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
-#define I40E_GLPES_VFIP4TXOCTSHI(_i) (0x00019A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXOCTSLO(_i) (0x00019A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXPKTSHI(_i) (0x00019C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXPKTSLO(_i) (0x00019C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXDISCARD(_i) (0x00019200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
-#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
-#define I40E_GLPES_VFIP6RXFRAGSHI(_i) (0x00019404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXFRAGSLO(_i) (0x00019400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXMCOCTSHI(_i) (0x00019604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXMCOCTSLO(_i) (0x00019600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXMCPKTSHI(_i) (0x00019804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXMCPKTSLO(_i) (0x00019800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXOCTSHI(_i) (0x00018E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXOCTSLO(_i) (0x00018E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXPKTSHI(_i) (0x00019004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXPKTSLO(_i) (0x00019000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXTRUNC(_i) (0x00019300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
-#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
-#define I40E_GLPES_VFIP6TXFRAGSHI(_i) (0x0001A804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXFRAGSLO(_i) (0x0001A800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXMCOCTSHI(_i) (0x0001AA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXMCOCTSLO(_i) (0x0001AA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXMCPKTSHI(_i) (0x0001AC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXMCPKTSLO(_i) (0x0001AC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXNOROUTE(_i) (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
-#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
-#define I40E_GLPES_VFIP6TXOCTSHI(_i) (0x0001A404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXOCTSLO(_i) (0x0001A400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXPKTSHI(_i) (0x0001A604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXPKTSLO(_i) (0x0001A600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
-#define I40E_GLPES_VFRDMARXRDSHI(_i) (0x0001BE04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_VFRDMARXRDSLO(_i) (0x0001BE00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_VFRDMARXSNDSHI(_i) (0x0001C004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_VFRDMARXSNDSLO(_i) (0x0001C000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_VFRDMARXWRSHI(_i) (0x0001BC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_VFRDMARXWRSLO(_i) (0x0001BC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_VFRDMATXRDSHI(_i) (0x0001C404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_VFRDMATXRDSLO(_i) (0x0001C400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_VFRDMATXSNDSHI(_i) (0x0001C604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_VFRDMATXSNDSLO(_i) (0x0001C600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_VFRDMATXWRSHI(_i) (0x0001C204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_VFRDMATXWRSLO(_i) (0x0001C200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_VFRDMAVBNDHI(_i) (0x0001C804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
-#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
-#define I40E_GLPES_VFRDMAVBNDLO(_i) (0x0001C800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
-#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
-#define I40E_GLPES_VFRDMAVINVHI(_i) (0x0001CA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
-#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
-#define I40E_GLPES_VFRDMAVINVLO(_i) (0x0001CA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
-#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
-#define I40E_GLPES_VFRXVLANERR(_i) (0x00018000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRXVLANERR_MAX_INDEX 31
-#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
-#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
-#define I40E_GLPES_VFTCPRTXSEG(_i) (0x0001B600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
-#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
-#define I40E_GLPES_VFTCPRXOPTERR(_i) (0x0001B200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
-#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
-#define I40E_GLPES_VFTCPRXPROTOERR(_i) (0x0001B300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
-#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
-#define I40E_GLPES_VFTCPRXSEGSHI(_i) (0x0001B004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
-#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
-#define I40E_GLPES_VFTCPRXSEGSLO(_i) (0x0001B000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
-#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
-#define I40E_GLPES_VFTCPTXSEGHI(_i) (0x0001B404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX 31
-#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
-#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
-#define I40E_GLPES_VFTCPTXSEGLO(_i) (0x0001B400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX 31
-#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
-#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
-#define I40E_GLPES_VFUDPRXPKTSHI(_i) (0x0001B804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
-#define I40E_GLPES_VFUDPRXPKTSLO(_i) (0x0001B800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
-#define I40E_GLPES_VFUDPTXPKTSHI(_i) (0x0001BA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
-#define I40E_GLPES_VFUDPTXPKTSLO(_i) (0x0001BA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
-
-#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
-#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
-#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
-#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
-#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
-#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
-#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
-#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
-#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
-#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT 0
-#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
-#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK (0x7 << I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
-#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK (0x3F << I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT 31
-#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
-#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
-#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
-#define I40E_VFPE_CQACK1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK1_PECQID_SHIFT)
-#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
-#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
-#define I40E_VFPE_CQARM1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM1_PECQID_SHIFT)
-#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
-#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
-#define I40E_VFPE_CQPDB1_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
-#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
-#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
-#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
-#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
-#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
-#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT 0
-#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
-#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
-#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
-#define I40E_VFPE_IPCONFIG01 0x00008C00 /* Reset: VFR */
-#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT 0
-#define I40E_VFPE_IPCONFIG01_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
-#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
-#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
-#define I40E_VFPE_MRTEIDXMASK1 0x00009000 /* Reset: VFR */
-#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
-#define I40E_VFPE_RCVUNEXPECTEDERROR1 0x00009400 /* Reset: VFR */
-#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_VFPE_TCPNOWTIMER1 0x0000A800 /* Reset: VFR */
-#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
-#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
-#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
-#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT 0
-#define I40E_VFPE_WQEALLOC1_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
-#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
-#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
-#endif /* I40IW_REGISTER_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
deleted file mode 100644
index f7013f11d808..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_status.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_STATUS_H
-#define I40IW_STATUS_H
-
-/* Error Codes */
-enum i40iw_status_code {
- I40IW_SUCCESS = 0,
- I40IW_ERR_NVM = -1,
- I40IW_ERR_NVM_CHECKSUM = -2,
- I40IW_ERR_CONFIG = -4,
- I40IW_ERR_PARAM = -5,
- I40IW_ERR_DEVICE_NOT_SUPPORTED = -6,
- I40IW_ERR_RESET_FAILED = -7,
- I40IW_ERR_SWFW_SYNC = -8,
- I40IW_ERR_NO_MEMORY = -9,
- I40IW_ERR_BAD_PTR = -10,
- I40IW_ERR_INVALID_PD_ID = -11,
- I40IW_ERR_INVALID_QP_ID = -12,
- I40IW_ERR_INVALID_CQ_ID = -13,
- I40IW_ERR_INVALID_CEQ_ID = -14,
- I40IW_ERR_INVALID_AEQ_ID = -15,
- I40IW_ERR_INVALID_SIZE = -16,
- I40IW_ERR_INVALID_ARP_INDEX = -17,
- I40IW_ERR_INVALID_FPM_FUNC_ID = -18,
- I40IW_ERR_QP_INVALID_MSG_SIZE = -19,
- I40IW_ERR_QP_TOOMANY_WRS_POSTED = -20,
- I40IW_ERR_INVALID_FRAG_COUNT = -21,
- I40IW_ERR_QUEUE_EMPTY = -22,
- I40IW_ERR_INVALID_ALIGNMENT = -23,
- I40IW_ERR_FLUSHED_QUEUE = -24,
- I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25,
- I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26,
- I40IW_ERR_TIMEOUT = -27,
- I40IW_ERR_OPCODE_MISMATCH = -28,
- I40IW_ERR_CQP_COMPL_ERROR = -29,
- I40IW_ERR_INVALID_VF_ID = -30,
- I40IW_ERR_INVALID_HMCFN_ID = -31,
- I40IW_ERR_BACKING_PAGE_ERROR = -32,
- I40IW_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
- I40IW_ERR_INVALID_PBLE_INDEX = -34,
- I40IW_ERR_INVALID_SD_INDEX = -35,
- I40IW_ERR_INVALID_PAGE_DESC_INDEX = -36,
- I40IW_ERR_INVALID_SD_TYPE = -37,
- I40IW_ERR_MEMCPY_FAILED = -38,
- I40IW_ERR_INVALID_HMC_OBJ_INDEX = -39,
- I40IW_ERR_INVALID_HMC_OBJ_COUNT = -40,
- I40IW_ERR_INVALID_SRQ_ARM_LIMIT = -41,
- I40IW_ERR_SRQ_ENABLED = -42,
- I40IW_ERR_BUF_TOO_SHORT = -43,
- I40IW_ERR_BAD_IWARP_CQE = -44,
- I40IW_ERR_NVM_BLANK_MODE = -45,
- I40IW_ERR_NOT_IMPLEMENTED = -46,
- I40IW_ERR_PE_DOORBELL_NOT_ENABLED = -47,
- I40IW_ERR_NOT_READY = -48,
- I40IW_NOT_SUPPORTED = -49,
- I40IW_ERR_FIRMWARE_API_VERSION = -50,
- I40IW_ERR_RING_FULL = -51,
- I40IW_ERR_MPA_CRC = -61,
- I40IW_ERR_NO_TXBUFS = -62,
- I40IW_ERR_SEQ_NUM = -63,
- I40IW_ERR_list_empty = -64,
- I40IW_ERR_INVALID_MAC_ADDR = -65,
- I40IW_ERR_BAD_STAG = -66,
- I40IW_ERR_CQ_COMPL_ERROR = -67,
- I40IW_ERR_QUEUE_DESTROYED = -68
-
-};
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
deleted file mode 100644
index adc8d2ec523d..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ /dev/null
@@ -1,1363 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_TYPE_H
-#define I40IW_TYPE_H
-#include "i40iw_user.h"
-#include "i40iw_hmc.h"
-#include "i40iw_vf.h"
-#include "i40iw_virtchnl.h"
-
-struct i40iw_cqp_sq_wqe {
- u64 buf[I40IW_CQP_WQE_SIZE];
-};
-
-struct i40iw_sc_aeqe {
- u64 buf[I40IW_AEQE_SIZE];
-};
-
-struct i40iw_ceqe {
- u64 buf[I40IW_CEQE_SIZE];
-};
-
-struct i40iw_cqp_ctx {
- u64 buf[I40IW_CQP_CTX_SIZE];
-};
-
-struct i40iw_cq_shadow_area {
- u64 buf[I40IW_SHADOW_AREA_SIZE];
-};
-
-struct i40iw_sc_dev;
-struct i40iw_hmc_info;
-struct i40iw_vsi_pestat;
-
-struct i40iw_cqp_ops;
-struct i40iw_ccq_ops;
-struct i40iw_ceq_ops;
-struct i40iw_aeq_ops;
-struct i40iw_mr_ops;
-struct i40iw_cqp_misc_ops;
-struct i40iw_pd_ops;
-struct i40iw_priv_qp_ops;
-struct i40iw_priv_cq_ops;
-struct i40iw_hmc_ops;
-
-enum i40iw_page_size {
- I40IW_PAGE_SIZE_4K,
- I40IW_PAGE_SIZE_2M
-};
-
-enum i40iw_resource_indicator_type {
- I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
- I40IW_RSRC_INDICATOR_TYPE_CQ,
- I40IW_RSRC_INDICATOR_TYPE_QP,
- I40IW_RSRC_INDICATOR_TYPE_SRQ
-};
-
-enum i40iw_hdrct_flags {
- DDP_LEN_FLAG = 0x80,
- DDP_HDR_FLAG = 0x40,
- RDMA_HDR_FLAG = 0x20
-};
-
-enum i40iw_term_layers {
- LAYER_RDMA = 0,
- LAYER_DDP = 1,
- LAYER_MPA = 2
-};
-
-enum i40iw_term_error_types {
- RDMAP_REMOTE_PROT = 1,
- RDMAP_REMOTE_OP = 2,
- DDP_CATASTROPHIC = 0,
- DDP_TAGGED_BUFFER = 1,
- DDP_UNTAGGED_BUFFER = 2,
- DDP_LLP = 3
-};
-
-enum i40iw_term_rdma_errors {
- RDMAP_INV_STAG = 0x00,
- RDMAP_INV_BOUNDS = 0x01,
- RDMAP_ACCESS = 0x02,
- RDMAP_UNASSOC_STAG = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_RDMAP_VER = 0x05,
- RDMAP_UNEXPECTED_OP = 0x06,
- RDMAP_CATASTROPHIC_LOCAL = 0x07,
- RDMAP_CATASTROPHIC_GLOBAL = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum i40iw_term_ddp_errors {
- DDP_CATASTROPHIC_LOCAL = 0x00,
- DDP_TAGGED_INV_STAG = 0x00,
- DDP_TAGGED_BOUNDS = 0x01,
- DDP_TAGGED_UNASSOC_STAG = 0x02,
- DDP_TAGGED_TO_WRAP = 0x03,
- DDP_TAGGED_INV_DDP_VER = 0x04,
- DDP_UNTAGGED_INV_QN = 0x01,
- DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
- DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
- DDP_UNTAGGED_INV_MO = 0x04,
- DDP_UNTAGGED_INV_TOO_LONG = 0x05,
- DDP_UNTAGGED_INV_DDP_VER = 0x06
-};
-
-enum i40iw_term_mpa_errors {
- MPA_CLOSED = 0x01,
- MPA_CRC = 0x02,
- MPA_MARKER = 0x03,
- MPA_REQ_RSP = 0x04,
-};
-
-enum i40iw_flush_opcode {
- FLUSH_INVALID = 0,
- FLUSH_PROT_ERR,
- FLUSH_REM_ACCESS_ERR,
- FLUSH_LOC_QP_OP_ERR,
- FLUSH_REM_OP_ERR,
- FLUSH_LOC_LEN_ERR,
- FLUSH_GENERAL_ERR,
- FLUSH_FATAL_ERR
-};
-
-enum i40iw_term_eventtypes {
- TERM_EVENT_QP_FATAL,
- TERM_EVENT_QP_ACCESS_ERR
-};
-
-struct i40iw_terminate_hdr {
- u8 layer_etype;
- u8 error_code;
- u8 hdrct;
- u8 rsvd;
-};
-
-enum i40iw_debug_flag {
- I40IW_DEBUG_NONE = 0x00000000,
- I40IW_DEBUG_ERR = 0x00000001,
- I40IW_DEBUG_INIT = 0x00000002,
- I40IW_DEBUG_DEV = 0x00000004,
- I40IW_DEBUG_CM = 0x00000008,
- I40IW_DEBUG_VERBS = 0x00000010,
- I40IW_DEBUG_PUDA = 0x00000020,
- I40IW_DEBUG_ILQ = 0x00000040,
- I40IW_DEBUG_IEQ = 0x00000080,
- I40IW_DEBUG_QP = 0x00000100,
- I40IW_DEBUG_CQ = 0x00000200,
- I40IW_DEBUG_MR = 0x00000400,
- I40IW_DEBUG_PBLE = 0x00000800,
- I40IW_DEBUG_WQE = 0x00001000,
- I40IW_DEBUG_AEQ = 0x00002000,
- I40IW_DEBUG_CQP = 0x00004000,
- I40IW_DEBUG_HMC = 0x00008000,
- I40IW_DEBUG_USER = 0x00010000,
- I40IW_DEBUG_VIRT = 0x00020000,
- I40IW_DEBUG_DCB = 0x00040000,
- I40IW_DEBUG_CQE = 0x00800000,
- I40IW_DEBUG_ALL = 0xFFFFFFFF
-};
-
-enum i40iw_hw_stats_index_32b {
- I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
- I40IW_HW_STAT_INDEX_IP4RXTRUNC,
- I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
- I40IW_HW_STAT_INDEX_IP6RXDISCARD,
- I40IW_HW_STAT_INDEX_IP6RXTRUNC,
- I40IW_HW_STAT_INDEX_IP6TXNOROUTE,
- I40IW_HW_STAT_INDEX_TCPRTXSEG,
- I40IW_HW_STAT_INDEX_TCPRXOPTERR,
- I40IW_HW_STAT_INDEX_TCPRXPROTOERR,
- I40IW_HW_STAT_INDEX_MAX_32
-};
-
-enum i40iw_hw_stats_index_64b {
- I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
- I40IW_HW_STAT_INDEX_IP4RXPKTS,
- I40IW_HW_STAT_INDEX_IP4RXFRAGS,
- I40IW_HW_STAT_INDEX_IP4RXMCPKTS,
- I40IW_HW_STAT_INDEX_IP4TXOCTS,
- I40IW_HW_STAT_INDEX_IP4TXPKTS,
- I40IW_HW_STAT_INDEX_IP4TXFRAGS,
- I40IW_HW_STAT_INDEX_IP4TXMCPKTS,
- I40IW_HW_STAT_INDEX_IP6RXOCTS,
- I40IW_HW_STAT_INDEX_IP6RXPKTS,
- I40IW_HW_STAT_INDEX_IP6RXFRAGS,
- I40IW_HW_STAT_INDEX_IP6RXMCPKTS,
- I40IW_HW_STAT_INDEX_IP6TXOCTS,
- I40IW_HW_STAT_INDEX_IP6TXPKTS,
- I40IW_HW_STAT_INDEX_IP6TXFRAGS,
- I40IW_HW_STAT_INDEX_IP6TXMCPKTS,
- I40IW_HW_STAT_INDEX_TCPRXSEGS,
- I40IW_HW_STAT_INDEX_TCPTXSEG,
- I40IW_HW_STAT_INDEX_RDMARXRDS,
- I40IW_HW_STAT_INDEX_RDMARXSNDS,
- I40IW_HW_STAT_INDEX_RDMARXWRS,
- I40IW_HW_STAT_INDEX_RDMATXRDS,
- I40IW_HW_STAT_INDEX_RDMATXSNDS,
- I40IW_HW_STAT_INDEX_RDMATXWRS,
- I40IW_HW_STAT_INDEX_RDMAVBND,
- I40IW_HW_STAT_INDEX_RDMAVINV,
- I40IW_HW_STAT_INDEX_MAX_64
-};
-
-struct i40iw_dev_hw_stats_offsets {
- u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
- u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
-};
-
-struct i40iw_dev_hw_stats {
- u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32];
- u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64];
-};
-
-struct i40iw_vsi_pestat {
- struct i40iw_hw *hw;
- struct i40iw_dev_hw_stats hw_stats;
- struct i40iw_dev_hw_stats last_read_hw_stats;
- struct i40iw_dev_hw_stats_offsets hw_stats_offsets;
- struct timer_list stats_timer;
- struct i40iw_sc_vsi *vsi;
- spinlock_t lock; /* rdma stats lock */
-};
-
-struct i40iw_hw {
- u8 __iomem *hw_addr;
- void *dev_context;
- struct i40iw_hmc_info hmc;
-};
-
-struct i40iw_pfpdu {
- struct list_head rxlist;
- u32 rcv_nxt;
- u32 fps;
- u32 max_fpdu_data;
- bool mode;
- bool mpa_crc_err;
- u64 total_ieq_bufs;
- u64 fpdu_processed;
- u64 bad_seq_num;
- u64 crc_err;
- u64 no_tx_bufs;
- u64 tx_err;
- u64 out_of_order;
- u64 pmode_count;
-};
-
-struct i40iw_sc_pd {
- u32 size;
- struct i40iw_sc_dev *dev;
- u16 pd_id;
- int abi_ver;
-};
-
-struct i40iw_cqp_quanta {
- u64 elem[I40IW_CQP_WQE_SIZE];
-};
-
-struct i40iw_sc_cqp {
- u32 size;
- u64 sq_pa;
- u64 host_ctx_pa;
- void *back_cqp;
- struct i40iw_sc_dev *dev;
- enum i40iw_status_code (*process_cqp_sds)(struct i40iw_sc_dev *,
- struct i40iw_update_sds_info *);
- struct i40iw_dma_mem sdbuf;
- struct i40iw_ring sq_ring;
- struct i40iw_cqp_quanta *sq_base;
- u64 *host_ctx;
- u64 *scratch_array;
- u32 cqp_id;
- u32 sq_size;
- u32 hw_sq_size;
- u8 struct_ver;
- u8 polarity;
- bool en_datacenter_tcp;
- u8 hmc_profile;
- u8 enabled_vf_count;
- u8 timeout_count;
-};
-
-struct i40iw_sc_aeq {
- u32 size;
- u64 aeq_elem_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_aeqe *aeqe_base;
- void *pbl_list;
- u32 elem_cnt;
- struct i40iw_ring aeq_ring;
- bool virtual_map;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
- u8 polarity;
-};
-
-struct i40iw_sc_ceq {
- u32 size;
- u64 ceq_elem_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_ceqe *ceqe_base;
- void *pbl_list;
- u32 ceq_id;
- u32 elem_cnt;
- struct i40iw_ring ceq_ring;
- bool virtual_map;
- u8 pbl_chunk_size;
- bool tph_en;
- u8 tph_val;
- u32 first_pm_pbl_idx;
- u8 polarity;
-};
-
-struct i40iw_sc_cq {
- struct i40iw_cq_uk cq_uk;
- u64 cq_pa;
- u64 shadow_area_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_vsi *vsi;
- void *pbl_list;
- void *back_cq;
- u32 ceq_id;
- u32 shadow_read_threshold;
- bool ceqe_mask;
- bool virtual_map;
- u8 pbl_chunk_size;
- u8 cq_type;
- bool ceq_id_valid;
- bool tph_en;
- u8 tph_val;
- u32 first_pm_pbl_idx;
- bool check_overflow;
-};
-
-struct i40iw_sc_qp {
- struct i40iw_qp_uk qp_uk;
- u64 sq_pa;
- u64 rq_pa;
- u64 hw_host_ctx_pa;
- u64 shadow_area_pa;
- u64 q2_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_vsi *vsi;
- struct i40iw_sc_pd *pd;
- u64 *hw_host_ctx;
- void *llp_stream_handle;
- void *back_qp;
- struct i40iw_pfpdu pfpdu;
- u8 *q2_buf;
- u64 qp_compl_ctx;
- u16 qs_handle;
- u16 push_idx;
- u8 sq_tph_val;
- u8 rq_tph_val;
- u8 qp_state;
- u8 qp_type;
- u8 hw_sq_size;
- u8 hw_rq_size;
- u8 src_mac_addr_idx;
- bool sq_tph_en;
- bool rq_tph_en;
- bool rcv_tph_en;
- bool xmit_tph_en;
- bool virtual_map;
- bool flush_sq;
- bool flush_rq;
- u8 user_pri;
- struct list_head list;
- bool on_qoslist;
- bool sq_flush;
- enum i40iw_flush_opcode flush_code;
- enum i40iw_term_eventtypes eventtype;
- u8 term_flags;
-};
-
-struct i40iw_hmc_fpm_misc {
- u32 max_ceqs;
- u32 max_sds;
- u32 xf_block_size;
- u32 q1_block_size;
- u32 ht_multiplier;
- u32 timer_bucket;
-};
-
-struct i40iw_vchnl_if {
- enum i40iw_status_code (*vchnl_recv)(struct i40iw_sc_dev *, u32, u8 *, u16);
- enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *dev, u32, u8 *, u16);
-};
-
-#define I40IW_VCHNL_MAX_VF_MSG_SIZE 512
-
-struct i40iw_vchnl_vf_msg_buffer {
- struct i40iw_virtchnl_op_buf vchnl_msg;
- char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
-};
-
-struct i40iw_qos {
- struct list_head qplist;
- spinlock_t lock; /* qos list */
- u16 qs_handle;
-};
-
-struct i40iw_vfdev {
- struct i40iw_sc_dev *pf_dev;
- u8 *hmc_info_mem;
- struct i40iw_vsi_pestat pestat;
- struct i40iw_hmc_pble_info *pble_info;
- struct i40iw_hmc_info hmc_info;
- struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
- u64 fpm_query_buf_pa;
- u64 *fpm_query_buf;
- u32 vf_id;
- u32 msg_count;
- bool pf_hmc_initialized;
- u16 pmf_index;
- u16 iw_vf_idx; /* VF Device table index */
- bool stats_initialized;
-};
-
-#define I40IW_INVALID_FCN_ID 0xff
-struct i40iw_sc_vsi {
- struct i40iw_sc_dev *dev;
- void *back_vsi; /* Owned by OS */
- u32 ilq_count;
- struct i40iw_virt_mem ilq_mem;
- struct i40iw_puda_rsrc *ilq;
- u32 ieq_count;
- struct i40iw_virt_mem ieq_mem;
- struct i40iw_puda_rsrc *ieq;
- u16 exception_lan_queue;
- u16 mtu;
- u8 fcn_id;
- bool stats_fcn_id_alloc;
- struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY];
- struct i40iw_vsi_pestat *pestat;
-};
-
-struct i40iw_sc_dev {
- struct list_head cqp_cmd_head; /* head of the CQP command list */
- spinlock_t cqp_lock; /* cqp list sync */
- struct i40iw_dev_uk dev_uk;
- bool fcn_id_array[I40IW_MAX_STATS_COUNT];
- struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
- u64 fpm_query_buf_pa;
- u64 fpm_commit_buf_pa;
- u64 *fpm_query_buf;
- u64 *fpm_commit_buf;
- void *back_dev;
- struct i40iw_hw *hw;
- u8 __iomem *db_addr;
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_hmc_pble_info *pble_info;
- struct i40iw_vfdev *vf_dev[I40IW_MAX_PE_ENABLED_VF_COUNT];
- struct i40iw_sc_cqp *cqp;
- struct i40iw_sc_aeq *aeq;
- struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT];
- struct i40iw_sc_cq *ccq;
- struct i40iw_cqp_ops *cqp_ops;
- struct i40iw_ccq_ops *ccq_ops;
- struct i40iw_ceq_ops *ceq_ops;
- struct i40iw_aeq_ops *aeq_ops;
- struct i40iw_pd_ops *iw_pd_ops;
- struct i40iw_priv_qp_ops *iw_priv_qp_ops;
- struct i40iw_priv_cq_ops *iw_priv_cq_ops;
- struct i40iw_mr_ops *mr_ops;
- struct i40iw_cqp_misc_ops *cqp_misc_ops;
- struct i40iw_hmc_ops *hmc_ops;
- struct i40iw_vchnl_if vchnl_if;
- const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
-
- struct i40iw_hmc_fpm_misc hmc_fpm_misc;
- u32 debug_mask;
- u8 hmc_fn_id;
- bool is_pf;
- bool vchnl_up;
- bool ceq_valid;
- u8 vf_id;
- wait_queue_head_t vf_reqs;
- u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
- struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
- u8 hw_rev;
-};
-
-struct i40iw_modify_cq_info {
- u64 cq_pa;
- struct i40iw_cqe *cq_base;
- void *pbl_list;
- u32 ceq_id;
- u32 cq_size;
- u32 shadow_read_threshold;
- bool virtual_map;
- u8 pbl_chunk_size;
- bool check_overflow;
- bool cq_resize;
- bool ceq_change;
- bool check_overflow_change;
- u32 first_pm_pbl_idx;
- bool ceq_valid;
-};
-
-struct i40iw_create_qp_info {
- u8 next_iwarp_state;
- bool ord_valid;
- bool tcp_ctx_valid;
- bool cq_num_valid;
- bool arp_cache_idx_valid;
-};
-
-struct i40iw_modify_qp_info {
- u64 rx_win0;
- u64 rx_win1;
- u8 next_iwarp_state;
- u8 termlen;
- bool ord_valid;
- bool tcp_ctx_valid;
- bool cq_num_valid;
- bool arp_cache_idx_valid;
- bool reset_tcp_conn;
- bool remove_hash_idx;
- bool dont_send_term;
- bool dont_send_fin;
- bool cached_var_valid;
- bool force_loopback;
-};
-
-struct i40iw_ccq_cqe_info {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- u32 op_ret_val;
- u16 maj_err_code;
- u16 min_err_code;
- u8 op_code;
- bool error;
-};
-
-struct i40iw_l2params {
- u16 qs_handle_list[I40IW_MAX_USER_PRIORITY];
- u16 mtu;
-};
-
-struct i40iw_vsi_init_info {
- struct i40iw_sc_dev *dev;
- void *back_vsi;
- struct i40iw_l2params *params;
- u16 exception_lan_queue;
-};
-
-struct i40iw_vsi_stats_info {
- struct i40iw_vsi_pestat *pestat;
- u8 fcn_id;
- bool alloc_fcn_id;
- bool stats_initialize;
-};
-
-struct i40iw_device_init_info {
- u64 fpm_query_buf_pa;
- u64 fpm_commit_buf_pa;
- u64 *fpm_query_buf;
- u64 *fpm_commit_buf;
- struct i40iw_hw *hw;
- void __iomem *bar0;
- enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
- u8 hmc_fn_id;
- bool is_pf;
- u32 debug_mask;
-};
-
-enum i40iw_cqp_hmc_profile {
- I40IW_HMC_PROFILE_DEFAULT = 1,
- I40IW_HMC_PROFILE_FAVOR_VF = 2,
- I40IW_HMC_PROFILE_EQUAL = 3,
-};
-
-struct i40iw_cqp_init_info {
- u64 cqp_compl_ctx;
- u64 host_ctx_pa;
- u64 sq_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_cqp_quanta *sq;
- u64 *host_ctx;
- u64 *scratch_array;
- u32 sq_size;
- u8 struct_ver;
- bool en_datacenter_tcp;
- u8 hmc_profile;
- u8 enabled_vf_count;
-};
-
-struct i40iw_ceq_init_info {
- u64 ceqe_pa;
- struct i40iw_sc_dev *dev;
- u64 *ceqe_base;
- void *pbl_list;
- u32 elem_cnt;
- u32 ceq_id;
- bool virtual_map;
- u8 pbl_chunk_size;
- bool tph_en;
- u8 tph_val;
- u32 first_pm_pbl_idx;
-};
-
-struct i40iw_aeq_init_info {
- u64 aeq_elem_pa;
- struct i40iw_sc_dev *dev;
- u32 *aeqe_base;
- void *pbl_list;
- u32 elem_cnt;
- bool virtual_map;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
-};
-
-struct i40iw_ccq_init_info {
- u64 cq_pa;
- u64 shadow_area_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_cqe *cq_base;
- u64 *shadow_area;
- void *pbl_list;
- u32 num_elem;
- u32 ceq_id;
- u32 shadow_read_threshold;
- bool ceqe_mask;
- bool ceq_id_valid;
- bool tph_en;
- u8 tph_val;
- bool avoid_mem_cflct;
- bool virtual_map;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
-};
-
-struct i40iwarp_offload_info {
- u16 rcv_mark_offset;
- u16 snd_mark_offset;
- u16 pd_id;
- u8 ddp_ver;
- u8 rdmap_ver;
- u8 ord_size;
- u8 ird_size;
- bool wr_rdresp_en;
- bool rd_enable;
- bool snd_mark_en;
- bool rcv_mark_en;
- bool bind_en;
- bool fast_reg_en;
- bool priv_mode_en;
- bool lsmm_present;
- u8 iwarp_mode;
- bool align_hdrs;
- bool rcv_no_mpa_crc;
-
- u8 last_byte_sent;
-};
-
-struct i40iw_tcp_offload_info {
- bool ipv4;
- bool no_nagle;
- bool insert_vlan_tag;
- bool time_stamp;
- u8 cwnd_inc_limit;
- bool drop_ooo_seg;
- u8 dup_ack_thresh;
- u8 ttl;
- u8 src_mac_addr_idx;
- bool avoid_stretch_ack;
- u8 tos;
- u16 src_port;
- u16 dst_port;
- u32 dest_ip_addr0;
- u32 dest_ip_addr1;
- u32 dest_ip_addr2;
- u32 dest_ip_addr3;
- u32 snd_mss;
- u16 vlan_tag;
- u16 arp_idx;
- u32 flow_label;
- bool wscale;
- u8 tcp_state;
- u8 snd_wscale;
- u8 rcv_wscale;
- u32 time_stamp_recent;
- u32 time_stamp_age;
- u32 snd_nxt;
- u32 snd_wnd;
- u32 rcv_nxt;
- u32 rcv_wnd;
- u32 snd_max;
- u32 snd_una;
- u32 srtt;
- u32 rtt_var;
- u32 ss_thresh;
- u32 cwnd;
- u32 snd_wl1;
- u32 snd_wl2;
- u32 max_snd_window;
- u8 rexmit_thresh;
- u32 local_ipaddr0;
- u32 local_ipaddr1;
- u32 local_ipaddr2;
- u32 local_ipaddr3;
- bool ignore_tcp_opt;
- bool ignore_tcp_uns_opt;
-};
-
-struct i40iw_qp_host_ctx_info {
- u64 qp_compl_ctx;
- struct i40iw_tcp_offload_info *tcp_info;
- struct i40iwarp_offload_info *iwarp_info;
- u32 send_cq_num;
- u32 rcv_cq_num;
- u16 push_idx;
- bool push_mode_en;
- bool tcp_info_valid;
- bool iwarp_info_valid;
- bool err_rq_idx_valid;
- u16 err_rq_idx;
- bool add_to_qoslist;
- u8 user_pri;
-};
-
-struct i40iw_aeqe_info {
- u64 compl_ctx;
- u32 qp_cq_id;
- u16 ae_id;
- u16 wqe_idx;
- u8 tcp_state;
- u8 iwarp_state;
- bool qp;
- bool cq;
- bool sq;
- bool in_rdrsp_wr;
- bool out_rdrsp;
- u8 q2_data_written;
- bool aeqe_overflow;
-};
-
-struct i40iw_allocate_stag_info {
- u64 total_len;
- u32 chunk_size;
- u32 stag_idx;
- u32 page_size;
- u16 pd_id;
- u16 access_rights;
- bool remote_access;
- bool use_hmc_fcn_index;
- u8 hmc_fcn_index;
- bool use_pf_rid;
-};
-
-struct i40iw_reg_ns_stag_info {
- u64 reg_addr_pa;
- u64 fbo;
- void *va;
- u64 total_len;
- u32 page_size;
- u32 chunk_size;
- u32 first_pm_pbl_index;
- enum i40iw_addressing_type addr_type;
- i40iw_stag_index stag_idx;
- u16 access_rights;
- u16 pd_id;
- i40iw_stag_key stag_key;
- bool use_hmc_fcn_index;
- u8 hmc_fcn_index;
- bool use_pf_rid;
-};
-
-struct i40iw_fast_reg_stag_info {
- u64 wr_id;
- u64 reg_addr_pa;
- u64 fbo;
- void *va;
- u64 total_len;
- u32 page_size;
- u32 chunk_size;
- u32 first_pm_pbl_index;
- enum i40iw_addressing_type addr_type;
- i40iw_stag_index stag_idx;
- u16 access_rights;
- u16 pd_id;
- i40iw_stag_key stag_key;
- bool local_fence;
- bool read_fence;
- bool signaled;
- bool use_hmc_fcn_index;
- u8 hmc_fcn_index;
- bool use_pf_rid;
- bool defer_flag;
-};
-
-struct i40iw_dealloc_stag_info {
- u32 stag_idx;
- u16 pd_id;
- bool mr;
- bool dealloc_pbl;
-};
-
-struct i40iw_register_shared_stag {
- void *va;
- enum i40iw_addressing_type addr_type;
- i40iw_stag_index new_stag_idx;
- i40iw_stag_index parent_stag_idx;
- u32 access_rights;
- u16 pd_id;
- i40iw_stag_key new_stag_key;
-};
-
-struct i40iw_qp_init_info {
- struct i40iw_qp_uk_init_info qp_uk_init_info;
- struct i40iw_sc_pd *pd;
- struct i40iw_sc_vsi *vsi;
- u64 *host_ctx;
- u8 *q2;
- u64 sq_pa;
- u64 rq_pa;
- u64 host_ctx_pa;
- u64 q2_pa;
- u64 shadow_area_pa;
- int abi_ver;
- u8 sq_tph_val;
- u8 rq_tph_val;
- u8 type;
- bool sq_tph_en;
- bool rq_tph_en;
- bool rcv_tph_en;
- bool xmit_tph_en;
- bool virtual_map;
-};
-
-struct i40iw_cq_init_info {
- struct i40iw_sc_dev *dev;
- u64 cq_base_pa;
- u64 shadow_area_pa;
- u32 ceq_id;
- u32 shadow_read_threshold;
- bool virtual_map;
- bool ceqe_mask;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
- bool ceq_id_valid;
- bool tph_en;
- u8 tph_val;
- u8 type;
- struct i40iw_cq_uk_init_info cq_uk_init_info;
-};
-
-struct i40iw_upload_context_info {
- u64 buf_pa;
- bool freeze_qp;
- bool raw_format;
- u32 qp_id;
- u8 qp_type;
-};
-
-struct i40iw_add_arp_cache_entry_info {
- u8 mac_addr[6];
- u32 reach_max;
- u16 arp_index;
- bool permanent;
-};
-
-struct i40iw_apbvt_info {
- u16 port;
- bool add;
-};
-
-enum i40iw_quad_entry_type {
- I40IW_QHASH_TYPE_TCP_ESTABLISHED = 1,
- I40IW_QHASH_TYPE_TCP_SYN,
-};
-
-enum i40iw_quad_hash_manage_type {
- I40IW_QHASH_MANAGE_TYPE_DELETE = 0,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- I40IW_QHASH_MANAGE_TYPE_MODIFY
-};
-
-struct i40iw_qhash_table_info {
- struct i40iw_sc_vsi *vsi;
- enum i40iw_quad_hash_manage_type manage;
- enum i40iw_quad_entry_type entry_type;
- bool vlan_valid;
- bool ipv4_valid;
- u8 mac_addr[6];
- u16 vlan_id;
- u8 user_pri;
- u32 qp_num;
- u32 dest_ip[4];
- u32 src_ip[4];
- u16 dest_port;
- u16 src_port;
-};
-
-struct i40iw_local_mac_ipaddr_entry_info {
- u8 mac_addr[6];
- u8 entry_idx;
-};
-
-struct i40iw_cqp_manage_push_page_info {
- u32 push_idx;
- u16 qs_handle;
- u8 free_page;
-};
-
-struct i40iw_qp_flush_info {
- u16 sq_minor_code;
- u16 sq_major_code;
- u16 rq_minor_code;
- u16 rq_major_code;
- u16 ae_code;
- u8 ae_source;
- bool sq;
- bool rq;
- bool userflushcode;
- bool generate_ae;
-};
-
-struct i40iw_cqp_commit_fpm_values {
- u64 qp_base;
- u64 cq_base;
- u32 hte_base;
- u32 arp_base;
- u32 apbvt_inuse_base;
- u32 mr_base;
- u32 xf_base;
- u32 xffl_base;
- u32 q1_base;
- u32 q1fl_base;
- u32 fsimc_base;
- u32 fsiav_base;
- u32 pbl_base;
-
- u32 qp_cnt;
- u32 cq_cnt;
- u32 hte_cnt;
- u32 arp_cnt;
- u32 mr_cnt;
- u32 xf_cnt;
- u32 xffl_cnt;
- u32 q1_cnt;
- u32 q1fl_cnt;
- u32 fsimc_cnt;
- u32 fsiav_cnt;
- u32 pbl_cnt;
-};
-
-struct i40iw_cqp_query_fpm_values {
- u16 first_pe_sd_index;
- u32 qp_objsize;
- u32 cq_objsize;
- u32 hte_objsize;
- u32 arp_objsize;
- u32 mr_objsize;
- u32 xf_objsize;
- u32 q1_objsize;
- u32 fsimc_objsize;
- u32 fsiav_objsize;
-
- u32 qp_max;
- u32 cq_max;
- u32 hte_max;
- u32 arp_max;
- u32 mr_max;
- u32 xf_max;
- u32 xffl_max;
- u32 q1_max;
- u32 q1fl_max;
- u32 fsimc_max;
- u32 fsiav_max;
- u32 pbl_max;
-};
-
-struct i40iw_gen_ae_info {
- u16 ae_code;
- u8 ae_source;
-};
-
-struct i40iw_cqp_ops {
- enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
- struct i40iw_cqp_init_info *);
- enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *);
- void (*cqp_post_sq)(struct i40iw_sc_cqp *);
- u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
- enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
- enum i40iw_status_code (*poll_for_cqp_op_done)(struct i40iw_sc_cqp *, u8,
- struct i40iw_ccq_cqe_info *);
-};
-
-struct i40iw_ccq_ops {
- enum i40iw_status_code (*ccq_init)(struct i40iw_sc_cq *,
- struct i40iw_ccq_init_info *);
- enum i40iw_status_code (*ccq_create)(struct i40iw_sc_cq *, u64, bool, bool);
- enum i40iw_status_code (*ccq_destroy)(struct i40iw_sc_cq *, u64, bool);
- enum i40iw_status_code (*ccq_create_done)(struct i40iw_sc_cq *);
- enum i40iw_status_code (*ccq_get_cqe_info)(struct i40iw_sc_cq *,
- struct i40iw_ccq_cqe_info *);
- void (*ccq_arm)(struct i40iw_sc_cq *);
-};
-
-struct i40iw_ceq_ops {
- enum i40iw_status_code (*ceq_init)(struct i40iw_sc_ceq *,
- struct i40iw_ceq_init_info *);
- enum i40iw_status_code (*ceq_create)(struct i40iw_sc_ceq *, u64, bool);
- enum i40iw_status_code (*cceq_create_done)(struct i40iw_sc_ceq *);
- enum i40iw_status_code (*cceq_destroy_done)(struct i40iw_sc_ceq *);
- enum i40iw_status_code (*cceq_create)(struct i40iw_sc_ceq *, u64);
- enum i40iw_status_code (*ceq_destroy)(struct i40iw_sc_ceq *, u64, bool);
- void *(*process_ceq)(struct i40iw_sc_dev *, struct i40iw_sc_ceq *);
-};
-
-struct i40iw_aeq_ops {
- enum i40iw_status_code (*aeq_init)(struct i40iw_sc_aeq *,
- struct i40iw_aeq_init_info *);
- enum i40iw_status_code (*aeq_create)(struct i40iw_sc_aeq *, u64, bool);
- enum i40iw_status_code (*aeq_destroy)(struct i40iw_sc_aeq *, u64, bool);
- enum i40iw_status_code (*get_next_aeqe)(struct i40iw_sc_aeq *,
- struct i40iw_aeqe_info *);
- enum i40iw_status_code (*repost_aeq_entries)(struct i40iw_sc_dev *, u32);
- enum i40iw_status_code (*aeq_create_done)(struct i40iw_sc_aeq *);
- enum i40iw_status_code (*aeq_destroy_done)(struct i40iw_sc_aeq *);
-};
-
-struct i40iw_pd_ops {
- void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16, int);
-};
-
-struct i40iw_priv_qp_ops {
- enum i40iw_status_code (*qp_init)(struct i40iw_sc_qp *, struct i40iw_qp_init_info *);
- enum i40iw_status_code (*qp_create)(struct i40iw_sc_qp *,
- struct i40iw_create_qp_info *, u64, bool);
- enum i40iw_status_code (*qp_modify)(struct i40iw_sc_qp *,
- struct i40iw_modify_qp_info *, u64, bool);
- enum i40iw_status_code (*qp_destroy)(struct i40iw_sc_qp *, u64, bool, bool, bool);
- enum i40iw_status_code (*qp_flush_wqes)(struct i40iw_sc_qp *,
- struct i40iw_qp_flush_info *, u64, bool);
- enum i40iw_status_code (*qp_upload_context)(struct i40iw_sc_dev *,
- struct i40iw_upload_context_info *,
- u64, bool);
- enum i40iw_status_code (*qp_setctx)(struct i40iw_sc_qp *, u64 *,
- struct i40iw_qp_host_ctx_info *);
-
- void (*qp_send_lsmm)(struct i40iw_sc_qp *, void *, u32, i40iw_stag);
- void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
- void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
- enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
- enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *,
- struct i40iw_fast_reg_stag_info *,
- bool);
-};
-
-struct i40iw_priv_cq_ops {
- enum i40iw_status_code (*cq_init)(struct i40iw_sc_cq *, struct i40iw_cq_init_info *);
- enum i40iw_status_code (*cq_create)(struct i40iw_sc_cq *, u64, bool, bool);
- enum i40iw_status_code (*cq_destroy)(struct i40iw_sc_cq *, u64, bool);
- enum i40iw_status_code (*cq_modify)(struct i40iw_sc_cq *,
- struct i40iw_modify_cq_info *, u64, bool);
-};
-
-struct i40iw_mr_ops {
- enum i40iw_status_code (*alloc_stag)(struct i40iw_sc_dev *,
- struct i40iw_allocate_stag_info *, u64, bool);
- enum i40iw_status_code (*mr_reg_non_shared)(struct i40iw_sc_dev *,
- struct i40iw_reg_ns_stag_info *,
- u64, bool);
- enum i40iw_status_code (*mr_reg_shared)(struct i40iw_sc_dev *,
- struct i40iw_register_shared_stag *,
- u64, bool);
- enum i40iw_status_code (*dealloc_stag)(struct i40iw_sc_dev *,
- struct i40iw_dealloc_stag_info *,
- u64, bool);
- enum i40iw_status_code (*query_stag)(struct i40iw_sc_dev *, u64, u32, bool);
- enum i40iw_status_code (*mw_alloc)(struct i40iw_sc_dev *, u64, u32, u16, bool);
-};
-
-struct i40iw_cqp_misc_ops {
- enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *,
- struct i40iw_cqp_manage_push_page_info *,
- u64, bool);
- enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *,
- u64, u8, bool, bool);
- enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *,
- u64, u8, u8, bool, bool);
- enum i40iw_status_code (*commit_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
- struct i40iw_dma_mem *, bool, u8);
- enum i40iw_status_code (*query_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
- struct i40iw_dma_mem *, bool, u8);
- enum i40iw_status_code (*static_hmc_pages_allocated)(struct i40iw_sc_cqp *,
- u64, u8, bool, bool);
- enum i40iw_status_code (*add_arp_cache_entry)(struct i40iw_sc_cqp *,
- struct i40iw_add_arp_cache_entry_info *,
- u64, bool);
- enum i40iw_status_code (*del_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
- enum i40iw_status_code (*query_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
- enum i40iw_status_code (*manage_apbvt_entry)(struct i40iw_sc_cqp *,
- struct i40iw_apbvt_info *, u64, bool);
- enum i40iw_status_code (*manage_qhash_table_entry)(struct i40iw_sc_cqp *,
- struct i40iw_qhash_table_info *, u64, bool);
- enum i40iw_status_code (*alloc_local_mac_ipaddr_table_entry)(struct i40iw_sc_cqp *, u64, bool);
- enum i40iw_status_code (*add_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *,
- struct i40iw_local_mac_ipaddr_entry_info *,
- u64, bool);
- enum i40iw_status_code (*del_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *, u64, u8, u8, bool);
- enum i40iw_status_code (*cqp_nop)(struct i40iw_sc_cqp *, u64, bool);
- enum i40iw_status_code (*commit_fpm_values_done)(struct i40iw_sc_cqp
- *);
- enum i40iw_status_code (*query_fpm_values_done)(struct i40iw_sc_cqp *);
- enum i40iw_status_code (*manage_hmc_pm_func_table_done)(struct i40iw_sc_cqp *);
- enum i40iw_status_code (*update_suspend_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
- enum i40iw_status_code (*update_resume_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
-};
-
-struct i40iw_hmc_ops {
- enum i40iw_status_code (*init_iw_hmc)(struct i40iw_sc_dev *, u8);
- enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
- struct i40iw_hmc_fpm_misc *);
- enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
- enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd);
- enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *);
- enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *,
- bool reset);
- enum i40iw_status_code (*pf_init_vfhmc)(struct i40iw_sc_dev *, u8, u32 *);
- enum i40iw_status_code (*vf_configure_vffpm)(struct i40iw_sc_dev *, u32 *);
-};
-
-struct cqp_info {
- union {
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_create_qp_info info;
- u64 scratch;
- } qp_create;
-
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_modify_qp_info info;
- u64 scratch;
- } qp_modify;
-
- struct {
- struct i40iw_sc_qp *qp;
- u64 scratch;
- bool remove_hash_idx;
- bool ignore_mw_bnd;
- } qp_destroy;
-
- struct {
- struct i40iw_sc_cq *cq;
- u64 scratch;
- bool check_overflow;
- } cq_create;
-
- struct {
- struct i40iw_sc_cq *cq;
- u64 scratch;
- } cq_destroy;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_allocate_stag_info info;
- u64 scratch;
- } alloc_stag;
-
- struct {
- struct i40iw_sc_dev *dev;
- u64 scratch;
- u32 mw_stag_index;
- u16 pd_id;
- } mw_alloc;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_reg_ns_stag_info info;
- u64 scratch;
- } mr_reg_non_shared;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_dealloc_stag_info info;
- u64 scratch;
- } dealloc_stag;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_local_mac_ipaddr_entry_info info;
- u64 scratch;
- } add_local_mac_ipaddr_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_add_arp_cache_entry_info info;
- u64 scratch;
- } add_arp_cache_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- u8 entry_idx;
- u8 ignore_ref_count;
- } del_local_mac_ipaddr_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- u16 arp_index;
- } del_arp_cache_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_manage_vf_pble_info info;
- u64 scratch;
- } manage_vf_pble_bp;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_cqp_manage_push_page_info info;
- u64 scratch;
- } manage_push_page;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_upload_context_info info;
- u64 scratch;
- } qp_upload_context;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- } alloc_local_mac_ipaddr_entry;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_hmc_fcn_info info;
- u64 scratch;
- } manage_hmc_pm;
-
- struct {
- struct i40iw_sc_ceq *ceq;
- u64 scratch;
- } ceq_create;
-
- struct {
- struct i40iw_sc_ceq *ceq;
- u64 scratch;
- } ceq_destroy;
-
- struct {
- struct i40iw_sc_aeq *aeq;
- u64 scratch;
- } aeq_create;
-
- struct {
- struct i40iw_sc_aeq *aeq;
- u64 scratch;
- } aeq_destroy;
-
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_qp_flush_info info;
- u64 scratch;
- } qp_flush_wqes;
-
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_gen_ae_info info;
- u64 scratch;
- } gen_ae;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- void *fpm_values_va;
- u64 fpm_values_pa;
- u8 hmc_fn_id;
- u64 scratch;
- } query_fpm_values;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- void *fpm_values_va;
- u64 fpm_values_pa;
- u8 hmc_fn_id;
- u64 scratch;
- } commit_fpm_values;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_apbvt_info info;
- u64 scratch;
- } manage_apbvt_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_qhash_table_info info;
- u64 scratch;
- } manage_qhash_table_entry;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_update_sds_info info;
- u64 scratch;
- } update_pe_sds;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_sc_qp *qp;
- u64 scratch;
- } suspend_resume;
- } u;
-};
-
-struct cqp_commands_info {
- struct list_head cqp_cmd_entry;
- u8 cqp_cmd;
- u8 post_sq;
- struct cqp_info in;
-};
-
-struct i40iw_virtchnl_work_info {
- void (*callback_fcn)(void *vf_dev);
- void *worker_vf_dev;
-};
-
-struct i40iw_cqp_timeout {
- u64 compl_cqp_cmds;
- u8 count;
-};
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
deleted file mode 100644
index 8afa5a67a86b..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ /dev/null
@@ -1,1232 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_status.h"
-#include "i40iw_d.h"
-#include "i40iw_user.h"
-#include "i40iw_register.h"
-
-static u32 nop_signature = 0x55550000;
-
-/**
- * i40iw_nop_1 - insert a nop wqe and move head. no post work
- * @qp: hw qp ptr
- */
-static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
-{
- u64 header, *wqe;
- u64 *wqe_0 = NULL;
- u32 wqe_idx, peek_head;
- bool signaled = false;
-
- if (!qp->sq_ring.head)
- return I40IW_ERR_PARAM;
-
- wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- wqe = qp->sq_base[wqe_idx].elem;
-
- qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE;
-
- peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
- wqe_0 = qp->sq_base[peek_head].elem;
- if (peek_head)
- wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
- else
- wqe_0[3] = LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID) | nop_signature++;
-
- wmb(); /* Memory barrier to ensure data is written before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- return 0;
-}
-
-/**
- * i40iw_qp_post_wr - post wr to hrdware
- * @qp: hw qp ptr
- */
-void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
-{
- u64 temp;
- u32 hw_sq_tail;
- u32 sw_sq_head;
-
- mb(); /* valid bit is written and loads completed before reading shadow */
-
- /* read the doorbell shadow area */
- get_64bit_val(qp->shadow_area, 0, &temp);
-
- hw_sq_tail = (u32)RS_64(temp, I40IW_QP_DBSA_HW_SQ_TAIL);
- sw_sq_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (sw_sq_head != hw_sq_tail) {
- if (sw_sq_head > qp->initial_ring.head) {
- if ((hw_sq_tail >= qp->initial_ring.head) &&
- (hw_sq_tail < sw_sq_head)) {
- writel(qp->qp_id, qp->wqe_alloc_reg);
- }
- } else if (sw_sq_head != qp->initial_ring.head) {
- if ((hw_sq_tail >= qp->initial_ring.head) ||
- (hw_sq_tail < sw_sq_head)) {
- writel(qp->qp_id, qp->wqe_alloc_reg);
- }
- }
- }
-
- qp->initial_ring.head = qp->sq_ring.head;
-}
-
-/**
- * i40iw_qp_ring_push_db - ring qp doorbell
- * @qp: hw qp ptr
- * @wqe_idx: wqe index
- */
-static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
-{
- set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
- qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
-}
-
-/**
- * i40iw_qp_get_next_send_wqe - return next wqe ptr
- * @qp: hw qp ptr
- * @wqe_idx: return wqe index
- * @wqe_size: size of sq wqe
- */
-u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
- u32 *wqe_idx,
- u8 wqe_size,
- u32 total_size,
- u64 wr_id
- )
-{
- u64 *wqe = NULL;
- u64 wqe_ptr;
- u32 peek_head = 0;
- u16 offset;
- enum i40iw_status_code ret_code = 0;
- u8 nop_wqe_cnt = 0, i;
- u64 *wqe_0 = NULL;
-
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
-
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- wqe_ptr = (uintptr_t)qp->sq_base[*wqe_idx].elem;
- offset = (u16)(wqe_ptr) & 0x7F;
- if ((offset + wqe_size) > I40IW_QP_WQE_MAX_SIZE) {
- nop_wqe_cnt = (u8)(I40IW_QP_WQE_MAX_SIZE - offset) / I40IW_QP_WQE_MIN_SIZE;
- for (i = 0; i < nop_wqe_cnt; i++) {
- i40iw_nop_1(qp);
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return NULL;
- }
-
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- }
-
- if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) {
- i40iw_nop_1(qp);
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return NULL;
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- }
- I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring,
- wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code);
- if (ret_code)
- return NULL;
-
- wqe = qp->sq_base[*wqe_idx].elem;
-
- peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- wqe_0 = qp->sq_base[peek_head].elem;
-
- if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) {
- if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity)
- wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
- }
-
- qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id;
- qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
- qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size;
- return wqe;
-}
-
-/**
- * i40iw_set_fragment - set fragment in wqe
- * @wqe: wqe for setting fragment
- * @offset: offset value
- * @sge: sge length and stag
- */
-static void i40iw_set_fragment(u64 *wqe, u32 offset, struct i40iw_sge *sge)
-{
- if (sge) {
- set_64bit_val(wqe, offset, LS_64(sge->tag_off, I40IWQPSQ_FRAG_TO));
- set_64bit_val(wqe, (offset + 8),
- (LS_64(sge->len, I40IWQPSQ_FRAG_LEN) |
- LS_64(sge->stag, I40IWQPSQ_FRAG_STAG)));
- }
-}
-
-/**
- * i40iw_qp_get_next_recv_wqe - get next qp's rcv wqe
- * @qp: hw qp ptr
- * @wqe_idx: return wqe index
- */
-u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
-{
- u64 *wqe = NULL;
- enum i40iw_status_code ret_code;
-
- if (I40IW_RING_FULL_ERR(qp->rq_ring))
- return NULL;
-
- I40IW_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
- if (ret_code)
- return NULL;
- if (!*wqe_idx)
- qp->rwqe_polarity = !qp->rwqe_polarity;
- /* rq_wqe_size_multiplier is no of qwords in one rq wqe */
- wqe = qp->rq_base[*wqe_idx * (qp->rq_wqe_size_multiplier >> 2)].elem;
-
- return wqe;
-}
-
-/**
- * i40iw_rdma_write - rdma write operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 header;
- u64 *wqe;
- struct i40iw_rdma_write *op_info;
- u32 i, wqe_idx;
- u32 total_size = 0, byte_off;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
-
- op_info = &info->op.rdma_write;
- if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
- return I40IW_ERR_INVALID_FRAG_COUNT;
-
- for (i = 0; i < op_info->num_lo_sges; i++)
- total_size += op_info->lo_sg_list[i].len;
-
- if (total_size > I40IW_MAX_OUTBOUND_MESSAGE_SIZE)
- return I40IW_ERR_QP_INVALID_MSG_SIZE;
-
- read_fence |= info->read_fence;
-
- ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_lo_sges, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 16,
- LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
- if (!op_info->rem_addr.stag)
- return I40IW_ERR_BAD_STAG;
-
- header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
- LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
- LS_64((op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0), I40IWQPSQ_ADDFRAGCNT) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
-
- for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) {
- i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
- byte_off += 16;
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_rdma_read - rdma read command
- * @qp: hw qp ptr
- * @info: post sq information
- * @inv_stag: flag for inv_stag
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool inv_stag,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_rdma_read *op_info;
- u64 header;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
- u8 wqe_size;
- bool local_fence = false;
-
- op_info = &info->op.rdma_read;
- ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
- if (ret_code)
- return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- local_fence |= info->local_fence;
-
- set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
- header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
- LS_64((inv_stag ? I40IWQP_OP_RDMA_READ_LOC_INV : I40IWQP_OP_RDMA_READ), I40IWQPSQ_OPCODE) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, &op_info->lo_addr);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_send - rdma send command
- * @qp: hw qp ptr
- * @info: post sq information
- * @stag_to_inv: stag_to_inv value
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- u32 stag_to_inv,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_post_send *op_info;
- u64 header;
- u32 i, wqe_idx, total_size = 0, byte_off;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
-
- op_info = &info->op.send;
- if (qp->max_sq_frag_cnt < op_info->num_sges)
- return I40IW_ERR_INVALID_FRAG_COUNT;
-
- for (i = 0; i < op_info->num_sges; i++)
- total_size += op_info->sg_list[i].len;
- ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_sges, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- read_fence |= info->read_fence;
- set_64bit_val(wqe, 16, 0);
- header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
- LS_64(info->op_type, I40IWQPSQ_OPCODE) |
- LS_64((op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0),
- I40IWQPSQ_ADDFRAGCNT) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, op_info->sg_list);
-
- for (i = 1, byte_off = 32; i < op_info->num_sges; i++) {
- i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
- byte_off += 16;
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_inline_rdma_write - inline rdma write operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 *wqe;
- u8 *dest, *src;
- struct i40iw_inline_rdma_write *op_info;
- u64 *push;
- u64 header = 0;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
-
- op_info = &info->op.inline_rdma_write;
- if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
- return I40IW_ERR_INVALID_INLINE_DATA_SIZE;
-
- ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- read_fence |= info->read_fence;
- set_64bit_val(wqe, 16,
- LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
-
- header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
- LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
- LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
- LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
- LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- dest = (u8 *)wqe;
- src = (u8 *)(op_info->data);
-
- if (op_info->len <= 16) {
- memcpy(dest, src, op_info->len);
- } else {
- memcpy(dest, src, 16);
- src += 16;
- dest = (u8 *)wqe + 32;
- memcpy(dest, src, op_info->len - 16);
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (qp->push_db) {
- push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
- memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
- i40iw_qp_ring_push_db(qp, wqe_idx);
- } else {
- if (post_sq)
- i40iw_qp_post_wr(qp);
- }
-
- return 0;
-}
-
-/**
- * i40iw_inline_send - inline send operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @stag_to_inv: remote stag
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- u32 stag_to_inv,
- bool post_sq)
-{
- u64 *wqe;
- u8 *dest, *src;
- struct i40iw_post_inline_send *op_info;
- u64 header;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
- u64 *push;
-
- op_info = &info->op.inline_send;
- if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
- return I40IW_ERR_INVALID_INLINE_DATA_SIZE;
-
- ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- read_fence |= info->read_fence;
- header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
- LS_64(info->op_type, I40IWQPSQ_OPCODE) |
- LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
- LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
- LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- dest = (u8 *)wqe;
- src = (u8 *)(op_info->data);
-
- if (op_info->len <= 16) {
- memcpy(dest, src, op_info->len);
- } else {
- memcpy(dest, src, 16);
- src += 16;
- dest = (u8 *)wqe + 32;
- memcpy(dest, src, op_info->len - 16);
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (qp->push_db) {
- push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
- memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
- i40iw_qp_ring_push_db(qp, wqe_idx);
- } else {
- if (post_sq)
- i40iw_qp_post_wr(qp);
- }
-
- return 0;
-}
-
-/**
- * i40iw_stag_local_invalidate - stag invalidate operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_inv_local_stag *op_info;
- u64 header;
- u32 wqe_idx;
- bool local_fence = false;
-
- op_info = &info->op.inv_local_stag;
- local_fence = info->local_fence;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8,
- LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
- set_64bit_val(wqe, 16, 0);
- header = LS_64(I40IW_OP_TYPE_INV_STAG, I40IWQPSQ_OPCODE) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_mw_bind - Memory Window bind operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_bind_window *op_info;
- u64 header;
- u32 wqe_idx;
- bool local_fence = false;
-
- op_info = &info->op.bind_window;
-
- local_fence |= info->local_fence;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
- set_64bit_val(wqe, 8,
- LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
- LS_64(op_info->mw_stag, I40IWQPSQ_MWSTAG));
- set_64bit_val(wqe, 16, op_info->bind_length);
- header = LS_64(I40IW_OP_TYPE_BIND_MW, I40IWQPSQ_OPCODE) |
- LS_64(((op_info->enable_reads << 2) |
- (op_info->enable_writes << 3)),
- I40IWQPSQ_STAGRIGHTS) |
- LS_64((op_info->addressing_type == I40IW_ADDR_TYPE_VA_BASED ? 1 : 0),
- I40IWQPSQ_VABASEDTO) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_post_receive - post receive wqe
- * @qp: hw qp ptr
- * @info: post rq information
- */
-static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
- struct i40iw_post_rq_info *info)
-{
- u64 *wqe;
- u64 header;
- u32 total_size = 0, wqe_idx, i, byte_off;
-
- if (qp->max_rq_frag_cnt < info->num_sges)
- return I40IW_ERR_INVALID_FRAG_COUNT;
- for (i = 0; i < info->num_sges; i++)
- total_size += info->sg_list[i].len;
- wqe = i40iw_qp_get_next_recv_wqe(qp, &wqe_idx);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->rq_wrid_array[wqe_idx] = info->wr_id;
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64((info->num_sges > 1 ? (info->num_sges - 1) : 0),
- I40IWQPSQ_ADDFRAGCNT) |
- LS_64(qp->rwqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, info->sg_list);
-
- for (i = 1, byte_off = 32; i < info->num_sges; i++) {
- i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
- byte_off += 16;
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- return 0;
-}
-
-/**
- * i40iw_cq_request_notification - cq notification request (door bell)
- * @cq: hw cq
- * @cq_notify: notification type
- */
-static void i40iw_cq_request_notification(struct i40iw_cq_uk *cq,
- enum i40iw_completion_notify cq_notify)
-{
- u64 temp_val;
- u16 sw_cq_sel;
- u8 arm_next_se = 0;
- u8 arm_next = 0;
- u8 arm_seq_num;
-
- get_64bit_val(cq->shadow_area, 32, &temp_val);
- arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
- arm_seq_num++;
-
- sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
- arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
- arm_next_se |= 1;
- if (cq_notify == IW_CQ_COMPL_EVENT)
- arm_next = 1;
- temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
- LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
- LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
- LS_64(arm_next, I40IW_CQ_DBSA_ARM_NEXT);
-
- set_64bit_val(cq->shadow_area, 32, temp_val);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- writel(cq->cq_id, cq->cqe_alloc_reg);
-}
-
-/**
- * i40iw_cq_post_entries - update tail in shadow memory
- * @cq: hw cq
- * @count: # of entries processed
- */
-static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
- u8 count)
-{
- I40IW_RING_MOVE_TAIL_BY_COUNT(cq->cq_ring, count);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- return 0;
-}
-
-/**
- * i40iw_cq_poll_completion - get cq completion info
- * @cq: hw cq
- * @info: cq poll information returned
- * @post_cq: update cq tail
- */
-static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
- struct i40iw_cq_poll_info *info)
-{
- u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
- u64 *cqe, *sw_wqe;
- struct i40iw_qp_uk *qp;
- struct i40iw_ring *pring = NULL;
- u32 wqe_idx, q_type, array_idx = 0;
- enum i40iw_status_code ret_code = 0;
- bool move_cq_head = true;
- u8 polarity;
- u8 addl_wqes = 0;
-
- if (cq->avoid_mem_cflct)
- cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
- else
- cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(cq);
-
- get_64bit_val(cqe, 24, &qword3);
- polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
-
- if (polarity != cq->polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
- info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
- info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP);
- if (info->error) {
- info->comp_status = I40IW_COMPL_STATUS_FLUSHED;
- info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR);
- info->minor_err = (bool)RS_64(qword3, I40IW_CQ_MINERR);
- } else {
- info->comp_status = I40IW_COMPL_STATUS_SUCCESS;
- }
-
- get_64bit_val(cqe, 0, &qword0);
- get_64bit_val(cqe, 16, &qword2);
-
- info->tcp_seq_num = (u32)RS_64(qword0, I40IWCQ_TCPSEQNUM);
-
- info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
-
- get_64bit_val(cqe, 8, &comp_ctx);
-
- info->solicited_event = (bool)RS_64(qword3, I40IWCQ_SOEVENT);
- info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
-
- qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
- if (!qp) {
- ret_code = I40IW_ERR_QUEUE_DESTROYED;
- goto exit;
- }
- wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
- info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
-
- if (q_type == I40IW_CQE_QTYPE_RQ) {
- array_idx = (wqe_idx * 4) / qp->rq_wqe_size_multiplier;
- if (info->comp_status == I40IW_COMPL_STATUS_FLUSHED) {
- info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
- array_idx = qp->rq_ring.tail;
- } else {
- info->wr_id = qp->rq_wrid_array[array_idx];
- }
-
- info->op_type = I40IW_OP_TYPE_REC;
- if (qword3 & I40IWCQ_STAG_MASK) {
- info->stag_invalid_set = true;
- info->inv_stag = (u32)RS_64(qword2, I40IWCQ_INVSTAG);
- } else {
- info->stag_invalid_set = false;
- }
- info->bytes_xfered = (u32)RS_64(qword0, I40IWCQ_PAYLDLEN);
- I40IW_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
- pring = &qp->rq_ring;
- } else {
- if (qp->first_sq_wq) {
- qp->first_sq_wq = false;
- if (!wqe_idx && (qp->sq_ring.head == qp->sq_ring.tail)) {
- I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
- I40IW_RING_MOVE_TAIL(cq->cq_ring);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- memset(info, 0, sizeof(struct i40iw_cq_poll_info));
- return i40iw_cq_poll_completion(cq, info);
- }
- }
-
- if (info->comp_status != I40IW_COMPL_STATUS_FLUSHED) {
- info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
- info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
-
- info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
- sw_wqe = qp->sq_base[wqe_idx].elem;
- get_64bit_val(sw_wqe, 24, &wqe_qword);
-
- addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE;
- I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
- } else {
- do {
- u8 op_type;
- u32 tail;
-
- tail = qp->sq_ring.tail;
- sw_wqe = qp->sq_base[tail].elem;
- get_64bit_val(sw_wqe, 24, &wqe_qword);
- op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
- info->op_type = op_type;
- addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE;
- I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
- if (op_type != I40IWQP_OP_NOP) {
- info->wr_id = qp->sq_wrtrk_array[tail].wrid;
- info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
- break;
- }
- } while (1);
- }
- pring = &qp->sq_ring;
- }
-
- ret_code = 0;
-
-exit:
- if (!ret_code &&
- (info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
- if (pring && (I40IW_RING_MORE_WORK(*pring)))
- move_cq_head = false;
-
- if (move_cq_head) {
- I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
-
- if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
- cq->polarity ^= 1;
-
- I40IW_RING_MOVE_TAIL(cq->cq_ring);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- } else {
- if (info->is_srq)
- return ret_code;
- qword3 &= ~I40IW_CQ_WQEIDX_MASK;
- qword3 |= LS_64(pring->tail, I40IW_CQ_WQEIDX);
- set_64bit_val(cqe, 24, qword3);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_get_wqe_shift - get shift count for maximum wqe size
- * @sge: Maximum Scatter Gather Elements wqe
- * @inline_data: Maximum inline data size
- * @shift: Returns the shift needed based on sge
- *
- * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
- * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes).
- * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes).
- * Shift of 2 otherwise (wqe size of 128 bytes).
- */
-void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift)
-{
- *shift = 0;
- if (sge > 1 || inline_data > 16)
- *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
-}
-
-/*
- * i40iw_get_sqdepth - get SQ depth (quantas)
- * @sq_size: SQ size
- * @shift: shift which determines size of WQE
- * @sqdepth: depth of SQ
- *
- */
-enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth)
-{
- *sqdepth = roundup_pow_of_two((sq_size << shift) + I40IW_SQ_RSVD);
-
- if (*sqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
- *sqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
- else if (*sqdepth > I40IW_QP_SW_MAX_SQ_QUANTAS)
- return I40IW_ERR_INVALID_SIZE;
-
- return 0;
-}
-
-/*
- * i40iw_get_rq_depth - get RQ depth (quantas)
- * @rq_size: RQ size
- * @shift: shift which determines size of WQE
- * @rqdepth: depth of RQ
- *
- */
-enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth)
-{
- *rqdepth = roundup_pow_of_two((rq_size << shift) + I40IW_RQ_RSVD);
-
- if (*rqdepth < (I40IW_QP_SW_MIN_WQSIZE << shift))
- *rqdepth = I40IW_QP_SW_MIN_WQSIZE << shift;
- else if (*rqdepth > I40IW_QP_SW_MAX_RQ_QUANTAS)
- return I40IW_ERR_INVALID_SIZE;
-
- return 0;
-}
-
-static const struct i40iw_qp_uk_ops iw_qp_uk_ops = {
- .iw_qp_post_wr = i40iw_qp_post_wr,
- .iw_qp_ring_push_db = i40iw_qp_ring_push_db,
- .iw_rdma_write = i40iw_rdma_write,
- .iw_rdma_read = i40iw_rdma_read,
- .iw_send = i40iw_send,
- .iw_inline_rdma_write = i40iw_inline_rdma_write,
- .iw_inline_send = i40iw_inline_send,
- .iw_stag_local_invalidate = i40iw_stag_local_invalidate,
- .iw_mw_bind = i40iw_mw_bind,
- .iw_post_receive = i40iw_post_receive,
- .iw_post_nop = i40iw_nop
-};
-
-static const struct i40iw_cq_ops iw_cq_ops = {
- .iw_cq_request_notification = i40iw_cq_request_notification,
- .iw_cq_poll_completion = i40iw_cq_poll_completion,
- .iw_cq_post_entries = i40iw_cq_post_entries,
- .iw_cq_clean = i40iw_clean_cq
-};
-
-static const struct i40iw_device_uk_ops iw_device_uk_ops = {
- .iwarp_cq_uk_init = i40iw_cq_uk_init,
- .iwarp_qp_uk_init = i40iw_qp_uk_init,
-};
-
-/**
- * i40iw_qp_uk_init - initialize shared qp
- * @qp: hw qp (user and kernel)
- * @info: qp initialization info
- *
- * initializes the vars used in both user and kernel mode.
- * size of the wqe depends on numbers of max. fragements
- * allowed. Then size of wqe * the number of wqes should be the
- * amount of memory allocated for sq and rq. If srq is used,
- * then rq_base will point to one rq wqe only (not the whole
- * array of wqes)
- */
-enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
- struct i40iw_qp_uk_init_info *info)
-{
- enum i40iw_status_code ret_code = 0;
- u32 sq_ring_size;
- u8 sqshift, rqshift;
-
- if (info->max_sq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
- return I40IW_ERR_INVALID_FRAG_COUNT;
-
- if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
- return I40IW_ERR_INVALID_FRAG_COUNT;
- i40iw_get_wqe_shift(info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
-
- qp->sq_base = info->sq;
- qp->rq_base = info->rq;
- qp->shadow_area = info->shadow_area;
- qp->sq_wrtrk_array = info->sq_wrtrk_array;
- qp->rq_wrid_array = info->rq_wrid_array;
-
- qp->wqe_alloc_reg = info->wqe_alloc_reg;
- qp->qp_id = info->qp_id;
-
- qp->sq_size = info->sq_size;
- qp->push_db = info->push_db;
- qp->push_wqe = info->push_wqe;
-
- qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
- sq_ring_size = qp->sq_size << sqshift;
-
- I40IW_RING_INIT(qp->sq_ring, sq_ring_size);
- I40IW_RING_INIT(qp->initial_ring, sq_ring_size);
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- I40IW_RING_MOVE_TAIL(qp->sq_ring);
- I40IW_RING_MOVE_HEAD(qp->initial_ring, ret_code);
- qp->swqe_polarity = 1;
- qp->first_sq_wq = true;
- qp->swqe_polarity_deferred = 1;
- qp->rwqe_polarity = 0;
-
- if (!qp->use_srq) {
- qp->rq_size = info->rq_size;
- qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
- I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
- switch (info->abi_ver) {
- case 4:
- i40iw_get_wqe_shift(info->max_rq_frag_cnt, 0, &rqshift);
- break;
- case 5: /* fallthrough until next ABI version */
- default:
- rqshift = I40IW_MAX_RQ_WQE_SHIFT;
- break;
- }
- qp->rq_wqe_size = rqshift;
- qp->rq_wqe_size_multiplier = 4 << rqshift;
- }
- qp->ops = iw_qp_uk_ops;
-
- return ret_code;
-}
-
-/**
- * i40iw_cq_uk_init - initialize shared cq (user and kernel)
- * @cq: hw cq
- * @info: hw cq initialization info
- */
-enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
- struct i40iw_cq_uk_init_info *info)
-{
- if ((info->cq_size < I40IW_MIN_CQ_SIZE) ||
- (info->cq_size > I40IW_MAX_CQ_SIZE))
- return I40IW_ERR_INVALID_SIZE;
- cq->cq_base = (struct i40iw_cqe *)info->cq_base;
- cq->cq_id = info->cq_id;
- cq->cq_size = info->cq_size;
- cq->cqe_alloc_reg = info->cqe_alloc_reg;
- cq->shadow_area = info->shadow_area;
- cq->avoid_mem_cflct = info->avoid_mem_cflct;
-
- I40IW_RING_INIT(cq->cq_ring, cq->cq_size);
- cq->polarity = 1;
- cq->ops = iw_cq_ops;
-
- return 0;
-}
-
-/**
- * i40iw_device_init_uk - setup routines for iwarp shared device
- * @dev: iwarp shared (user and kernel)
- */
-void i40iw_device_init_uk(struct i40iw_dev_uk *dev)
-{
- dev->ops_uk = iw_device_uk_ops;
-}
-
-/**
- * i40iw_clean_cq - clean cq entries
- * @ queue completion context
- * @cq: cq to clean
- */
-void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq)
-{
- u64 *cqe;
- u64 qword3, comp_ctx;
- u32 cq_head;
- u8 polarity, temp;
-
- cq_head = cq->cq_ring.head;
- temp = cq->polarity;
- do {
- if (cq->avoid_mem_cflct)
- cqe = (u64 *)&(((struct i40iw_extended_cqe *)cq->cq_base)[cq_head]);
- else
- cqe = (u64 *)&cq->cq_base[cq_head];
- get_64bit_val(cqe, 24, &qword3);
- polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
-
- if (polarity != temp)
- break;
-
- get_64bit_val(cqe, 8, &comp_ctx);
- if ((void *)(unsigned long)comp_ctx == queue)
- set_64bit_val(cqe, 8, 0);
-
- cq_head = (cq_head + 1) % cq->cq_ring.size;
- if (!cq_head)
- temp ^= 1;
- } while (true);
-}
-
-/**
- * i40iw_nop - send a nop
- * @qp: hw qp ptr
- * @wr_id: work request id
- * @signaled: flag if signaled for completion
- * @post_sq: flag to post sq
- */
-enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
- u64 wr_id,
- bool signaled,
- bool post_sq)
-{
- u64 header, *wqe;
- u32 wqe_idx;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_fragcnt_to_wqesize_sq - calculate wqe size based on fragment count for SQ
- * @frag_cnt: number of fragments
- * @wqe_size: size of sq wqe returned
- */
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size)
-{
- switch (frag_cnt) {
- case 0:
- case 1:
- *wqe_size = I40IW_QP_WQE_MIN_SIZE;
- break;
- case 2:
- case 3:
- *wqe_size = 64;
- break;
- case 4:
- case 5:
- *wqe_size = 96;
- break;
- case 6:
- case 7:
- *wqe_size = 128;
- break;
- default:
- return I40IW_ERR_INVALID_FRAG_COUNT;
- }
-
- return 0;
-}
-
-/**
- * i40iw_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
- * @frag_cnt: number of fragments
- * @wqe_size: size of rq wqe returned
- */
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size)
-{
- switch (frag_cnt) {
- case 0:
- case 1:
- *wqe_size = 32;
- break;
- case 2:
- case 3:
- *wqe_size = 64;
- break;
- case 4:
- case 5:
- case 6:
- case 7:
- *wqe_size = 128;
- break;
- default:
- return I40IW_ERR_INVALID_FRAG_COUNT;
- }
-
- return 0;
-}
-
-/**
- * i40iw_inline_data_size_to_wqesize - based on inline data, wqe size
- * @data_size: data size for inline
- * @wqe_size: size of sq wqe returned
- */
-enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
- u8 *wqe_size)
-{
- if (data_size > I40IW_MAX_INLINE_DATA_SIZE)
- return I40IW_ERR_INVALID_INLINE_DATA_SIZE;
-
- if (data_size <= 16)
- *wqe_size = I40IW_QP_WQE_MIN_SIZE;
- else
- *wqe_size = 64;
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
deleted file mode 100644
index b125925641e0..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ /dev/null
@@ -1,430 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_USER_H
-#define I40IW_USER_H
-
-enum i40iw_device_capabilities_const {
- I40IW_WQE_SIZE = 4,
- I40IW_CQP_WQE_SIZE = 8,
- I40IW_CQE_SIZE = 4,
- I40IW_EXTENDED_CQE_SIZE = 8,
- I40IW_AEQE_SIZE = 2,
- I40IW_CEQE_SIZE = 1,
- I40IW_CQP_CTX_SIZE = 8,
- I40IW_SHADOW_AREA_SIZE = 8,
- I40IW_CEQ_MAX_COUNT = 256,
- I40IW_QUERY_FPM_BUF_SIZE = 128,
- I40IW_COMMIT_FPM_BUF_SIZE = 128,
- I40IW_MIN_IW_QP_ID = 1,
- I40IW_MAX_IW_QP_ID = 262143,
- I40IW_MIN_CEQID = 0,
- I40IW_MAX_CEQID = 256,
- I40IW_MIN_CQID = 0,
- I40IW_MAX_CQID = 131071,
- I40IW_MIN_AEQ_ENTRIES = 1,
- I40IW_MAX_AEQ_ENTRIES = 524287,
- I40IW_MIN_CEQ_ENTRIES = 1,
- I40IW_MAX_CEQ_ENTRIES = 131071,
- I40IW_MIN_CQ_SIZE = 1,
- I40IW_MAX_CQ_SIZE = 1048575,
- I40IW_DB_ID_ZERO = 0,
- I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
- I40IW_MAX_SGE_RD = 1,
- I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
- I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
- I40IW_MAX_PUSH_PAGE_COUNT = 4096,
- I40IW_MAX_PE_ENABLED_VF_COUNT = 32,
- I40IW_MAX_VF_FPM_ID = 47,
- I40IW_MAX_VF_PER_PF = 127,
- I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
- I40IW_MAX_INLINE_DATA_SIZE = 48,
- I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
- I40IW_MAX_IRD_SIZE = 64,
- I40IW_MAX_ORD_SIZE = 127,
- I40IW_MAX_WQ_ENTRIES = 2048,
- I40IW_Q2_BUFFER_SIZE = (248 + 100),
- I40IW_MAX_WQE_SIZE_RQ = 128,
- I40IW_QP_CTX_SIZE = 248,
- I40IW_MAX_PDS = 32768
-};
-
-#define i40iw_handle void *
-#define i40iw_adapter_handle i40iw_handle
-#define i40iw_qp_handle i40iw_handle
-#define i40iw_cq_handle i40iw_handle
-#define i40iw_srq_handle i40iw_handle
-#define i40iw_pd_id i40iw_handle
-#define i40iw_stag_handle i40iw_handle
-#define i40iw_stag_index u32
-#define i40iw_stag u32
-#define i40iw_stag_key u8
-
-#define i40iw_tagged_offset u64
-#define i40iw_access_privileges u32
-#define i40iw_physical_fragment u64
-#define i40iw_address_list u64 *
-
-#define I40IW_MAX_MR_SIZE 0x10000000000L
-#define I40IW_MAX_RQ_WQE_SHIFT 2
-
-struct i40iw_qp_uk;
-struct i40iw_cq_uk;
-struct i40iw_srq_uk;
-struct i40iw_qp_uk_init_info;
-struct i40iw_cq_uk_init_info;
-struct i40iw_srq_uk_init_info;
-
-struct i40iw_sge {
- i40iw_tagged_offset tag_off;
- u32 len;
- i40iw_stag stag;
-};
-
-#define i40iw_sgl struct i40iw_sge *
-
-struct i40iw_ring {
- u32 head;
- u32 tail;
- u32 size;
-};
-
-struct i40iw_cqe {
- u64 buf[I40IW_CQE_SIZE];
-};
-
-struct i40iw_extended_cqe {
- u64 buf[I40IW_EXTENDED_CQE_SIZE];
-};
-
-struct i40iw_wqe {
- u64 buf[I40IW_WQE_SIZE];
-};
-
-struct i40iw_qp_uk_ops;
-
-enum i40iw_addressing_type {
- I40IW_ADDR_TYPE_ZERO_BASED = 0,
- I40IW_ADDR_TYPE_VA_BASED = 1,
-};
-
-#define I40IW_ACCESS_FLAGS_LOCALREAD 0x01
-#define I40IW_ACCESS_FLAGS_LOCALWRITE 0x02
-#define I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
-#define I40IW_ACCESS_FLAGS_REMOTEREAD 0x05
-#define I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
-#define I40IW_ACCESS_FLAGS_REMOTEWRITE 0x0a
-#define I40IW_ACCESS_FLAGS_BIND_WINDOW 0x10
-#define I40IW_ACCESS_FLAGS_ALL 0x1F
-
-#define I40IW_OP_TYPE_RDMA_WRITE 0
-#define I40IW_OP_TYPE_RDMA_READ 1
-#define I40IW_OP_TYPE_SEND 3
-#define I40IW_OP_TYPE_SEND_INV 4
-#define I40IW_OP_TYPE_SEND_SOL 5
-#define I40IW_OP_TYPE_SEND_SOL_INV 6
-#define I40IW_OP_TYPE_REC 7
-#define I40IW_OP_TYPE_BIND_MW 8
-#define I40IW_OP_TYPE_FAST_REG_NSMR 9
-#define I40IW_OP_TYPE_INV_STAG 10
-#define I40IW_OP_TYPE_RDMA_READ_INV_STAG 11
-#define I40IW_OP_TYPE_NOP 12
-
-enum i40iw_completion_status {
- I40IW_COMPL_STATUS_SUCCESS = 0,
- I40IW_COMPL_STATUS_FLUSHED,
- I40IW_COMPL_STATUS_INVALID_WQE,
- I40IW_COMPL_STATUS_QP_CATASTROPHIC,
- I40IW_COMPL_STATUS_REMOTE_TERMINATION,
- I40IW_COMPL_STATUS_INVALID_STAG,
- I40IW_COMPL_STATUS_BASE_BOUND_VIOLATION,
- I40IW_COMPL_STATUS_ACCESS_VIOLATION,
- I40IW_COMPL_STATUS_INVALID_PD_ID,
- I40IW_COMPL_STATUS_WRAP_ERROR,
- I40IW_COMPL_STATUS_STAG_INVALID_PDID,
- I40IW_COMPL_STATUS_RDMA_READ_ZERO_ORD,
- I40IW_COMPL_STATUS_QP_NOT_PRIVLEDGED,
- I40IW_COMPL_STATUS_STAG_NOT_INVALID,
- I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_SIZE,
- I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_ENTRY,
- I40IW_COMPL_STATUS_INVALID_FBO,
- I40IW_COMPL_STATUS_INVALID_LENGTH,
- I40IW_COMPL_STATUS_INVALID_ACCESS,
- I40IW_COMPL_STATUS_PHYS_BUFFER_LIST_TOO_LONG,
- I40IW_COMPL_STATUS_INVALID_VIRT_ADDRESS,
- I40IW_COMPL_STATUS_INVALID_REGION,
- I40IW_COMPL_STATUS_INVALID_WINDOW,
- I40IW_COMPL_STATUS_INVALID_TOTAL_LENGTH
-};
-
-enum i40iw_completion_notify {
- IW_CQ_COMPL_EVENT = 0,
- IW_CQ_COMPL_SOLICITED = 1
-};
-
-struct i40iw_post_send {
- i40iw_sgl sg_list;
- u32 num_sges;
-};
-
-struct i40iw_post_inline_send {
- void *data;
- u32 len;
-};
-
-struct i40iw_rdma_write {
- i40iw_sgl lo_sg_list;
- u32 num_lo_sges;
- struct i40iw_sge rem_addr;
-};
-
-struct i40iw_inline_rdma_write {
- void *data;
- u32 len;
- struct i40iw_sge rem_addr;
-};
-
-struct i40iw_rdma_read {
- struct i40iw_sge lo_addr;
- struct i40iw_sge rem_addr;
-};
-
-struct i40iw_bind_window {
- i40iw_stag mr_stag;
- u64 bind_length;
- void *va;
- enum i40iw_addressing_type addressing_type;
- bool enable_reads;
- bool enable_writes;
- i40iw_stag mw_stag;
-};
-
-struct i40iw_inv_local_stag {
- i40iw_stag target_stag;
-};
-
-struct i40iw_post_sq_info {
- u64 wr_id;
- u8 op_type;
- bool signaled;
- bool read_fence;
- bool local_fence;
- bool inline_data;
- bool defer_flag;
- union {
- struct i40iw_post_send send;
- struct i40iw_rdma_write rdma_write;
- struct i40iw_rdma_read rdma_read;
- struct i40iw_rdma_read rdma_read_inv;
- struct i40iw_bind_window bind_window;
- struct i40iw_inv_local_stag inv_local_stag;
- struct i40iw_inline_rdma_write inline_rdma_write;
- struct i40iw_post_inline_send inline_send;
- } op;
-};
-
-struct i40iw_post_rq_info {
- u64 wr_id;
- i40iw_sgl sg_list;
- u32 num_sges;
-};
-
-struct i40iw_cq_poll_info {
- u64 wr_id;
- i40iw_qp_handle qp_handle;
- u32 bytes_xfered;
- u32 tcp_seq_num;
- u32 qp_id;
- i40iw_stag inv_stag;
- enum i40iw_completion_status comp_status;
- u16 major_err;
- u16 minor_err;
- u8 op_type;
- bool stag_invalid_set;
- bool push_dropped;
- bool error;
- bool is_srq;
- bool solicited_event;
-};
-
-struct i40iw_qp_uk_ops {
- void (*iw_qp_post_wr)(struct i40iw_qp_uk *);
- void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32);
- enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool, bool);
- enum i40iw_status_code (*iw_send)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, u32, bool);
- enum i40iw_status_code (*iw_inline_rdma_write)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_inline_send)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, u32, bool);
- enum i40iw_status_code (*iw_stag_local_invalidate)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_mw_bind)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_post_receive)(struct i40iw_qp_uk *,
- struct i40iw_post_rq_info *);
- enum i40iw_status_code (*iw_post_nop)(struct i40iw_qp_uk *, u64, bool, bool);
-};
-
-struct i40iw_cq_ops {
- void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
- enum i40iw_completion_notify);
- enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
- struct i40iw_cq_poll_info *);
- enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
- void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
-};
-
-struct i40iw_dev_uk;
-
-struct i40iw_device_uk_ops {
- enum i40iw_status_code (*iwarp_cq_uk_init)(struct i40iw_cq_uk *,
- struct i40iw_cq_uk_init_info *);
- enum i40iw_status_code (*iwarp_qp_uk_init)(struct i40iw_qp_uk *,
- struct i40iw_qp_uk_init_info *);
-};
-
-struct i40iw_dev_uk {
- struct i40iw_device_uk_ops ops_uk;
-};
-
-struct i40iw_sq_uk_wr_trk_info {
- u64 wrid;
- u32 wr_len;
- u8 wqe_size;
- u8 reserved[3];
-};
-
-struct i40iw_qp_quanta {
- u64 elem[I40IW_WQE_SIZE];
-};
-
-struct i40iw_qp_uk {
- struct i40iw_qp_quanta *sq_base;
- struct i40iw_qp_quanta *rq_base;
- u32 __iomem *wqe_alloc_reg;
- struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
- u64 *rq_wrid_array;
- u64 *shadow_area;
- u32 *push_db;
- u64 *push_wqe;
- struct i40iw_ring sq_ring;
- struct i40iw_ring rq_ring;
- struct i40iw_ring initial_ring;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u32 max_sq_frag_cnt;
- u32 max_rq_frag_cnt;
- struct i40iw_qp_uk_ops ops;
- bool use_srq;
- u8 swqe_polarity;
- u8 swqe_polarity_deferred;
- u8 rwqe_polarity;
- u8 rq_wqe_size;
- u8 rq_wqe_size_multiplier;
- bool first_sq_wq;
- bool deferred_flag;
-};
-
-struct i40iw_cq_uk {
- struct i40iw_cqe *cq_base;
- u32 __iomem *cqe_alloc_reg;
- u64 *shadow_area;
- u32 cq_id;
- u32 cq_size;
- struct i40iw_ring cq_ring;
- u8 polarity;
- bool avoid_mem_cflct;
-
- struct i40iw_cq_ops ops;
-};
-
-struct i40iw_qp_uk_init_info {
- struct i40iw_qp_quanta *sq;
- struct i40iw_qp_quanta *rq;
- u32 __iomem *wqe_alloc_reg;
- u64 *shadow_area;
- struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
- u64 *rq_wrid_array;
- u32 *push_db;
- u64 *push_wqe;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u32 max_sq_frag_cnt;
- u32 max_rq_frag_cnt;
- u32 max_inline_data;
- int abi_ver;
-};
-
-struct i40iw_cq_uk_init_info {
- u32 __iomem *cqe_alloc_reg;
- struct i40iw_cqe *cq_base;
- u64 *shadow_area;
- u32 cq_size;
- u32 cq_id;
- bool avoid_mem_cflct;
-};
-
-void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
-
-void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
-u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
- u8 wqe_size,
- u32 total_size,
- u64 wr_id
- );
-u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
-u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
-
-enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
- struct i40iw_cq_uk_init_info *info);
-enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
- struct i40iw_qp_uk_init_info *info);
-
-void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
-enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
- bool signaled, bool post_sq);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size);
-enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
- u8 *wqe_size);
-void i40iw_get_wqe_shift(u32 sge, u32 inline_data, u8 *shift);
-enum i40iw_status_code i40iw_get_sqdepth(u32 sq_size, u8 shift, u32 *sqdepth);
-enum i40iw_status_code i40iw_get_rqdepth(u32 rq_size, u8 shift, u32 *rqdepth);
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
deleted file mode 100644
index 337410f40860..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ /dev/null
@@ -1,1553 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <net/netevent.h>
-#include <net/neighbour.h>
-#include "i40iw.h"
-
-/**
- * i40iw_arp_table - manage arp table
- * @iwdev: iwarp device
- * @ip_addr: ip address for device
- * @mac_addr: mac address ptr
- * @action: modify, delete or add
- */
-int i40iw_arp_table(struct i40iw_device *iwdev,
- u32 *ip_addr,
- bool ipv4,
- u8 *mac_addr,
- u32 action)
-{
- int arp_index;
- int err;
- u32 ip[4];
-
- if (ipv4) {
- memset(ip, 0, sizeof(ip));
- ip[0] = *ip_addr;
- } else {
- memcpy(ip, ip_addr, sizeof(ip));
- }
-
- for (arp_index = 0; (u32)arp_index < iwdev->arp_table_size; arp_index++)
- if (memcmp(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip)) == 0)
- break;
- switch (action) {
- case I40IW_ARP_ADD:
- if (arp_index != iwdev->arp_table_size)
- return -1;
-
- arp_index = 0;
- err = i40iw_alloc_resource(iwdev, iwdev->allocated_arps,
- iwdev->arp_table_size,
- (u32 *)&arp_index,
- &iwdev->next_arp_index);
-
- if (err)
- return err;
-
- memcpy(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip));
- ether_addr_copy(iwdev->arp_table[arp_index].mac_addr, mac_addr);
- break;
- case I40IW_ARP_RESOLVE:
- if (arp_index == iwdev->arp_table_size)
- return -1;
- break;
- case I40IW_ARP_DELETE:
- if (arp_index == iwdev->arp_table_size)
- return -1;
- memset(iwdev->arp_table[arp_index].ip_addr, 0,
- sizeof(iwdev->arp_table[arp_index].ip_addr));
- eth_zero_addr(iwdev->arp_table[arp_index].mac_addr);
- i40iw_free_resource(iwdev, iwdev->allocated_arps, arp_index);
- break;
- default:
- return -1;
- }
- return arp_index;
-}
-
-/**
- * i40iw_wr32 - write 32 bits to hw register
- * @hw: hardware information including registers
- * @reg: register offset
- * @value: vvalue to write to register
- */
-inline void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value)
-{
- writel(value, hw->hw_addr + reg);
-}
-
-/**
- * i40iw_rd32 - read a 32 bit hw register
- * @hw: hardware information including registers
- * @reg: register offset
- *
- * Return value of register content
- */
-inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
-{
- return readl(hw->hw_addr + reg);
-}
-
-/**
- * i40iw_inetaddr_event - system notifier for ipv4 addr events
- * @notfier: not used
- * @event: event for notifier
- * @ptr: if address
- */
-int i40iw_inetaddr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *event_netdev = ifa->ifa_dev->dev;
- struct net_device *netdev;
- struct net_device *upper_dev;
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
- u32 local_ipaddr;
- u32 action = I40IW_ARP_ADD;
-
- hdl = i40iw_find_netdev(event_netdev);
- if (!hdl)
- return NOTIFY_DONE;
-
- iwdev = &hdl->device;
- if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
- return NOTIFY_DONE;
-
- netdev = iwdev->ldev->netdev;
- upper_dev = netdev_master_upper_dev_get(netdev);
- if (netdev != event_netdev)
- return NOTIFY_DONE;
-
- if (upper_dev) {
- struct in_device *in;
-
- rcu_read_lock();
- in = __in_dev_get_rcu(upper_dev);
-
- if (!in->ifa_list)
- local_ipaddr = 0;
- else
- local_ipaddr = ntohl(in->ifa_list->ifa_address);
-
- rcu_read_unlock();
- } else {
- local_ipaddr = ntohl(ifa->ifa_address);
- }
- switch (event) {
- case NETDEV_DOWN:
- action = I40IW_ARP_DELETE;
- /* Fall through */
- case NETDEV_UP:
- /* Fall through */
- case NETDEV_CHANGEADDR:
-
- /* Just skip if no need to handle ARP cache */
- if (!local_ipaddr)
- break;
-
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- &local_ipaddr,
- true,
- action);
- i40iw_if_notify(iwdev, netdev, &local_ipaddr, true,
- (action == I40IW_ARP_ADD) ? true : false);
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_inet6addr_event - system notifier for ipv6 addr events
- * @notfier: not used
- * @event: event for notifier
- * @ptr: if address
- */
-int i40iw_inet6addr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr)
-{
- struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct net_device *event_netdev = ifa->idev->dev;
- struct net_device *netdev;
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
- u32 local_ipaddr6[4];
- u32 action = I40IW_ARP_ADD;
-
- hdl = i40iw_find_netdev(event_netdev);
- if (!hdl)
- return NOTIFY_DONE;
-
- iwdev = &hdl->device;
- if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
- return NOTIFY_DONE;
-
- netdev = iwdev->ldev->netdev;
- if (netdev != event_netdev)
- return NOTIFY_DONE;
-
- i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
- switch (event) {
- case NETDEV_DOWN:
- action = I40IW_ARP_DELETE;
- /* Fall through */
- case NETDEV_UP:
- /* Fall through */
- case NETDEV_CHANGEADDR:
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- local_ipaddr6,
- false,
- action);
- i40iw_if_notify(iwdev, netdev, local_ipaddr6, false,
- (action == I40IW_ARP_ADD) ? true : false);
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_net_event - system notifier for netevents
- * @notfier: not used
- * @event: event for notifier
- * @ptr: neighbor
- */
-int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *ptr)
-{
- struct neighbour *neigh = ptr;
- struct i40iw_device *iwdev;
- struct i40iw_handler *iwhdl;
- __be32 *p;
- u32 local_ipaddr[4];
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- iwhdl = i40iw_find_netdev((struct net_device *)neigh->dev);
- if (!iwhdl)
- return NOTIFY_DONE;
- iwdev = &iwhdl->device;
- if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing)
- return NOTIFY_DONE;
- p = (__be32 *)neigh->primary_key;
- i40iw_copy_ip_ntohl(local_ipaddr, p);
- if (neigh->nud_state & NUD_VALID) {
- i40iw_manage_arp_cache(iwdev,
- neigh->ha,
- local_ipaddr,
- false,
- I40IW_ARP_ADD);
-
- } else {
- i40iw_manage_arp_cache(iwdev,
- neigh->ha,
- local_ipaddr,
- false,
- I40IW_ARP_DELETE);
- }
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_netdevice_event - system notifier for netdev events
- * @notfier: not used
- * @event: event for notifier
- * @ptr: netdev
- */
-int i40iw_netdevice_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr)
-{
- struct net_device *event_netdev;
- struct net_device *netdev;
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
-
- event_netdev = netdev_notifier_info_to_dev(ptr);
-
- hdl = i40iw_find_netdev(event_netdev);
- if (!hdl)
- return NOTIFY_DONE;
-
- iwdev = &hdl->device;
- if (iwdev->init_state < RDMA_DEV_REGISTERED || iwdev->closing)
- return NOTIFY_DONE;
-
- netdev = iwdev->ldev->netdev;
- if (netdev != event_netdev)
- return NOTIFY_DONE;
-
- iwdev->iw_status = 1;
-
- switch (event) {
- case NETDEV_DOWN:
- iwdev->iw_status = 0;
- /* Fall through */
- case NETDEV_UP:
- i40iw_port_ibevent(iwdev);
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_get_cqp_request - get cqp struct
- * @cqp: device cqp ptr
- * @wait: cqp to be used in wait mode
- */
-struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait)
-{
- struct i40iw_cqp_request *cqp_request = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&cqp->req_lock, flags);
- if (!list_empty(&cqp->cqp_avail_reqs)) {
- cqp_request = list_entry(cqp->cqp_avail_reqs.next,
- struct i40iw_cqp_request, list);
- list_del_init(&cqp_request->list);
- }
- spin_unlock_irqrestore(&cqp->req_lock, flags);
- if (!cqp_request) {
- cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
- if (cqp_request) {
- cqp_request->dynamic = true;
- INIT_LIST_HEAD(&cqp_request->list);
- init_waitqueue_head(&cqp_request->waitq);
- }
- }
- if (!cqp_request) {
- i40iw_pr_err("CQP Request Fail: No Memory");
- return NULL;
- }
-
- if (wait) {
- atomic_set(&cqp_request->refcount, 2);
- cqp_request->waiting = true;
- } else {
- atomic_set(&cqp_request->refcount, 1);
- }
- return cqp_request;
-}
-
-/**
- * i40iw_free_cqp_request - free cqp request
- * @cqp: cqp ptr
- * @cqp_request: to be put back in cqp list
- */
-void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
-{
- struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp);
- unsigned long flags;
-
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- cqp_request->request_done = false;
- cqp_request->callback_fcn = NULL;
- cqp_request->waiting = false;
-
- spin_lock_irqsave(&cqp->req_lock, flags);
- list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
- spin_unlock_irqrestore(&cqp->req_lock, flags);
- }
- wake_up(&iwdev->close_wq);
-}
-
-/**
- * i40iw_put_cqp_request - dec ref count and free if 0
- * @cqp: cqp ptr
- * @cqp_request: to be put back in cqp list
- */
-void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
- struct i40iw_cqp_request *cqp_request)
-{
- if (atomic_dec_and_test(&cqp_request->refcount))
- i40iw_free_cqp_request(cqp, cqp_request);
-}
-
-/**
- * i40iw_free_pending_cqp_request -free pending cqp request objs
- * @cqp: cqp ptr
- * @cqp_request: to be put back in cqp list
- */
-static void i40iw_free_pending_cqp_request(struct i40iw_cqp *cqp,
- struct i40iw_cqp_request *cqp_request)
-{
- struct i40iw_device *iwdev = container_of(cqp, struct i40iw_device, cqp);
-
- if (cqp_request->waiting) {
- cqp_request->compl_info.error = true;
- cqp_request->request_done = true;
- wake_up(&cqp_request->waitq);
- }
- i40iw_put_cqp_request(cqp, cqp_request);
- wait_event_timeout(iwdev->close_wq,
- !atomic_read(&cqp_request->refcount),
- 1000);
-}
-
-/**
- * i40iw_cleanup_pending_cqp_op - clean-up cqp with no completions
- * @iwdev: iwarp device
- */
-void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cqp *cqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request = NULL;
- struct cqp_commands_info *pcmdinfo = NULL;
- u32 i, pending_work, wqe_idx;
-
- pending_work = I40IW_RING_WORK_AVAILABLE(cqp->sc_cqp.sq_ring);
- wqe_idx = I40IW_RING_GETCURRENT_TAIL(cqp->sc_cqp.sq_ring);
- for (i = 0; i < pending_work; i++) {
- cqp_request = (struct i40iw_cqp_request *)(unsigned long)cqp->scratch_array[wqe_idx];
- if (cqp_request)
- i40iw_free_pending_cqp_request(cqp, cqp_request);
- wqe_idx = (wqe_idx + 1) % I40IW_RING_GETSIZE(cqp->sc_cqp.sq_ring);
- }
-
- while (!list_empty(&dev->cqp_cmd_head)) {
- pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
- cqp_request = container_of(pcmdinfo, struct i40iw_cqp_request, info);
- if (cqp_request)
- i40iw_free_pending_cqp_request(cqp, cqp_request);
- }
-}
-
-/**
- * i40iw_free_qp - callback after destroy cqp completes
- * @cqp_request: cqp request for destroy qp
- * @num: not used
- */
-static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
-{
- struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
- struct i40iw_device *iwdev;
- u32 qp_num = iwqp->ibqp.qp_num;
-
- iwdev = iwqp->iwdev;
-
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
-}
-
-/**
- * i40iw_wait_event - wait for completion
- * @iwdev: iwarp device
- * @cqp_request: cqp request to wait
- */
-static int i40iw_wait_event(struct i40iw_device *iwdev,
- struct i40iw_cqp_request *cqp_request)
-{
- struct cqp_commands_info *info = &cqp_request->info;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_timeout cqp_timeout;
- bool cqp_error = false;
- int err_code = 0;
- memset(&cqp_timeout, 0, sizeof(cqp_timeout));
- cqp_timeout.compl_cqp_cmds = iwdev->sc_dev.cqp_cmd_stats[OP_COMPLETED_COMMANDS];
- do {
- if (wait_event_timeout(cqp_request->waitq,
- cqp_request->request_done, CQP_COMPL_WAIT_TIME))
- break;
-
- i40iw_check_cqp_progress(&cqp_timeout, &iwdev->sc_dev);
-
- if (cqp_timeout.count < CQP_TIMEOUT_THRESHOLD)
- continue;
-
- i40iw_pr_err("error cqp command 0x%x timed out", info->cqp_cmd);
- err_code = -ETIME;
- if (!iwdev->reset) {
- iwdev->reset = true;
- i40iw_request_reset(iwdev);
- }
- goto done;
- } while (1);
- cqp_error = cqp_request->compl_info.error;
- if (cqp_error) {
- i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n",
- info->cqp_cmd, cqp_request->compl_info.maj_err_code,
- cqp_request->compl_info.min_err_code);
- err_code = -EPROTO;
- goto done;
- }
-done:
- i40iw_put_cqp_request(iwcqp, cqp_request);
- return err_code;
-}
-
-/**
- * i40iw_handle_cqp_op - process cqp command
- * @iwdev: iwarp device
- * @cqp_request: cqp request to process
- */
-enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
- struct i40iw_cqp_request
- *cqp_request)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- enum i40iw_status_code status;
- struct cqp_commands_info *info = &cqp_request->info;
- int err_code = 0;
-
- if (iwdev->reset) {
- i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
- return I40IW_ERR_CQP_COMPL_ERROR;
- }
-
- status = i40iw_process_cqp_cmd(dev, info);
- if (status) {
- i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
- i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
- return status;
- }
- if (cqp_request->waiting)
- err_code = i40iw_wait_event(iwdev, cqp_request);
- if (err_code)
- status = I40IW_ERR_CQP_COMPL_ERROR;
- return status;
-}
-
-/**
- * i40iw_add_devusecount - add dev refcount
- * @iwdev: dev for refcount
- */
-void i40iw_add_devusecount(struct i40iw_device *iwdev)
-{
- atomic64_inc(&iwdev->use_count);
-}
-
-/**
- * i40iw_rem_devusecount - decrement refcount for dev
- * @iwdev: device
- */
-void i40iw_rem_devusecount(struct i40iw_device *iwdev)
-{
- if (!atomic64_dec_and_test(&iwdev->use_count))
- return;
- wake_up(&iwdev->close_wq);
-}
-
-/**
- * i40iw_add_pdusecount - add pd refcount
- * @iwpd: pd for refcount
- */
-void i40iw_add_pdusecount(struct i40iw_pd *iwpd)
-{
- atomic_inc(&iwpd->usecount);
-}
-
-/**
- * i40iw_rem_pdusecount - decrement refcount for pd and free if 0
- * @iwpd: pd for refcount
- * @iwdev: iwarp device
- */
-void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
-{
- if (!atomic_dec_and_test(&iwpd->usecount))
- return;
- i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
-}
-
-/**
- * i40iw_add_ref - add refcount for qp
- * @ibqp: iqarp qp
- */
-void i40iw_add_ref(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
-
- atomic_inc(&iwqp->refcount);
-}
-
-/**
- * i40iw_rem_ref - rem refcount for qp and free if 0
- * @ibqp: iqarp qp
- */
-void i40iw_rem_ref(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev;
- u32 qp_num;
- unsigned long flags;
-
- iwqp = to_iwqp(ibqp);
- iwdev = iwqp->iwdev;
- spin_lock_irqsave(&iwdev->qptable_lock, flags);
- if (!atomic_dec_and_test(&iwqp->refcount)) {
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- return;
- }
-
- qp_num = iwqp->ibqp.qp_num;
- iwdev->qp_table[qp_num] = NULL;
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_request->callback_fcn = i40iw_free_qp;
- cqp_request->param = (void *)&iwqp->sc_qp;
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_QP_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
- cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.qp_destroy.remove_hash_idx = true;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- return;
-
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
-}
-
-/**
- * i40iw_get_qp - get qp address
- * @device: iwarp device
- * @qpn: qp number
- */
-struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn)
-{
- struct i40iw_device *iwdev = to_iwdev(device);
-
- if ((qpn < IW_FIRST_QPN) || (qpn >= iwdev->max_qp))
- return NULL;
-
- return &iwdev->qp_table[qpn]->ibqp;
-}
-
-/**
- * i40iw_debug_buf - print debug msg and buffer is mask set
- * @dev: hardware control device structure
- * @mask: mask to compare if to print debug buffer
- * @buf: points buffer addr
- * @size: saize of buffer to print
- */
-void i40iw_debug_buf(struct i40iw_sc_dev *dev,
- enum i40iw_debug_flag mask,
- char *desc,
- u64 *buf,
- u32 size)
-{
- u32 i;
-
- if (!(dev->debug_mask & mask))
- return;
- i40iw_debug(dev, mask, "%s\n", desc);
- i40iw_debug(dev, mask, "starting address virt=%p phy=%llxh\n", buf,
- (unsigned long long)virt_to_phys(buf));
-
- for (i = 0; i < size; i += 8)
- i40iw_debug(dev, mask, "index %03d val: %016llx\n", i, buf[i / 8]);
-}
-
-/**
- * i40iw_get_hw_addr - return hw addr
- * @par: points to shared dev
- */
-u8 __iomem *i40iw_get_hw_addr(void *par)
-{
- struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)par;
-
- return dev->hw->hw_addr;
-}
-
-/**
- * i40iw_remove_head - return head entry and remove from list
- * @list: list for entry
- */
-void *i40iw_remove_head(struct list_head *list)
-{
- struct list_head *entry;
-
- if (list_empty(list))
- return NULL;
-
- entry = (void *)list->next;
- list_del(entry);
- return (void *)entry;
-}
-
-/**
- * i40iw_allocate_dma_mem - Memory alloc helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to fill out
- * @size: size of memory requested
- * @alignment: what to align the allocation to
- */
-enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
- struct i40iw_dma_mem *mem,
- u64 size,
- u32 alignment)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
-
- if (!mem)
- return I40IW_ERR_PARAM;
- mem->size = ALIGN(size, alignment);
- mem->va = dma_alloc_coherent(&pcidev->dev, mem->size,
- (dma_addr_t *)&mem->pa, GFP_KERNEL);
- if (!mem->va)
- return I40IW_ERR_NO_MEMORY;
- return 0;
-}
-
-/**
- * i40iw_free_dma_mem - Memory free helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to free
- */
-void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
-
- if (!mem || !mem->va)
- return;
-
- dma_free_coherent(&pcidev->dev, mem->size,
- mem->va, (dma_addr_t)mem->pa);
- mem->va = NULL;
-}
-
-/**
- * i40iw_allocate_virt_mem - virtual memory alloc helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to fill out
- * @size: size of memory requested
- */
-enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem,
- u32 size)
-{
- if (!mem)
- return I40IW_ERR_PARAM;
-
- mem->size = size;
- mem->va = kzalloc(size, GFP_KERNEL);
-
- if (mem->va)
- return 0;
- else
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * i40iw_free_virt_mem - virtual memory free helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to free
- */
-enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem)
-{
- if (!mem)
- return I40IW_ERR_PARAM;
- /*
- * mem->va points to the parent of mem, so both mem and mem->va
- * can not be touched once mem->va is freed
- */
- kfree(mem->va);
- return 0;
-}
-
-/**
- * i40iw_cqp_sds_cmd - create cqp command for sd
- * @dev: hardware control device structure
- * @sd_info: information for sd cqp
- *
- */
-enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *sdinfo)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
- sizeof(cqp_info->in.u.update_pe_sds.info));
- cqp_info->cqp_cmd = OP_UPDATE_PE_SDS;
- cqp_info->post_sq = 1;
- cqp_info->in.u.update_pe_sds.dev = dev;
- cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Update SD's fail");
- return status;
-}
-
-/**
- * i40iw_qp_suspend_resume - cqp command for suspend/resume
- * @dev: hardware control device structure
- * @qp: hardware control qp
- * @suspend: flag if suspend or resume
- */
-void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp_request *cqp_request;
- struct i40iw_sc_cqp *cqp = dev->cqp;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME;
- cqp_info->in.u.suspend_resume.cqp = cqp;
- cqp_info->in.u.suspend_resume.qp = qp;
- cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP QP Suspend/Resume fail");
-}
-
-/**
- * i40iw_term_modify_qp - modify qp for term message
- * @qp: hardware control qp
- * @next_state: qp's next state
- * @term: terminate code
- * @term_len: length
- */
-void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len)
-{
- struct i40iw_qp *iwqp;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- i40iw_next_iw_state(iwqp, next_state, 0, term, term_len);
-};
-
-/**
- * i40iw_terminate_done - after terminate is completed
- * @qp: hardware control qp
- * @timeout_occurred: indicates if terminate timer expired
- */
-void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred)
-{
- struct i40iw_qp *iwqp;
- u32 next_iwarp_state = I40IW_QP_STATE_ERROR;
- u8 hte = 0;
- bool first_time;
- unsigned long flags;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->hte_added) {
- iwqp->hte_added = 0;
- hte = 1;
- }
- first_time = !(qp->term_flags & I40IW_TERM_DONE);
- qp->term_flags |= I40IW_TERM_DONE;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- if (first_time) {
- if (!timeout_occurred)
- i40iw_terminate_del_timer(qp);
- else
- next_iwarp_state = I40IW_QP_STATE_CLOSING;
-
- i40iw_next_iw_state(iwqp, next_iwarp_state, hte, 0, 0);
- i40iw_cm_disconn(iwqp);
- }
-}
-
-/**
- * i40iw_terminate_imeout - timeout happened
- * @context: points to iwarp qp
- */
-static void i40iw_terminate_timeout(struct timer_list *t)
-{
- struct i40iw_qp *iwqp = from_timer(iwqp, t, terminate_timer);
- struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
-
- i40iw_terminate_done(qp, 1);
- i40iw_rem_ref(&iwqp->ibqp);
-}
-
-/**
- * i40iw_terminate_start_timer - start terminate timeout
- * @qp: hardware control qp
- */
-void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
-{
- struct i40iw_qp *iwqp;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- i40iw_add_ref(&iwqp->ibqp);
- timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0);
- iwqp->terminate_timer.expires = jiffies + HZ;
- add_timer(&iwqp->terminate_timer);
-}
-
-/**
- * i40iw_terminate_del_timer - delete terminate timeout
- * @qp: hardware control qp
- */
-void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
-{
- struct i40iw_qp *iwqp;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- if (del_timer(&iwqp->terminate_timer))
- i40iw_rem_ref(&iwqp->ibqp);
-}
-
-/**
- * i40iw_cqp_generic_worker - generic worker for cqp
- * @work: work pointer
- */
-static void i40iw_cqp_generic_worker(struct work_struct *work)
-{
- struct i40iw_virtchnl_work_info *work_info =
- &((struct virtchnl_work *)work)->work_info;
-
- if (work_info->worker_vf_dev)
- work_info->callback_fcn(work_info->worker_vf_dev);
-}
-
-/**
- * i40iw_cqp_spawn_worker - spawn worket thread
- * @iwdev: device struct pointer
- * @work_info: work request info
- * @iw_vf_idx: virtual function index
- */
-void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_work_info *work_info,
- u32 iw_vf_idx)
-{
- struct virtchnl_work *work;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- work = &iwdev->virtchnl_w[iw_vf_idx];
- memcpy(&work->work_info, work_info, sizeof(*work_info));
- INIT_WORK(&work->work, i40iw_cqp_generic_worker);
- queue_work(iwdev->virtchnl_wq, &work->work);
-}
-
-/**
- * i40iw_cqp_manage_hmc_fcn_worker -
- * @work: work pointer for hmc info
- */
-static void i40iw_cqp_manage_hmc_fcn_worker(struct work_struct *work)
-{
- struct i40iw_cqp_request *cqp_request =
- ((struct virtchnl_work *)work)->cqp_request;
- struct i40iw_ccq_cqe_info ccq_cqe_info;
- struct i40iw_hmc_fcn_info *hmcfcninfo =
- &cqp_request->info.in.u.manage_hmc_pm.info;
- struct i40iw_device *iwdev =
- (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->back_dev;
-
- ccq_cqe_info.cqp = NULL;
- ccq_cqe_info.maj_err_code = cqp_request->compl_info.maj_err_code;
- ccq_cqe_info.min_err_code = cqp_request->compl_info.min_err_code;
- ccq_cqe_info.op_code = cqp_request->compl_info.op_code;
- ccq_cqe_info.op_ret_val = cqp_request->compl_info.op_ret_val;
- ccq_cqe_info.scratch = 0;
- ccq_cqe_info.error = cqp_request->compl_info.error;
- hmcfcninfo->callback_fcn(cqp_request->info.in.u.manage_hmc_pm.dev,
- hmcfcninfo->cqp_callback_param, &ccq_cqe_info);
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
-}
-
-/**
- * i40iw_cqp_manage_hmc_fcn_callback - called function after cqp completion
- * @cqp_request: cqp request info struct for hmc fun
- * @unused: unused param of callback
- */
-static void i40iw_cqp_manage_hmc_fcn_callback(struct i40iw_cqp_request *cqp_request,
- u32 unused)
-{
- struct virtchnl_work *work;
- struct i40iw_hmc_fcn_info *hmcfcninfo =
- &cqp_request->info.in.u.manage_hmc_pm.info;
- struct i40iw_device *iwdev =
- (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->
- back_dev;
-
- if (hmcfcninfo && hmcfcninfo->callback_fcn) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s1\n", __func__);
- atomic_inc(&cqp_request->refcount);
- work = &iwdev->virtchnl_w[hmcfcninfo->iw_vf_idx];
- work->cqp_request = cqp_request;
- INIT_WORK(&work->work, i40iw_cqp_manage_hmc_fcn_worker);
- queue_work(iwdev->virtchnl_wq, &work->work);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s2\n", __func__);
- } else {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s: Something wrong\n", __func__);
- }
-}
-
-/**
- * i40iw_cqp_manage_hmc_fcn_cmd - issue cqp command to manage hmc
- * @dev: hardware control device structure
- * @hmcfcninfo: info for hmc
- */
-enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_fcn_info *hmcfcninfo)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s\n", __func__);
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- cqp_request->callback_fcn = i40iw_cqp_manage_hmc_fcn_callback;
- cqp_request->param = hmcfcninfo;
- memcpy(&cqp_info->in.u.manage_hmc_pm.info, hmcfcninfo,
- sizeof(*hmcfcninfo));
- cqp_info->in.u.manage_hmc_pm.dev = dev;
- cqp_info->cqp_cmd = OP_MANAGE_HMC_PM_FUNC_TABLE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.manage_hmc_pm.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage HMC fail");
- return status;
-}
-
-/**
- * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm
- * @iwdev: function device struct
- * @values_mem: buffer for fpm
- * @hmc_fn_id: function id for fpm
- */
-enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.query_fpm_values.cqp = dev->cqp;
- cqp_info->in.u.query_fpm_values.fpm_values_pa = values_mem->pa;
- cqp_info->in.u.query_fpm_values.fpm_values_va = values_mem->va;
- cqp_info->in.u.query_fpm_values.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = OP_QUERY_FPM_VALUES;
- cqp_info->post_sq = 1;
- cqp_info->in.u.query_fpm_values.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Query FPM fail");
- return status;
-}
-
-/**
- * i40iw_cqp_commit_fpm_values_cmd - commit fpm values in hw
- * @dev: hardware control device structure
- * @values_mem: buffer with fpm values
- * @hmc_fn_id: function id for fpm
- */
-enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.commit_fpm_values.cqp = dev->cqp;
- cqp_info->in.u.commit_fpm_values.fpm_values_pa = values_mem->pa;
- cqp_info->in.u.commit_fpm_values.fpm_values_va = values_mem->va;
- cqp_info->in.u.commit_fpm_values.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = OP_COMMIT_FPM_VALUES;
- cqp_info->post_sq = 1;
- cqp_info->in.u.commit_fpm_values.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Commit FPM fail");
- return status;
-}
-
-/**
- * i40iw_vf_wait_vchnl_resp - wait for channel msg
- * @iwdev: function's device struct
- */
-enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
-{
- struct i40iw_device *iwdev = dev->back_dev;
- int timeout_ret;
-
- i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
- __func__, __LINE__, dev, iwdev);
-
- atomic_set(&iwdev->vchnl_msgs, 2);
- timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
- (atomic_read(&iwdev->vchnl_msgs) == 1),
- I40IW_VCHNL_EVENT_TIMEOUT);
- atomic_dec(&iwdev->vchnl_msgs);
- if (!timeout_ret) {
- i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
- atomic_set(&iwdev->vchnl_msgs, 0);
- dev->vchnl_up = false;
- return I40IW_ERR_TIMEOUT;
- }
- wake_up(&dev->vf_reqs);
- return 0;
-}
-
-/**
- * i40iw_cqp_cq_create_cmd - create a cq for the cqp
- * @dev: device pointer
- * @cq: pointer to created cq
- */
-enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_sc_cq *cq)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_CQ_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.cq_create.cq = cq;
- cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Create QP fail");
-
- return status;
-}
-
-/**
- * i40iw_cqp_qp_create_cmd - create a qp for the cqp
- * @dev: device pointer
- * @qp: pointer to created qp
- */
-enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_sc_qp *qp)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_create_qp_info *qp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- qp_info = &cqp_request->info.in.u.qp_create.info;
-
- memset(qp_info, 0, sizeof(*qp_info));
-
- qp_info->cq_num_valid = true;
- qp_info->next_iwarp_state = I40IW_QP_STATE_RTS;
-
- cqp_info->cqp_cmd = OP_QP_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_create.qp = qp;
- cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP QP create fail");
- return status;
-}
-
-/**
- * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq
- * @dev: device pointer
- * @cq: pointer to cq
- */
-void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_cq_wq_destroy(iwdev, cq);
-}
-
-/**
- * i40iw_cqp_qp_destroy_cmd - destroy the cqp
- * @dev: device pointer
- * @qp: pointer to qp
- */
-void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- memset(cqp_info, 0, sizeof(*cqp_info));
-
- cqp_info->cqp_cmd = OP_QP_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_destroy.qp = qp;
- cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.qp_destroy.remove_hash_idx = true;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP QP_DESTROY fail");
-}
-
-
-/**
- * i40iw_ieq_mpa_crc_ae - generate AE for crc error
- * @dev: hardware control device structure
- * @qp: hardware control qp
- */
-void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_gen_ae_info info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__);
- info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR;
- info.ae_source = I40IW_AE_SOURCE_RQ;
- i40iw_gen_ae(iwdev, qp, &info, false);
-}
-
-/**
- * i40iw_init_hash_desc - initialize hash for crc calculation
- * @desc: cryption type
- */
-enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc)
-{
- struct crypto_shash *tfm;
- struct shash_desc *tdesc;
-
- tfm = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(tfm))
- return I40IW_ERR_MPA_CRC;
-
- tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!tdesc) {
- crypto_free_shash(tfm);
- return I40IW_ERR_MPA_CRC;
- }
- tdesc->tfm = tfm;
- *desc = tdesc;
-
- return 0;
-}
-
-/**
- * i40iw_free_hash_desc - free hash desc
- * @desc: to be freed
- */
-void i40iw_free_hash_desc(struct shash_desc *desc)
-{
- if (desc) {
- crypto_free_shash(desc->tfm);
- kfree(desc);
- }
-}
-
-/**
- * i40iw_alloc_query_fpm_buf - allocate buffer for fpm
- * @dev: hardware control device structure
- * @mem: buffer ptr for fpm to be allocated
- * @return: memory allocation status
- */
-enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *mem)
-{
- enum i40iw_status_code status;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- status = i40iw_obj_aligned_mem(iwdev, mem, I40IW_QUERY_FPM_BUF_SIZE,
- I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
- return status;
-}
-
-/**
- * i40iw_ieq_check_mpacrc - check if mpa crc is OK
- * @desc: desc for hash
- * @addr: address of buffer for crc
- * @length: length of buffer
- * @value: value to be compared
- */
-enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr,
- u32 length,
- u32 value)
-{
- u32 crc = 0;
- int ret;
- enum i40iw_status_code ret_code = 0;
-
- crypto_shash_init(desc);
- ret = crypto_shash_update(desc, addr, length);
- if (!ret)
- crypto_shash_final(desc, (u8 *)&crc);
- if (crc != value) {
- i40iw_pr_err("mpa crc check fail\n");
- ret_code = I40IW_ERR_MPA_CRC;
- }
- return ret_code;
-}
-
-/**
- * i40iw_ieq_get_qp - get qp based on quad in puda buffer
- * @dev: hardware control device structure
- * @buf: receive puda buffer on exception q
- */
-struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
- struct i40iw_puda_buf *buf)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_qp *iwqp;
- struct i40iw_cm_node *cm_node;
- u32 loc_addr[4], rem_addr[4];
- u16 loc_port, rem_port;
- struct ipv6hdr *ip6h;
- struct iphdr *iph = (struct iphdr *)buf->iph;
- struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
-
- if (iph->version == 4) {
- memset(loc_addr, 0, sizeof(loc_addr));
- loc_addr[0] = ntohl(iph->daddr);
- memset(rem_addr, 0, sizeof(rem_addr));
- rem_addr[0] = ntohl(iph->saddr);
- } else {
- ip6h = (struct ipv6hdr *)buf->iph;
- i40iw_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
- i40iw_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
- }
- loc_port = ntohs(tcph->dest);
- rem_port = ntohs(tcph->source);
-
- cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
- loc_addr, false, true);
- if (!cm_node)
- return NULL;
- iwqp = cm_node->iwqp;
- return &iwqp->sc_qp;
-}
-
-/**
- * i40iw_ieq_update_tcpip_info - update tcpip in the buffer
- * @buf: puda to update
- * @length: length of buffer
- * @seqnum: seq number for tcp
- */
-void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum)
-{
- struct tcphdr *tcph;
- struct iphdr *iph;
- u16 iphlen;
- u16 packetsize;
- u8 *addr = (u8 *)buf->mem.va;
-
- iphlen = (buf->ipv4) ? 20 : 40;
- iph = (struct iphdr *)(addr + buf->maclen);
- tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
- packetsize = length + buf->tcphlen + iphlen;
-
- iph->tot_len = htons(packetsize);
- tcph->seq = htonl(seqnum);
-}
-
-/**
- * i40iw_puda_get_tcpip_info - get tcpip info from puda buffer
- * @info: to get information
- * @buf: puda buffer
- */
-enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
- struct i40iw_puda_buf *buf)
-{
- struct iphdr *iph;
- struct ipv6hdr *ip6h;
- struct tcphdr *tcph;
- u16 iphlen;
- u16 pkt_len;
- u8 *mem = (u8 *)buf->mem.va;
- struct ethhdr *ethh = (struct ethhdr *)buf->mem.va;
-
- if (ethh->h_proto == htons(0x8100)) {
- info->vlan_valid = true;
- buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) & VLAN_VID_MASK;
- }
- buf->maclen = (info->vlan_valid) ? 18 : 14;
- iphlen = (info->l3proto) ? 40 : 20;
- buf->ipv4 = (info->l3proto) ? false : true;
- buf->iph = mem + buf->maclen;
- iph = (struct iphdr *)buf->iph;
-
- buf->tcph = buf->iph + iphlen;
- tcph = (struct tcphdr *)buf->tcph;
-
- if (buf->ipv4) {
- pkt_len = ntohs(iph->tot_len);
- } else {
- ip6h = (struct ipv6hdr *)buf->iph;
- pkt_len = ntohs(ip6h->payload_len) + iphlen;
- }
-
- buf->totallen = pkt_len + buf->maclen;
-
- if (info->payload_len < buf->totallen) {
- i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
- info->payload_len, buf->totallen);
- return I40IW_ERR_INVALID_SIZE;
- }
-
- buf->tcphlen = (tcph->doff) << 2;
- buf->datalen = pkt_len - iphlen - buf->tcphlen;
- buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
- buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
- buf->seqnum = ntohl(tcph->seq);
- return 0;
-}
-
-/**
- * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
- * @vsi: pointer to the vsi structure
- */
-static void i40iw_hw_stats_timeout(struct timer_list *t)
-{
- struct i40iw_vsi_pestat *pf_devstat = from_timer(pf_devstat, t,
- stats_timer);
- struct i40iw_sc_vsi *sc_vsi = pf_devstat->vsi;
- struct i40iw_sc_dev *pf_dev = sc_vsi->dev;
- struct i40iw_vsi_pestat *vf_devstat = NULL;
- u16 iw_vf_idx;
- unsigned long flags;
-
- /*PF*/
- i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats);
-
- for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
- spin_lock_irqsave(&pf_devstat->lock, flags);
- if (pf_dev->vf_dev[iw_vf_idx]) {
- if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
- vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat;
- i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats);
- }
- }
- spin_unlock_irqrestore(&pf_devstat->lock, flags);
- }
-
- mod_timer(&pf_devstat->stats_timer,
- jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
-}
-
-/**
- * i40iw_hw_stats_start_timer - Start periodic stats timer
- * @vsi: pointer to the vsi structure
- */
-void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
-{
- struct i40iw_vsi_pestat *devstat = vsi->pestat;
-
- timer_setup(&devstat->stats_timer, i40iw_hw_stats_timeout, 0);
- mod_timer(&devstat->stats_timer,
- jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
-}
-
-/**
- * i40iw_hw_stats_stop_timer - Delete periodic stats timer
- * @vsi: pointer to the vsi structure
- */
-void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi)
-{
- struct i40iw_vsi_pestat *devstat = vsi->pestat;
-
- del_timer_sync(&devstat->stats_timer);
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
deleted file mode 100644
index 5689d742bafb..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ /dev/null
@@ -1,2807 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/random.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/hugetlb.h>
-#include <linux/irq.h>
-#include <asm/byteorder.h>
-#include <net/ip.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_umem.h>
-#include <rdma/uverbs_ioctl.h>
-#include "i40iw.h"
-
-/**
- * i40iw_query_device - get device attributes
- * @ibdev: device pointer from stack
- * @props: returning device attributes
- * @udata: user data
- */
-static int i40iw_query_device(struct ib_device *ibdev,
- struct ib_device_attr *props,
- struct ib_udata *udata)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
-
- if (udata->inlen || udata->outlen)
- return -EINVAL;
- memset(props, 0, sizeof(*props));
- ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
- props->fw_ver = I40IW_FW_VERSION;
- props->device_cap_flags = iwdev->device_cap_flags;
- props->vendor_id = iwdev->ldev->pcidev->vendor;
- props->vendor_part_id = iwdev->ldev->pcidev->device;
- props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
- props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
- props->max_qp = iwdev->max_qp - iwdev->used_qps;
- props->max_qp_wr = I40IW_MAX_QP_WRS;
- props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- props->max_cq = iwdev->max_cq - iwdev->used_cqs;
- props->max_cqe = iwdev->max_cqe;
- props->max_mr = iwdev->max_mr - iwdev->used_mrs;
- props->max_pd = iwdev->max_pd - iwdev->used_pds;
- props->max_sge_rd = I40IW_MAX_SGE_RD;
- props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
- props->max_qp_init_rd_atom = props->max_qp_rd_atom;
- props->atomic_cap = IB_ATOMIC_NONE;
- props->max_map_per_fmr = 1;
- props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
- return 0;
-}
-
-/**
- * i40iw_query_port - get port attrubutes
- * @ibdev: device pointer from stack
- * @port: port number for query
- * @props: returning device attributes
- */
-static int i40iw_query_port(struct ib_device *ibdev,
- u8 port,
- struct ib_port_attr *props)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct net_device *netdev = iwdev->netdev;
-
- /* props being zeroed by the caller, avoid zeroing it here */
- props->max_mtu = IB_MTU_4096;
- props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
-
- props->lid = 1;
- if (netif_carrier_ok(iwdev->netdev))
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_DOWN;
- props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
- props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
- return 0;
-}
-
-/**
- * i40iw_alloc_ucontext - Allocate the user context data structure
- * @uctx: Uverbs context pointer from stack
- * @udata: user data
- *
- * This keeps track of all objects associated with a particular
- * user-mode client.
- */
-static int i40iw_alloc_ucontext(struct ib_ucontext *uctx,
- struct ib_udata *udata)
-{
- struct ib_device *ibdev = uctx->device;
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_alloc_ucontext_req req;
- struct i40iw_alloc_ucontext_resp uresp = {};
- struct i40iw_ucontext *ucontext = to_ucontext(uctx);
-
- if (ib_copy_from_udata(&req, udata, sizeof(req)))
- return -EINVAL;
-
- if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) {
- i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver);
- return -EINVAL;
- }
-
- uresp.max_qps = iwdev->max_qp;
- uresp.max_pds = iwdev->max_pd;
- uresp.wq_size = iwdev->max_qp_wr * 2;
- uresp.kernel_ver = req.userspace_ver;
-
- ucontext->iwdev = iwdev;
- ucontext->abi_ver = req.userspace_ver;
-
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp)))
- return -EFAULT;
-
- INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
- spin_lock_init(&ucontext->cq_reg_mem_list_lock);
- INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
- spin_lock_init(&ucontext->qp_reg_mem_list_lock);
-
- return 0;
-}
-
-/**
- * i40iw_dealloc_ucontext - deallocate the user context data structure
- * @context: user context created during alloc
- */
-static void i40iw_dealloc_ucontext(struct ib_ucontext *context)
-{
- return;
-}
-
-/**
- * i40iw_mmap - user memory map
- * @context: context created during alloc
- * @vma: kernel info for user memory map
- */
-static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- struct i40iw_ucontext *ucontext;
- u64 db_addr_offset;
- u64 push_offset;
-
- ucontext = to_ucontext(context);
- if (ucontext->iwdev->sc_dev.is_pf) {
- db_addr_offset = I40IW_DB_ADDR_OFFSET;
- push_offset = I40IW_PUSH_OFFSET;
- if (vma->vm_pgoff)
- vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
- } else {
- db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
- push_offset = I40IW_VF_PUSH_OFFSET;
- if (vma->vm_pgoff)
- vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
- }
-
- vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
-
- if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_private_data = ucontext;
- } else {
- if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- else
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- }
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT),
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * i40iw_alloc_push_page - allocate a push page for qp
- * @iwdev: iwarp device
- * @qp: hardware control qp
- */
-static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
- return;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return;
-
- atomic_inc(&cqp_request->refcount);
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
- cqp_info->post_sq = 1;
-
- cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
- cqp_info->in.u.manage_push_page.info.free_page = 0;
- cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- qp->push_idx = cqp_request->compl_info.op_ret_val;
- else
- i40iw_pr_err("CQP-OP Push page fail");
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
-}
-
-/**
- * i40iw_dealloc_push_page - free a push page for qp
- * @iwdev: iwarp device
- * @qp: hardware control qp
- */
-static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
- return;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
- cqp_info->post_sq = 1;
-
- cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
- cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
- cqp_info->in.u.manage_push_page.info.free_page = 1;
- cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
- else
- i40iw_pr_err("CQP-OP Push page fail");
-}
-
-/**
- * i40iw_alloc_pd - allocate protection domain
- * @pd: PD pointer
- * @udata: user data
- */
-static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_alloc_pd_resp uresp;
- struct i40iw_sc_pd *sc_pd;
- u32 pd_id = 0;
- int err;
-
- if (iwdev->closing)
- return -ENODEV;
-
- err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
- iwdev->max_pd, &pd_id, &iwdev->next_pd);
- if (err) {
- i40iw_pr_err("alloc resource failed\n");
- return err;
- }
-
- sc_pd = &iwpd->sc_pd;
-
- if (udata) {
- struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct i40iw_ucontext, ibucontext);
- dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
- memset(&uresp, 0, sizeof(uresp));
- uresp.pd_id = pd_id;
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
- err = -EFAULT;
- goto error;
- }
- } else {
- dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, -1);
- }
-
- i40iw_add_pdusecount(iwpd);
- return 0;
-
-error:
- i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
- return err;
-}
-
-/**
- * i40iw_dealloc_pd - deallocate pd
- * @ibpd: ptr of pd to be deallocated
- * @udata: user data or null for kernel object
- */
-static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(ibpd);
- struct i40iw_device *iwdev = to_iwdev(ibpd->device);
-
- i40iw_rem_pdusecount(iwpd, iwdev);
-}
-
-/**
- * i40iw_get_pbl - Retrieve pbl from a list given a virtual
- * address
- * @va: user virtual address
- * @pbl_list: pbl list to search in (QP's or CQ's)
- */
-static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
- struct list_head *pbl_list)
-{
- struct i40iw_pbl *iwpbl;
-
- list_for_each_entry(iwpbl, pbl_list, list) {
- if (iwpbl->user_base == va) {
- iwpbl->on_list = false;
- list_del(&iwpbl->list);
- return iwpbl;
- }
- }
- return NULL;
-}
-
-/**
- * i40iw_free_qp_resources - free up memory resources for qp
- * @iwdev: iwarp device
- * @iwqp: qp ptr (user or kernel)
- * @qp_num: qp number assigned
- */
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num)
-{
- struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
-
- i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
- i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
- if (qp_num)
- i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
- if (iwpbl->pbl_allocated)
- i40iw_free_pble(iwdev->pble_rsrc, &iwpbl->pble_alloc);
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
- kfree(iwqp->kqp.wrid_mem);
- iwqp->kqp.wrid_mem = NULL;
- kfree(iwqp->allocated_buffer);
-}
-
-/**
- * i40iw_clean_cqes - clean cq entries for qp
- * @iwqp: qp ptr (user or kernel)
- * @iwcq: cq ptr
- */
-static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
-{
- struct i40iw_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
-
- ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
-}
-
-/**
- * i40iw_destroy_qp - destroy qp
- * @ibqp: qp's ib pointer also to get to device's qp address
- */
-static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
-
- iwqp->destroyed = 1;
-
- if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
- i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 0, 0, 0);
-
- if (!iwqp->user_mode) {
- if (iwqp->iwscq) {
- i40iw_clean_cqes(iwqp, iwqp->iwscq);
- if (iwqp->iwrcq != iwqp->iwscq)
- i40iw_clean_cqes(iwqp, iwqp->iwrcq);
- }
- }
-
- i40iw_rem_ref(&iwqp->ibqp);
- return 0;
-}
-
-/**
- * i40iw_setup_virt_qp - setup for allocation of virtual qp
- * @dev: iwarp device
- * @qp: qp ptr
- * @init_info: initialize info to return
- */
-static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- struct i40iw_qp_init_info *init_info)
-{
- struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
- struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
-
- iwqp->page = qpmr->sq_page;
- init_info->shadow_area_pa = cpu_to_le64(qpmr->shadow);
- if (iwpbl->pbl_allocated) {
- init_info->virtual_map = true;
- init_info->sq_pa = qpmr->sq_pbl.idx;
- init_info->rq_pa = qpmr->rq_pbl.idx;
- } else {
- init_info->sq_pa = qpmr->sq_pbl.addr;
- init_info->rq_pa = qpmr->rq_pbl.addr;
- }
- return 0;
-}
-
-/**
- * i40iw_setup_kmode_qp - setup initialization for kernel mode qp
- * @iwdev: iwarp device
- * @iwqp: qp ptr (user or kernel)
- * @info: initialize info to return
- */
-static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- struct i40iw_qp_init_info *info)
-{
- struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
- u32 sqdepth, rqdepth;
- u8 sqshift;
- u32 size;
- enum i40iw_status_code status;
- struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
-
- i40iw_get_wqe_shift(ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
- status = i40iw_get_sqdepth(ukinfo->sq_size, sqshift, &sqdepth);
- if (status)
- return -ENOMEM;
-
- status = i40iw_get_rqdepth(ukinfo->rq_size, I40IW_MAX_RQ_WQE_SHIFT, &rqdepth);
- if (status)
- return -ENOMEM;
-
- size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
- iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
-
- ukinfo->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)iwqp->kqp.wrid_mem;
- if (!ukinfo->sq_wrtrk_array)
- return -ENOMEM;
-
- ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
-
- size = (sqdepth + rqdepth) * I40IW_QP_WQE_MIN_SIZE;
- size += (I40IW_SHADOW_AREA_SIZE << 3);
-
- status = i40iw_allocate_dma_mem(iwdev->sc_dev.hw, mem, size, 256);
- if (status) {
- kfree(ukinfo->sq_wrtrk_array);
- ukinfo->sq_wrtrk_array = NULL;
- return -ENOMEM;
- }
-
- ukinfo->sq = mem->va;
- info->sq_pa = mem->pa;
-
- ukinfo->rq = &ukinfo->sq[sqdepth];
- info->rq_pa = info->sq_pa + (sqdepth * I40IW_QP_WQE_MIN_SIZE);
-
- ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
- info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
-
- ukinfo->sq_size = sqdepth >> sqshift;
- ukinfo->rq_size = rqdepth >> I40IW_MAX_RQ_WQE_SHIFT;
- ukinfo->qp_id = iwqp->ibqp.qp_num;
- return 0;
-}
-
-/**
- * i40iw_create_qp - create qp
- * @ibpd: ptr of pd
- * @init_attr: attributes for qp
- * @udata: user data for create qp
- */
-static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(ibpd);
- struct i40iw_device *iwdev = to_iwdev(ibpd->device);
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_qp *iwqp;
- struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct i40iw_ucontext, ibucontext);
- struct i40iw_create_qp_req req;
- struct i40iw_create_qp_resp uresp;
- u32 qp_num = 0;
- void *mem;
- enum i40iw_status_code ret;
- int err_code;
- int sq_size;
- int rq_size;
- struct i40iw_sc_qp *qp;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_qp_init_info init_info;
- struct i40iw_create_qp_info *qp_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- struct i40iw_qp_host_ctx_info *ctx_info;
- struct i40iwarp_offload_info *iwarp_info;
- unsigned long flags;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
- if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
- init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
-
- if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
- init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
-
- if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
- init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
-
- memset(&init_info, 0, sizeof(init_info));
-
- sq_size = init_attr->cap.max_send_wr;
- rq_size = init_attr->cap.max_recv_wr;
-
- init_info.vsi = &iwdev->vsi;
- init_info.qp_uk_init_info.sq_size = sq_size;
- init_info.qp_uk_init_info.rq_size = rq_size;
- init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
- init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
- init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
-
- mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
- if (!mem)
- return ERR_PTR(-ENOMEM);
-
- iwqp = (struct i40iw_qp *)mem;
- iwqp->allocated_buffer = mem;
- qp = &iwqp->sc_qp;
- qp->back_qp = (void *)iwqp;
- qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
-
- iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
-
- if (i40iw_allocate_dma_mem(dev->hw,
- &iwqp->q2_ctx_mem,
- I40IW_Q2_BUFFER_SIZE + I40IW_QP_CTX_SIZE,
- 256)) {
- i40iw_pr_err("dma_mem failed\n");
- err_code = -ENOMEM;
- goto error;
- }
-
- init_info.q2 = iwqp->q2_ctx_mem.va;
- init_info.q2_pa = iwqp->q2_ctx_mem.pa;
-
- init_info.host_ctx = (void *)init_info.q2 + I40IW_Q2_BUFFER_SIZE;
- init_info.host_ctx_pa = init_info.q2_pa + I40IW_Q2_BUFFER_SIZE;
-
- err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_qps, iwdev->max_qp,
- &qp_num, &iwdev->next_qp);
- if (err_code) {
- i40iw_pr_err("qp resource\n");
- goto error;
- }
-
- iwqp->iwdev = iwdev;
- iwqp->iwpd = iwpd;
- iwqp->ibqp.qp_num = qp_num;
- qp = &iwqp->sc_qp;
- iwqp->iwscq = to_iwcq(init_attr->send_cq);
- iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
-
- iwqp->host_ctx.va = init_info.host_ctx;
- iwqp->host_ctx.pa = init_info.host_ctx_pa;
- iwqp->host_ctx.size = I40IW_QP_CTX_SIZE;
-
- init_info.pd = &iwpd->sc_pd;
- init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
- iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
-
- if (init_attr->qp_type != IB_QPT_RC) {
- err_code = -EINVAL;
- goto error;
- }
- if (iwdev->push_mode)
- i40iw_alloc_push_page(iwdev, qp);
- if (udata) {
- err_code = ib_copy_from_udata(&req, udata, sizeof(req));
- if (err_code) {
- i40iw_pr_err("ib_copy_from_data\n");
- goto error;
- }
- iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
- iwqp->user_mode = 1;
-
- if (req.user_wqe_buffers) {
- struct i40iw_pbl *iwpbl;
-
- spin_lock_irqsave(
- &ucontext->qp_reg_mem_list_lock, flags);
- iwpbl = i40iw_get_pbl(
- (unsigned long)req.user_wqe_buffers,
- &ucontext->qp_reg_mem_list);
- spin_unlock_irqrestore(
- &ucontext->qp_reg_mem_list_lock, flags);
-
- if (!iwpbl) {
- err_code = -ENODATA;
- i40iw_pr_err("no pbl info\n");
- goto error;
- }
- memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
- }
- err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
- } else {
- err_code = i40iw_setup_kmode_qp(iwdev, iwqp, &init_info);
- }
-
- if (err_code) {
- i40iw_pr_err("setup qp failed\n");
- goto error;
- }
-
- init_info.type = I40IW_QP_TYPE_IWARP;
- ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
- if (ret) {
- err_code = -EPROTO;
- i40iw_pr_err("qp_init fail\n");
- goto error;
- }
- ctx_info = &iwqp->ctx_info;
- iwarp_info = &iwqp->iwarp_info;
- iwarp_info->rd_enable = true;
- iwarp_info->wr_rdresp_en = true;
- if (!iwqp->user_mode) {
- iwarp_info->fast_reg_en = true;
- iwarp_info->priv_mode_en = true;
- }
- iwarp_info->ddp_ver = 1;
- iwarp_info->rdmap_ver = 1;
-
- ctx_info->iwarp_info_valid = true;
- ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
- ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
- if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
- ctx_info->push_mode_en = false;
- } else {
- ctx_info->push_mode_en = true;
- ctx_info->push_idx = qp->push_idx;
- }
-
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- (u64 *)iwqp->host_ctx.va,
- ctx_info);
- ctx_info->iwarp_info_valid = false;
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- err_code = -ENOMEM;
- goto error;
- }
- cqp_info = &cqp_request->info;
- qp_info = &cqp_request->info.in.u.qp_create.info;
-
- memset(qp_info, 0, sizeof(*qp_info));
-
- qp_info->cq_num_valid = true;
- qp_info->next_iwarp_state = I40IW_QP_STATE_IDLE;
-
- cqp_info->cqp_cmd = OP_QP_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_create.qp = qp;
- cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
- ret = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (ret) {
- i40iw_pr_err("CQP-OP QP create fail");
- err_code = -EACCES;
- goto error;
- }
-
- i40iw_add_ref(&iwqp->ibqp);
- spin_lock_init(&iwqp->lock);
- iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
- iwdev->qp_table[qp_num] = iwqp;
- i40iw_add_pdusecount(iwqp->iwpd);
- i40iw_add_devusecount(iwdev);
- if (udata) {
- memset(&uresp, 0, sizeof(uresp));
- uresp.actual_sq_size = sq_size;
- uresp.actual_rq_size = rq_size;
- uresp.qp_id = qp_num;
- uresp.push_idx = qp->push_idx;
- err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
- if (err_code) {
- i40iw_pr_err("copy_to_udata failed\n");
- i40iw_destroy_qp(&iwqp->ibqp, udata);
- /* let the completion of the qp destroy free the qp */
- return ERR_PTR(err_code);
- }
- }
- init_completion(&iwqp->sq_drained);
- init_completion(&iwqp->rq_drained);
-
- return &iwqp->ibqp;
-error:
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- return ERR_PTR(err_code);
-}
-
-/**
- * i40iw_query - query qp attributes
- * @ibqp: qp pointer
- * @attr: attributes pointer
- * @attr_mask: Not used
- * @init_attr: qp attributes to return
- */
-static int i40iw_query_qp(struct ib_qp *ibqp,
- struct ib_qp_attr *attr,
- int attr_mask,
- struct ib_qp_init_attr *init_attr)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- attr->qp_access_flags = 0;
- attr->cap.max_send_wr = qp->qp_uk.sq_size;
- attr->cap.max_recv_wr = qp->qp_uk.rq_size;
- attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
- attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- attr->port_num = 1;
- init_attr->event_handler = iwqp->ibqp.event_handler;
- init_attr->qp_context = iwqp->ibqp.qp_context;
- init_attr->send_cq = iwqp->ibqp.send_cq;
- init_attr->recv_cq = iwqp->ibqp.recv_cq;
- init_attr->srq = iwqp->ibqp.srq;
- init_attr->cap = attr->cap;
- init_attr->port_num = 1;
- return 0;
-}
-
-/**
- * i40iw_hw_modify_qp - setup cqp for modify qp
- * @iwdev: iwarp device
- * @iwqp: qp ptr (user or kernel)
- * @info: info for modify qp
- * @wait: flag to wait or not for modify qp completion
- */
-void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
- struct i40iw_modify_qp_info *info, bool wait)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_modify_qp_info *m_info;
- struct i40iw_gen_ae_info ae_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- m_info = &cqp_info->in.u.qp_modify.info;
- memcpy(m_info, info, sizeof(*m_info));
- cqp_info->cqp_cmd = OP_QP_MODIFY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
- cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
- if (!i40iw_handle_cqp_op(iwdev, cqp_request))
- return;
-
- switch (m_info->next_iwarp_state) {
- case I40IW_QP_STATE_RTS:
- if (iwqp->iwarp_state == I40IW_QP_STATE_IDLE)
- i40iw_send_reset(iwqp->cm_node);
- /* fall through */
- case I40IW_QP_STATE_IDLE:
- case I40IW_QP_STATE_TERMINATE:
- case I40IW_QP_STATE_CLOSING:
- ae_info.ae_code = I40IW_AE_BAD_CLOSE;
- ae_info.ae_source = 0;
- i40iw_gen_ae(iwdev, &iwqp->sc_qp, &ae_info, false);
- break;
- case I40IW_QP_STATE_ERROR:
- default:
- break;
- }
-}
-
-/**
- * i40iw_modify_qp - modify qp request
- * @ibqp: qp's pointer for modify
- * @attr: access attributes
- * @attr_mask: state mask
- * @udata: user data
- */
-int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_device *iwdev = iwqp->iwdev;
- struct i40iw_qp_host_ctx_info *ctx_info;
- struct i40iwarp_offload_info *iwarp_info;
- struct i40iw_modify_qp_info info;
- u8 issue_modify_qp = 0;
- u8 dont_wait = 0;
- u32 err;
- unsigned long flags;
-
- memset(&info, 0, sizeof(info));
- ctx_info = &iwqp->ctx_info;
- iwarp_info = &iwqp->iwarp_info;
-
- spin_lock_irqsave(&iwqp->lock, flags);
-
- if (attr_mask & IB_QP_STATE) {
- if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
- err = -EINVAL;
- goto exit;
- }
-
- switch (attr->qp_state) {
- case IB_QPS_INIT:
- case IB_QPS_RTR:
- if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_IDLE) {
- err = -EINVAL;
- goto exit;
- }
- if (iwqp->iwarp_state == I40IW_QP_STATE_INVALID) {
- info.next_iwarp_state = I40IW_QP_STATE_IDLE;
- issue_modify_qp = 1;
- }
- break;
- case IB_QPS_RTS:
- if ((iwqp->iwarp_state > (u32)I40IW_QP_STATE_RTS) ||
- (!iwqp->cm_id)) {
- err = -EINVAL;
- goto exit;
- }
-
- issue_modify_qp = 1;
- iwqp->hw_tcp_state = I40IW_TCP_STATE_ESTABLISHED;
- iwqp->hte_added = 1;
- info.next_iwarp_state = I40IW_QP_STATE_RTS;
- info.tcp_ctx_valid = true;
- info.ord_valid = true;
- info.arp_cache_idx_valid = true;
- info.cq_num_valid = true;
- break;
- case IB_QPS_SQD:
- if (iwqp->hw_iwarp_state > (u32)I40IW_QP_STATE_RTS) {
- err = 0;
- goto exit;
- }
- if ((iwqp->iwarp_state == (u32)I40IW_QP_STATE_CLOSING) ||
- (iwqp->iwarp_state < (u32)I40IW_QP_STATE_RTS)) {
- err = 0;
- goto exit;
- }
- if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_CLOSING) {
- err = -EINVAL;
- goto exit;
- }
- info.next_iwarp_state = I40IW_QP_STATE_CLOSING;
- issue_modify_qp = 1;
- break;
- case IB_QPS_SQE:
- if (iwqp->iwarp_state >= (u32)I40IW_QP_STATE_TERMINATE) {
- err = -EINVAL;
- goto exit;
- }
- info.next_iwarp_state = I40IW_QP_STATE_TERMINATE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_ERR:
- case IB_QPS_RESET:
- if (iwqp->iwarp_state == (u32)I40IW_QP_STATE_ERROR) {
- err = -EINVAL;
- goto exit;
- }
- if (iwqp->sc_qp.term_flags)
- i40iw_terminate_del_timer(&iwqp->sc_qp);
- info.next_iwarp_state = I40IW_QP_STATE_ERROR;
- if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
- iwdev->iw_status &&
- (iwqp->hw_tcp_state != I40IW_TCP_STATE_TIME_WAIT))
- info.reset_tcp_conn = true;
- else
- dont_wait = 1;
- issue_modify_qp = 1;
- info.next_iwarp_state = I40IW_QP_STATE_ERROR;
- break;
- default:
- err = -EINVAL;
- goto exit;
- }
-
- iwqp->ibqp_state = attr->qp_state;
-
- }
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- ctx_info->iwarp_info_valid = true;
- if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
- iwarp_info->wr_rdresp_en = true;
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
- iwarp_info->wr_rdresp_en = true;
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
- iwarp_info->rd_enable = true;
- if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
- iwarp_info->bind_en = true;
-
- if (iwqp->user_mode) {
- iwarp_info->rd_enable = true;
- iwarp_info->wr_rdresp_en = true;
- iwarp_info->priv_mode_en = false;
- }
- }
-
- if (ctx_info->iwarp_info_valid) {
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- int ret;
-
- ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
- ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- (u64 *)iwqp->host_ctx.va,
- ctx_info);
- if (ret) {
- i40iw_pr_err("setting QP context\n");
- err = -EINVAL;
- goto exit;
- }
- }
-
- spin_unlock_irqrestore(&iwqp->lock, flags);
-
- if (issue_modify_qp) {
- i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
-
- spin_lock_irqsave(&iwqp->lock, flags);
- iwqp->iwarp_state = info.next_iwarp_state;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- }
-
- if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
- if (dont_wait) {
- if (iwqp->cm_id && iwqp->hw_tcp_state) {
- spin_lock_irqsave(&iwqp->lock, flags);
- iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
- iwqp->last_aeq = I40IW_AE_RESET_SENT;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- i40iw_cm_disconn(iwqp);
- }
- } else {
- spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->cm_id) {
- if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
- iwqp->cm_id->add_ref(iwqp->cm_id);
- i40iw_schedule_cm_timer(iwqp->cm_node,
- (struct i40iw_puda_buf *)iwqp,
- I40IW_TIMER_TYPE_CLOSE, 1, 0);
- }
- }
- spin_unlock_irqrestore(&iwqp->lock, flags);
- }
- }
- return 0;
-exit:
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return err;
-}
-
-/**
- * cq_free_resources - free up recources for cq
- * @iwdev: iwarp device
- * @iwcq: cq ptr
- */
-static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
-{
- struct i40iw_sc_cq *cq = &iwcq->sc_cq;
-
- if (!iwcq->user_mode)
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwcq->kmem);
- i40iw_free_resource(iwdev, iwdev->allocated_cqs, cq->cq_uk.cq_id);
-}
-
-/**
- * i40iw_cq_wq_destroy - send cq destroy cqp
- * @iwdev: iwarp device
- * @cq: hardware control cq
- */
-void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
-
- cqp_info->cqp_cmd = OP_CQ_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.cq_destroy.cq = cq;
- cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Destroy QP fail");
-}
-
-/**
- * i40iw_destroy_cq - destroy cq
- * @ib_cq: cq pointer
- * @udata: user data or NULL for kernel object
- */
-static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
-{
- struct i40iw_cq *iwcq;
- struct i40iw_device *iwdev;
- struct i40iw_sc_cq *cq;
-
- if (!ib_cq) {
- i40iw_pr_err("ib_cq == NULL\n");
- return 0;
- }
-
- iwcq = to_iwcq(ib_cq);
- iwdev = to_iwdev(ib_cq->device);
- cq = &iwcq->sc_cq;
- i40iw_cq_wq_destroy(iwdev, cq);
- cq_free_resources(iwdev, iwcq);
- kfree(iwcq);
- i40iw_rem_devusecount(iwdev);
- return 0;
-}
-
-/**
- * i40iw_create_cq - create cq
- * @ibdev: device pointer from stack
- * @attr: attributes for cq
- * @udata: user data
- */
-static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_cq *iwcq;
- struct i40iw_pbl *iwpbl;
- u32 cq_num = 0;
- struct i40iw_sc_cq *cq;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cq_init_info info;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
- unsigned long flags;
- int err_code;
- int entries = attr->cqe;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- if (entries > iwdev->max_cqe)
- return ERR_PTR(-EINVAL);
-
- iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
- if (!iwcq)
- return ERR_PTR(-ENOMEM);
-
- memset(&info, 0, sizeof(info));
-
- err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
- iwdev->max_cq, &cq_num,
- &iwdev->next_cq);
- if (err_code)
- goto error;
-
- cq = &iwcq->sc_cq;
- cq->back_cq = (void *)iwcq;
- spin_lock_init(&iwcq->lock);
-
- info.dev = dev;
- ukinfo->cq_size = max(entries, 4);
- ukinfo->cq_id = cq_num;
- iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
- info.ceqe_mask = 0;
- if (attr->comp_vector < iwdev->ceqs_count)
- info.ceq_id = attr->comp_vector;
- info.ceq_id_valid = true;
- info.ceqe_mask = 1;
- info.type = I40IW_CQ_TYPE_IWARP;
- if (udata) {
- struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct i40iw_ucontext, ibucontext);
- struct i40iw_create_cq_req req;
- struct i40iw_cq_mr *cqmr;
-
- memset(&req, 0, sizeof(req));
- iwcq->user_mode = true;
- if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) {
- err_code = -EFAULT;
- goto cq_free_resources;
- }
-
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
- &ucontext->cq_reg_mem_list);
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- if (!iwpbl) {
- err_code = -EPROTO;
- goto cq_free_resources;
- }
-
- iwcq->iwpbl = iwpbl;
- iwcq->cq_mem_size = 0;
- cqmr = &iwpbl->cq_mr;
- info.shadow_area_pa = cpu_to_le64(cqmr->shadow);
- if (iwpbl->pbl_allocated) {
- info.virtual_map = true;
- info.pbl_chunk_size = 1;
- info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
- } else {
- info.cq_base_pa = cqmr->cq_pbl.addr;
- }
- } else {
- /* Kmode allocations */
- int rsize;
- int shadow;
-
- rsize = info.cq_uk_init_info.cq_size * sizeof(struct i40iw_cqe);
- rsize = round_up(rsize, 256);
- shadow = I40IW_SHADOW_AREA_SIZE << 3;
- status = i40iw_allocate_dma_mem(dev->hw, &iwcq->kmem,
- rsize + shadow, 256);
- if (status) {
- err_code = -ENOMEM;
- goto cq_free_resources;
- }
- ukinfo->cq_base = iwcq->kmem.va;
- info.cq_base_pa = iwcq->kmem.pa;
- info.shadow_area_pa = info.cq_base_pa + rsize;
- ukinfo->shadow_area = iwcq->kmem.va + rsize;
- }
-
- if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
- i40iw_pr_err("init cq fail\n");
- err_code = -EPROTO;
- goto cq_free_resources;
- }
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request) {
- err_code = -ENOMEM;
- goto cq_free_resources;
- }
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_CQ_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.cq_create.cq = cq;
- cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- i40iw_pr_err("CQP-OP Create QP fail");
- err_code = -EPROTO;
- goto cq_free_resources;
- }
-
- if (udata) {
- struct i40iw_create_cq_resp resp;
-
- memset(&resp, 0, sizeof(resp));
- resp.cq_id = info.cq_uk_init_info.cq_id;
- resp.cq_size = info.cq_uk_init_info.cq_size;
- if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- i40iw_pr_err("copy to user data\n");
- err_code = -EPROTO;
- goto cq_destroy;
- }
- }
-
- i40iw_add_devusecount(iwdev);
- return (struct ib_cq *)iwcq;
-
-cq_destroy:
- i40iw_cq_wq_destroy(iwdev, cq);
-cq_free_resources:
- cq_free_resources(iwdev, iwcq);
-error:
- kfree(iwcq);
- return ERR_PTR(err_code);
-}
-
-/**
- * i40iw_get_user_access - get hw access from IB access
- * @acc: IB access to return hw access
- */
-static inline u16 i40iw_get_user_access(int acc)
-{
- u16 access = 0;
-
- access |= (acc & IB_ACCESS_LOCAL_WRITE) ? I40IW_ACCESS_FLAGS_LOCALWRITE : 0;
- access |= (acc & IB_ACCESS_REMOTE_WRITE) ? I40IW_ACCESS_FLAGS_REMOTEWRITE : 0;
- access |= (acc & IB_ACCESS_REMOTE_READ) ? I40IW_ACCESS_FLAGS_REMOTEREAD : 0;
- access |= (acc & IB_ACCESS_MW_BIND) ? I40IW_ACCESS_FLAGS_BIND_WINDOW : 0;
- return access;
-}
-
-/**
- * i40iw_free_stag - free stag resource
- * @iwdev: iwarp device
- * @stag: stag to free
- */
-static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
-{
- u32 stag_idx;
-
- stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
- i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
- i40iw_rem_devusecount(iwdev);
-}
-
-/**
- * i40iw_create_stag - create random stag
- * @iwdev: iwarp device
- */
-static u32 i40iw_create_stag(struct i40iw_device *iwdev)
-{
- u32 stag = 0;
- u32 stag_index = 0;
- u32 next_stag_index;
- u32 driver_key;
- u32 random;
- u8 consumer_key;
- int ret;
-
- get_random_bytes(&random, sizeof(random));
- consumer_key = (u8)random;
-
- driver_key = random & ~iwdev->mr_stagmask;
- next_stag_index = (random & iwdev->mr_stagmask) >> 8;
- next_stag_index %= iwdev->max_mr;
-
- ret = i40iw_alloc_resource(iwdev,
- iwdev->allocated_mrs, iwdev->max_mr,
- &stag_index, &next_stag_index);
- if (!ret) {
- stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
- stag |= driver_key;
- stag += (u32)consumer_key;
- i40iw_add_devusecount(iwdev);
- }
- return stag;
-}
-
-/**
- * i40iw_next_pbl_addr - Get next pbl address
- * @pbl: pointer to a pble
- * @pinfo: info pointer
- * @idx: index
- */
-static inline u64 *i40iw_next_pbl_addr(u64 *pbl,
- struct i40iw_pble_info **pinfo,
- u32 *idx)
-{
- *idx += 1;
- if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
- return ++pbl;
- *idx = 0;
- (*pinfo)++;
- return (u64 *)(*pinfo)->addr;
-}
-
-/**
- * i40iw_copy_user_pgaddrs - copy user page address to pble's os locally
- * @iwmr: iwmr for IB's user page addresses
- * @pbl: ple pointer to save 1 level or 0 level pble
- * @level: indicated level 0, 1 or 2
- */
-static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
- u64 *pbl,
- enum i40iw_pble_level level)
-{
- struct ib_umem *region = iwmr->region;
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_pble_info *pinfo;
- struct ib_block_iter biter;
- u32 idx = 0;
-
- pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
-
- if (iwmr->type == IW_MEMREG_TYPE_QP)
- iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
-
- rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap,
- iwmr->page_size) {
- *pbl = rdma_block_iter_dma_address(&biter);
- pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
- }
-}
-
-/**
- * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
- * @arr: lvl1 pbl array
- * @npages: page count
- * pg_size: page size
- *
- */
-static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
-{
- u32 pg_idx;
-
- for (pg_idx = 0; pg_idx < npages; pg_idx++) {
- if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
- return false;
- }
- return true;
-}
-
-/**
- * i40iw_check_mr_contiguous - check if MR is physically contiguous
- * @palloc: pbl allocation struct
- * pg_size: page size
- */
-static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
-{
- struct i40iw_pble_level2 *lvl2 = &palloc->level2;
- struct i40iw_pble_info *leaf = lvl2->leaf;
- u64 *arr = NULL;
- u64 *start_addr = NULL;
- int i;
- bool ret;
-
- if (palloc->level == I40IW_LEVEL_1) {
- arr = (u64 *)palloc->level1.addr;
- ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size);
- return ret;
- }
-
- start_addr = (u64 *)leaf->addr;
-
- for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
- arr = (u64 *)leaf->addr;
- if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
- return false;
- ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size);
- if (!ret)
- return false;
- }
-
- return true;
-}
-
-/**
- * i40iw_setup_pbles - copy user pg address to pble's
- * @iwdev: iwarp device
- * @iwmr: mr pointer for this memory registration
- * @use_pbles: flag if to use pble's
- */
-static int i40iw_setup_pbles(struct i40iw_device *iwdev,
- struct i40iw_mr *iwmr,
- bool use_pbles)
-{
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_pble_info *pinfo;
- u64 *pbl;
- enum i40iw_status_code status;
- enum i40iw_pble_level level = I40IW_LEVEL_1;
-
- if (use_pbles) {
- mutex_lock(&iwdev->pbl_mutex);
- status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
- mutex_unlock(&iwdev->pbl_mutex);
- if (status)
- return -ENOMEM;
-
- iwpbl->pbl_allocated = true;
- level = palloc->level;
- pinfo = (level == I40IW_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf;
- pbl = (u64 *)pinfo->addr;
- } else {
- pbl = iwmr->pgaddrmem;
- }
-
- i40iw_copy_user_pgaddrs(iwmr, pbl, level);
-
- if (use_pbles)
- iwmr->pgaddrmem[0] = *pbl;
-
- return 0;
-}
-
-/**
- * i40iw_handle_q_mem - handle memory for qp and cq
- * @iwdev: iwarp device
- * @req: information for q memory management
- * @iwpbl: pble struct
- * @use_pbles: flag to use pble
- */
-static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
- struct i40iw_mem_reg_req *req,
- struct i40iw_pbl *iwpbl,
- bool use_pbles)
-{
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_mr *iwmr = iwpbl->iwmr;
- struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
- struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
- struct i40iw_hmc_pble *hmc_p;
- u64 *arr = iwmr->pgaddrmem;
- u32 pg_size;
- int err;
- int total;
- bool ret = true;
-
- total = req->sq_pages + req->rq_pages + req->cq_pages;
- pg_size = iwmr->page_size;
-
- err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
- if (err)
- return err;
-
- if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- return -ENOMEM;
- }
-
- if (use_pbles)
- arr = (u64 *)palloc->level1.addr;
-
- if (iwmr->type == IW_MEMREG_TYPE_QP) {
- hmc_p = &qpmr->sq_pbl;
- qpmr->shadow = (dma_addr_t)arr[total];
-
- if (use_pbles) {
- ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size);
- if (ret)
- ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size);
- }
-
- if (!ret) {
- hmc_p->idx = palloc->level1.idx;
- hmc_p = &qpmr->rq_pbl;
- hmc_p->idx = palloc->level1.idx + req->sq_pages;
- } else {
- hmc_p->addr = arr[0];
- hmc_p = &qpmr->rq_pbl;
- hmc_p->addr = arr[req->sq_pages];
- }
- } else { /* CQ */
- hmc_p = &cqmr->cq_pbl;
- cqmr->shadow = (dma_addr_t)arr[total];
-
- if (use_pbles)
- ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size);
-
- if (!ret)
- hmc_p->idx = palloc->level1.idx;
- else
- hmc_p->addr = arr[0];
- }
-
- if (use_pbles && ret) {
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- }
-
- return err;
-}
-
-/**
- * i40iw_hw_alloc_stag - cqp command to allocate stag
- * @iwdev: iwarp device
- * @iwmr: iwarp mr pointer
- */
-static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr)
-{
- struct i40iw_allocate_stag_info *info;
- struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
- enum i40iw_status_code status;
- int err = 0;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.alloc_stag.info;
- memset(info, 0, sizeof(*info));
- info->page_size = PAGE_SIZE;
- info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
- info->pd_id = iwpd->sc_pd.pd_id;
- info->total_len = iwmr->length;
- info->remote_access = true;
- cqp_info->cqp_cmd = OP_ALLOC_STAG;
- cqp_info->post_sq = 1;
- cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
- cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- err = -ENOMEM;
- i40iw_pr_err("CQP-OP MR Reg fail");
- }
- return err;
-}
-
-/**
- * i40iw_alloc_mr - register stag for fast memory registration
- * @pd: ibpd pointer
- * @mr_type: memory for stag registrion
- * @max_num_sg: man number of pages
- * @udata: user data or NULL for kernel objects
- */
-static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_pble_alloc *palloc;
- struct i40iw_pbl *iwpbl;
- struct i40iw_mr *iwmr;
- enum i40iw_status_code status;
- u32 stag;
- int err_code = -ENOMEM;
-
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr)
- return ERR_PTR(-ENOMEM);
-
- stag = i40iw_create_stag(iwdev);
- if (!stag) {
- err_code = -EOVERFLOW;
- goto err;
- }
- stag &= ~I40IW_CQPSQ_STAG_KEY_MASK;
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->type = IW_MEMREG_TYPE_MEM;
- palloc = &iwpbl->pble_alloc;
- iwmr->page_cnt = max_num_sg;
- mutex_lock(&iwdev->pbl_mutex);
- status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
- mutex_unlock(&iwdev->pbl_mutex);
- if (status)
- goto err1;
-
- if (palloc->level != I40IW_LEVEL_1)
- goto err2;
- err_code = i40iw_hw_alloc_stag(iwdev, iwmr);
- if (err_code)
- goto err2;
- iwpbl->pbl_allocated = true;
- i40iw_add_pdusecount(iwpd);
- return &iwmr->ibmr;
-err2:
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
-err1:
- i40iw_free_stag(iwdev, stag);
-err:
- kfree(iwmr);
- return ERR_PTR(err_code);
-}
-
-/**
- * i40iw_set_page - populate pbl list for fmr
- * @ibmr: ib mem to access iwarp mr pointer
- * @addr: page dma address fro pbl list
- */
-static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct i40iw_mr *iwmr = to_iwmr(ibmr);
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- u64 *pbl;
-
- if (unlikely(iwmr->npages == iwmr->page_cnt))
- return -ENOMEM;
-
- pbl = (u64 *)palloc->level1.addr;
- pbl[iwmr->npages++] = cpu_to_le64(addr);
- return 0;
-}
-
-/**
- * i40iw_map_mr_sg - map of sg list for fmr
- * @ibmr: ib mem to access iwarp mr pointer
- * @sg: scatter gather list for fmr
- * @sg_nents: number of sg pages
- */
-static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct i40iw_mr *iwmr = to_iwmr(ibmr);
-
- iwmr->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page);
-}
-
-/**
- * i40iw_drain_sq - drain the send queue
- * @ibqp: ib qp pointer
- */
-static void i40iw_drain_sq(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
- wait_for_completion(&iwqp->sq_drained);
-}
-
-/**
- * i40iw_drain_rq - drain the receive queue
- * @ibqp: ib qp pointer
- */
-static void i40iw_drain_rq(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
- wait_for_completion(&iwqp->rq_drained);
-}
-
-/**
- * i40iw_hwreg_mr - send cqp command for memory registration
- * @iwdev: iwarp device
- * @iwmr: iwarp mr pointer
- * @access: access for MR
- */
-static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
- struct i40iw_mr *iwmr,
- u16 access)
-{
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_reg_ns_stag_info *stag_info;
- struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- enum i40iw_status_code status;
- int err = 0;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
- memset(stag_info, 0, sizeof(*stag_info));
- stag_info->va = (void *)(unsigned long)iwpbl->user_base;
- stag_info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
- stag_info->stag_key = (u8)iwmr->stag;
- stag_info->total_len = iwmr->length;
- stag_info->access_rights = access;
- stag_info->pd_id = iwpd->sc_pd.pd_id;
- stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
- stag_info->page_size = iwmr->page_size;
-
- if (iwpbl->pbl_allocated) {
- if (palloc->level == I40IW_LEVEL_1) {
- stag_info->first_pm_pbl_index = palloc->level1.idx;
- stag_info->chunk_size = 1;
- } else {
- stag_info->first_pm_pbl_index = palloc->level2.root.idx;
- stag_info->chunk_size = 3;
- }
- } else {
- stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
- }
-
- cqp_info->cqp_cmd = OP_MR_REG_NON_SHARED;
- cqp_info->post_sq = 1;
- cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->sc_dev;
- cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- err = -ENOMEM;
- i40iw_pr_err("CQP-OP MR Reg fail");
- }
- return err;
-}
-
-/**
- * i40iw_reg_user_mr - Register a user memory region
- * @pd: ptr of pd
- * @start: virtual start address
- * @length: length of mr
- * @virt: virtual address
- * @acc: access of mr
- * @udata: user data
- */
-static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
- u64 start,
- u64 length,
- u64 virt,
- int acc,
- struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct i40iw_ucontext, ibucontext);
- struct i40iw_pble_alloc *palloc;
- struct i40iw_pbl *iwpbl;
- struct i40iw_mr *iwmr;
- struct ib_umem *region;
- struct i40iw_mem_reg_req req;
- u64 pbl_depth = 0;
- u32 stag = 0;
- u16 access;
- u64 region_length;
- bool use_pbles = false;
- unsigned long flags;
- int err = -ENOSYS;
- int ret;
- int pg_shift;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- if (length > I40IW_MAX_MR_SIZE)
- return ERR_PTR(-EINVAL);
- region = ib_umem_get(udata, start, length, acc, 0);
- if (IS_ERR(region))
- return (struct ib_mr *)region;
-
- if (ib_copy_from_udata(&req, udata, sizeof(req))) {
- ib_umem_release(region);
- return ERR_PTR(-EFAULT);
- }
-
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr) {
- ib_umem_release(region);
- return ERR_PTR(-ENOMEM);
- }
-
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->region = region;
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
-
- iwmr->page_size = PAGE_SIZE;
- if (req.reg_type == IW_MEMREG_TYPE_MEM)
- iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M,
- virt);
-
- region_length = region->length + (start & (iwmr->page_size - 1));
- pg_shift = ffs(iwmr->page_size) - 1;
- pbl_depth = region_length >> pg_shift;
- pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
- iwmr->length = region->length;
-
- iwpbl->user_base = virt;
- palloc = &iwpbl->pble_alloc;
-
- iwmr->type = req.reg_type;
- iwmr->page_cnt = (u32)pbl_depth;
-
- switch (req.reg_type) {
- case IW_MEMREG_TYPE_QP:
- use_pbles = ((req.sq_pages + req.rq_pages) > 2);
- err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
- if (err)
- goto error;
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
- iwpbl->on_list = true;
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
- break;
- case IW_MEMREG_TYPE_CQ:
- use_pbles = (req.cq_pages > 1);
- err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
- if (err)
- goto error;
-
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
- iwpbl->on_list = true;
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- break;
- case IW_MEMREG_TYPE_MEM:
- use_pbles = (iwmr->page_cnt != 1);
- access = I40IW_ACCESS_FLAGS_LOCALREAD;
-
- err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
- if (err)
- goto error;
-
- if (use_pbles) {
- ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size);
- if (ret) {
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- }
- }
-
- access |= i40iw_get_user_access(acc);
- stag = i40iw_create_stag(iwdev);
- if (!stag) {
- err = -ENOMEM;
- goto error;
- }
-
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
-
- err = i40iw_hwreg_mr(iwdev, iwmr, access);
- if (err) {
- i40iw_free_stag(iwdev, stag);
- goto error;
- }
-
- break;
- default:
- goto error;
- }
-
- iwmr->type = req.reg_type;
- if (req.reg_type == IW_MEMREG_TYPE_MEM)
- i40iw_add_pdusecount(iwpd);
- return &iwmr->ibmr;
-
-error:
- if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated)
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- ib_umem_release(region);
- kfree(iwmr);
- return ERR_PTR(err);
-}
-
-/**
- * i40iw_reg_phys_mr - register kernel physical memory
- * @pd: ibpd pointer
- * @addr: physical address of memory to register
- * @size: size of memory to register
- * @acc: Access rights
- * @iova_start: start of virtual address for physical buffers
- */
-struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
- u64 addr,
- u64 size,
- int acc,
- u64 *iova_start)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_pbl *iwpbl;
- struct i40iw_mr *iwmr;
- enum i40iw_status_code status;
- u32 stag;
- u16 access = I40IW_ACCESS_FLAGS_LOCALREAD;
- int ret;
-
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr)
- return ERR_PTR(-ENOMEM);
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->type = IW_MEMREG_TYPE_MEM;
- iwpbl->user_base = *iova_start;
- stag = i40iw_create_stag(iwdev);
- if (!stag) {
- ret = -EOVERFLOW;
- goto err;
- }
- access |= i40iw_get_user_access(acc);
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
- iwmr->page_cnt = 1;
- iwmr->pgaddrmem[0] = addr;
- iwmr->length = size;
- status = i40iw_hwreg_mr(iwdev, iwmr, access);
- if (status) {
- i40iw_free_stag(iwdev, stag);
- ret = -ENOMEM;
- goto err;
- }
-
- i40iw_add_pdusecount(iwpd);
- return &iwmr->ibmr;
- err:
- kfree(iwmr);
- return ERR_PTR(ret);
-}
-
-/**
- * i40iw_get_dma_mr - register physical mem
- * @pd: ptr of pd
- * @acc: access for memory
- */
-static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
-{
- u64 kva = 0;
-
- return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
-}
-
-/**
- * i40iw_del_mem_list - Deleting pbl list entries for CQ/QP
- * @iwmr: iwmr for IB's user page addresses
- * @ucontext: ptr to user context
- */
-static void i40iw_del_memlist(struct i40iw_mr *iwmr,
- struct i40iw_ucontext *ucontext)
-{
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- unsigned long flags;
-
- switch (iwmr->type) {
- case IW_MEMREG_TYPE_CQ:
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- if (iwpbl->on_list) {
- iwpbl->on_list = false;
- list_del(&iwpbl->list);
- }
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- break;
- case IW_MEMREG_TYPE_QP:
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- if (iwpbl->on_list) {
- iwpbl->on_list = false;
- list_del(&iwpbl->list);
- }
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
- break;
- default:
- break;
- }
-}
-
-/**
- * i40iw_dereg_mr - deregister mr
- * @ib_mr: mr ptr for dereg
- */
-static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
-{
- struct ib_pd *ibpd = ib_mr->pd;
- struct i40iw_pd *iwpd = to_iwpd(ibpd);
- struct i40iw_mr *iwmr = to_iwmr(ib_mr);
- struct i40iw_device *iwdev = to_iwdev(ib_mr->device);
- enum i40iw_status_code status;
- struct i40iw_dealloc_stag_info *info;
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- u32 stag_idx;
-
- if (iwmr->region)
- ib_umem_release(iwmr->region);
-
- if (iwmr->type != IW_MEMREG_TYPE_MEM) {
- /* region is released. only test for userness. */
- if (iwmr->region) {
- struct i40iw_ucontext *ucontext =
- rdma_udata_to_drv_context(
- udata,
- struct i40iw_ucontext,
- ibucontext);
-
- i40iw_del_memlist(iwmr, ucontext);
- }
- if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP)
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- kfree(iwmr);
- return 0;
- }
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.dealloc_stag.info;
- memset(info, 0, sizeof(*info));
-
- info->pd_id = cpu_to_le32(iwpd->sc_pd.pd_id & 0x00007fff);
- info->stag_idx = RS_64_1(ib_mr->rkey, I40IW_CQPSQ_STAG_IDX_SHIFT);
- stag_idx = info->stag_idx;
- info->mr = true;
- if (iwpbl->pbl_allocated)
- info->dealloc_pbl = true;
-
- cqp_info->cqp_cmd = OP_DEALLOC_STAG;
- cqp_info->post_sq = 1;
- cqp_info->in.u.dealloc_stag.dev = &iwdev->sc_dev;
- cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP dealloc failed for stag_idx = 0x%x\n", stag_idx);
- i40iw_rem_pdusecount(iwpd, iwdev);
- i40iw_free_stag(iwdev, iwmr->stag);
- if (iwpbl->pbl_allocated)
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- kfree(iwmr);
- return 0;
-}
-
-/**
- * hw_rev_show
- */
-static ssize_t hw_rev_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct i40iw_ib_device *iwibdev =
- rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev);
- u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
-
- return sprintf(buf, "%x\n", hw_rev);
-}
-static DEVICE_ATTR_RO(hw_rev);
-
-/**
- * hca_type_show
- */
-static ssize_t hca_type_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "I40IW\n");
-}
-static DEVICE_ATTR_RO(hca_type);
-
-/**
- * board_id_show
- */
-static ssize_t board_id_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
-}
-static DEVICE_ATTR_RO(board_id);
-
-static struct attribute *i40iw_dev_attributes[] = {
- &dev_attr_hw_rev.attr,
- &dev_attr_hca_type.attr,
- &dev_attr_board_id.attr,
- NULL
-};
-
-static const struct attribute_group i40iw_attr_group = {
- .attrs = i40iw_dev_attributes,
-};
-
-/**
- * i40iw_copy_sg_list - copy sg list for qp
- * @sg_list: copied into sg_list
- * @sgl: copy from sgl
- * @num_sges: count of sg entries
- */
-static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, int num_sges)
-{
- unsigned int i;
-
- for (i = 0; (i < num_sges) && (i < I40IW_MAX_WQ_FRAGMENT_COUNT); i++) {
- sg_list[i].tag_off = sgl[i].addr;
- sg_list[i].len = sgl[i].length;
- sg_list[i].stag = sgl[i].lkey;
- }
-}
-
-/**
- * i40iw_post_send - kernel application wr
- * @ibqp: qp ptr for wr
- * @ib_wr: work request ptr
- * @bad_wr: return of bad wr if err
- */
-static int i40iw_post_send(struct ib_qp *ibqp,
- const struct ib_send_wr *ib_wr,
- const struct ib_send_wr **bad_wr)
-{
- struct i40iw_qp *iwqp;
- struct i40iw_qp_uk *ukqp;
- struct i40iw_post_sq_info info;
- enum i40iw_status_code ret;
- int err = 0;
- unsigned long flags;
- bool inv_stag;
-
- iwqp = (struct i40iw_qp *)ibqp;
- ukqp = &iwqp->sc_qp.qp_uk;
-
- spin_lock_irqsave(&iwqp->lock, flags);
-
- if (iwqp->flush_issued) {
- err = -EINVAL;
- goto out;
- }
-
- while (ib_wr) {
- inv_stag = false;
- memset(&info, 0, sizeof(info));
- info.wr_id = (u64)(ib_wr->wr_id);
- if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
- info.signaled = true;
- if (ib_wr->send_flags & IB_SEND_FENCE)
- info.read_fence = true;
-
- switch (ib_wr->opcode) {
- case IB_WR_SEND:
- /* fall-through */
- case IB_WR_SEND_WITH_INV:
- if (ib_wr->opcode == IB_WR_SEND) {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- info.op_type = I40IW_OP_TYPE_SEND_SOL;
- else
- info.op_type = I40IW_OP_TYPE_SEND;
- } else {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- info.op_type = I40IW_OP_TYPE_SEND_SOL_INV;
- else
- info.op_type = I40IW_OP_TYPE_SEND_INV;
- }
-
- if (ib_wr->send_flags & IB_SEND_INLINE) {
- info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
- info.op.inline_send.len = ib_wr->sg_list[0].length;
- ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
- } else {
- info.op.send.num_sges = ib_wr->num_sge;
- info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
- ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
- }
-
- if (ret) {
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
- break;
- case IB_WR_RDMA_WRITE:
- info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
-
- if (ib_wr->send_flags & IB_SEND_INLINE) {
- info.op.inline_rdma_write.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
- info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
- info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
- info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
- } else {
- info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
- info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
- info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
- info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
- }
-
- if (ret) {
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
- break;
- case IB_WR_RDMA_READ_WITH_INV:
- inv_stag = true;
- /* fall-through*/
- case IB_WR_RDMA_READ:
- if (ib_wr->num_sge > I40IW_MAX_SGE_RD) {
- err = -EINVAL;
- break;
- }
- info.op_type = I40IW_OP_TYPE_RDMA_READ;
- info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
- info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
- info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
- info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
- ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
- if (ret) {
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
- break;
- case IB_WR_LOCAL_INV:
- info.op_type = I40IW_OP_TYPE_INV_STAG;
- info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
- ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
- if (ret)
- err = -ENOMEM;
- break;
- case IB_WR_REG_MR:
- {
- struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
- int flags = reg_wr(ib_wr)->access;
- struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
- struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
- struct i40iw_fast_reg_stag_info info;
-
- memset(&info, 0, sizeof(info));
- info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
- info.access_rights |= i40iw_get_user_access(flags);
- info.stag_key = reg_wr(ib_wr)->key & 0xff;
- info.stag_idx = reg_wr(ib_wr)->key >> 8;
- info.page_size = reg_wr(ib_wr)->mr->page_size;
- info.wr_id = ib_wr->wr_id;
-
- info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
- info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
- info.total_len = iwmr->ibmr.length;
- info.reg_addr_pa = *(u64 *)palloc->level1.addr;
- info.first_pm_pbl_index = palloc->level1.idx;
- info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
- info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
-
- if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
- info.chunk_size = 1;
-
- ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
- if (ret)
- err = -ENOMEM;
- break;
- }
- default:
- err = -EINVAL;
- i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
- ib_wr->opcode);
- break;
- }
-
- if (err)
- break;
- ib_wr = ib_wr->next;
- }
-
-out:
- if (err)
- *bad_wr = ib_wr;
- else
- ukqp->ops.iw_qp_post_wr(ukqp);
- spin_unlock_irqrestore(&iwqp->lock, flags);
-
- return err;
-}
-
-/**
- * i40iw_post_recv - post receive wr for kernel application
- * @ibqp: ib qp pointer
- * @ib_wr: work request for receive
- * @bad_wr: bad wr caused an error
- */
-static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
- const struct ib_recv_wr **bad_wr)
-{
- struct i40iw_qp *iwqp;
- struct i40iw_qp_uk *ukqp;
- struct i40iw_post_rq_info post_recv;
- struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
- enum i40iw_status_code ret = 0;
- unsigned long flags;
- int err = 0;
-
- iwqp = (struct i40iw_qp *)ibqp;
- ukqp = &iwqp->sc_qp.qp_uk;
-
- memset(&post_recv, 0, sizeof(post_recv));
- spin_lock_irqsave(&iwqp->lock, flags);
-
- if (iwqp->flush_issued) {
- err = -EINVAL;
- goto out;
- }
-
- while (ib_wr) {
- post_recv.num_sges = ib_wr->num_sge;
- post_recv.wr_id = ib_wr->wr_id;
- i40iw_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
- post_recv.sg_list = sg_list;
- ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
- if (ret) {
- i40iw_pr_err(" post_recv err %d\n", ret);
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- *bad_wr = ib_wr;
- goto out;
- }
- ib_wr = ib_wr->next;
- }
- out:
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return err;
-}
-
-/**
- * i40iw_poll_cq - poll cq for completion (kernel apps)
- * @ibcq: cq to poll
- * @num_entries: number of entries to poll
- * @entry: wr of entry completed
- */
-static int i40iw_poll_cq(struct ib_cq *ibcq,
- int num_entries,
- struct ib_wc *entry)
-{
- struct i40iw_cq *iwcq;
- int cqe_count = 0;
- struct i40iw_cq_poll_info cq_poll_info;
- enum i40iw_status_code ret;
- struct i40iw_cq_uk *ukcq;
- struct i40iw_sc_qp *qp;
- struct i40iw_qp *iwqp;
- unsigned long flags;
-
- iwcq = (struct i40iw_cq *)ibcq;
- ukcq = &iwcq->sc_cq.cq_uk;
-
- spin_lock_irqsave(&iwcq->lock, flags);
- while (cqe_count < num_entries) {
- ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
- if (ret == I40IW_ERR_QUEUE_EMPTY) {
- break;
- } else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
- continue;
- } else if (ret) {
- if (!cqe_count)
- cqe_count = -1;
- break;
- }
- entry->wc_flags = 0;
- entry->wr_id = cq_poll_info.wr_id;
- if (cq_poll_info.error) {
- entry->status = IB_WC_WR_FLUSH_ERR;
- entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
- } else {
- entry->status = IB_WC_SUCCESS;
- }
-
- switch (cq_poll_info.op_type) {
- case I40IW_OP_TYPE_RDMA_WRITE:
- entry->opcode = IB_WC_RDMA_WRITE;
- break;
- case I40IW_OP_TYPE_RDMA_READ_INV_STAG:
- case I40IW_OP_TYPE_RDMA_READ:
- entry->opcode = IB_WC_RDMA_READ;
- break;
- case I40IW_OP_TYPE_SEND_SOL:
- case I40IW_OP_TYPE_SEND_SOL_INV:
- case I40IW_OP_TYPE_SEND_INV:
- case I40IW_OP_TYPE_SEND:
- entry->opcode = IB_WC_SEND;
- break;
- case I40IW_OP_TYPE_REC:
- entry->opcode = IB_WC_RECV;
- break;
- default:
- entry->opcode = IB_WC_RECV;
- break;
- }
-
- entry->ex.imm_data = 0;
- qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
- entry->qp = (struct ib_qp *)qp->back_qp;
- entry->src_qp = cq_poll_info.qp_id;
- iwqp = (struct i40iw_qp *)qp->back_qp;
- if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) {
- if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
- complete(&iwqp->sq_drained);
- if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
- complete(&iwqp->rq_drained);
- }
- entry->byte_len = cq_poll_info.bytes_xfered;
- entry++;
- cqe_count++;
- }
- spin_unlock_irqrestore(&iwcq->lock, flags);
- return cqe_count;
-}
-
-/**
- * i40iw_req_notify_cq - arm cq kernel application
- * @ibcq: cq to arm
- * @notify_flags: notofication flags
- */
-static int i40iw_req_notify_cq(struct ib_cq *ibcq,
- enum ib_cq_notify_flags notify_flags)
-{
- struct i40iw_cq *iwcq;
- struct i40iw_cq_uk *ukcq;
- unsigned long flags;
- enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
-
- iwcq = (struct i40iw_cq *)ibcq;
- ukcq = &iwcq->sc_cq.cq_uk;
- if (notify_flags == IB_CQ_SOLICITED)
- cq_notify = IW_CQ_COMPL_SOLICITED;
- spin_lock_irqsave(&iwcq->lock, flags);
- ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
- spin_unlock_irqrestore(&iwcq->lock, flags);
- return 0;
-}
-
-/**
- * i40iw_port_immutable - return port's immutable data
- * @ibdev: ib dev struct
- * @port_num: port number
- * @immutable: immutable data for the port return
- */
-static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- err = ib_query_port(ibdev, port_num, &attr);
-
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
- return 0;
-}
-
-static const char * const i40iw_hw_stat_names[] = {
- // 32bit names
- [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
- [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
- [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
- [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
- [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
- [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
- [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
- [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
- [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
- // 64bit names
- [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InOctets",
- [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InPkts",
- [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InReasmRqd",
- [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InMcastPkts",
- [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutOctets",
- [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutPkts",
- [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutSegRqd",
- [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutMcastPkts",
- [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InOctets",
- [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InPkts",
- [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InReasmRqd",
- [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InMcastPkts",
- [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutOctets",
- [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutPkts",
- [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutSegRqd",
- [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutMcastPkts",
- [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "tcpInSegs",
- [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] =
- "tcpOutSegs",
- [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwInRdmaReads",
- [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwInRdmaSends",
- [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwInRdmaWrites",
- [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwOutRdmaReads",
- [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwOutRdmaSends",
- [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwOutRdmaWrites",
- [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwRdmaBnd",
- [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwRdmaInv"
-};
-
-static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str)
-{
- u32 firmware_version = I40IW_FW_VERSION;
-
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u", firmware_version,
- (firmware_version & 0x000000ff));
-}
-
-/**
- * i40iw_alloc_hw_stats - Allocate a hw stats structure
- * @ibdev: device pointer from stack
- * @port_num: port number
- */
-static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- int num_counters = I40IW_HW_STAT_INDEX_MAX_32 +
- I40IW_HW_STAT_INDEX_MAX_64;
- unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
-
- BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) !=
- (I40IW_HW_STAT_INDEX_MAX_32 +
- I40IW_HW_STAT_INDEX_MAX_64));
-
- /*
- * PFs get the default update lifespan, but VFs only update once
- * per second
- */
- if (!dev->is_pf)
- lifespan = 1000;
- return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters,
- lifespan);
-}
-
-/**
- * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
- * @ibdev: device pointer from stack
- * @stats: stats pointer from stack
- * @port_num: port number
- * @index: which hw counter the stack is requesting we update
- */
-static int i40iw_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat;
- struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
-
- if (dev->is_pf) {
- i40iw_hw_stats_read_all(devstat, &devstat->hw_stats);
- } else {
- if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
- return -ENOSYS;
- }
-
- memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
-
- return stats->num_counters;
-}
-
-/**
- * i40iw_query_gid - Query port GID
- * @ibdev: device pointer from stack
- * @port: port number
- * @index: Entry index
- * @gid: Global ID
- */
-static int i40iw_query_gid(struct ib_device *ibdev,
- u8 port,
- int index,
- union ib_gid *gid)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
-
- memset(gid->raw, 0, sizeof(gid->raw));
- ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
- return 0;
-}
-
-/**
- * i40iw_query_pkey - Query partition key
- * @ibdev: device pointer from stack
- * @port: port number
- * @index: index of pkey
- * @pkey: pointer to store the pkey
- */
-static int i40iw_query_pkey(struct ib_device *ibdev,
- u8 port,
- u16 index,
- u16 *pkey)
-{
- *pkey = 0;
- return 0;
-}
-
-static const struct ib_device_ops i40iw_dev_ops = {
- .alloc_hw_stats = i40iw_alloc_hw_stats,
- .alloc_mr = i40iw_alloc_mr,
- .alloc_pd = i40iw_alloc_pd,
- .alloc_ucontext = i40iw_alloc_ucontext,
- .create_cq = i40iw_create_cq,
- .create_qp = i40iw_create_qp,
- .dealloc_pd = i40iw_dealloc_pd,
- .dealloc_ucontext = i40iw_dealloc_ucontext,
- .dereg_mr = i40iw_dereg_mr,
- .destroy_cq = i40iw_destroy_cq,
- .destroy_qp = i40iw_destroy_qp,
- .drain_rq = i40iw_drain_rq,
- .drain_sq = i40iw_drain_sq,
- .get_dev_fw_str = i40iw_get_dev_fw_str,
- .get_dma_mr = i40iw_get_dma_mr,
- .get_hw_stats = i40iw_get_hw_stats,
- .get_port_immutable = i40iw_port_immutable,
- .iw_accept = i40iw_accept,
- .iw_add_ref = i40iw_add_ref,
- .iw_connect = i40iw_connect,
- .iw_create_listen = i40iw_create_listen,
- .iw_destroy_listen = i40iw_destroy_listen,
- .iw_get_qp = i40iw_get_qp,
- .iw_reject = i40iw_reject,
- .iw_rem_ref = i40iw_rem_ref,
- .map_mr_sg = i40iw_map_mr_sg,
- .mmap = i40iw_mmap,
- .modify_qp = i40iw_modify_qp,
- .poll_cq = i40iw_poll_cq,
- .post_recv = i40iw_post_recv,
- .post_send = i40iw_post_send,
- .query_device = i40iw_query_device,
- .query_gid = i40iw_query_gid,
- .query_pkey = i40iw_query_pkey,
- .query_port = i40iw_query_port,
- .query_qp = i40iw_query_qp,
- .reg_user_mr = i40iw_reg_user_mr,
- .req_notify_cq = i40iw_req_notify_cq,
- INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd),
- INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext),
-};
-
-/**
- * i40iw_init_rdma_device - initialization of iwarp device
- * @iwdev: iwarp device
- */
-static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev)
-{
- struct i40iw_ib_device *iwibdev;
- struct net_device *netdev = iwdev->netdev;
- struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
-
- iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
- if (!iwibdev) {
- i40iw_pr_err("iwdev == NULL\n");
- return NULL;
- }
- iwibdev->ibdev.owner = THIS_MODULE;
- iwdev->iwibdev = iwibdev;
- iwibdev->iwdev = iwdev;
-
- iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
- ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
-
- iwibdev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND);
- iwibdev->ibdev.phys_port_cnt = 1;
- iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
- iwibdev->ibdev.dev.parent = &pcidev->dev;
- memcpy(iwibdev->ibdev.iw_ifname, netdev->name,
- sizeof(iwibdev->ibdev.iw_ifname));
- ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops);
-
- return iwibdev;
-}
-
-/**
- * i40iw_port_ibevent - indicate port event
- * @iwdev: iwarp device
- */
-void i40iw_port_ibevent(struct i40iw_device *iwdev)
-{
- struct i40iw_ib_device *iwibdev = iwdev->iwibdev;
- struct ib_event event;
-
- event.device = &iwibdev->ibdev;
- event.element.port_num = 1;
- event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
- ib_dispatch_event(&event);
-}
-
-/**
- * i40iw_destroy_rdma_device - destroy rdma device and free resources
- * @iwibdev: IB device ptr
- */
-void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
-{
- if (!iwibdev)
- return;
-
- ib_unregister_device(&iwibdev->ibdev);
- wait_event_timeout(iwibdev->iwdev->close_wq,
- !atomic64_read(&iwibdev->iwdev->use_count),
- I40IW_EVENT_TIMEOUT);
- ib_dealloc_device(&iwibdev->ibdev);
-}
-
-/**
- * i40iw_register_rdma_device - register iwarp device to IB
- * @iwdev: iwarp device
- */
-int i40iw_register_rdma_device(struct i40iw_device *iwdev)
-{
- int ret;
- struct i40iw_ib_device *iwibdev;
-
- iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
- if (!iwdev->iwibdev)
- return -ENOMEM;
- iwibdev = iwdev->iwibdev;
- rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
- iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
- ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
- if (ret)
- goto error;
-
- return 0;
-error:
- ib_dealloc_device(&iwdev->iwibdev->ibdev);
- return ret;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
deleted file mode 100644
index 3a413752ccc3..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_VERBS_H
-#define I40IW_VERBS_H
-
-struct i40iw_ucontext {
- struct ib_ucontext ibucontext;
- struct i40iw_device *iwdev;
- struct list_head cq_reg_mem_list;
- spinlock_t cq_reg_mem_list_lock; /* memory list for cq's */
- struct list_head qp_reg_mem_list;
- spinlock_t qp_reg_mem_list_lock; /* memory list for qp's */
- int abi_ver;
-};
-
-struct i40iw_pd {
- struct ib_pd ibpd;
- struct i40iw_sc_pd sc_pd;
- atomic_t usecount;
-};
-
-struct i40iw_hmc_pble {
- union {
- u32 idx;
- dma_addr_t addr;
- };
-};
-
-struct i40iw_cq_mr {
- struct i40iw_hmc_pble cq_pbl;
- dma_addr_t shadow;
-};
-
-struct i40iw_qp_mr {
- struct i40iw_hmc_pble sq_pbl;
- struct i40iw_hmc_pble rq_pbl;
- dma_addr_t shadow;
- struct page *sq_page;
-};
-
-struct i40iw_pbl {
- struct list_head list;
- union {
- struct i40iw_qp_mr qp_mr;
- struct i40iw_cq_mr cq_mr;
- };
-
- bool pbl_allocated;
- bool on_list;
- u64 user_base;
- struct i40iw_pble_alloc pble_alloc;
- struct i40iw_mr *iwmr;
-};
-
-#define MAX_SAVE_PAGE_ADDRS 4
-struct i40iw_mr {
- union {
- struct ib_mr ibmr;
- struct ib_mw ibmw;
- struct ib_fmr ibfmr;
- };
- struct ib_umem *region;
- u16 type;
- u32 page_cnt;
- u64 page_size;
- u32 npages;
- u32 stag;
- u64 length;
- u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
- struct i40iw_pbl iwpbl;
-};
-
-struct i40iw_cq {
- struct ib_cq ibcq;
- struct i40iw_sc_cq sc_cq;
- u16 cq_head;
- u16 cq_size;
- u16 cq_number;
- bool user_mode;
- u32 polled_completions;
- u32 cq_mem_size;
- struct i40iw_dma_mem kmem;
- spinlock_t lock; /* for poll cq */
- struct i40iw_pbl *iwpbl;
-};
-
-struct disconn_work {
- struct work_struct work;
- struct i40iw_qp *iwqp;
-};
-
-struct iw_cm_id;
-struct ietf_mpa_frame;
-struct i40iw_ud_file;
-
-struct i40iw_qp_kmode {
- struct i40iw_dma_mem dma_mem;
- u64 *wrid_mem;
-};
-
-struct i40iw_qp {
- struct ib_qp ibqp;
- struct i40iw_sc_qp sc_qp;
- struct i40iw_device *iwdev;
- struct i40iw_cq *iwscq;
- struct i40iw_cq *iwrcq;
- struct i40iw_pd *iwpd;
- struct i40iw_qp_host_ctx_info ctx_info;
- struct i40iwarp_offload_info iwarp_info;
- void *allocated_buffer;
- atomic_t refcount;
- struct iw_cm_id *cm_id;
- void *cm_node;
- struct ib_mr *lsmm_mr;
- struct work_struct work;
- enum ib_qp_state ibqp_state;
- u32 iwarp_state;
- u32 qp_mem_size;
- u32 last_aeq;
- atomic_t close_timer_started;
- spinlock_t lock; /* for post work requests */
- struct i40iw_qp_context *iwqp_context;
- void *pbl_vbase;
- dma_addr_t pbl_pbase;
- struct page *page;
- u8 active_conn:1;
- u8 user_mode:1;
- u8 hte_added:1;
- u8 flush_issued:1;
- u8 destroyed:1;
- u8 sig_all:1;
- u8 pau_mode:1;
- u8 rsvd:1;
- u16 term_sq_flush_code;
- u16 term_rq_flush_code;
- u8 hw_iwarp_state;
- u8 hw_tcp_state;
- struct i40iw_qp_kmode kqp;
- struct i40iw_dma_mem host_ctx;
- struct timer_list terminate_timer;
- struct i40iw_pbl iwpbl;
- struct i40iw_dma_mem q2_ctx_mem;
- struct i40iw_dma_mem ietf_mem;
- struct completion sq_drained;
- struct completion rq_drained;
-};
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
deleted file mode 100644
index e33d4810965c..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_vf.h"
-
-/**
- * i40iw_manage_vf_pble_bp - manage vf pble
- * @cqp: cqp for cqp' sq wqe
- * @info: pble info
- * @scratch: pointer for completion
- * @post_sq: to post and ring
- */
-enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
- struct i40iw_manage_vf_pble_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp, header, pd_pl_pba = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = LS_64(info->pd_entry_cnt, I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT) |
- LS_64(info->first_pd_index, I40IW_CQPSQ_MVPBP_FIRST_PD_INX) |
- LS_64(info->sd_index, I40IW_CQPSQ_MVPBP_SD_INX);
- set_64bit_val(wqe, 16, temp);
-
- header = LS_64((info->inv_pd_ent ? 1 : 0), I40IW_CQPSQ_MVPBP_INV_PD_ENT) |
- LS_64(I40IW_CQP_OP_MANAGE_VF_PBLE_BP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- set_64bit_val(wqe, 24, header);
-
- pd_pl_pba = LS_64(info->pd_pl_pba >> 3, I40IW_CQPSQ_MVPBP_PD_PLPBA);
- set_64bit_val(wqe, 32, pd_pl_pba);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE VF_PBLE_BP WQE", wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
- i40iw_manage_vf_pble_bp
-};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
deleted file mode 100644
index 4359559ece9c..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_VF_H
-#define I40IW_VF_H
-
-struct i40iw_sc_cqp;
-
-struct i40iw_manage_vf_pble_info {
- u32 sd_index;
- u16 first_pd_index;
- u16 pd_entry_cnt;
- u8 inv_pd_ent;
- u64 pd_pl_pba;
-};
-
-struct i40iw_vf_cqp_ops {
- enum i40iw_status_code (*manage_vf_pble_bp)(struct i40iw_sc_cqp *,
- struct i40iw_manage_vf_pble_info *,
- u64,
- bool);
-};
-
-enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
- struct i40iw_manage_vf_pble_info *info,
- u64 scratch,
- bool post_sq);
-
-extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
deleted file mode 100644
index 48fd327f876b..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ /dev/null
@@ -1,756 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_virtchnl.h"
-
-/**
- * vchnl_vf_send_get_ver_req - Request Channel version
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_get_ver_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_VER;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_VER_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_get_hmc_fcn_req - Request HMC Function from VF
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_get_hmc_fcn_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_HMC_FCN;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_HMC_FCN_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_get_pe_stats_req - Request PE stats from VF
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_dev_hw_stats) - 1;
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_STATS;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_STATS_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_add_hmc_objs_req - Add HMC objects
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
- struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0;
- add_hmc_obj->obj_type = (u16)rsrc_type;
- add_hmc_obj->start_index = start_index;
- add_hmc_obj->obj_count = rsrc_count;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_del_hmc_objs_req - del HMC objects
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- * @ rsrc_type - resource type to delete
- * @ start_index - starting index for resource
- * @ rsrc_count - number of resource type to delete
- */
-static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
- struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0;
- add_hmc_obj->obj_type = (u16)rsrc_type;
- add_hmc_obj->start_index = start_index;
- add_hmc_obj->obj_count = rsrc_count;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_pf_send_get_ver_resp - Send channel version to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- */
-static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev,
- u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u32) - 1];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(*resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((u32 *)vchnl_msg_resp->iw_chnl_buf) = I40IW_VCHNL_CHNL_VER_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * vchnl_pf_send_get_hmc_fcn_resp - Send HMC Function to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- */
-static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
- u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg,
- u16 hmc_fcn)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u16) - 1];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(*resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((u16 *)vchnl_msg_resp->iw_chnl_buf) = hmc_fcn;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * vchnl_pf_send_get_pe_stats_resp - Send PE Stats to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- * @hw_stats: HW Stats struct
- */
-
-static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
- u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg,
- struct i40iw_dev_hw_stats *hw_stats)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(*resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * vchnl_pf_send_error_resp - Send an error response to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- */
-static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg,
- u16 op_ret_code)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf)];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = (u16)op_ret_code;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn
- * @cqp_req_param: CQP Request param value
- * @not_used: unused CQP callback parameter
- */
-static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param,
- struct i40iw_ccq_cqe_info *cqe_info)
-{
- struct i40iw_vfdev *vf_dev = callback_param;
- struct i40iw_virt_mem vf_dev_mem;
-
- if (cqe_info->error) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "CQP Completion Error on Get HMC Function. Maj = 0x%04x, Minor = 0x%04x\n",
- cqe_info->maj_err_code, cqe_info->min_err_code);
- dev->vf_dev[vf_dev->iw_vf_idx] = NULL;
- vchnl_pf_send_error_resp(dev, vf_dev->vf_id, &vf_dev->vf_msg_buffer.vchnl_msg,
- (u16)I40IW_ERR_CQP_COMPL_ERROR);
- vf_dev_mem.va = vf_dev;
- vf_dev_mem.size = sizeof(*vf_dev);
- i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "CQP Completion Operation Return information = 0x%08x\n",
- cqe_info->op_ret_val);
- vf_dev->pmf_index = (u16)cqe_info->op_ret_val;
- vf_dev->msg_count--;
- vchnl_pf_send_get_hmc_fcn_resp(dev,
- vf_dev->vf_id,
- &vf_dev->vf_msg_buffer.vchnl_msg,
- vf_dev->pmf_index);
- }
-}
-
-/**
- * pf_add_hmc_obj - Callback for Add HMC Object
- * @vf_dev: pointer to the VF Device
- */
-static void pf_add_hmc_obj_callback(void *work_vf_dev)
-{
- struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
- struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
- struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
- struct i40iw_hmc_create_obj_info info;
- struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
- enum i40iw_status_code ret_code;
-
- if (!vf_dev->pf_hmc_initialized) {
- ret_code = i40iw_pf_init_vfhmc(vf_dev->pf_dev, (u8)vf_dev->pmf_index, NULL);
- if (ret_code)
- goto add_out;
- vf_dev->pf_hmc_initialized = true;
- }
-
- add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = hmc_info;
- info.is_pf = false;
- info.rsrc_type = (u32)add_hmc_obj->obj_type;
- info.entry_type = (info.rsrc_type == I40IW_HMC_IW_PBLE) ? I40IW_SD_TYPE_PAGED : I40IW_SD_TYPE_DIRECT;
- info.start_idx = add_hmc_obj->start_index;
- info.count = add_hmc_obj->obj_count;
- i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
- "I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE. Add %u type %u objects\n",
- info.count, info.rsrc_type);
- ret_code = i40iw_sc_create_hmc_obj(vf_dev->pf_dev, &info);
- if (!ret_code)
- vf_dev->hmc_info.hmc_obj[add_hmc_obj->obj_type].cnt = add_hmc_obj->obj_count;
-add_out:
- vf_dev->msg_count--;
- vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
-}
-
-/**
- * pf_del_hmc_obj_callback - Callback for delete HMC Object
- * @work_vf_dev: pointer to the VF Device
- */
-static void pf_del_hmc_obj_callback(void *work_vf_dev)
-{
- struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
- struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
- struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
- struct i40iw_hmc_del_obj_info info;
- struct i40iw_virtchnl_hmc_obj_range *del_hmc_obj;
- enum i40iw_status_code ret_code = I40IW_SUCCESS;
-
- if (!vf_dev->pf_hmc_initialized)
- goto del_out;
-
- del_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = hmc_info;
- info.is_pf = false;
- info.rsrc_type = (u32)del_hmc_obj->obj_type;
- info.start_idx = del_hmc_obj->start_index;
- info.count = del_hmc_obj->obj_count;
- i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
- "I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE. Delete %u type %u objects\n",
- info.count, info.rsrc_type);
- ret_code = i40iw_sc_del_hmc_obj(vf_dev->pf_dev, &info, false);
-del_out:
- vf_dev->msg_count--;
- vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
-}
-
-/**
- * i40iw_vf_init_pestat - Initialize stats for VF
- * @devL pointer to the VF Device
- * @stats: Statistics structure pointer
- * @index: Stats index
- */
-static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index)
-{
- stats->hw = dev->hw;
- i40iw_hw_stats_init(stats, (u8)index, false);
- spin_lock_init(&stats->lock);
-}
-
-/**
- * i40iw_vchnl_recv_pf - Receive PF virtual channel messages
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @msg: Virtual channel message buffer pointer
- * @len: Length of the virtual channels message
- */
-enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_virtchnl_op_buf *vchnl_msg = (struct i40iw_virtchnl_op_buf *)msg;
- struct i40iw_vfdev *vf_dev = NULL;
- struct i40iw_hmc_fcn_info hmc_fcn_info;
- u16 iw_vf_idx;
- u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
- struct i40iw_virt_mem vf_dev_mem;
- struct i40iw_virtchnl_work_info work_info;
- struct i40iw_vsi_pestat *stats;
- enum i40iw_status_code ret_code;
-
- if (!dev || !msg || !len)
- return I40IW_ERR_PARAM;
-
- if (!dev->vchnl_up)
- return I40IW_ERR_NOT_READY;
- if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
- vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
- return I40IW_SUCCESS;
- }
- for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
- if (!dev->vf_dev[iw_vf_idx]) {
- if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT)
- first_avail_iw_vf = iw_vf_idx;
- continue;
- }
- if (dev->vf_dev[iw_vf_idx]->vf_id == vf_id) {
- vf_dev = dev->vf_dev[iw_vf_idx];
- break;
- }
- }
- if (vf_dev) {
- if (!vf_dev->msg_count) {
- vf_dev->msg_count++;
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u already has a channel message in progress.\n",
- vf_id);
- return I40IW_SUCCESS;
- }
- }
- switch (vchnl_msg->iw_op_code) {
- case I40IW_VCHNL_OP_GET_HMC_FCN:
- if (!vf_dev &&
- (first_avail_iw_vf != I40IW_MAX_PE_ENABLED_VF_COUNT)) {
- ret_code = i40iw_allocate_virt_mem(dev->hw, &vf_dev_mem, sizeof(struct i40iw_vfdev) +
- (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX));
- if (!ret_code) {
- vf_dev = vf_dev_mem.va;
- vf_dev->stats_initialized = false;
- vf_dev->pf_dev = dev;
- vf_dev->msg_count = 1;
- vf_dev->vf_id = vf_id;
- vf_dev->iw_vf_idx = first_avail_iw_vf;
- vf_dev->pf_hmc_initialized = false;
- vf_dev->hmc_info.hmc_obj = (struct i40iw_hmc_obj_info *)(&vf_dev[1]);
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "vf_dev %p, hmc_info %p, hmc_obj %p\n",
- vf_dev, &vf_dev->hmc_info, vf_dev->hmc_info.hmc_obj);
- dev->vf_dev[first_avail_iw_vf] = vf_dev;
- iw_vf_idx = first_avail_iw_vf;
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u Unable to allocate a VF device structure.\n",
- vf_id);
- vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg, (u16)I40IW_ERR_NO_MEMORY);
- return I40IW_SUCCESS;
- }
- memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
- hmc_fcn_info.callback_fcn = pf_cqp_get_hmc_fcn_callback;
- hmc_fcn_info.vf_id = vf_id;
- hmc_fcn_info.iw_vf_idx = vf_dev->iw_vf_idx;
- hmc_fcn_info.cqp_callback_param = vf_dev;
- hmc_fcn_info.free_fcn = false;
- ret_code = i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u error CQP HMC Function operation.\n",
- vf_id);
- i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index);
- vf_dev->stats_initialized = true;
- } else {
- if (vf_dev) {
- vf_dev->msg_count--;
- vchnl_pf_send_get_hmc_fcn_resp(dev, vf_id, vchnl_msg, vf_dev->pmf_index);
- } else {
- vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg,
- (u16)I40IW_ERR_NO_MEMORY);
- }
- }
- break;
- case I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE:
- if (!vf_dev)
- return I40IW_ERR_BAD_PTR;
- work_info.worker_vf_dev = vf_dev;
- work_info.callback_fcn = pf_add_hmc_obj_callback;
- memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
- i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
- break;
- case I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE:
- if (!vf_dev)
- return I40IW_ERR_BAD_PTR;
- work_info.worker_vf_dev = vf_dev;
- work_info.callback_fcn = pf_del_hmc_obj_callback;
- memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
- i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
- break;
- case I40IW_VCHNL_OP_GET_STATS:
- if (!vf_dev)
- return I40IW_ERR_BAD_PTR;
- stats = &vf_dev->pestat;
- i40iw_hw_stats_read_all(stats, &stats->hw_stats);
- vf_dev->msg_count--;
- vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats);
- break;
- default:
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "40iw_vchnl_recv_pf: Invalid OpCode 0x%x\n",
- vchnl_msg->iw_op_code);
- vchnl_pf_send_error_resp(dev, vf_id,
- vchnl_msg, (u16)I40IW_ERR_NOT_IMPLEMENTED);
- }
- return I40IW_SUCCESS;
-}
-
-/**
- * i40iw_vchnl_recv_vf - Receive VF virtual channel messages
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @msg: Virtual channel message buffer pointer
- * @len: Length of the virtual channels message
- */
-enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)msg;
- struct i40iw_virtchnl_req *vchnl_req;
-
- vchnl_req = (struct i40iw_virtchnl_req *)(uintptr_t)vchnl_msg_resp->iw_chnl_op_ctx;
- vchnl_req->ret_code = (enum i40iw_status_code)vchnl_msg_resp->iw_op_ret_code;
- if (len == (sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1)) {
- if (vchnl_req->parm_len && vchnl_req->parm)
- memcpy(vchnl_req->parm, vchnl_msg_resp->iw_chnl_buf, vchnl_req->parm_len);
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: Got response, data size %u\n", __func__,
- vchnl_req->parm_len);
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: error length on response, Got %u, expected %u\n", __func__,
- len, (u32)(sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1));
- }
-
- return I40IW_SUCCESS;
-}
-
-/**
- * i40iw_vchnl_vf_get_ver - Request Channel version
- * @dev: IWARP device pointer
- * @vchnl_ver: Virtual channel message version pointer
- */
-enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
- u32 *vchnl_ver)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.parm = vchnl_ver;
- vchnl_req.parm_len = sizeof(*vchnl_ver);
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_get_hmc_fcn - Request HMC Function
- * @dev: IWARP device pointer
- * @hmc_fcn: HMC function index pointer
- */
-enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
- u16 *hmc_fcn)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.parm = hmc_fcn;
- vchnl_req.parm_len = sizeof(*hmc_fcn);
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_add_hmc_objs - Add HMC Object
- * @dev: IWARP device pointer
- * @rsrc_type: HMC Resource type
- * @start_index: Starting index of the objects to be added
- * @rsrc_count: Number of resources to be added
- */
-enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
- &vchnl_req,
- rsrc_type,
- start_index,
- rsrc_count);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_del_hmc_obj - del HMC obj
- * @dev: IWARP device pointer
- * @rsrc_type: HMC Resource type
- * @start_index: Starting index of the object to delete
- * @rsrc_count: Number of resources to be delete
- */
-enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
- &vchnl_req,
- rsrc_type,
- start_index,
- rsrc_count);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_get_pe_stats - Get PE stats
- * @dev: IWARP device pointer
- * @hw_stats: HW stats struct
- */
-enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
- struct i40iw_dev_hw_stats *hw_stats)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.parm = hw_stats;
- vchnl_req.parm_len = sizeof(*hw_stats);
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
deleted file mode 100644
index 24886ef08293..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_VIRTCHNL_H
-#define I40IW_VIRTCHNL_H
-
-#include "i40iw_hmc.h"
-
-#pragma pack(push, 1)
-
-struct i40iw_virtchnl_op_buf {
- u16 iw_op_code;
- u16 iw_op_ver;
- u16 iw_chnl_buf_len;
- u16 rsvd;
- u64 iw_chnl_op_ctx;
- /* Member alignment MUST be maintained above this location */
- u8 iw_chnl_buf[1];
-};
-
-struct i40iw_virtchnl_resp_buf {
- u64 iw_chnl_op_ctx;
- u16 iw_chnl_buf_len;
- s16 iw_op_ret_code;
- /* Member alignment MUST be maintained above this location */
- u16 rsvd[2];
- u8 iw_chnl_buf[1];
-};
-
-enum i40iw_virtchnl_ops {
- I40IW_VCHNL_OP_GET_VER = 0,
- I40IW_VCHNL_OP_GET_HMC_FCN,
- I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE,
- I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE,
- I40IW_VCHNL_OP_GET_STATS
-};
-
-#define I40IW_VCHNL_OP_GET_VER_V0 0
-#define I40IW_VCHNL_OP_GET_HMC_FCN_V0 0
-#define I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0 0
-#define I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0 0
-#define I40IW_VCHNL_OP_GET_STATS_V0 0
-#define I40IW_VCHNL_CHNL_VER_V0 0
-
-struct i40iw_dev_hw_stats;
-
-struct i40iw_virtchnl_hmc_obj_range {
- u16 obj_type;
- u16 rsvd;
- u32 start_index;
- u32 obj_count;
-};
-
-enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len);
-
-enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len);
-
-struct i40iw_virtchnl_req {
- struct i40iw_sc_dev *dev;
- struct i40iw_virtchnl_op_buf *vchnl_msg;
- void *parm;
- u32 vf_id;
- u16 parm_len;
- s16 ret_code;
-};
-
-#pragma pack(pop)
-
-enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
- u32 *vchnl_ver);
-
-enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
- u16 *hmc_fcn);
-
-enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count);
-
-enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count);
-
-enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
- struct i40iw_dev_hw_stats *hw_stats);
-#endif
diff --git a/drivers/infiniband/hw/ionic/Kconfig b/drivers/infiniband/hw/ionic/Kconfig
new file mode 100644
index 000000000000..de6f10e9b6e9
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2018-2025, Advanced Micro Devices, Inc.
+
+config INFINIBAND_IONIC
+ tristate "AMD Pensando DSC RDMA/RoCE Support"
+ depends on NETDEVICES && ETHERNET && PCI && INET && IONIC
+ help
+ This enables RDMA/RoCE support for the AMD Pensando family of
+ Distributed Services Cards (DSCs).
+
+ To learn more, visit our website at
+ <https://www.amd.com/en/products/accelerators/pensando.html>.
+
+ To compile this driver as a module, choose M here. The module
+ will be called ionic_rdma.
diff --git a/drivers/infiniband/hw/ionic/Makefile b/drivers/infiniband/hw/ionic/Makefile
new file mode 100644
index 000000000000..957973742820
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := -I $(srctree)/drivers/net/ethernet/pensando/ionic
+
+obj-$(CONFIG_INFINIBAND_IONIC) += ionic_rdma.o
+
+ionic_rdma-y := \
+ ionic_ibdev.o ionic_lif_cfg.o ionic_queue.o ionic_pgtbl.o ionic_admin.o \
+ ionic_controlpath.o ionic_datapath.o ionic_hw_stats.o
diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c
new file mode 100644
index 000000000000..2537aa55d12d
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_admin.c
@@ -0,0 +1,1229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define IONIC_EQ_COUNT_MIN 4
+#define IONIC_AQ_COUNT_MIN 1
+
+/* not a valid queue position or negative error status */
+#define IONIC_ADMIN_POSTED 0x10000
+
+/* cpu can be held with irq disabled for COUNT * MS (for create/destroy_ah) */
+#define IONIC_ADMIN_BUSY_RETRY_COUNT 2000
+#define IONIC_ADMIN_BUSY_RETRY_MS 1
+
+/* admin queue will be considered failed if a command takes longer */
+#define IONIC_ADMIN_TIMEOUT (HZ * 2)
+#define IONIC_ADMIN_WARN (HZ / 8)
+
+/* will poll for admin cq to tolerate and report from missed event */
+#define IONIC_ADMIN_DELAY (HZ / 8)
+
+/* work queue for polling the event queue and admin cq */
+struct workqueue_struct *ionic_evt_workq;
+
+static void ionic_admin_timedout(struct ionic_aq *aq)
+{
+ struct ionic_ibdev *dev = aq->dev;
+ unsigned long irqflags;
+ u16 pos;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ if (ionic_queue_empty(&aq->q))
+ goto out;
+
+ /* Reset ALL adminq if any one times out */
+ if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED)
+ queue_work(ionic_evt_workq, &dev->reset_work);
+
+ ibdev_err(&dev->ibdev, "admin command timed out, aq %d after: %ums\n",
+ aq->aqid, (u32)jiffies_to_msecs(jiffies - aq->stamp));
+
+ pos = (aq->q.prod - 1) & aq->q.mask;
+ if (pos == aq->q.cons)
+ goto out;
+
+ ibdev_warn(&dev->ibdev, "admin pos %u (last posted)\n", pos);
+ print_hex_dump(KERN_WARNING, "cmd ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at(&aq->q, pos),
+ BIT(aq->q.stride_log2), true);
+
+out:
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_admin_reset_dwork(struct ionic_ibdev *dev)
+{
+ if (atomic_read(&dev->admin_state) == IONIC_ADMIN_KILLED)
+ return;
+
+ queue_delayed_work(ionic_evt_workq, &dev->admin_dwork,
+ IONIC_ADMIN_DELAY);
+}
+
+static void ionic_admin_reset_wdog(struct ionic_aq *aq)
+{
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED)
+ return;
+
+ aq->stamp = jiffies;
+ ionic_admin_reset_dwork(aq->dev);
+}
+
+static bool ionic_admin_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_v1_cqe **cqe)
+{
+ struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q);
+
+ if (unlikely(cq->color != ionic_v1_cqe_color(qcqe)))
+ return false;
+
+ /* Prevent out-of-order reads of the CQE */
+ dma_rmb();
+ *cqe = qcqe;
+
+ return true;
+}
+
+static void ionic_admin_poll_locked(struct ionic_aq *aq)
+{
+ struct ionic_cq *cq = &aq->vcq->cq[0];
+ struct ionic_admin_wr *wr, *wr_next;
+ struct ionic_ibdev *dev = aq->dev;
+ u32 wr_strides, avlbl_strides;
+ struct ionic_v1_cqe *cqe;
+ u32 qtf, qid;
+ u16 old_prod;
+ u8 type;
+
+ lockdep_assert_held(&aq->lock);
+
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED) {
+ list_for_each_entry_safe(wr, wr_next, &aq->wr_prod, aq_ent) {
+ INIT_LIST_HEAD(&wr->aq_ent);
+ aq->q_wr[wr->status].wr = NULL;
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+ INIT_LIST_HEAD(&aq->wr_prod);
+
+ list_for_each_entry_safe(wr, wr_next, &aq->wr_post, aq_ent) {
+ INIT_LIST_HEAD(&wr->aq_ent);
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+ INIT_LIST_HEAD(&aq->wr_post);
+
+ return;
+ }
+
+ old_prod = cq->q.prod;
+
+ while (ionic_admin_next_cqe(dev, cq, &cqe)) {
+ qtf = ionic_v1_cqe_qtf(cqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ if (unlikely(type != IONIC_V1_CQE_TYPE_ADMIN)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe type %u\n", type);
+ goto cq_next;
+ }
+
+ if (unlikely(qid != aq->aqid)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe qid %u\n", qid);
+ goto cq_next;
+ }
+
+ if (unlikely(be16_to_cpu(cqe->admin.cmd_idx) != aq->q.cons)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad idx %u cons %u qid %u\n",
+ be16_to_cpu(cqe->admin.cmd_idx),
+ aq->q.cons, qid);
+ goto cq_next;
+ }
+
+ if (unlikely(ionic_queue_empty(&aq->q))) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe for empty adminq\n");
+ goto cq_next;
+ }
+
+ wr = aq->q_wr[aq->q.cons].wr;
+ if (wr) {
+ aq->q_wr[aq->q.cons].wr = NULL;
+ list_del_init(&wr->aq_ent);
+
+ wr->cqe = *cqe;
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+
+ ionic_queue_consume_entries(&aq->q,
+ aq->q_wr[aq->q.cons].wqe_strides);
+
+cq_next:
+ ionic_queue_produce(&cq->q);
+ cq->color = ionic_color_wrap(cq->q.prod, cq->color);
+ }
+
+ if (old_prod != cq->q.prod) {
+ ionic_admin_reset_wdog(aq);
+ cq->q.cons = cq->q.prod;
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ ionic_queue_dbell_val(&cq->q));
+ queue_work(ionic_evt_workq, &aq->work);
+ } else if (!aq->armed) {
+ aq->armed = true;
+ cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod);
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ cq->q.dbell | IONIC_CQ_RING_ARM |
+ cq->arm_any_prod);
+ queue_work(ionic_evt_workq, &aq->work);
+ }
+
+ if (atomic_read(&aq->admin_state) != IONIC_ADMIN_ACTIVE)
+ return;
+
+ old_prod = aq->q.prod;
+
+ if (ionic_queue_empty(&aq->q) && !list_empty(&aq->wr_post))
+ ionic_admin_reset_wdog(aq);
+
+ if (list_empty(&aq->wr_post))
+ return;
+
+ do {
+ u8 *src;
+ int i, src_len;
+ size_t stride_len;
+
+ wr = list_first_entry(&aq->wr_post, struct ionic_admin_wr,
+ aq_ent);
+ wr_strides = (le16_to_cpu(wr->wqe.len) + ADMIN_WQE_HDR_LEN +
+ (ADMIN_WQE_STRIDE - 1)) >> aq->q.stride_log2;
+ avlbl_strides = ionic_queue_length_remaining(&aq->q);
+
+ if (wr_strides > avlbl_strides)
+ break;
+
+ list_move(&wr->aq_ent, &aq->wr_prod);
+ wr->status = aq->q.prod;
+ aq->q_wr[aq->q.prod].wr = wr;
+ aq->q_wr[aq->q.prod].wqe_strides = wr_strides;
+
+ src_len = le16_to_cpu(wr->wqe.len);
+ src = (uint8_t *)&wr->wqe.cmd;
+
+ /* First stride */
+ memcpy(ionic_queue_at_prod(&aq->q), &wr->wqe,
+ ADMIN_WQE_HDR_LEN);
+ stride_len = ADMIN_WQE_STRIDE - ADMIN_WQE_HDR_LEN;
+ if (stride_len > src_len)
+ stride_len = src_len;
+ memcpy(ionic_queue_at_prod(&aq->q) + ADMIN_WQE_HDR_LEN,
+ src, stride_len);
+ ibdev_dbg(&dev->ibdev, "post admin prod %u (%u strides)\n",
+ aq->q.prod, wr_strides);
+ print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at_prod(&aq->q),
+ BIT(aq->q.stride_log2), true);
+ ionic_queue_produce(&aq->q);
+
+ /* Remaining strides */
+ for (i = stride_len; i < src_len; i += stride_len) {
+ stride_len = ADMIN_WQE_STRIDE;
+
+ if (i + stride_len > src_len)
+ stride_len = src_len - i;
+
+ memcpy(ionic_queue_at_prod(&aq->q), src + i,
+ stride_len);
+ print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at_prod(&aq->q),
+ BIT(aq->q.stride_log2), true);
+ ionic_queue_produce(&aq->q);
+ }
+ } while (!list_empty(&aq->wr_post));
+
+ if (old_prod != aq->q.prod)
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.aq_qtype,
+ ionic_queue_dbell_val(&aq->q));
+}
+
+static void ionic_admin_dwork(struct work_struct *ws)
+{
+ struct ionic_ibdev *dev =
+ container_of(ws, struct ionic_ibdev, admin_dwork.work);
+ struct ionic_aq *aq, *bad_aq = NULL;
+ bool do_reschedule = false;
+ unsigned long irqflags;
+ bool do_reset = false;
+ u16 pos;
+ int i;
+
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ aq = dev->aq_vec[i];
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+
+ if (ionic_queue_empty(&aq->q))
+ goto next_aq;
+
+ /* Reschedule if any queue has outstanding work */
+ do_reschedule = true;
+
+ if (time_is_after_eq_jiffies(aq->stamp + IONIC_ADMIN_WARN))
+ /* Warning threshold not met, nothing to do */
+ goto next_aq;
+
+ /* See if polling now makes some progress */
+ pos = aq->q.cons;
+ ionic_admin_poll_locked(aq);
+ if (pos != aq->q.cons) {
+ ibdev_dbg(&dev->ibdev,
+ "missed event for acq %d\n", aq->cqid);
+ goto next_aq;
+ }
+
+ if (time_is_after_eq_jiffies(aq->stamp +
+ IONIC_ADMIN_TIMEOUT)) {
+ /* Timeout threshold not met */
+ ibdev_dbg(&dev->ibdev, "no progress after %ums\n",
+ (u32)jiffies_to_msecs(jiffies - aq->stamp));
+ goto next_aq;
+ }
+
+ /* Queue timed out */
+ bad_aq = aq;
+ do_reset = true;
+next_aq:
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+ }
+
+ if (do_reset)
+ /* Reset RDMA lif on a timeout */
+ ionic_admin_timedout(bad_aq);
+ else if (do_reschedule)
+ /* Try to poll again later */
+ ionic_admin_reset_dwork(dev);
+}
+
+static void ionic_admin_work(struct work_struct *ws)
+{
+ struct ionic_aq *aq = container_of(ws, struct ionic_aq, work);
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_admin_post_aq(struct ionic_aq *aq, struct ionic_admin_wr *wr)
+{
+ unsigned long irqflags;
+ bool poll;
+
+ wr->status = IONIC_ADMIN_POSTED;
+ wr->aq = aq;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ poll = list_empty(&aq->wr_post);
+ list_add(&wr->aq_ent, &aq->wr_post);
+ if (poll)
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr)
+{
+ int aq_idx;
+
+ /* Use cpu id for the adminq selection */
+ aq_idx = raw_smp_processor_id() % dev->lif_cfg.aq_count;
+ ionic_admin_post_aq(dev->aq_vec[aq_idx], wr);
+}
+
+static void ionic_admin_cancel(struct ionic_admin_wr *wr)
+{
+ struct ionic_aq *aq = wr->aq;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+
+ if (!list_empty(&wr->aq_ent)) {
+ list_del(&wr->aq_ent);
+ if (wr->status != IONIC_ADMIN_POSTED)
+ aq->q_wr[wr->status].wr = NULL;
+ }
+
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static int ionic_admin_busy_wait(struct ionic_admin_wr *wr)
+{
+ struct ionic_aq *aq = wr->aq;
+ unsigned long irqflags;
+ int try_i;
+
+ for (try_i = 0; try_i < IONIC_ADMIN_BUSY_RETRY_COUNT; ++try_i) {
+ if (completion_done(&wr->work))
+ return 0;
+
+ mdelay(IONIC_ADMIN_BUSY_RETRY_MS);
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+ }
+
+ /*
+ * we timed out. Initiate RDMA LIF reset and indicate
+ * error to caller.
+ */
+ ionic_admin_timedout(aq);
+ return -ETIMEDOUT;
+}
+
+int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr,
+ enum ionic_admin_flags flags)
+{
+ int rc, timo;
+
+ if (flags & IONIC_ADMIN_F_BUSYWAIT) {
+ /* Spin */
+ rc = ionic_admin_busy_wait(wr);
+ } else if (flags & IONIC_ADMIN_F_INTERRUPT) {
+ /*
+ * Interruptible sleep, 1s timeout
+ * This is used for commands which are safe for the caller
+ * to clean up without killing and resetting the adminq.
+ */
+ timo = wait_for_completion_interruptible_timeout(&wr->work,
+ HZ);
+ if (timo > 0)
+ rc = 0;
+ else if (timo == 0)
+ rc = -ETIMEDOUT;
+ else
+ rc = timo;
+ } else {
+ /*
+ * Uninterruptible sleep
+ * This is used for commands which are NOT safe for the
+ * caller to clean up. Cleanup must be handled by the
+ * adminq kill and reset process so that host memory is
+ * not corrupted by the device.
+ */
+ wait_for_completion(&wr->work);
+ rc = 0;
+ }
+
+ if (rc) {
+ ibdev_warn(&dev->ibdev, "wait status %d\n", rc);
+ ionic_admin_cancel(wr);
+ } else if (wr->status == IONIC_ADMIN_KILLED) {
+ ibdev_dbg(&dev->ibdev, "admin killed\n");
+
+ /* No error if admin already killed during teardown */
+ rc = (flags & IONIC_ADMIN_F_TEARDOWN) ? 0 : -ENODEV;
+ } else if (ionic_v1_cqe_error(&wr->cqe)) {
+ ibdev_warn(&dev->ibdev, "opcode %u error %u\n",
+ wr->wqe.op,
+ be32_to_cpu(wr->cqe.status_length));
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+static int ionic_rdma_devcmd(struct ionic_ibdev *dev,
+ struct ionic_admin_ctx *admin)
+{
+ int rc;
+
+ rc = ionic_adminq_post_wait(dev->lif_cfg.lif, admin);
+ if (rc)
+ return rc;
+
+ return ionic_error_to_errno(admin->comp.comp.status);
+}
+
+int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev)
+{
+ struct ionic_admin_ctx admin = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(admin.work),
+ .cmd.rdma_reset = {
+ .opcode = IONIC_CMD_RDMA_RESET_LIF,
+ .lif_index = cpu_to_le16(dev->lif_cfg.lif_index),
+ },
+ };
+
+ return ionic_rdma_devcmd(dev, &admin);
+}
+
+static int ionic_rdma_queue_devcmd(struct ionic_ibdev *dev,
+ struct ionic_queue *q,
+ u32 qid, u32 cid, u16 opcode)
+{
+ struct ionic_admin_ctx admin = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(admin.work),
+ .cmd.rdma_queue = {
+ .opcode = opcode,
+ .lif_index = cpu_to_le16(dev->lif_cfg.lif_index),
+ .qid_ver = cpu_to_le32(qid),
+ .cid = cpu_to_le32(cid),
+ .dbid = cpu_to_le16(dev->lif_cfg.dbid),
+ .depth_log2 = q->depth_log2,
+ .stride_log2 = q->stride_log2,
+ .dma_addr = cpu_to_le64(q->dma),
+ },
+ };
+
+ return ionic_rdma_devcmd(dev, &admin);
+}
+
+static void ionic_rdma_admincq_comp(struct ib_cq *ibcq, void *cq_context)
+{
+ struct ionic_aq *aq = cq_context;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ aq->armed = false;
+ if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED)
+ queue_work(ionic_evt_workq, &aq->work);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_rdma_admincq_event(struct ib_event *event, void *cq_context)
+{
+ struct ionic_aq *aq = cq_context;
+
+ ibdev_err(&aq->dev->ibdev, "admincq event %d\n", event->event);
+}
+
+static struct ionic_vcq *ionic_create_rdma_admincq(struct ionic_ibdev *dev,
+ int comp_vector)
+{
+ struct ib_cq_init_attr attr = {
+ .cqe = IONIC_AQ_DEPTH,
+ .comp_vector = comp_vector,
+ };
+ struct ionic_tbl_buf buf = {};
+ struct ionic_vcq *vcq;
+ struct ionic_cq *cq;
+ int rc;
+
+ vcq = kzalloc(sizeof(*vcq), GFP_KERNEL);
+ if (!vcq)
+ return ERR_PTR(-ENOMEM);
+
+ vcq->ibcq.device = &dev->ibdev;
+ vcq->ibcq.comp_handler = ionic_rdma_admincq_comp;
+ vcq->ibcq.event_handler = ionic_rdma_admincq_event;
+ atomic_set(&vcq->ibcq.usecnt, 0);
+
+ vcq->udma_mask = 1;
+ cq = &vcq->cq[0];
+
+ rc = ionic_create_cq_common(vcq, &buf, &attr, NULL, NULL,
+ NULL, NULL, 0);
+ if (rc)
+ goto err_init;
+
+ rc = ionic_rdma_queue_devcmd(dev, &cq->q, cq->cqid, cq->eqid,
+ IONIC_CMD_RDMA_CREATE_CQ);
+ if (rc)
+ goto err_cmd;
+
+ return vcq;
+
+err_cmd:
+ ionic_destroy_cq_common(dev, cq);
+err_init:
+ kfree(vcq);
+
+ return ERR_PTR(rc);
+}
+
+static struct ionic_aq *__ionic_create_rdma_adminq(struct ionic_ibdev *dev,
+ u32 aqid, u32 cqid)
+{
+ struct ionic_aq *aq;
+ int rc;
+
+ aq = kzalloc(sizeof(*aq), GFP_KERNEL);
+ if (!aq)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED);
+ aq->dev = dev;
+ aq->aqid = aqid;
+ aq->cqid = cqid;
+ spin_lock_init(&aq->lock);
+
+ rc = ionic_queue_init(&aq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH,
+ ADMIN_WQE_STRIDE);
+ if (rc)
+ goto err_q;
+
+ ionic_queue_dbell_init(&aq->q, aq->aqid);
+
+ aq->q_wr = kcalloc((u32)aq->q.mask + 1, sizeof(*aq->q_wr), GFP_KERNEL);
+ if (!aq->q_wr) {
+ rc = -ENOMEM;
+ goto err_wr;
+ }
+
+ INIT_LIST_HEAD(&aq->wr_prod);
+ INIT_LIST_HEAD(&aq->wr_post);
+
+ INIT_WORK(&aq->work, ionic_admin_work);
+ aq->armed = false;
+
+ return aq;
+
+err_wr:
+ ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev);
+err_q:
+ kfree(aq);
+
+ return ERR_PTR(rc);
+}
+
+static void __ionic_destroy_rdma_adminq(struct ionic_ibdev *dev,
+ struct ionic_aq *aq)
+{
+ kfree(aq->q_wr);
+ ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev);
+ kfree(aq);
+}
+
+static struct ionic_aq *ionic_create_rdma_adminq(struct ionic_ibdev *dev,
+ u32 aqid, u32 cqid)
+{
+ struct ionic_aq *aq;
+ int rc;
+
+ aq = __ionic_create_rdma_adminq(dev, aqid, cqid);
+ if (IS_ERR(aq))
+ return aq;
+
+ rc = ionic_rdma_queue_devcmd(dev, &aq->q, aq->aqid, aq->cqid,
+ IONIC_CMD_RDMA_CREATE_ADMINQ);
+ if (rc)
+ goto err_cmd;
+
+ return aq;
+
+err_cmd:
+ __ionic_destroy_rdma_adminq(dev, aq);
+
+ return ERR_PTR(rc);
+}
+
+static void ionic_flush_qs(struct ionic_ibdev *dev)
+{
+ struct ionic_qp *qp, *qp_tmp;
+ struct ionic_cq *cq, *cq_tmp;
+ LIST_HEAD(flush_list);
+ unsigned long index;
+
+ WARN_ON(!irqs_disabled());
+
+ /* Flush qp send and recv */
+ xa_lock(&dev->qp_tbl);
+ xa_for_each(&dev->qp_tbl, index, qp) {
+ kref_get(&qp->qp_kref);
+ list_add_tail(&qp->ibkill_flush_ent, &flush_list);
+ }
+ xa_unlock(&dev->qp_tbl);
+
+ list_for_each_entry_safe(qp, qp_tmp, &flush_list, ibkill_flush_ent) {
+ ionic_flush_qp(dev, qp);
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+ list_del(&qp->ibkill_flush_ent);
+ }
+
+ /* Notify completions */
+ xa_lock(&dev->cq_tbl);
+ xa_for_each(&dev->cq_tbl, index, cq) {
+ kref_get(&cq->cq_kref);
+ list_add_tail(&cq->ibkill_flush_ent, &flush_list);
+ }
+ xa_unlock(&dev->cq_tbl);
+
+ list_for_each_entry_safe(cq, cq_tmp, &flush_list, ibkill_flush_ent) {
+ ionic_notify_flush_cq(cq);
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+ list_del(&cq->ibkill_flush_ent);
+ }
+}
+
+static void ionic_kill_ibdev(struct ionic_ibdev *dev, bool fatal_path)
+{
+ unsigned long irqflags;
+ bool do_flush = false;
+ int i;
+
+ /* Mark AQs for drain and flush the QPs while irq is disabled */
+ local_irq_save(irqflags);
+
+ /* Mark the admin queue, flushing at most once */
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ struct ionic_aq *aq = dev->aq_vec[i];
+
+ spin_lock(&aq->lock);
+ if (atomic_read(&aq->admin_state) != IONIC_ADMIN_KILLED) {
+ atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED);
+ /* Flush incomplete admin commands */
+ ionic_admin_poll_locked(aq);
+ do_flush = true;
+ }
+ spin_unlock(&aq->lock);
+ }
+
+ if (do_flush)
+ ionic_flush_qs(dev);
+
+ local_irq_restore(irqflags);
+
+ /* Post a fatal event if requested */
+ if (fatal_path) {
+ struct ib_event ev;
+
+ ev.device = &dev->ibdev;
+ ev.element.port_num = 1;
+ ev.event = IB_EVENT_DEVICE_FATAL;
+
+ ib_dispatch_event(&ev);
+ }
+
+ atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED);
+}
+
+void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path)
+{
+ enum ionic_admin_state old_state;
+ unsigned long irqflags = 0;
+ int i, rc;
+
+ if (!dev->aq_vec)
+ return;
+
+ /*
+ * Admin queues are transitioned from active to paused to killed state.
+ * When in paused state, no new commands are issued to the device,
+ * nor are any completed locally. After resetting the lif, it will be
+ * safe to resume the rdma admin queues in the killed state. Commands
+ * will not be issued to the device, but will complete locally with status
+ * IONIC_ADMIN_KILLED. Handling completion will ensure that creating or
+ * modifying resources fails, but destroying resources succeeds.
+ * If there was a failure resetting the lif using this strategy,
+ * then the state of the device is unknown.
+ */
+ old_state = atomic_cmpxchg(&dev->admin_state, IONIC_ADMIN_ACTIVE,
+ IONIC_ADMIN_PAUSED);
+ if (old_state != IONIC_ADMIN_ACTIVE)
+ return;
+
+ /* Pause all the AQs */
+ local_irq_save(irqflags);
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ struct ionic_aq *aq = dev->aq_vec[i];
+
+ spin_lock(&aq->lock);
+ /* pause rdma admin queues to reset lif */
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_ACTIVE)
+ atomic_set(&aq->admin_state, IONIC_ADMIN_PAUSED);
+ spin_unlock(&aq->lock);
+ }
+ local_irq_restore(irqflags);
+
+ rc = ionic_rdma_reset_devcmd(dev);
+ if (unlikely(rc)) {
+ ibdev_err(&dev->ibdev, "failed to reset rdma %d\n", rc);
+ ionic_request_rdma_reset(dev->lif_cfg.lif);
+ }
+
+ ionic_kill_ibdev(dev, fatal_path);
+}
+
+static void ionic_reset_work(struct work_struct *ws)
+{
+ struct ionic_ibdev *dev =
+ container_of(ws, struct ionic_ibdev, reset_work);
+
+ ionic_kill_rdma_admin(dev, true);
+}
+
+static bool ionic_next_eqe(struct ionic_eq *eq, struct ionic_v1_eqe *eqe)
+{
+ struct ionic_v1_eqe *qeqe;
+ bool color;
+
+ qeqe = ionic_queue_at_prod(&eq->q);
+ color = ionic_v1_eqe_color(qeqe);
+
+ /* cons is color for eq */
+ if (eq->q.cons != color)
+ return false;
+
+ /* Prevent out-of-order reads of the EQE */
+ dma_rmb();
+
+ ibdev_dbg(&eq->dev->ibdev, "poll eq prod %u\n", eq->q.prod);
+ print_hex_dump_debug("eqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ qeqe, BIT(eq->q.stride_log2), true);
+ *eqe = *qeqe;
+
+ return true;
+}
+
+static void ionic_cq_event(struct ionic_ibdev *dev, u32 cqid, u8 code)
+{
+ unsigned long irqflags;
+ struct ib_event ibev;
+ struct ionic_cq *cq;
+
+ xa_lock_irqsave(&dev->cq_tbl, irqflags);
+ cq = xa_load(&dev->cq_tbl, cqid);
+ if (cq)
+ kref_get(&cq->cq_kref);
+ xa_unlock_irqrestore(&dev->cq_tbl, irqflags);
+
+ if (!cq) {
+ ibdev_dbg(&dev->ibdev,
+ "missing cqid %#x code %u\n", cqid, code);
+ return;
+ }
+
+ switch (code) {
+ case IONIC_V1_EQE_CQ_NOTIFY:
+ if (cq->vcq->ibcq.comp_handler)
+ cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq,
+ cq->vcq->ibcq.cq_context);
+ break;
+
+ case IONIC_V1_EQE_CQ_ERR:
+ if (cq->vcq->ibcq.event_handler) {
+ ibev.event = IB_EVENT_CQ_ERR;
+ ibev.device = &dev->ibdev;
+ ibev.element.cq = &cq->vcq->ibcq;
+
+ cq->vcq->ibcq.event_handler(&ibev,
+ cq->vcq->ibcq.cq_context);
+ }
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unrecognized cqid %#x code %u\n", cqid, code);
+ break;
+ }
+
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+}
+
+static void ionic_qp_event(struct ionic_ibdev *dev, u32 qpid, u8 code)
+{
+ unsigned long irqflags;
+ struct ib_event ibev;
+ struct ionic_qp *qp;
+
+ xa_lock_irqsave(&dev->qp_tbl, irqflags);
+ qp = xa_load(&dev->qp_tbl, qpid);
+ if (qp)
+ kref_get(&qp->qp_kref);
+ xa_unlock_irqrestore(&dev->qp_tbl, irqflags);
+
+ if (!qp) {
+ ibdev_dbg(&dev->ibdev,
+ "missing qpid %#x code %u\n", qpid, code);
+ return;
+ }
+
+ ibev.device = &dev->ibdev;
+ ibev.element.qp = &qp->ibqp;
+
+ switch (code) {
+ case IONIC_V1_EQE_SQ_DRAIN:
+ ibev.event = IB_EVENT_SQ_DRAINED;
+ break;
+
+ case IONIC_V1_EQE_QP_COMM_EST:
+ ibev.event = IB_EVENT_COMM_EST;
+ break;
+
+ case IONIC_V1_EQE_QP_LAST_WQE:
+ ibev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR:
+ ibev.event = IB_EVENT_QP_FATAL;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR_REQUEST:
+ ibev.event = IB_EVENT_QP_REQ_ERR;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR_ACCESS:
+ ibev.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unrecognized qpid %#x code %u\n", qpid, code);
+ goto out;
+ }
+
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&ibev, qp->ibqp.qp_context);
+
+out:
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+}
+
+static u16 ionic_poll_eq(struct ionic_eq *eq, u16 budget)
+{
+ struct ionic_ibdev *dev = eq->dev;
+ struct ionic_v1_eqe eqe;
+ u16 npolled = 0;
+ u8 type, code;
+ u32 evt, qid;
+
+ while (npolled < budget) {
+ if (!ionic_next_eqe(eq, &eqe))
+ break;
+
+ ionic_queue_produce(&eq->q);
+
+ /* cons is color for eq */
+ eq->q.cons = ionic_color_wrap(eq->q.prod, eq->q.cons);
+
+ ++npolled;
+
+ evt = ionic_v1_eqe_evt(&eqe);
+ type = ionic_v1_eqe_evt_type(evt);
+ code = ionic_v1_eqe_evt_code(evt);
+ qid = ionic_v1_eqe_evt_qid(evt);
+
+ switch (type) {
+ case IONIC_V1_EQE_TYPE_CQ:
+ ionic_cq_event(dev, qid, code);
+ break;
+
+ case IONIC_V1_EQE_TYPE_QP:
+ ionic_qp_event(dev, qid, code);
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unknown event %#x type %u\n", evt, type);
+ }
+ }
+
+ return npolled;
+}
+
+static void ionic_poll_eq_work(struct work_struct *work)
+{
+ struct ionic_eq *eq = container_of(work, struct ionic_eq, work);
+ u32 npolled;
+
+ if (unlikely(!eq->enable) || WARN_ON(eq->armed))
+ return;
+
+ npolled = ionic_poll_eq(eq, IONIC_EQ_WORK_BUDGET);
+ if (npolled == IONIC_EQ_WORK_BUDGET) {
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ npolled, 0);
+ queue_work(ionic_evt_workq, &eq->work);
+ } else {
+ xchg(&eq->armed, 1);
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ 0, IONIC_INTR_CRED_UNMASK);
+ }
+}
+
+static irqreturn_t ionic_poll_eq_isr(int irq, void *eqptr)
+{
+ struct ionic_eq *eq = eqptr;
+ int was_armed;
+ u32 npolled;
+
+ was_armed = xchg(&eq->armed, 0);
+
+ if (unlikely(!eq->enable) || !was_armed)
+ return IRQ_HANDLED;
+
+ npolled = ionic_poll_eq(eq, IONIC_EQ_ISR_BUDGET);
+ if (npolled == IONIC_EQ_ISR_BUDGET) {
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ npolled, 0);
+ queue_work(ionic_evt_workq, &eq->work);
+ } else {
+ xchg(&eq->armed, 1);
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ 0, IONIC_INTR_CRED_UNMASK);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static struct ionic_eq *ionic_create_eq(struct ionic_ibdev *dev, int eqid)
+{
+ struct ionic_intr_info intr_obj = { };
+ struct ionic_eq *eq;
+ int rc;
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ eq->dev = dev;
+
+ rc = ionic_queue_init(&eq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH,
+ sizeof(struct ionic_v1_eqe));
+ if (rc)
+ goto err_q;
+
+ eq->eqid = eqid;
+
+ eq->armed = true;
+ eq->enable = false;
+ INIT_WORK(&eq->work, ionic_poll_eq_work);
+
+ rc = ionic_intr_alloc(dev->lif_cfg.lif, &intr_obj);
+ if (rc < 0)
+ goto err_intr;
+
+ eq->irq = intr_obj.vector;
+ eq->intr = intr_obj.index;
+
+ ionic_queue_dbell_init(&eq->q, eq->eqid);
+
+ /* cons is color for eq */
+ eq->q.cons = true;
+
+ snprintf(eq->name, sizeof(eq->name), "%s-%d-%d-eq",
+ "ionr", dev->lif_cfg.lif_index, eq->eqid);
+
+ ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET);
+ ionic_intr_mask_assert(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET);
+ ionic_intr_coal_init(dev->lif_cfg.intr_ctrl, eq->intr, 0);
+ ionic_intr_clean(dev->lif_cfg.intr_ctrl, eq->intr);
+
+ eq->enable = true;
+
+ rc = request_irq(eq->irq, ionic_poll_eq_isr, 0, eq->name, eq);
+ if (rc)
+ goto err_irq;
+
+ rc = ionic_rdma_queue_devcmd(dev, &eq->q, eq->eqid, eq->intr,
+ IONIC_CMD_RDMA_CREATE_EQ);
+ if (rc)
+ goto err_cmd;
+
+ ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_CLEAR);
+
+ return eq;
+
+err_cmd:
+ eq->enable = false;
+ free_irq(eq->irq, eq);
+ flush_work(&eq->work);
+err_irq:
+ ionic_intr_free(dev->lif_cfg.lif, eq->intr);
+err_intr:
+ ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev);
+err_q:
+ kfree(eq);
+
+ return ERR_PTR(rc);
+}
+
+static void ionic_destroy_eq(struct ionic_eq *eq)
+{
+ struct ionic_ibdev *dev = eq->dev;
+
+ eq->enable = false;
+ free_irq(eq->irq, eq);
+ flush_work(&eq->work);
+
+ ionic_intr_free(dev->lif_cfg.lif, eq->intr);
+ ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev);
+ kfree(eq);
+}
+
+int ionic_create_rdma_admin(struct ionic_ibdev *dev)
+{
+ int eq_i = 0, aq_i = 0, rc = 0;
+ struct ionic_vcq *vcq;
+ struct ionic_aq *aq;
+ struct ionic_eq *eq;
+
+ dev->eq_vec = NULL;
+ dev->aq_vec = NULL;
+
+ INIT_WORK(&dev->reset_work, ionic_reset_work);
+ INIT_DELAYED_WORK(&dev->admin_dwork, ionic_admin_dwork);
+ atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED);
+
+ if (dev->lif_cfg.aq_count > IONIC_AQ_COUNT) {
+ ibdev_dbg(&dev->ibdev, "limiting adminq count to %d\n",
+ IONIC_AQ_COUNT);
+ dev->lif_cfg.aq_count = IONIC_AQ_COUNT;
+ }
+
+ if (dev->lif_cfg.eq_count > IONIC_EQ_COUNT) {
+ dev_dbg(&dev->ibdev.dev, "limiting eventq count to %d\n",
+ IONIC_EQ_COUNT);
+ dev->lif_cfg.eq_count = IONIC_EQ_COUNT;
+ }
+
+ /* need at least two eq and one aq */
+ if (dev->lif_cfg.eq_count < IONIC_EQ_COUNT_MIN ||
+ dev->lif_cfg.aq_count < IONIC_AQ_COUNT_MIN) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ dev->eq_vec = kmalloc_array(dev->lif_cfg.eq_count, sizeof(*dev->eq_vec),
+ GFP_KERNEL);
+ if (!dev->eq_vec) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ for (eq_i = 0; eq_i < dev->lif_cfg.eq_count; ++eq_i) {
+ eq = ionic_create_eq(dev, eq_i + dev->lif_cfg.eq_base);
+ if (IS_ERR(eq)) {
+ rc = PTR_ERR(eq);
+
+ if (eq_i < IONIC_EQ_COUNT_MIN) {
+ ibdev_err(&dev->ibdev,
+ "fail create eq %pe\n", eq);
+ goto out;
+ }
+
+ /* ok, just fewer eq than device supports */
+ ibdev_dbg(&dev->ibdev, "eq count %d want %d rc %pe\n",
+ eq_i, dev->lif_cfg.eq_count, eq);
+
+ rc = 0;
+ break;
+ }
+
+ dev->eq_vec[eq_i] = eq;
+ }
+
+ dev->lif_cfg.eq_count = eq_i;
+
+ dev->aq_vec = kmalloc_array(dev->lif_cfg.aq_count, sizeof(*dev->aq_vec),
+ GFP_KERNEL);
+ if (!dev->aq_vec) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Create one CQ per AQ */
+ for (aq_i = 0; aq_i < dev->lif_cfg.aq_count; ++aq_i) {
+ vcq = ionic_create_rdma_admincq(dev, aq_i % eq_i);
+ if (IS_ERR(vcq)) {
+ rc = PTR_ERR(vcq);
+
+ if (!aq_i) {
+ ibdev_err(&dev->ibdev,
+ "failed to create acq %pe\n", vcq);
+ goto out;
+ }
+
+ /* ok, just fewer adminq than device supports */
+ ibdev_dbg(&dev->ibdev, "acq count %d want %d rc %pe\n",
+ aq_i, dev->lif_cfg.aq_count, vcq);
+ break;
+ }
+
+ aq = ionic_create_rdma_adminq(dev, aq_i + dev->lif_cfg.aq_base,
+ vcq->cq[0].cqid);
+ if (IS_ERR(aq)) {
+ /* Clean up the dangling CQ */
+ ionic_destroy_cq_common(dev, &vcq->cq[0]);
+ kfree(vcq);
+
+ rc = PTR_ERR(aq);
+
+ if (!aq_i) {
+ ibdev_err(&dev->ibdev,
+ "failed to create aq %pe\n", aq);
+ goto out;
+ }
+
+ /* ok, just fewer adminq than device supports */
+ ibdev_dbg(&dev->ibdev, "aq count %d want %d rc %pe\n",
+ aq_i, dev->lif_cfg.aq_count, aq);
+ break;
+ }
+
+ vcq->ibcq.cq_context = aq;
+ aq->vcq = vcq;
+
+ atomic_set(&aq->admin_state, IONIC_ADMIN_ACTIVE);
+ dev->aq_vec[aq_i] = aq;
+ }
+
+ atomic_set(&dev->admin_state, IONIC_ADMIN_ACTIVE);
+out:
+ dev->lif_cfg.eq_count = eq_i;
+ dev->lif_cfg.aq_count = aq_i;
+
+ return rc;
+}
+
+void ionic_destroy_rdma_admin(struct ionic_ibdev *dev)
+{
+ struct ionic_vcq *vcq;
+ struct ionic_aq *aq;
+ struct ionic_eq *eq;
+
+ /*
+ * Killing the admin before destroy makes sure all admin and
+ * completions are flushed. admin_state = IONIC_ADMIN_KILLED
+ * stops queueing up further works.
+ */
+ cancel_delayed_work_sync(&dev->admin_dwork);
+ cancel_work_sync(&dev->reset_work);
+
+ if (dev->aq_vec) {
+ while (dev->lif_cfg.aq_count > 0) {
+ aq = dev->aq_vec[--dev->lif_cfg.aq_count];
+ vcq = aq->vcq;
+
+ cancel_work_sync(&aq->work);
+
+ __ionic_destroy_rdma_adminq(dev, aq);
+ if (vcq) {
+ ionic_destroy_cq_common(dev, &vcq->cq[0]);
+ kfree(vcq);
+ }
+ }
+
+ kfree(dev->aq_vec);
+ }
+
+ if (dev->eq_vec) {
+ while (dev->lif_cfg.eq_count > 0) {
+ eq = dev->eq_vec[--dev->lif_cfg.eq_count];
+ ionic_destroy_eq(eq);
+ }
+
+ kfree(dev->eq_vec);
+ }
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c
new file mode 100644
index 000000000000..ea12d9b8e125
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -0,0 +1,2679 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
+#include <ionic_api.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define ionic_set_ecn(tos) (((tos) | 2u) & ~1u)
+#define ionic_clear_ecn(tos) ((tos) & ~3u)
+
+static int ionic_validate_qdesc(struct ionic_qdesc *q)
+{
+ if (!q->addr || !q->size || !q->mask ||
+ !q->depth_log2 || !q->stride_log2)
+ return -EINVAL;
+
+ if (q->addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ if (q->mask != BIT(q->depth_log2) - 1)
+ return -EINVAL;
+
+ if (q->size < BIT_ULL(q->depth_log2 + q->stride_log2))
+ return -EINVAL;
+
+ return 0;
+}
+
+static u32 ionic_get_eqid(struct ionic_ibdev *dev, u32 comp_vector, u8 udma_idx)
+{
+ /* EQ per vector per udma, and the first eqs reserved for async events.
+ * The rest of the vectors can be requested for completions.
+ */
+ u32 comp_vec_count = dev->lif_cfg.eq_count / dev->lif_cfg.udma_count - 1;
+
+ return (comp_vector % comp_vec_count + 1) * dev->lif_cfg.udma_count + udma_idx;
+}
+
+static int ionic_get_cqid(struct ionic_ibdev *dev, u32 *cqid, u8 udma_idx)
+{
+ unsigned int size, base, bound;
+ int rc;
+
+ size = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count;
+ base = size * udma_idx;
+ bound = base + size;
+
+ rc = ionic_resid_get_shared(&dev->inuse_cqid, base, bound);
+ if (rc >= 0) {
+ /* cq_base is zero or a multiple of two queue groups */
+ *cqid = dev->lif_cfg.cq_base +
+ ionic_bitid_to_qid(rc, dev->lif_cfg.udma_qgrp_shift,
+ dev->half_cqid_udma_shift);
+
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void ionic_put_cqid(struct ionic_ibdev *dev, u32 cqid)
+{
+ u32 bitid = ionic_qid_to_bitid(cqid - dev->lif_cfg.cq_base,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_cqid_udma_shift);
+
+ ionic_resid_put(&dev->inuse_cqid, bitid);
+}
+
+int ionic_create_cq_common(struct ionic_vcq *vcq,
+ struct ionic_tbl_buf *buf,
+ const struct ib_cq_init_attr *attr,
+ struct ionic_ctx *ctx,
+ struct ib_udata *udata,
+ struct ionic_qdesc *req_cq,
+ __u32 *resp_cqid,
+ int udma_idx)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(vcq->ibcq.device);
+ struct ionic_cq *cq = &vcq->cq[udma_idx];
+ void *entry;
+ int rc;
+
+ cq->vcq = vcq;
+
+ if (attr->cqe < 1 || attr->cqe + IONIC_CQ_GRACE > 0xffff) {
+ rc = -EINVAL;
+ goto err_args;
+ }
+
+ rc = ionic_get_cqid(dev, &cq->cqid, udma_idx);
+ if (rc)
+ goto err_args;
+
+ cq->eqid = ionic_get_eqid(dev, attr->comp_vector, udma_idx);
+
+ spin_lock_init(&cq->lock);
+ INIT_LIST_HEAD(&cq->poll_sq);
+ INIT_LIST_HEAD(&cq->flush_sq);
+ INIT_LIST_HEAD(&cq->flush_rq);
+
+ if (udata) {
+ rc = ionic_validate_qdesc(req_cq);
+ if (rc)
+ goto err_qdesc;
+
+ cq->umem = ib_umem_get(&dev->ibdev, req_cq->addr, req_cq->size,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ rc = PTR_ERR(cq->umem);
+ goto err_qdesc;
+ }
+
+ cq->q.ptr = NULL;
+ cq->q.size = req_cq->size;
+ cq->q.mask = req_cq->mask;
+ cq->q.depth_log2 = req_cq->depth_log2;
+ cq->q.stride_log2 = req_cq->stride_log2;
+
+ *resp_cqid = cq->cqid;
+ } else {
+ rc = ionic_queue_init(&cq->q, dev->lif_cfg.hwdev,
+ attr->cqe + IONIC_CQ_GRACE,
+ sizeof(struct ionic_v1_cqe));
+ if (rc)
+ goto err_q_init;
+
+ ionic_queue_dbell_init(&cq->q, cq->cqid);
+ cq->color = true;
+ cq->credit = cq->q.mask;
+ }
+
+ rc = ionic_pgtbl_init(dev, buf, cq->umem, cq->q.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_pgtbl_init;
+
+ init_completion(&cq->cq_rel_comp);
+ kref_init(&cq->cq_kref);
+
+ entry = xa_store_irq(&dev->cq_tbl, cq->cqid, cq, GFP_KERNEL);
+ if (entry) {
+ if (!xa_is_err(entry))
+ rc = -EINVAL;
+ else
+ rc = xa_err(entry);
+
+ goto err_xa;
+ }
+
+ return 0;
+
+err_xa:
+ ionic_pgtbl_unbuf(dev, buf);
+err_pgtbl_init:
+ if (!udata)
+ ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev);
+err_q_init:
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+err_qdesc:
+ ionic_put_cqid(dev, cq->cqid);
+err_args:
+ cq->vcq = NULL;
+
+ return rc;
+}
+
+void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq)
+{
+ if (!cq->vcq)
+ return;
+
+ xa_erase_irq(&dev->cq_tbl, cq->cqid);
+
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+ wait_for_completion(&cq->cq_rel_comp);
+
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+ else
+ ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev);
+
+ ionic_put_cqid(dev, cq->cqid);
+
+ cq->vcq = NULL;
+}
+
+static int ionic_validate_qdesc_zero(struct ionic_qdesc *q)
+{
+ if (q->addr || q->size || q->mask || q->depth_log2 || q->stride_log2)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ionic_get_pdid(struct ionic_ibdev *dev, u32 *pdid)
+{
+ int rc;
+
+ rc = ionic_resid_get(&dev->inuse_pdid);
+ if (rc < 0)
+ return rc;
+
+ *pdid = rc;
+ return 0;
+}
+
+static int ionic_get_ahid(struct ionic_ibdev *dev, u32 *ahid)
+{
+ int rc;
+
+ rc = ionic_resid_get(&dev->inuse_ahid);
+ if (rc < 0)
+ return rc;
+
+ *ahid = rc;
+ return 0;
+}
+
+static int ionic_get_mrid(struct ionic_ibdev *dev, u32 *mrid)
+{
+ int rc;
+
+ /* wrap to 1, skip reserved lkey */
+ rc = ionic_resid_get_shared(&dev->inuse_mrid, 1,
+ dev->inuse_mrid.inuse_size);
+ if (rc < 0)
+ return rc;
+
+ *mrid = ionic_mrid(rc, dev->next_mrkey++);
+ return 0;
+}
+
+static int ionic_get_gsi_qpid(struct ionic_ibdev *dev, u32 *qpid)
+{
+ int rc = 0;
+
+ rc = ionic_resid_get_shared(&dev->inuse_qpid, IB_QPT_GSI, IB_QPT_GSI + 1);
+ if (rc < 0)
+ return rc;
+
+ *qpid = IB_QPT_GSI;
+ return 0;
+}
+
+static int ionic_get_qpid(struct ionic_ibdev *dev, u32 *qpid,
+ u8 *udma_idx, u8 udma_mask)
+{
+ unsigned int size, base, bound;
+ int udma_i, udma_x, udma_ix;
+ int rc = -EINVAL;
+
+ udma_x = dev->next_qpid_udma_idx;
+
+ dev->next_qpid_udma_idx ^= dev->lif_cfg.udma_count - 1;
+
+ for (udma_i = 0; udma_i < dev->lif_cfg.udma_count; ++udma_i) {
+ udma_ix = udma_i ^ udma_x;
+
+ if (!(udma_mask & BIT(udma_ix)))
+ continue;
+
+ size = dev->lif_cfg.qp_count / dev->lif_cfg.udma_count;
+ base = size * udma_ix;
+ bound = base + size;
+
+ /* skip reserved SMI and GSI qpids in group zero */
+ if (!base)
+ base = 2;
+
+ rc = ionic_resid_get_shared(&dev->inuse_qpid, base, bound);
+ if (rc >= 0) {
+ *qpid = ionic_bitid_to_qid(rc,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_qpid_udma_shift);
+ *udma_idx = udma_ix;
+
+ rc = 0;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int ionic_get_dbid(struct ionic_ibdev *dev, u32 *dbid, phys_addr_t *addr)
+{
+ int rc, dbpage_num;
+
+ /* wrap to 1, skip kernel reserved */
+ rc = ionic_resid_get_shared(&dev->inuse_dbid, 1,
+ dev->inuse_dbid.inuse_size);
+ if (rc < 0)
+ return rc;
+
+ dbpage_num = (dev->lif_cfg.lif_hw_index * dev->lif_cfg.dbid_count) + rc;
+ *addr = dev->lif_cfg.db_phys + ((phys_addr_t)dbpage_num << PAGE_SHIFT);
+
+ *dbid = rc;
+
+ return 0;
+}
+
+static void ionic_put_pdid(struct ionic_ibdev *dev, u32 pdid)
+{
+ ionic_resid_put(&dev->inuse_pdid, pdid);
+}
+
+static void ionic_put_ahid(struct ionic_ibdev *dev, u32 ahid)
+{
+ ionic_resid_put(&dev->inuse_ahid, ahid);
+}
+
+static void ionic_put_mrid(struct ionic_ibdev *dev, u32 mrid)
+{
+ ionic_resid_put(&dev->inuse_mrid, ionic_mrid_index(mrid));
+}
+
+static void ionic_put_qpid(struct ionic_ibdev *dev, u32 qpid)
+{
+ u32 bitid = ionic_qid_to_bitid(qpid,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_qpid_udma_shift);
+
+ ionic_resid_put(&dev->inuse_qpid, bitid);
+}
+
+static void ionic_put_dbid(struct ionic_ibdev *dev, u32 dbid)
+{
+ ionic_resid_put(&dev->inuse_dbid, dbid);
+}
+
+static struct rdma_user_mmap_entry*
+ionic_mmap_entry_insert(struct ionic_ctx *ctx, unsigned long size,
+ unsigned long pfn, u8 mmap_flags, u64 *offset)
+{
+ struct ionic_mmap_entry *entry;
+ int rc;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->size = size;
+ entry->pfn = pfn;
+ entry->mmap_flags = mmap_flags;
+
+ rc = rdma_user_mmap_entry_insert(&ctx->ibctx, &entry->rdma_entry,
+ entry->size);
+ if (rc) {
+ kfree(entry);
+ return NULL;
+ }
+
+ if (offset)
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+ struct ionic_ctx_resp resp = {};
+ struct ionic_ctx_req req;
+ phys_addr_t db_phys = 0;
+ int rc;
+
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+
+ /* try to allocate dbid for user ctx */
+ rc = ionic_get_dbid(dev, &ctx->dbid, &db_phys);
+ if (rc < 0)
+ return rc;
+
+ ibdev_dbg(&dev->ibdev, "user space dbid %u\n", ctx->dbid);
+
+ ctx->mmap_dbell = ionic_mmap_entry_insert(ctx, PAGE_SIZE,
+ PHYS_PFN(db_phys), 0, NULL);
+ if (!ctx->mmap_dbell) {
+ rc = -ENOMEM;
+ goto err_mmap_dbell;
+ }
+
+ resp.page_shift = PAGE_SHIFT;
+
+ resp.dbell_offset = db_phys & ~PAGE_MASK;
+
+ resp.version = dev->lif_cfg.rdma_version;
+ resp.qp_opcodes = dev->lif_cfg.qp_opcodes;
+ resp.admin_opcodes = dev->lif_cfg.admin_opcodes;
+
+ resp.sq_qtype = dev->lif_cfg.sq_qtype;
+ resp.rq_qtype = dev->lif_cfg.rq_qtype;
+ resp.cq_qtype = dev->lif_cfg.cq_qtype;
+ resp.admin_qtype = dev->lif_cfg.aq_qtype;
+ resp.max_stride = dev->lif_cfg.max_stride;
+ resp.max_spec = IONIC_SPEC_HIGH;
+
+ resp.udma_count = dev->lif_cfg.udma_count;
+ resp.expdb_mask = dev->lif_cfg.expdb_mask;
+
+ if (dev->lif_cfg.sq_expdb)
+ resp.expdb_qtypes |= IONIC_EXPDB_SQ;
+ if (dev->lif_cfg.rq_expdb)
+ resp.expdb_qtypes |= IONIC_EXPDB_RQ;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+
+ return 0;
+
+err_resp:
+ rdma_user_mmap_entry_remove(ctx->mmap_dbell);
+err_mmap_dbell:
+ ionic_put_dbid(dev, ctx->dbid);
+
+ return rc;
+}
+
+void ionic_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+
+ rdma_user_mmap_entry_remove(ctx->mmap_dbell);
+ ionic_put_dbid(dev, ctx->dbid);
+}
+
+int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct ionic_mmap_entry *ionic_entry;
+ int rc = 0;
+
+ rdma_entry = rdma_user_mmap_entry_get(&ctx->ibctx, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev, "not found %#lx\n",
+ vma->vm_pgoff << PAGE_SHIFT);
+ return -EINVAL;
+ }
+
+ ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry,
+ rdma_entry);
+
+ ibdev_dbg(&dev->ibdev, "writecombine? %d\n",
+ ionic_entry->mmap_flags & IONIC_MMAP_WC);
+ if (ionic_entry->mmap_flags & IONIC_MMAP_WC)
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ ibdev_dbg(&dev->ibdev, "remap st %#lx pf %#lx sz %#lx\n",
+ vma->vm_start, ionic_entry->pfn, ionic_entry->size);
+ rc = rdma_user_mmap_io(&ctx->ibctx, vma, ionic_entry->pfn,
+ ionic_entry->size, vma->vm_page_prot,
+ rdma_entry);
+ if (rc)
+ ibdev_dbg(&dev->ibdev, "remap failed %d\n", rc);
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return rc;
+}
+
+void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct ionic_mmap_entry *ionic_entry;
+
+ ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry,
+ rdma_entry);
+ kfree(ionic_entry);
+}
+
+int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+
+ return ionic_get_pdid(dev, &pd->pdid);
+}
+
+int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+
+ ionic_put_pdid(dev, pd->pdid);
+
+ return 0;
+}
+
+static int ionic_build_hdr(struct ionic_ibdev *dev,
+ struct ib_ud_header *hdr,
+ const struct rdma_ah_attr *attr,
+ u16 sport, bool want_ecn)
+{
+ const struct ib_global_route *grh;
+ enum rdma_network_type net;
+ u16 vlan;
+ int rc;
+
+ if (attr->ah_flags != IB_AH_GRH)
+ return -EINVAL;
+ if (attr->type != RDMA_AH_ATTR_TYPE_ROCE)
+ return -EINVAL;
+
+ grh = rdma_ah_read_grh(attr);
+
+ rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]);
+ if (rc)
+ return rc;
+
+ net = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ rc = ib_ud_header_init(0, /* no payload */
+ 0, /* no lrh */
+ 1, /* yes eth */
+ vlan != 0xffff,
+ 0, /* no grh */
+ net == RDMA_NETWORK_IPV4 ? 4 : 6,
+ 1, /* yes udp */
+ 0, /* no imm */
+ hdr);
+ if (rc)
+ return rc;
+
+ ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac);
+
+ if (net == RDMA_NETWORK_IPV4) {
+ hdr->eth.type = cpu_to_be16(ETH_P_IP);
+ hdr->ip4.frag_off = cpu_to_be16(0x4000); /* don't fragment */
+ hdr->ip4.ttl = grh->hop_limit;
+ hdr->ip4.tot_len = cpu_to_be16(0xffff);
+ hdr->ip4.saddr =
+ *(const __be32 *)(grh->sgid_attr->gid.raw + 12);
+ hdr->ip4.daddr = *(const __be32 *)(grh->dgid.raw + 12);
+
+ if (want_ecn)
+ hdr->ip4.tos = ionic_set_ecn(grh->traffic_class);
+ else
+ hdr->ip4.tos = ionic_clear_ecn(grh->traffic_class);
+ } else {
+ hdr->eth.type = cpu_to_be16(ETH_P_IPV6);
+ hdr->grh.flow_label = cpu_to_be32(grh->flow_label);
+ hdr->grh.hop_limit = grh->hop_limit;
+ hdr->grh.source_gid = grh->sgid_attr->gid;
+ hdr->grh.destination_gid = grh->dgid;
+
+ if (want_ecn)
+ hdr->grh.traffic_class =
+ ionic_set_ecn(grh->traffic_class);
+ else
+ hdr->grh.traffic_class =
+ ionic_clear_ecn(grh->traffic_class);
+ }
+
+ if (vlan != 0xffff) {
+ vlan |= rdma_ah_get_sl(attr) << VLAN_PRIO_SHIFT;
+ hdr->vlan.tag = cpu_to_be16(vlan);
+ hdr->vlan.type = hdr->eth.type;
+ hdr->eth.type = cpu_to_be16(ETH_P_8021Q);
+ }
+
+ hdr->udp.sport = cpu_to_be16(sport);
+ hdr->udp.dport = cpu_to_be16(ROCE_V2_UDP_DPORT);
+
+ return 0;
+}
+
+static void ionic_set_ah_attr(struct ionic_ibdev *dev,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_ud_header *hdr,
+ int sgid_index)
+{
+ u32 flow_label;
+ u16 vlan = 0;
+ u8 tos, ttl;
+
+ if (hdr->vlan_present)
+ vlan = be16_to_cpu(hdr->vlan.tag);
+
+ if (hdr->ipv4_present) {
+ flow_label = 0;
+ ttl = hdr->ip4.ttl;
+ tos = hdr->ip4.tos;
+ *(__be16 *)(hdr->grh.destination_gid.raw + 10) = cpu_to_be16(0xffff);
+ *(__be32 *)(hdr->grh.destination_gid.raw + 12) = hdr->ip4.daddr;
+ } else {
+ flow_label = be32_to_cpu(hdr->grh.flow_label);
+ ttl = hdr->grh.hop_limit;
+ tos = hdr->grh.traffic_class;
+ }
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+ if (hdr->eth_present)
+ ether_addr_copy(ah_attr->roce.dmac, hdr->eth.dmac_h);
+ rdma_ah_set_sl(ah_attr, vlan >> VLAN_PRIO_SHIFT);
+ rdma_ah_set_port_num(ah_attr, 1);
+ rdma_ah_set_grh(ah_attr, NULL, flow_label, sgid_index, ttl, tos);
+ rdma_ah_set_dgid_raw(ah_attr, &hdr->grh.destination_gid);
+}
+
+static int ionic_create_ah_cmd(struct ionic_ibdev *dev,
+ struct ionic_ah *ah,
+ struct ionic_pd *pd,
+ struct rdma_ah_attr *attr,
+ u32 flags)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_AH,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_AH_IN_V1_LEN),
+ .cmd.create_ah = {
+ .pd_id = cpu_to_le32(pd->pdid),
+ .dbid_flags = cpu_to_le16(dev->lif_cfg.dbid),
+ .id_ver = cpu_to_le32(ah->ahid),
+ }
+ }
+ };
+ enum ionic_admin_flags admin_flags = 0;
+ dma_addr_t hdr_dma = 0;
+ void *hdr_buf;
+ gfp_t gfp = GFP_ATOMIC;
+ int rc, hdr_len = 0;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_AH)
+ return -EBADRQC;
+
+ if (flags & RDMA_CREATE_AH_SLEEPABLE)
+ gfp = GFP_KERNEL;
+ else
+ admin_flags |= IONIC_ADMIN_F_BUSYWAIT;
+
+ rc = ionic_build_hdr(dev, &ah->hdr, attr, IONIC_ROCE_UDP_SPORT, false);
+ if (rc)
+ return rc;
+
+ if (ah->hdr.eth.type == cpu_to_be16(ETH_P_8021Q)) {
+ if (ah->hdr.vlan.type == cpu_to_be16(ETH_P_IP))
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP;
+ else
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP;
+ } else {
+ if (ah->hdr.eth.type == cpu_to_be16(ETH_P_IP))
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP;
+ else
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP;
+ }
+
+ ah->sgid_index = rdma_ah_read_grh(attr)->sgid_index;
+
+ hdr_buf = kmalloc(PAGE_SIZE, gfp);
+ if (!hdr_buf)
+ return -ENOMEM;
+
+ hdr_len = ib_ud_header_pack(&ah->hdr, hdr_buf);
+ hdr_len -= IB_BTH_BYTES;
+ hdr_len -= IB_DETH_BYTES;
+ ibdev_dbg(&dev->ibdev, "roce packet header template\n");
+ print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1,
+ hdr_buf, hdr_len, true);
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len,
+ DMA_TO_DEVICE);
+
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ wr.wqe.cmd.create_ah.dma_addr = cpu_to_le64(hdr_dma);
+ wr.wqe.cmd.create_ah.length = cpu_to_le32(hdr_len);
+
+ ionic_admin_post(dev, &wr);
+ rc = ionic_admin_wait(dev, &wr, admin_flags);
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len,
+ DMA_TO_DEVICE);
+err_dma:
+ kfree(hdr_buf);
+
+ return rc;
+}
+
+static int ionic_destroy_ah_cmd(struct ionic_ibdev *dev, u32 ahid, u32 flags)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_AH,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_AH_IN_V1_LEN),
+ .cmd.destroy_ah = {
+ .ah_id = cpu_to_le32(ahid),
+ },
+ }
+ };
+ enum ionic_admin_flags admin_flags = IONIC_ADMIN_F_TEARDOWN;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_AH)
+ return -EBADRQC;
+
+ if (!(flags & RDMA_CREATE_AH_SLEEPABLE))
+ admin_flags |= IONIC_ADMIN_F_BUSYWAIT;
+
+ ionic_admin_post(dev, &wr);
+ ionic_admin_wait(dev, &wr, admin_flags);
+
+ /* No host-memory resource is associated with ah, so it is ok
+ * to "succeed" and complete this destroy ah on the host.
+ */
+ return 0;
+}
+
+int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct rdma_ah_attr *attr = init_attr->ah_attr;
+ struct ionic_pd *pd = to_ionic_pd(ibah->pd);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+ struct ionic_ah_resp resp = {};
+ u32 flags = init_attr->flags;
+ int rc;
+
+ rc = ionic_get_ahid(dev, &ah->ahid);
+ if (rc)
+ return rc;
+
+ rc = ionic_create_ah_cmd(dev, ah, pd, attr, flags);
+ if (rc)
+ goto err_cmd;
+
+ if (udata) {
+ resp.ahid = ah->ahid;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ return 0;
+
+err_resp:
+ ionic_destroy_ah_cmd(dev, ah->ahid, flags);
+err_cmd:
+ ionic_put_ahid(dev, ah->ahid);
+ return rc;
+}
+
+int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+
+ ionic_set_ah_attr(dev, ah_attr, &ah->hdr, ah->sgid_index);
+
+ return 0;
+}
+
+int ionic_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+ int rc;
+
+ rc = ionic_destroy_ah_cmd(dev, ah->ahid, flags);
+ if (rc)
+ return rc;
+
+ ionic_put_ahid(dev, ah->ahid);
+
+ return 0;
+}
+
+static int ionic_create_mr_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_mr *mr,
+ u64 addr,
+ u64 length)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_MR,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_MR_IN_V1_LEN),
+ .cmd.create_mr = {
+ .va = cpu_to_le64(addr),
+ .length = cpu_to_le64(length),
+ .pd_id = cpu_to_le32(pd->pdid),
+ .page_size_log2 = mr->buf.page_size_log2,
+ .tbl_index = cpu_to_le32(~0),
+ .map_count = cpu_to_le32(mr->buf.tbl_pages),
+ .dma_addr = ionic_pgtbl_dma(&mr->buf, addr),
+ .dbid_flags = cpu_to_le16(mr->flags),
+ .id_ver = cpu_to_le32(mr->mrid),
+ }
+ }
+ };
+ int rc;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_MR)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+ rc = ionic_admin_wait(dev, &wr, 0);
+ if (!rc)
+ mr->created = true;
+
+ return rc;
+}
+
+static int ionic_destroy_mr_cmd(struct ionic_ibdev *dev, u32 mrid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_MR,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_MR_IN_V1_LEN),
+ .cmd.destroy_mr = {
+ .mr_id = cpu_to_le32(mrid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_MR)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access)
+{
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->ibmr.lkey = IONIC_DMA_LKEY;
+ mr->ibmr.rkey = IONIC_DMA_RKEY;
+
+ if (pd)
+ pd->flags |= IONIC_QPF_PRIVILEGED;
+
+ return &mr->ibmr;
+}
+
+struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 addr, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+ unsigned long pg_sz;
+ int rc;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+ mr->ibmr.iova = addr;
+ mr->ibmr.length = length;
+
+ mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access);
+
+ mr->umem = ib_umem_get(&dev->ibdev, start, length, access);
+ if (IS_ERR(mr->umem)) {
+ rc = PTR_ERR(mr->umem);
+ goto err_umem;
+ }
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->lif_cfg.page_size_supported,
+ addr);
+ if (!pg_sz) {
+ rc = -EINVAL;
+ goto err_pgtbl;
+ }
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz);
+ if (rc)
+ goto err_pgtbl;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, addr, length);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ib_umem_release(mr->umem);
+err_umem:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset,
+ u64 length, u64 addr, int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ionic_mr *mr;
+ u64 pg_sz;
+ int rc;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+ mr->ibmr.iova = addr;
+ mr->ibmr.length = length;
+
+ mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(&dev->ibdev, offset, length,
+ fd, access);
+ if (IS_ERR(umem_dmabuf)) {
+ rc = PTR_ERR(umem_dmabuf);
+ goto err_umem;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->lif_cfg.page_size_supported,
+ addr);
+ if (!pg_sz) {
+ rc = -EINVAL;
+ goto err_pgtbl;
+ }
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz);
+ if (rc)
+ goto err_pgtbl;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, addr, length);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ib_umem_release(mr->umem);
+err_umem:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+ int rc;
+
+ if (!mr->ibmr.lkey)
+ goto out;
+
+ if (mr->created) {
+ rc = ionic_destroy_mr_cmd(dev, mr->mrid);
+ if (rc)
+ return rc;
+ }
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ ionic_put_mrid(dev, mr->mrid);
+
+out:
+ kfree(mr);
+
+ return 0;
+}
+
+struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type,
+ u32 max_sg)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+ int rc;
+
+ if (type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+
+ mr->flags = IONIC_MRF_PHYS_MR;
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, max_sg, PAGE_SIZE);
+ if (rc)
+ goto err_pgtbl;
+
+ mr->buf.tbl_pages = 0;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0);
+ if (rc)
+ goto err_cmd;
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+static int ionic_map_mr_page(struct ib_mr *ibmr, u64 dma)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+
+ ibdev_dbg(&dev->ibdev, "dma %p\n", (void *)dma);
+ return ionic_pgtbl_page(&mr->buf, dma);
+}
+
+int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+ int rc;
+
+ /* mr must be allocated using ib_alloc_mr() */
+ if (unlikely(!mr->buf.tbl_limit))
+ return -EINVAL;
+
+ mr->buf.tbl_pages = 0;
+
+ if (mr->buf.tbl_buf)
+ dma_sync_single_for_cpu(dev->lif_cfg.hwdev, mr->buf.tbl_dma,
+ mr->buf.tbl_size, DMA_TO_DEVICE);
+
+ ibdev_dbg(&dev->ibdev, "sg %p nent %d\n", sg, sg_nents);
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ionic_map_mr_page);
+
+ mr->buf.page_size_log2 = order_base_2(ibmr->page_size);
+
+ if (mr->buf.tbl_buf)
+ dma_sync_single_for_device(dev->lif_cfg.hwdev, mr->buf.tbl_dma,
+ mr->buf.tbl_size, DMA_TO_DEVICE);
+
+ return rc;
+}
+
+int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device);
+ struct ionic_pd *pd = to_ionic_pd(ibmw->pd);
+ struct ionic_mr *mr = to_ionic_mw(ibmw);
+ int rc;
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ return rc;
+
+ mr->ibmw.rkey = mr->mrid;
+
+ if (mr->ibmw.type == IB_MW_TYPE_1)
+ mr->flags = IONIC_MRF_MW_1;
+ else
+ mr->flags = IONIC_MRF_MW_2;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0);
+ if (rc)
+ goto err_cmd;
+
+ return 0;
+
+err_cmd:
+ ionic_put_mrid(dev, mr->mrid);
+ return rc;
+}
+
+int ionic_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device);
+ struct ionic_mr *mr = to_ionic_mw(ibmw);
+ int rc;
+
+ rc = ionic_destroy_mr_cmd(dev, mr->mrid);
+ if (rc)
+ return rc;
+
+ ionic_put_mrid(dev, mr->mrid);
+
+ return 0;
+}
+
+static int ionic_create_cq_cmd(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_cq *cq,
+ struct ionic_tbl_buf *buf)
+{
+ const u16 dbid = ionic_ctx_dbid(dev, ctx);
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_CQ,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_CQ_IN_V1_LEN),
+ .cmd.create_cq = {
+ .eq_id = cpu_to_le32(cq->eqid),
+ .depth_log2 = cq->q.depth_log2,
+ .stride_log2 = cq->q.stride_log2,
+ .page_size_log2 = buf->page_size_log2,
+ .tbl_index = cpu_to_le32(~0),
+ .map_count = cpu_to_le32(buf->tbl_pages),
+ .dma_addr = ionic_pgtbl_dma(buf, 0),
+ .dbid_flags = cpu_to_le16(dbid),
+ .id_ver = cpu_to_le32(cq->cqid),
+ }
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_CQ)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, 0);
+}
+
+static int ionic_destroy_cq_cmd(struct ionic_ibdev *dev, u32 cqid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_CQ,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN),
+ .cmd.destroy_cq = {
+ .cq_id = cpu_to_le32(cqid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_CQ)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ struct ionic_tbl_buf buf = {};
+ struct ionic_cq_resp resp;
+ struct ionic_cq_req req;
+ int udma_idx = 0, rc;
+
+ if (udata) {
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+ }
+
+ vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+
+ if (udata)
+ vcq->udma_mask &= req.udma_mask;
+
+ if (!vcq->udma_mask) {
+ rc = -EINVAL;
+ goto err_init;
+ }
+
+ for (; udma_idx < dev->lif_cfg.udma_count; ++udma_idx) {
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+
+ rc = ionic_create_cq_common(vcq, &buf, attr, ctx, udata,
+ &req.cq[udma_idx],
+ &resp.cqid[udma_idx],
+ udma_idx);
+ if (rc)
+ goto err_init;
+
+ rc = ionic_create_cq_cmd(dev, ctx, &vcq->cq[udma_idx], &buf);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &buf);
+ }
+
+ vcq->ibcq.cqe = attr->cqe;
+
+ if (udata) {
+ resp.udma_mask = vcq->udma_mask;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ return 0;
+
+err_resp:
+ while (udma_idx) {
+ --udma_idx;
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+ ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &buf);
+ ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+err_init:
+ ;
+ }
+
+ return rc;
+}
+
+int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int udma_idx, rc_tmp, rc = 0;
+
+ for (udma_idx = dev->lif_cfg.udma_count; udma_idx; ) {
+ --udma_idx;
+
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+
+ rc_tmp = ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+ if (rc_tmp) {
+ if (!rc)
+ rc = rc_tmp;
+
+ continue;
+ }
+
+ ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+ }
+
+ return rc;
+}
+
+static bool pd_remote_privileged(struct ib_pd *pd)
+{
+ return pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
+}
+
+static int ionic_create_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_cq *send_cq,
+ struct ionic_cq *recv_cq,
+ struct ionic_qp *qp,
+ struct ionic_tbl_buf *sq_buf,
+ struct ionic_tbl_buf *rq_buf,
+ struct ib_qp_init_attr *attr)
+{
+ const u16 dbid = ionic_obj_dbid(dev, pd->ibpd.uobject);
+ const u32 flags = to_ionic_qp_flags(0, 0,
+ qp->sq_cmb & IONIC_CMB_ENABLE,
+ qp->rq_cmb & IONIC_CMB_ENABLE,
+ qp->sq_spec, qp->rq_spec,
+ pd->flags & IONIC_QPF_PRIVILEGED,
+ pd_remote_privileged(&pd->ibpd));
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_QP_IN_V1_LEN),
+ .cmd.create_qp = {
+ .pd_id = cpu_to_le32(pd->pdid),
+ .priv_flags = cpu_to_be32(flags),
+ .type_state = to_ionic_qp_type(attr->qp_type),
+ .dbid_flags = cpu_to_le16(dbid),
+ .id_ver = cpu_to_le32(qp->qpid),
+ }
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_QP)
+ return -EBADRQC;
+
+ if (qp->has_sq) {
+ wr.wqe.cmd.create_qp.sq_cq_id = cpu_to_le32(send_cq->cqid);
+ wr.wqe.cmd.create_qp.sq_depth_log2 = qp->sq.depth_log2;
+ wr.wqe.cmd.create_qp.sq_stride_log2 = qp->sq.stride_log2;
+ wr.wqe.cmd.create_qp.sq_page_size_log2 = sq_buf->page_size_log2;
+ wr.wqe.cmd.create_qp.sq_tbl_index_xrcd_id = cpu_to_le32(~0);
+ wr.wqe.cmd.create_qp.sq_map_count =
+ cpu_to_le32(sq_buf->tbl_pages);
+ wr.wqe.cmd.create_qp.sq_dma_addr = ionic_pgtbl_dma(sq_buf, 0);
+ }
+
+ if (qp->has_rq) {
+ wr.wqe.cmd.create_qp.rq_cq_id = cpu_to_le32(recv_cq->cqid);
+ wr.wqe.cmd.create_qp.rq_depth_log2 = qp->rq.depth_log2;
+ wr.wqe.cmd.create_qp.rq_stride_log2 = qp->rq.stride_log2;
+ wr.wqe.cmd.create_qp.rq_page_size_log2 = rq_buf->page_size_log2;
+ wr.wqe.cmd.create_qp.rq_tbl_index_srq_id = cpu_to_le32(~0);
+ wr.wqe.cmd.create_qp.rq_map_count =
+ cpu_to_le32(rq_buf->tbl_pages);
+ wr.wqe.cmd.create_qp.rq_dma_addr = ionic_pgtbl_dma(rq_buf, 0);
+ }
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, 0);
+}
+
+static int ionic_modify_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_qp *qp,
+ struct ib_qp_attr *attr,
+ int mask)
+{
+ const u32 flags = to_ionic_qp_flags(attr->qp_access_flags,
+ attr->en_sqd_async_notify,
+ qp->sq_cmb & IONIC_CMB_ENABLE,
+ qp->rq_cmb & IONIC_CMB_ENABLE,
+ qp->sq_spec, qp->rq_spec,
+ pd->flags & IONIC_QPF_PRIVILEGED,
+ pd_remote_privileged(qp->ibqp.pd));
+ const u8 state = to_ionic_qp_modify_state(attr->qp_state,
+ attr->cur_qp_state);
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_MODIFY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_MODIFY_QP_IN_V1_LEN),
+ .cmd.mod_qp = {
+ .attr_mask = cpu_to_be32(mask),
+ .access_flags = cpu_to_be16(flags),
+ .rq_psn = cpu_to_le32(attr->rq_psn),
+ .sq_psn = cpu_to_le32(attr->sq_psn),
+ .rate_limit_kbps =
+ cpu_to_le32(attr->rate_limit),
+ .pmtu = (attr->path_mtu + 7),
+ .retry = (attr->retry_cnt |
+ (attr->rnr_retry << 4)),
+ .rnr_timer = attr->min_rnr_timer,
+ .retry_timeout = attr->timeout,
+ .type_state = state,
+ .id_ver = cpu_to_le32(qp->qpid),
+ }
+ }
+ };
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ void *hdr_buf = NULL;
+ dma_addr_t hdr_dma = 0;
+ int rc, hdr_len = 0;
+ u16 sport;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_MODIFY_QP)
+ return -EBADRQC;
+
+ if ((mask & IB_QP_MAX_DEST_RD_ATOMIC) && attr->max_dest_rd_atomic) {
+ /* Note, round up/down was already done for allocating
+ * resources on the device. The allocation order is in cache
+ * line size. We can't use the order of the resource
+ * allocation to determine the order wqes here, because for
+ * queue length <= one cache line it is not distinct.
+ *
+ * Therefore, order wqes is computed again here.
+ *
+ * Account for hole and round up to the next order.
+ */
+ wr.wqe.cmd.mod_qp.rsq_depth =
+ order_base_2(attr->max_dest_rd_atomic + 1);
+ wr.wqe.cmd.mod_qp.rsq_index = cpu_to_le32(~0);
+ }
+
+ if ((mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
+ /* Account for hole and round down to the next order */
+ wr.wqe.cmd.mod_qp.rrq_depth =
+ order_base_2(attr->max_rd_atomic + 2) - 1;
+ wr.wqe.cmd.mod_qp.rrq_index = cpu_to_le32(~0);
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ wr.wqe.cmd.mod_qp.qkey_dest_qpn =
+ cpu_to_le32(attr->dest_qp_num);
+ else
+ wr.wqe.cmd.mod_qp.qkey_dest_qpn = cpu_to_le32(attr->qkey);
+
+ if (mask & IB_QP_AV) {
+ if (!qp->hdr)
+ return -ENOMEM;
+
+ sport = rdma_get_udp_sport(grh->flow_label,
+ qp->qpid,
+ attr->dest_qp_num);
+
+ rc = ionic_build_hdr(dev, qp->hdr, &attr->ah_attr, sport, true);
+ if (rc)
+ return rc;
+
+ qp->sgid_index = grh->sgid_index;
+
+ hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!hdr_buf)
+ return -ENOMEM;
+
+ hdr_len = ib_ud_header_pack(qp->hdr, hdr_buf);
+ hdr_len -= IB_BTH_BYTES;
+ hdr_len -= IB_DETH_BYTES;
+ ibdev_dbg(&dev->ibdev, "roce packet header template\n");
+ print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1,
+ hdr_buf, hdr_len, true);
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len,
+ DMA_TO_DEVICE);
+
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ if (qp->hdr->ipv4_present) {
+ wr.wqe.cmd.mod_qp.tfp_csum_profile =
+ qp->hdr->vlan_present ?
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP :
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP;
+ } else {
+ wr.wqe.cmd.mod_qp.tfp_csum_profile =
+ qp->hdr->vlan_present ?
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP :
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP;
+ }
+
+ wr.wqe.cmd.mod_qp.ah_id_len =
+ cpu_to_le32(qp->ahid | (hdr_len << 24));
+ wr.wqe.cmd.mod_qp.dma_addr = cpu_to_le64(hdr_dma);
+
+ wr.wqe.cmd.mod_qp.en_pcp = attr->ah_attr.sl;
+ wr.wqe.cmd.mod_qp.ip_dscp = grh->traffic_class >> 2;
+ }
+
+ ionic_admin_post(dev, &wr);
+
+ rc = ionic_admin_wait(dev, &wr, 0);
+
+ if (mask & IB_QP_AV)
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len,
+ DMA_TO_DEVICE);
+err_dma:
+ if (mask & IB_QP_AV)
+ kfree(hdr_buf);
+
+ return rc;
+}
+
+static int ionic_query_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_qp_attr *attr,
+ int mask)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_QUERY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_QUERY_QP_IN_V1_LEN),
+ .cmd.query_qp = {
+ .id_ver = cpu_to_le32(qp->qpid),
+ },
+ }
+ };
+ struct ionic_v1_admin_query_qp_sq *query_sqbuf;
+ struct ionic_v1_admin_query_qp_rq *query_rqbuf;
+ dma_addr_t query_sqdma;
+ dma_addr_t query_rqdma;
+ dma_addr_t hdr_dma = 0;
+ void *hdr_buf = NULL;
+ int flags, rc;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_QUERY_QP)
+ return -EBADRQC;
+
+ if (qp->has_sq) {
+ bool expdb = !!(qp->sq_cmb & IONIC_CMB_EXPDB);
+
+ attr->cap.max_send_sge =
+ ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ expdb);
+ attr->cap.max_inline_data =
+ ionic_v1_send_wqe_max_data(qp->sq.stride_log2, expdb);
+ }
+
+ if (qp->has_rq) {
+ attr->cap.max_recv_sge =
+ ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2,
+ qp->rq_spec,
+ qp->rq_cmb & IONIC_CMB_EXPDB);
+ }
+
+ query_sqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!query_sqbuf)
+ return -ENOMEM;
+
+ query_rqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!query_rqbuf) {
+ rc = -ENOMEM;
+ goto err_rqbuf;
+ }
+
+ query_sqdma = dma_map_single(dev->lif_cfg.hwdev, query_sqbuf, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, query_sqdma);
+ if (rc)
+ goto err_sqdma;
+
+ query_rqdma = dma_map_single(dev->lif_cfg.hwdev, query_rqbuf, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, query_rqdma);
+ if (rc)
+ goto err_rqdma;
+
+ if (mask & IB_QP_AV) {
+ hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!hdr_buf) {
+ rc = -ENOMEM;
+ goto err_hdrbuf;
+ }
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_hdrdma;
+ }
+
+ wr.wqe.cmd.query_qp.sq_dma_addr = cpu_to_le64(query_sqdma);
+ wr.wqe.cmd.query_qp.rq_dma_addr = cpu_to_le64(query_rqdma);
+ wr.wqe.cmd.query_qp.hdr_dma_addr = cpu_to_le64(hdr_dma);
+ wr.wqe.cmd.query_qp.ah_id = cpu_to_le32(qp->ahid);
+
+ ionic_admin_post(dev, &wr);
+
+ rc = ionic_admin_wait(dev, &wr, 0);
+
+ if (rc)
+ goto err_hdrdma;
+
+ flags = be16_to_cpu(query_sqbuf->access_perms_flags |
+ query_rqbuf->access_perms_flags);
+
+ print_hex_dump_debug("sqbuf ", DUMP_PREFIX_OFFSET, 16, 1,
+ query_sqbuf, sizeof(*query_sqbuf), true);
+ print_hex_dump_debug("rqbuf ", DUMP_PREFIX_OFFSET, 16, 1,
+ query_rqbuf, sizeof(*query_rqbuf), true);
+ ibdev_dbg(&dev->ibdev, "query qp %u state_pmtu %#x flags %#x",
+ qp->qpid, query_rqbuf->state_pmtu, flags);
+
+ attr->qp_state = from_ionic_qp_state(query_rqbuf->state_pmtu >> 4);
+ attr->cur_qp_state = attr->qp_state;
+ attr->path_mtu = (query_rqbuf->state_pmtu & 0xf) - 7;
+ attr->path_mig_state = IB_MIG_MIGRATED;
+ attr->qkey = be32_to_cpu(query_sqbuf->qkey_dest_qpn);
+ attr->rq_psn = be32_to_cpu(query_sqbuf->rq_psn);
+ attr->sq_psn = be32_to_cpu(query_rqbuf->sq_psn);
+ attr->dest_qp_num = attr->qkey;
+ attr->qp_access_flags = from_ionic_qp_flags(flags);
+ attr->pkey_index = 0;
+ attr->alt_pkey_index = 0;
+ attr->en_sqd_async_notify = !!(flags & IONIC_QPF_SQD_NOTIFY);
+ attr->sq_draining = !!(flags & IONIC_QPF_SQ_DRAINING);
+ attr->max_rd_atomic = BIT(query_rqbuf->rrq_depth) - 1;
+ attr->max_dest_rd_atomic = BIT(query_rqbuf->rsq_depth) - 1;
+ attr->min_rnr_timer = query_sqbuf->rnr_timer;
+ attr->port_num = 0;
+ attr->timeout = query_sqbuf->retry_timeout;
+ attr->retry_cnt = query_rqbuf->retry_rnrtry & 0xf;
+ attr->rnr_retry = query_rqbuf->retry_rnrtry >> 4;
+ attr->alt_port_num = 0;
+ attr->alt_timeout = 0;
+ attr->rate_limit = be32_to_cpu(query_sqbuf->rate_limit_kbps);
+
+ if (mask & IB_QP_AV)
+ ionic_set_ah_attr(dev, &attr->ah_attr,
+ qp->hdr, qp->sgid_index);
+
+err_hdrdma:
+ if (mask & IB_QP_AV) {
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ kfree(hdr_buf);
+ }
+err_hdrbuf:
+ dma_unmap_single(dev->lif_cfg.hwdev, query_rqdma, sizeof(*query_rqbuf),
+ DMA_FROM_DEVICE);
+err_rqdma:
+ dma_unmap_single(dev->lif_cfg.hwdev, query_sqdma, sizeof(*query_sqbuf),
+ DMA_FROM_DEVICE);
+err_sqdma:
+ kfree(query_rqbuf);
+err_rqbuf:
+ kfree(query_sqbuf);
+
+ return rc;
+}
+
+static int ionic_destroy_qp_cmd(struct ionic_ibdev *dev, u32 qpid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_QP_IN_V1_LEN),
+ .cmd.destroy_qp = {
+ .qp_id = cpu_to_le32(qpid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_QP)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+static bool ionic_expdb_wqe_size_supported(struct ionic_ibdev *dev,
+ uint32_t wqe_size)
+{
+ switch (wqe_size) {
+ case 64: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_64;
+ case 128: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_128;
+ case 256: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_256;
+ case 512: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_512;
+ }
+
+ return false;
+}
+
+static void ionic_qp_sq_init_cmb(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_udata *udata,
+ int max_data)
+{
+ u8 expdb_stride_log2 = 0;
+ bool expdb;
+ int rc;
+
+ if (!(qp->sq_cmb & IONIC_CMB_ENABLE))
+ goto not_in_cmb;
+
+ if (qp->sq_cmb & ~IONIC_CMB_SUPPORTED) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->sq_cmb &= IONIC_CMB_SUPPORTED;
+ }
+
+ if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.sq_expdb) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ qp->sq_cmb_order = order_base_2(qp->sq.size / PAGE_SIZE);
+
+ if (qp->sq_cmb_order >= IONIC_SQCMB_ORDER)
+ goto not_in_cmb;
+
+ if (qp->sq_cmb & IONIC_CMB_EXPDB)
+ expdb_stride_log2 = qp->sq.stride_log2;
+
+ rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->sq_cmb_pgid,
+ &qp->sq_cmb_addr, qp->sq_cmb_order,
+ expdb_stride_log2, &expdb);
+ if (rc)
+ goto not_in_cmb;
+
+ if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !expdb) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto err_map;
+
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ return;
+
+err_map:
+ ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order);
+not_in_cmb:
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ ibdev_dbg(&dev->ibdev, "could not place sq in cmb as required\n");
+
+ qp->sq_cmb = 0;
+ qp->sq_cmb_order = IONIC_RES_INVALID;
+ qp->sq_cmb_pgid = 0;
+ qp->sq_cmb_addr = 0;
+}
+
+static void ionic_qp_sq_destroy_cmb(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!(qp->sq_cmb & IONIC_CMB_ENABLE))
+ return;
+
+ if (ctx)
+ rdma_user_mmap_entry_remove(qp->mmap_sq_cmb);
+
+ ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order);
+}
+
+static int ionic_qp_sq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx,
+ struct ionic_qp *qp, struct ionic_qdesc *sq,
+ struct ionic_tbl_buf *buf, int max_wr, int max_sge,
+ int max_data, int sq_spec, struct ib_udata *udata)
+{
+ u32 wqe_size;
+ int rc = 0;
+
+ qp->sq_msn_prod = 0;
+ qp->sq_msn_cons = 0;
+
+ if (!qp->has_sq) {
+ if (buf) {
+ buf->tbl_buf = NULL;
+ buf->tbl_limit = 0;
+ buf->tbl_pages = 0;
+ }
+ if (udata)
+ rc = ionic_validate_qdesc_zero(sq);
+
+ return rc;
+ }
+
+ rc = -EINVAL;
+
+ if (max_wr < 0 || max_wr > 0xffff)
+ return rc;
+
+ if (max_sge < 1)
+ return rc;
+
+ if (max_sge > min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0,
+ qp->sq_cmb &
+ IONIC_CMB_EXPDB),
+ IONIC_SPEC_HIGH))
+ return rc;
+
+ if (max_data < 0)
+ return rc;
+
+ if (max_data > ionic_v1_send_wqe_max_data(dev->lif_cfg.max_stride,
+ qp->sq_cmb & IONIC_CMB_EXPDB))
+ return rc;
+
+ if (udata) {
+ rc = ionic_validate_qdesc(sq);
+ if (rc)
+ return rc;
+
+ qp->sq_spec = sq_spec;
+
+ qp->sq.ptr = NULL;
+ qp->sq.size = sq->size;
+ qp->sq.mask = sq->mask;
+ qp->sq.depth_log2 = sq->depth_log2;
+ qp->sq.stride_log2 = sq->stride_log2;
+
+ qp->sq_meta = NULL;
+ qp->sq_msn_idx = NULL;
+
+ qp->sq_umem = ib_umem_get(&dev->ibdev, sq->addr, sq->size, 0);
+ if (IS_ERR(qp->sq_umem))
+ return PTR_ERR(qp->sq_umem);
+ } else {
+ qp->sq_umem = NULL;
+
+ qp->sq_spec = ionic_v1_use_spec_sge(max_sge, sq_spec);
+ if (sq_spec && !qp->sq_spec)
+ ibdev_dbg(&dev->ibdev,
+ "init sq: max_sge %u disables spec\n",
+ max_sge);
+
+ if (qp->sq_cmb & IONIC_CMB_EXPDB) {
+ wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data,
+ qp->sq_spec,
+ true);
+
+ if (!ionic_expdb_wqe_size_supported(dev, wqe_size))
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ if (!(qp->sq_cmb & IONIC_CMB_EXPDB))
+ wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data,
+ qp->sq_spec,
+ false);
+
+ rc = ionic_queue_init(&qp->sq, dev->lif_cfg.hwdev,
+ max_wr, wqe_size);
+ if (rc)
+ return rc;
+
+ ionic_queue_dbell_init(&qp->sq, qp->qpid);
+
+ qp->sq_meta = kmalloc_array((u32)qp->sq.mask + 1,
+ sizeof(*qp->sq_meta),
+ GFP_KERNEL);
+ if (!qp->sq_meta) {
+ rc = -ENOMEM;
+ goto err_sq_meta;
+ }
+
+ qp->sq_msn_idx = kmalloc_array((u32)qp->sq.mask + 1,
+ sizeof(*qp->sq_msn_idx),
+ GFP_KERNEL);
+ if (!qp->sq_msn_idx) {
+ rc = -ENOMEM;
+ goto err_sq_msn;
+ }
+ }
+
+ ionic_qp_sq_init_cmb(dev, qp, udata, max_data);
+
+ if (qp->sq_cmb & IONIC_CMB_ENABLE)
+ rc = ionic_pgtbl_init(dev, buf, NULL,
+ (u64)qp->sq_cmb_pgid << PAGE_SHIFT,
+ 1, PAGE_SIZE);
+ else
+ rc = ionic_pgtbl_init(dev, buf,
+ qp->sq_umem, qp->sq.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_sq_tbl;
+
+ return 0;
+
+err_sq_tbl:
+ ionic_qp_sq_destroy_cmb(dev, ctx, qp);
+ kfree(qp->sq_msn_idx);
+err_sq_msn:
+ kfree(qp->sq_meta);
+err_sq_meta:
+ if (qp->sq_umem)
+ ib_umem_release(qp->sq_umem);
+ else
+ ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev);
+ return rc;
+}
+
+static void ionic_qp_sq_destroy(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!qp->has_sq)
+ return;
+
+ ionic_qp_sq_destroy_cmb(dev, ctx, qp);
+
+ kfree(qp->sq_msn_idx);
+ kfree(qp->sq_meta);
+
+ if (qp->sq_umem)
+ ib_umem_release(qp->sq_umem);
+ else
+ ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev);
+}
+
+static void ionic_qp_rq_init_cmb(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_udata *udata)
+{
+ u8 expdb_stride_log2 = 0;
+ bool expdb;
+ int rc;
+
+ if (!(qp->rq_cmb & IONIC_CMB_ENABLE))
+ goto not_in_cmb;
+
+ if (qp->rq_cmb & ~IONIC_CMB_SUPPORTED) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->rq_cmb &= IONIC_CMB_SUPPORTED;
+ }
+
+ if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.rq_expdb) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ qp->rq_cmb_order = order_base_2(qp->rq.size / PAGE_SIZE);
+
+ if (qp->rq_cmb_order >= IONIC_RQCMB_ORDER)
+ goto not_in_cmb;
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB)
+ expdb_stride_log2 = qp->rq.stride_log2;
+
+ rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->rq_cmb_pgid,
+ &qp->rq_cmb_addr, qp->rq_cmb_order,
+ expdb_stride_log2, &expdb);
+ if (rc)
+ goto not_in_cmb;
+
+ if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !expdb) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto err_map;
+
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ return;
+
+err_map:
+ ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order);
+not_in_cmb:
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ ibdev_dbg(&dev->ibdev, "could not place rq in cmb as required\n");
+
+ qp->rq_cmb = 0;
+ qp->rq_cmb_order = IONIC_RES_INVALID;
+ qp->rq_cmb_pgid = 0;
+ qp->rq_cmb_addr = 0;
+}
+
+static void ionic_qp_rq_destroy_cmb(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!(qp->rq_cmb & IONIC_CMB_ENABLE))
+ return;
+
+ if (ctx)
+ rdma_user_mmap_entry_remove(qp->mmap_rq_cmb);
+
+ ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order);
+}
+
+static int ionic_qp_rq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx,
+ struct ionic_qp *qp, struct ionic_qdesc *rq,
+ struct ionic_tbl_buf *buf, int max_wr, int max_sge,
+ int rq_spec, struct ib_udata *udata)
+{
+ int rc = 0, i;
+ u32 wqe_size;
+
+ if (!qp->has_rq) {
+ if (buf) {
+ buf->tbl_buf = NULL;
+ buf->tbl_limit = 0;
+ buf->tbl_pages = 0;
+ }
+ if (udata)
+ rc = ionic_validate_qdesc_zero(rq);
+
+ return rc;
+ }
+
+ rc = -EINVAL;
+
+ if (max_wr < 0 || max_wr > 0xffff)
+ return rc;
+
+ if (max_sge < 1)
+ return rc;
+
+ if (max_sge > min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH))
+ return rc;
+
+ if (udata) {
+ rc = ionic_validate_qdesc(rq);
+ if (rc)
+ return rc;
+
+ qp->rq_spec = rq_spec;
+
+ qp->rq.ptr = NULL;
+ qp->rq.size = rq->size;
+ qp->rq.mask = rq->mask;
+ qp->rq.depth_log2 = rq->depth_log2;
+ qp->rq.stride_log2 = rq->stride_log2;
+
+ qp->rq_meta = NULL;
+
+ qp->rq_umem = ib_umem_get(&dev->ibdev, rq->addr, rq->size, 0);
+ if (IS_ERR(qp->rq_umem))
+ return PTR_ERR(qp->rq_umem);
+ } else {
+ qp->rq_umem = NULL;
+
+ qp->rq_spec = ionic_v1_use_spec_sge(max_sge, rq_spec);
+ if (rq_spec && !qp->rq_spec)
+ ibdev_dbg(&dev->ibdev,
+ "init rq: max_sge %u disables spec\n",
+ max_sge);
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB) {
+ wqe_size = ionic_v1_recv_wqe_min_size(max_sge,
+ qp->rq_spec,
+ true);
+
+ if (!ionic_expdb_wqe_size_supported(dev, wqe_size))
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ if (!(qp->rq_cmb & IONIC_CMB_EXPDB))
+ wqe_size = ionic_v1_recv_wqe_min_size(max_sge,
+ qp->rq_spec,
+ false);
+
+ rc = ionic_queue_init(&qp->rq, dev->lif_cfg.hwdev,
+ max_wr, wqe_size);
+ if (rc)
+ return rc;
+
+ ionic_queue_dbell_init(&qp->rq, qp->qpid);
+
+ qp->rq_meta = kmalloc_array((u32)qp->rq.mask + 1,
+ sizeof(*qp->rq_meta),
+ GFP_KERNEL);
+ if (!qp->rq_meta) {
+ rc = -ENOMEM;
+ goto err_rq_meta;
+ }
+
+ for (i = 0; i < qp->rq.mask; ++i)
+ qp->rq_meta[i].next = &qp->rq_meta[i + 1];
+ qp->rq_meta[i].next = IONIC_META_LAST;
+ qp->rq_meta_head = &qp->rq_meta[0];
+ }
+
+ ionic_qp_rq_init_cmb(dev, qp, udata);
+
+ if (qp->rq_cmb & IONIC_CMB_ENABLE)
+ rc = ionic_pgtbl_init(dev, buf, NULL,
+ (u64)qp->rq_cmb_pgid << PAGE_SHIFT,
+ 1, PAGE_SIZE);
+ else
+ rc = ionic_pgtbl_init(dev, buf,
+ qp->rq_umem, qp->rq.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_rq_tbl;
+
+ return 0;
+
+err_rq_tbl:
+ ionic_qp_rq_destroy_cmb(dev, ctx, qp);
+ kfree(qp->rq_meta);
+err_rq_meta:
+ if (qp->rq_umem)
+ ib_umem_release(qp->rq_umem);
+ else
+ ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev);
+ return rc;
+}
+
+static void ionic_qp_rq_destroy(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!qp->has_rq)
+ return;
+
+ ionic_qp_rq_destroy_cmb(dev, ctx, qp);
+
+ kfree(qp->rq_meta);
+
+ if (qp->rq_umem)
+ ib_umem_release(qp->rq_umem);
+ else
+ ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev);
+}
+
+int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_tbl_buf sq_buf = {}, rq_buf = {};
+ struct ionic_pd *pd = to_ionic_pd(ibqp->pd);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_qp_resp resp = {};
+ struct ionic_qp_req req = {};
+ struct ionic_cq *cq;
+ u8 udma_mask;
+ void *entry;
+ int rc;
+
+ if (udata) {
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+ } else {
+ req.sq_spec = IONIC_SPEC_HIGH;
+ req.rq_spec = IONIC_SPEC_HIGH;
+ }
+
+ if (attr->qp_type == IB_QPT_SMI || attr->qp_type > IB_QPT_UD)
+ return -EOPNOTSUPP;
+
+ qp->state = IB_QPS_RESET;
+
+ INIT_LIST_HEAD(&qp->cq_poll_sq);
+ INIT_LIST_HEAD(&qp->cq_flush_sq);
+ INIT_LIST_HEAD(&qp->cq_flush_rq);
+
+ spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
+
+ qp->has_sq = 1;
+ qp->has_rq = 1;
+
+ if (attr->qp_type == IB_QPT_GSI) {
+ rc = ionic_get_gsi_qpid(dev, &qp->qpid);
+ } else {
+ udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+
+ if (qp->has_sq)
+ udma_mask &= to_ionic_vcq(attr->send_cq)->udma_mask;
+
+ if (qp->has_rq)
+ udma_mask &= to_ionic_vcq(attr->recv_cq)->udma_mask;
+
+ if (udata && req.udma_mask)
+ udma_mask &= req.udma_mask;
+
+ if (!udma_mask)
+ return -EINVAL;
+
+ rc = ionic_get_qpid(dev, &qp->qpid, &qp->udma_idx, udma_mask);
+ }
+ if (rc)
+ return rc;
+
+ qp->sig_all = attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ qp->has_ah = attr->qp_type == IB_QPT_RC;
+
+ if (qp->has_ah) {
+ qp->hdr = kzalloc(sizeof(*qp->hdr), GFP_KERNEL);
+ if (!qp->hdr) {
+ rc = -ENOMEM;
+ goto err_ah_alloc;
+ }
+
+ rc = ionic_get_ahid(dev, &qp->ahid);
+ if (rc)
+ goto err_ahid;
+ }
+
+ if (udata) {
+ if (req.rq_cmb & IONIC_CMB_ENABLE)
+ qp->rq_cmb = req.rq_cmb;
+
+ if (req.sq_cmb & IONIC_CMB_ENABLE)
+ qp->sq_cmb = req.sq_cmb;
+ }
+
+ rc = ionic_qp_sq_init(dev, ctx, qp, &req.sq, &sq_buf,
+ attr->cap.max_send_wr, attr->cap.max_send_sge,
+ attr->cap.max_inline_data, req.sq_spec, udata);
+ if (rc)
+ goto err_sq;
+
+ rc = ionic_qp_rq_init(dev, ctx, qp, &req.rq, &rq_buf,
+ attr->cap.max_recv_wr, attr->cap.max_recv_sge,
+ req.rq_spec, udata);
+ if (rc)
+ goto err_rq;
+
+ rc = ionic_create_qp_cmd(dev, pd,
+ to_ionic_vcq_cq(attr->send_cq, qp->udma_idx),
+ to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx),
+ qp, &sq_buf, &rq_buf, attr);
+ if (rc)
+ goto err_cmd;
+
+ if (udata) {
+ resp.qpid = qp->qpid;
+ resp.udma_idx = qp->udma_idx;
+
+ if (qp->sq_cmb & IONIC_CMB_ENABLE) {
+ bool wc;
+
+ if ((qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) ==
+ (IONIC_CMB_WC | IONIC_CMB_UC)) {
+ ibdev_dbg(&dev->ibdev,
+ "Both sq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n");
+ qp->sq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC);
+ }
+
+ wc = (qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ != IONIC_CMB_UC;
+
+ /* let userspace know the mapping */
+ if (wc)
+ qp->sq_cmb |= IONIC_CMB_WC;
+ else
+ qp->sq_cmb |= IONIC_CMB_UC;
+
+ qp->mmap_sq_cmb =
+ ionic_mmap_entry_insert(ctx,
+ qp->sq.size,
+ PHYS_PFN(qp->sq_cmb_addr),
+ wc ? IONIC_MMAP_WC : 0,
+ &resp.sq_cmb_offset);
+ if (!qp->mmap_sq_cmb) {
+ rc = -ENOMEM;
+ goto err_mmap_sq;
+ }
+
+ resp.sq_cmb = qp->sq_cmb;
+ }
+
+ if (qp->rq_cmb & IONIC_CMB_ENABLE) {
+ bool wc;
+
+ if ((qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) ==
+ (IONIC_CMB_WC | IONIC_CMB_UC)) {
+ ibdev_dbg(&dev->ibdev,
+ "Both rq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n");
+ qp->rq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC);
+ }
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB)
+ wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ == IONIC_CMB_WC;
+ else
+ wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ != IONIC_CMB_UC;
+
+ /* let userspace know the mapping */
+ if (wc)
+ qp->rq_cmb |= IONIC_CMB_WC;
+ else
+ qp->rq_cmb |= IONIC_CMB_UC;
+
+ qp->mmap_rq_cmb =
+ ionic_mmap_entry_insert(ctx,
+ qp->rq.size,
+ PHYS_PFN(qp->rq_cmb_addr),
+ wc ? IONIC_MMAP_WC : 0,
+ &resp.rq_cmb_offset);
+ if (!qp->mmap_rq_cmb) {
+ rc = -ENOMEM;
+ goto err_mmap_rq;
+ }
+
+ resp.rq_cmb = qp->rq_cmb;
+ }
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ ionic_pgtbl_unbuf(dev, &rq_buf);
+ ionic_pgtbl_unbuf(dev, &sq_buf);
+
+ qp->ibqp.qp_num = qp->qpid;
+
+ init_completion(&qp->qp_rel_comp);
+ kref_init(&qp->qp_kref);
+
+ entry = xa_store_irq(&dev->qp_tbl, qp->qpid, qp, GFP_KERNEL);
+ if (entry) {
+ if (!xa_is_err(entry))
+ rc = -EINVAL;
+ else
+ rc = xa_err(entry);
+
+ goto err_resp;
+ }
+
+ if (qp->has_sq) {
+ cq = to_ionic_vcq_cq(attr->send_cq, qp->udma_idx);
+
+ attr->cap.max_send_wr = qp->sq.mask;
+ attr->cap.max_send_sge =
+ ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ qp->sq_cmb & IONIC_CMB_EXPDB);
+ attr->cap.max_inline_data =
+ ionic_v1_send_wqe_max_data(qp->sq.stride_log2,
+ qp->sq_cmb &
+ IONIC_CMB_EXPDB);
+ qp->sq_cqid = cq->cqid;
+ }
+
+ if (qp->has_rq) {
+ cq = to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx);
+
+ attr->cap.max_recv_wr = qp->rq.mask;
+ attr->cap.max_recv_sge =
+ ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2,
+ qp->rq_spec,
+ qp->rq_cmb & IONIC_CMB_EXPDB);
+ qp->rq_cqid = cq->cqid;
+ }
+
+ return 0;
+
+err_resp:
+ if (udata && (qp->rq_cmb & IONIC_CMB_ENABLE))
+ rdma_user_mmap_entry_remove(qp->mmap_rq_cmb);
+err_mmap_rq:
+ if (udata && (qp->sq_cmb & IONIC_CMB_ENABLE))
+ rdma_user_mmap_entry_remove(qp->mmap_sq_cmb);
+err_mmap_sq:
+ ionic_destroy_qp_cmd(dev, qp->qpid);
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &rq_buf);
+ ionic_qp_rq_destroy(dev, ctx, qp);
+err_rq:
+ ionic_pgtbl_unbuf(dev, &sq_buf);
+ ionic_qp_sq_destroy(dev, ctx, qp);
+err_sq:
+ if (qp->has_ah)
+ ionic_put_ahid(dev, qp->ahid);
+err_ahid:
+ kfree(qp->hdr);
+err_ah_alloc:
+ ionic_put_qpid(dev, qp->qpid);
+ return rc;
+}
+
+void ionic_notify_flush_cq(struct ionic_cq *cq)
+{
+ if (cq->flush && cq->vcq->ibcq.comp_handler)
+ cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq,
+ cq->vcq->ibcq.cq_context);
+}
+
+static void ionic_notify_qp_cqs(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ if (qp->ibqp.send_cq)
+ ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.send_cq,
+ qp->udma_idx));
+ if (qp->ibqp.recv_cq && qp->ibqp.recv_cq != qp->ibqp.send_cq)
+ ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.recv_cq,
+ qp->udma_idx));
+}
+
+void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+
+ /* Hold the CQ lock and QP sq_lock to set up flush */
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->sq_lock);
+ qp->sq_flush = true;
+ if (!ionic_queue_empty(&qp->sq)) {
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+ spin_unlock(&qp->sq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+
+ /* Hold the CQ lock and QP rq_lock to set up flush */
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->rq_lock);
+ qp->rq_flush = true;
+ if (!ionic_queue_empty(&qp->rq)) {
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+ }
+ spin_unlock(&qp->rq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+}
+
+static void ionic_clean_cq(struct ionic_cq *cq, u32 qpid)
+{
+ struct ionic_v1_cqe *qcqe;
+ int prod, qtf, qid, type;
+ bool color;
+
+ if (!cq->q.ptr)
+ return;
+
+ color = cq->color;
+ prod = cq->q.prod;
+ qcqe = ionic_queue_at(&cq->q, prod);
+
+ while (color == ionic_v1_cqe_color(qcqe)) {
+ qtf = ionic_v1_cqe_qtf(qcqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ if (qid == qpid && type != IONIC_V1_CQE_TYPE_ADMIN)
+ ionic_v1_cqe_clean(qcqe);
+
+ prod = ionic_queue_next(&cq->q, prod);
+ qcqe = ionic_queue_at(&cq->q, prod);
+ color = ionic_color_wrap(prod, color);
+ }
+}
+
+static void ionic_reset_qp(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+ int i;
+
+ local_irq_save(irqflags);
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+ spin_lock(&cq->lock);
+ ionic_clean_cq(cq, qp->qpid);
+ spin_unlock(&cq->lock);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+ spin_lock(&cq->lock);
+ ionic_clean_cq(cq, qp->qpid);
+ spin_unlock(&cq->lock);
+ }
+
+ if (qp->has_sq) {
+ spin_lock(&qp->sq_lock);
+ qp->sq_flush = false;
+ qp->sq_flush_rcvd = false;
+ qp->sq_msn_prod = 0;
+ qp->sq_msn_cons = 0;
+ qp->sq.prod = 0;
+ qp->sq.cons = 0;
+ spin_unlock(&qp->sq_lock);
+ }
+
+ if (qp->has_rq) {
+ spin_lock(&qp->rq_lock);
+ qp->rq_flush = false;
+ qp->rq.prod = 0;
+ qp->rq.cons = 0;
+ if (qp->rq_meta) {
+ for (i = 0; i < qp->rq.mask; ++i)
+ qp->rq_meta[i].next = &qp->rq_meta[i + 1];
+ qp->rq_meta[i].next = IONIC_META_LAST;
+ }
+ qp->rq_meta_head = &qp->rq_meta[0];
+ spin_unlock(&qp->rq_lock);
+ }
+
+ local_irq_restore(irqflags);
+}
+
+static bool ionic_qp_cur_state_is_ok(enum ib_qp_state q_state,
+ enum ib_qp_state attr_state)
+{
+ if (q_state == attr_state)
+ return true;
+
+ if (attr_state == IB_QPS_ERR)
+ return true;
+
+ if (attr_state == IB_QPS_SQE)
+ return q_state == IB_QPS_RTS || q_state == IB_QPS_SQD;
+
+ return false;
+}
+
+static int ionic_check_modify_qp(struct ionic_qp *qp, struct ib_qp_attr *attr,
+ int mask)
+{
+ enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state : qp->state;
+ enum ib_qp_state next_state = (mask & IB_QP_STATE) ?
+ attr->qp_state : cur_state;
+
+ if ((mask & IB_QP_CUR_STATE) &&
+ !ionic_qp_cur_state_is_ok(qp->state, attr->cur_qp_state))
+ return -EINVAL;
+
+ if (!ib_modify_qp_is_ok(cur_state, next_state, qp->ibqp.qp_type, mask))
+ return -EINVAL;
+
+ /* unprivileged qp not allowed privileged qkey */
+ if ((mask & IB_QP_QKEY) && (attr->qkey & 0x80000000) &&
+ qp->ibqp.uobject)
+ return -EPERM;
+
+ return 0;
+}
+
+int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_pd *pd = to_ionic_pd(ibqp->pd);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ int rc;
+
+ rc = ionic_check_modify_qp(qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (mask & IB_QP_CAP)
+ return -EINVAL;
+
+ rc = ionic_modify_qp_cmd(dev, pd, qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (mask & IB_QP_STATE) {
+ qp->state = attr->qp_state;
+
+ if (attr->qp_state == IB_QPS_ERR) {
+ ionic_flush_qp(dev, qp);
+ ionic_notify_qp_cqs(dev, qp);
+ } else if (attr->qp_state == IB_QPS_RESET) {
+ ionic_reset_qp(dev, qp);
+ }
+ }
+
+ return 0;
+}
+
+int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int mask, struct ib_qp_init_attr *init_attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ int rc;
+
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
+
+ rc = ionic_query_qp_cmd(dev, qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (qp->has_sq)
+ attr->cap.max_send_wr = qp->sq.mask;
+
+ if (qp->has_rq)
+ attr->cap.max_recv_wr = qp->rq.mask;
+
+ init_attr->event_handler = ibqp->event_handler;
+ init_attr->qp_context = ibqp->qp_context;
+ init_attr->send_cq = ibqp->send_cq;
+ init_attr->recv_cq = ibqp->recv_cq;
+ init_attr->srq = ibqp->srq;
+ init_attr->xrcd = ibqp->xrcd;
+ init_attr->cap = attr->cap;
+ init_attr->sq_sig_type = qp->sig_all ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ init_attr->qp_type = ibqp->qp_type;
+ init_attr->create_flags = 0;
+ init_attr->port_num = 0;
+ init_attr->rwq_ind_tbl = ibqp->rwq_ind_tbl;
+ init_attr->source_qpn = 0;
+
+ return rc;
+}
+
+int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+ int rc;
+
+ rc = ionic_destroy_qp_cmd(dev, qp->qpid);
+ if (rc)
+ return rc;
+
+ xa_erase_irq(&dev->qp_tbl, qp->qpid);
+
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+ wait_for_completion(&qp->qp_rel_comp);
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+ spin_lock_irqsave(&cq->lock, irqflags);
+ ionic_clean_cq(cq, qp->qpid);
+ list_del(&qp->cq_poll_sq);
+ list_del(&qp->cq_flush_sq);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+ spin_lock_irqsave(&cq->lock, irqflags);
+ ionic_clean_cq(cq, qp->qpid);
+ list_del(&qp->cq_flush_rq);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ ionic_qp_rq_destroy(dev, ctx, qp);
+ ionic_qp_sq_destroy(dev, ctx, qp);
+ if (qp->has_ah) {
+ ionic_put_ahid(dev, qp->ahid);
+ kfree(qp->hdr);
+ }
+ ionic_put_qpid(dev, qp->qpid);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_datapath.c b/drivers/infiniband/hw/ionic/ionic_datapath.c
new file mode 100644
index 000000000000..aa2944887f23
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_datapath.c
@@ -0,0 +1,1399 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define IONIC_OP(version, opname) \
+ ((version) < 2 ? IONIC_V1_OP_##opname : IONIC_V2_OP_##opname)
+
+static bool ionic_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_v1_cqe **cqe)
+{
+ struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q);
+
+ if (unlikely(cq->color != ionic_v1_cqe_color(qcqe)))
+ return false;
+
+ /* Prevent out-of-order reads of the CQE */
+ dma_rmb();
+
+ *cqe = qcqe;
+
+ return true;
+}
+
+static int ionic_flush_recv(struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_rq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (!qp->rq_flush)
+ return 0;
+
+ if (ionic_queue_empty(&qp->rq))
+ return 0;
+
+ wqe = ionic_queue_at_cons(&qp->rq);
+
+ /* wqe_id must be a valid queue index */
+ if (unlikely(wqe->base.wqe_id >> qp->rq.depth_log2)) {
+ ibdev_warn(qp->ibqp.device,
+ "flush qp %u recv index %llu invalid\n",
+ qp->qpid, (unsigned long long)wqe->base.wqe_id);
+ return -EIO;
+ }
+
+ /* wqe_id must indicate a request that is outstanding */
+ meta = &qp->rq_meta[wqe->base.wqe_id];
+ if (unlikely(meta->next != IONIC_META_POSTED)) {
+ ibdev_warn(qp->ibqp.device,
+ "flush qp %u recv index %llu not posted\n",
+ qp->qpid, (unsigned long long)wqe->base.wqe_id);
+ return -EIO;
+ }
+
+ ionic_queue_consume(&qp->rq);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ meta->next = qp->rq_meta_head;
+ qp->rq_meta_head = meta;
+
+ return 1;
+}
+
+static int ionic_flush_recv_many(struct ionic_qp *qp,
+ struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_flush_recv(qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_flush_send(struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_sq_meta *meta;
+
+ if (!qp->sq_flush)
+ return 0;
+
+ if (ionic_queue_empty(&qp->sq))
+ return 0;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ ionic_queue_consume(&qp->sq);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ return 1;
+}
+
+static int ionic_flush_send_many(struct ionic_qp *qp,
+ struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_flush_send(qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_poll_recv(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *cqe_qp, struct ionic_v1_cqe *cqe,
+ struct ib_wc *wc)
+{
+ struct ionic_qp *qp = NULL;
+ struct ionic_rq_meta *meta;
+ u32 src_qpn, st_len;
+ u16 vlan_tag;
+ u8 op;
+
+ if (cqe_qp->rq_flush)
+ return 0;
+
+ qp = cqe_qp;
+
+ st_len = be32_to_cpu(cqe->status_length);
+
+ /* ignore wqe_id in case of flush error */
+ if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) {
+ cqe_qp->rq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+
+ /* posted recvs (if any) flushed by ionic_flush_recv */
+ return 0;
+ }
+
+ /* there had better be something in the recv queue to complete */
+ if (ionic_queue_empty(&qp->rq)) {
+ ibdev_warn(&dev->ibdev, "qp %u is empty\n", qp->qpid);
+ return -EIO;
+ }
+
+ /* wqe_id must be a valid queue index */
+ if (unlikely(cqe->recv.wqe_id >> qp->rq.depth_log2)) {
+ ibdev_warn(&dev->ibdev,
+ "qp %u recv index %llu invalid\n",
+ qp->qpid, (unsigned long long)cqe->recv.wqe_id);
+ return -EIO;
+ }
+
+ /* wqe_id must indicate a request that is outstanding */
+ meta = &qp->rq_meta[cqe->recv.wqe_id];
+ if (unlikely(meta->next != IONIC_META_POSTED)) {
+ ibdev_warn(&dev->ibdev,
+ "qp %u recv index %llu not posted\n",
+ qp->qpid, (unsigned long long)cqe->recv.wqe_id);
+ return -EIO;
+ }
+
+ meta->next = qp->rq_meta_head;
+ qp->rq_meta_head = meta;
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->wr_id = meta->wrid;
+
+ wc->qp = &cqe_qp->ibqp;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ wc->vendor_err = st_len;
+ wc->status = ionic_to_ib_status(st_len);
+
+ cqe_qp->rq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+
+ ibdev_warn(&dev->ibdev,
+ "qp %d recv cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, BIT(cq->q.stride_log2), true);
+ goto out;
+ }
+
+ wc->vendor_err = 0;
+ wc->status = IB_WC_SUCCESS;
+
+ src_qpn = be32_to_cpu(cqe->recv.src_qpn_op);
+ op = src_qpn >> IONIC_V1_CQE_RECV_OP_SHIFT;
+
+ src_qpn &= IONIC_V1_CQE_RECV_QPN_MASK;
+ op &= IONIC_V1_CQE_RECV_OP_MASK;
+
+ wc->opcode = IB_WC_RECV;
+ switch (op) {
+ case IONIC_V1_CQE_RECV_OP_RDMA_IMM:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */
+ break;
+ case IONIC_V1_CQE_RECV_OP_SEND_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */
+ break;
+ case IONIC_V1_CQE_RECV_OP_SEND_INV:
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->recv.imm_data_rkey);
+ break;
+ }
+
+ wc->byte_len = st_len;
+ wc->src_qp = src_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI) {
+ wc->wc_flags |= IB_WC_GRH | IB_WC_WITH_SMAC;
+ ether_addr_copy(wc->smac, cqe->recv.src_mac);
+
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ if (ionic_v1_cqe_recv_is_ipv4(cqe))
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+
+ if (ionic_v1_cqe_recv_is_vlan(cqe))
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+
+ /* vlan_tag in cqe will be valid from dpath even if no vlan */
+ vlan_tag = be16_to_cpu(cqe->recv.vlan_tag);
+ wc->vlan_id = vlan_tag & 0xfff; /* 802.1q VID */
+ wc->sl = vlan_tag >> VLAN_PRIO_SHIFT; /* 802.1q PCP */
+ }
+
+ wc->pkey_index = 0;
+ wc->port_num = 1;
+
+out:
+ ionic_queue_consume(&qp->rq);
+
+ return 1;
+}
+
+static bool ionic_peek_send(struct ionic_qp *qp)
+{
+ struct ionic_sq_meta *meta;
+
+ if (qp->sq_flush)
+ return false;
+
+ /* completed all send queue requests */
+ if (ionic_queue_empty(&qp->sq))
+ return false;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ /* waiting for remote completion */
+ if (meta->remote && meta->seq == qp->sq_msn_cons)
+ return false;
+
+ /* waiting for local completion */
+ if (!meta->remote && !meta->local_comp)
+ return false;
+
+ return true;
+}
+
+static int ionic_poll_send(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_sq_meta *meta;
+
+ if (qp->sq_flush)
+ return 0;
+
+ do {
+ /* completed all send queue requests */
+ if (ionic_queue_empty(&qp->sq))
+ goto out_empty;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ /* waiting for remote completion */
+ if (meta->remote && meta->seq == qp->sq_msn_cons)
+ goto out_empty;
+
+ /* waiting for local completion */
+ if (!meta->remote && !meta->local_comp)
+ goto out_empty;
+
+ ionic_queue_consume(&qp->sq);
+
+ /* produce wc only if signaled or error status */
+ } while (!meta->signal && meta->ibsts == IB_WC_SUCCESS);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = meta->ibsts;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ if (meta->ibsts == IB_WC_SUCCESS) {
+ wc->byte_len = meta->len;
+ wc->opcode = meta->ibop;
+ } else {
+ wc->vendor_err = meta->len;
+
+ qp->sq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+
+ return 1;
+
+out_empty:
+ if (qp->sq_flush_rcvd) {
+ qp->sq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+ return 0;
+}
+
+static int ionic_poll_send_many(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *qp, struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_poll_send(dev, cq, qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_validate_cons(u16 prod, u16 cons,
+ u16 comp, u16 mask)
+{
+ if (((prod - cons) & mask) <= ((comp - cons) & mask))
+ return -EIO;
+
+ return 0;
+}
+
+static int ionic_comp_msn(struct ionic_qp *qp, struct ionic_v1_cqe *cqe)
+{
+ struct ionic_sq_meta *meta;
+ u16 cqe_seq, cqe_idx;
+ int rc;
+
+ if (qp->sq_flush)
+ return 0;
+
+ cqe_seq = be32_to_cpu(cqe->send.msg_msn) & qp->sq.mask;
+
+ rc = ionic_validate_cons(qp->sq_msn_prod,
+ qp->sq_msn_cons,
+ cqe_seq - 1,
+ qp->sq.mask);
+ if (rc) {
+ ibdev_warn(qp->ibqp.device,
+ "qp %u bad msn %#x seq %u for prod %u cons %u\n",
+ qp->qpid, be32_to_cpu(cqe->send.msg_msn),
+ cqe_seq, qp->sq_msn_prod, qp->sq_msn_cons);
+ return rc;
+ }
+
+ qp->sq_msn_cons = cqe_seq;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ cqe_idx = qp->sq_msn_idx[(cqe_seq - 1) & qp->sq.mask];
+
+ meta = &qp->sq_meta[cqe_idx];
+ meta->len = be32_to_cpu(cqe->status_length);
+ meta->ibsts = ionic_to_ib_status(meta->len);
+
+ ibdev_warn(qp->ibqp.device,
+ "qp %d msn cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), true);
+ }
+
+ return 0;
+}
+
+static int ionic_comp_npg(struct ionic_qp *qp, struct ionic_v1_cqe *cqe)
+{
+ struct ionic_sq_meta *meta;
+ u16 cqe_idx;
+ u32 st_len;
+
+ if (qp->sq_flush)
+ return 0;
+
+ st_len = be32_to_cpu(cqe->status_length);
+
+ if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) {
+ /*
+ * Flush cqe does not consume a wqe on the device, and maybe
+ * no such work request is posted.
+ *
+ * The driver should begin flushing after the last indicated
+ * normal or error completion. Here, only set a hint that the
+ * flush request was indicated. In poll_send, if nothing more
+ * can be polled normally, then begin flushing.
+ */
+ qp->sq_flush_rcvd = true;
+ return 0;
+ }
+
+ cqe_idx = cqe->send.npg_wqe_id & qp->sq.mask;
+ meta = &qp->sq_meta[cqe_idx];
+ meta->local_comp = true;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ meta->len = st_len;
+ meta->ibsts = ionic_to_ib_status(st_len);
+ meta->remote = false;
+ ibdev_warn(qp->ibqp.device,
+ "qp %d npg cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), true);
+ }
+
+ return 0;
+}
+
+static void ionic_reserve_sync_cq(struct ionic_ibdev *dev, struct ionic_cq *cq)
+{
+ if (!ionic_queue_empty(&cq->q)) {
+ cq->credit += ionic_queue_length(&cq->q);
+ cq->q.cons = cq->q.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ ionic_queue_dbell_val(&cq->q));
+ }
+}
+
+static void ionic_reserve_cq(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ int spend)
+{
+ cq->credit -= spend;
+
+ if (cq->credit <= 0)
+ ionic_reserve_sync_cq(dev, cq);
+}
+
+static int ionic_poll_vcq_cq(struct ionic_ibdev *dev,
+ struct ionic_cq *cq,
+ int nwc, struct ib_wc *wc)
+{
+ struct ionic_qp *qp, *qp_next;
+ struct ionic_v1_cqe *cqe;
+ int rc = 0, npolled = 0;
+ unsigned long irqflags;
+ u32 qtf, qid;
+ bool peek;
+ u8 type;
+
+ if (nwc < 1)
+ return 0;
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+
+ /* poll already indicated work completions for send queue */
+ list_for_each_entry_safe(qp, qp_next, &cq->poll_sq, cq_poll_sq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->sq_lock);
+ rc = ionic_poll_send_many(dev, cq, qp, wc + npolled,
+ nwc - npolled);
+ spin_unlock(&qp->sq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_poll_sq);
+ }
+
+ /* poll for more work completions */
+ while (likely(ionic_next_cqe(dev, cq, &cqe))) {
+ if (npolled == nwc)
+ goto out;
+
+ qtf = ionic_v1_cqe_qtf(cqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ /*
+ * Safe to access QP without additional reference here as,
+ * 1. We hold cq->lock throughout
+ * 2. ionic_destroy_qp() acquires the same cq->lock before cleanup
+ * 3. QP is removed from qp_tbl before any cleanup begins
+ * This ensures no concurrent access between polling and destruction.
+ */
+ qp = xa_load(&dev->qp_tbl, qid);
+ if (unlikely(!qp)) {
+ ibdev_dbg(&dev->ibdev, "missing qp for qid %u\n", qid);
+ goto cq_next;
+ }
+
+ switch (type) {
+ case IONIC_V1_CQE_TYPE_RECV:
+ spin_lock(&qp->rq_lock);
+ rc = ionic_poll_recv(dev, cq, qp, cqe, wc + npolled);
+ spin_unlock(&qp->rq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ break;
+
+ case IONIC_V1_CQE_TYPE_SEND_MSN:
+ spin_lock(&qp->sq_lock);
+ rc = ionic_comp_msn(qp, cqe);
+ if (!rc) {
+ rc = ionic_poll_send_many(dev, cq, qp,
+ wc + npolled,
+ nwc - npolled);
+ peek = ionic_peek_send(qp);
+ }
+ spin_unlock(&qp->sq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ if (peek)
+ list_move_tail(&qp->cq_poll_sq, &cq->poll_sq);
+ break;
+
+ case IONIC_V1_CQE_TYPE_SEND_NPG:
+ spin_lock(&qp->sq_lock);
+ rc = ionic_comp_npg(qp, cqe);
+ if (!rc) {
+ rc = ionic_poll_send_many(dev, cq, qp,
+ wc + npolled,
+ nwc - npolled);
+ peek = ionic_peek_send(qp);
+ }
+ spin_unlock(&qp->sq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ if (peek)
+ list_move_tail(&qp->cq_poll_sq, &cq->poll_sq);
+ break;
+
+ default:
+ ibdev_warn(&dev->ibdev,
+ "unexpected cqe type %u\n", type);
+ rc = -EIO;
+ goto out;
+ }
+
+cq_next:
+ ionic_queue_produce(&cq->q);
+ cq->color = ionic_color_wrap(cq->q.prod, cq->color);
+ }
+
+ /* lastly, flush send and recv queues */
+ if (likely(!cq->flush))
+ goto out;
+
+ cq->flush = false;
+
+ list_for_each_entry_safe(qp, qp_next, &cq->flush_sq, cq_flush_sq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->sq_lock);
+ rc = ionic_flush_send_many(qp, wc + npolled, nwc - npolled);
+ spin_unlock(&qp->sq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_flush_sq);
+ else
+ cq->flush = true;
+ }
+
+ list_for_each_entry_safe(qp, qp_next, &cq->flush_rq, cq_flush_rq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->rq_lock);
+ rc = ionic_flush_recv_many(qp, wc + npolled, nwc - npolled);
+ spin_unlock(&qp->rq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_flush_rq);
+ else
+ cq->flush = true;
+ }
+
+out:
+ /* in case credit was depleted (more work posted than cq depth) */
+ if (cq->credit <= 0)
+ ionic_reserve_sync_cq(dev, cq);
+
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ return npolled ?: rc;
+}
+
+int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int rc_tmp, rc = 0, npolled = 0;
+ int cq_i, cq_x, cq_ix;
+
+ cq_x = vcq->poll_idx;
+ vcq->poll_idx ^= dev->lif_cfg.udma_count - 1;
+
+ for (cq_i = 0; npolled < nwc && cq_i < dev->lif_cfg.udma_count; ++cq_i) {
+ cq_ix = cq_i ^ cq_x;
+
+ if (!(vcq->udma_mask & BIT(cq_ix)))
+ continue;
+
+ rc_tmp = ionic_poll_vcq_cq(dev, &vcq->cq[cq_ix],
+ nwc - npolled,
+ wc + npolled);
+
+ if (rc_tmp >= 0)
+ npolled += rc_tmp;
+ else if (!rc)
+ rc = rc_tmp;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_req_notify_vcq_cq(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ u64 dbell_val = cq->q.dbell;
+
+ if (flags & IB_CQ_SOLICITED) {
+ cq->arm_sol_prod = ionic_queue_next(&cq->q, cq->arm_sol_prod);
+ dbell_val |= cq->arm_sol_prod | IONIC_CQ_RING_SOL;
+ } else {
+ cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod);
+ dbell_val |= cq->arm_any_prod | IONIC_CQ_RING_ARM;
+ }
+
+ ionic_reserve_sync_cq(dev, cq);
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, dbell_val);
+
+ /*
+ * IB_CQ_REPORT_MISSED_EVENTS:
+ *
+ * The queue index in ring zero guarantees no missed events.
+ *
+ * Here, we check if the color bit in the next cqe is flipped. If it
+ * is flipped, then progress can be made by immediately polling the cq.
+ * Still, the cq will be armed, and an event will be generated. The cq
+ * may be empty when polled after the event, because the next poll
+ * after arming the cq can empty it.
+ */
+ return (flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->color == ionic_v1_cqe_color(ionic_queue_at_prod(&cq->q));
+}
+
+int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int rc = 0, cq_i;
+
+ for (cq_i = 0; cq_i < dev->lif_cfg.udma_count; ++cq_i) {
+ if (!(vcq->udma_mask & BIT(cq_i)))
+ continue;
+
+ if (ionic_req_notify_vcq_cq(dev, &vcq->cq[cq_i], flags))
+ rc = 1;
+ }
+
+ return rc;
+}
+
+static s64 ionic_prep_inline(void *data, u32 max_data,
+ const struct ib_sge *ib_sgl, int num_sge)
+{
+ static const s64 bit_31 = 1u << 31;
+ s64 len = 0, sg_len;
+ int sg_i;
+
+ for (sg_i = 0; sg_i < num_sge; ++sg_i) {
+ sg_len = ib_sgl[sg_i].length;
+
+ /* sge length zero means 2GB */
+ if (unlikely(sg_len == 0))
+ sg_len = bit_31;
+
+ /* greater than max inline data is invalid */
+ if (unlikely(len + sg_len > max_data))
+ return -EINVAL;
+
+ memcpy(data + len, (void *)ib_sgl[sg_i].addr, sg_len);
+
+ len += sg_len;
+ }
+
+ return len;
+}
+
+static s64 ionic_prep_pld(struct ionic_v1_wqe *wqe,
+ union ionic_v1_pld *pld,
+ int spec, u32 max_sge,
+ const struct ib_sge *ib_sgl,
+ int num_sge)
+{
+ static const s64 bit_31 = 1l << 31;
+ struct ionic_sge *sgl;
+ __be32 *spec32 = NULL;
+ __be16 *spec16 = NULL;
+ s64 len = 0, sg_len;
+ int sg_i = 0;
+
+ if (unlikely(num_sge < 0 || (u32)num_sge > max_sge))
+ return -EINVAL;
+
+ if (spec && num_sge > IONIC_V1_SPEC_FIRST_SGE) {
+ sg_i = IONIC_V1_SPEC_FIRST_SGE;
+
+ if (num_sge > 8) {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC16);
+ spec16 = pld->spec16;
+ } else {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC32);
+ spec32 = pld->spec32;
+ }
+ }
+
+ sgl = &pld->sgl[sg_i];
+
+ for (sg_i = 0; sg_i < num_sge; ++sg_i) {
+ sg_len = ib_sgl[sg_i].length;
+
+ /* sge length zero means 2GB */
+ if (unlikely(sg_len == 0))
+ sg_len = bit_31;
+
+ /* greater than 2GB data is invalid */
+ if (unlikely(len + sg_len > bit_31))
+ return -EINVAL;
+
+ sgl[sg_i].va = cpu_to_be64(ib_sgl[sg_i].addr);
+ sgl[sg_i].len = cpu_to_be32(sg_len);
+ sgl[sg_i].lkey = cpu_to_be32(ib_sgl[sg_i].lkey);
+
+ if (spec32) {
+ spec32[sg_i] = sgl[sg_i].len;
+ } else if (spec16) {
+ if (unlikely(sg_len > U16_MAX))
+ return -EINVAL;
+ spec16[sg_i] = cpu_to_be16(sg_len);
+ }
+
+ len += sg_len;
+ }
+
+ return len;
+}
+
+static void ionic_prep_base(struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ struct ionic_sq_meta *meta,
+ struct ionic_v1_wqe *wqe)
+{
+ meta->wrid = wr->wr_id;
+ meta->ibsts = IB_WC_SUCCESS;
+ meta->signal = false;
+ meta->local_comp = false;
+
+ wqe->base.wqe_id = qp->sq.prod;
+
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_FENCE);
+
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SOL);
+
+ if (qp->sig_all || wr->send_flags & IB_SEND_SIGNALED) {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SIG);
+ meta->signal = true;
+ }
+
+ meta->seq = qp->sq_msn_prod;
+ meta->remote =
+ qp->ibqp.qp_type != IB_QPT_UD &&
+ qp->ibqp.qp_type != IB_QPT_GSI &&
+ !ionic_ibop_is_local(wr->opcode);
+
+ if (meta->remote) {
+ qp->sq_msn_idx[meta->seq] = qp->sq.prod;
+ qp->sq_msn_prod = ionic_queue_next(&qp->sq, qp->sq_msn_prod);
+ }
+
+ ionic_queue_produce(&qp->sq);
+}
+
+static int ionic_prep_common(struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ struct ionic_sq_meta *meta,
+ struct ionic_v1_wqe *wqe)
+{
+ s64 signed_len;
+ u32 mval;
+
+ if (wr->send_flags & IB_SEND_INLINE) {
+ wqe->base.num_sge_key = 0;
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_INL);
+ mval = ionic_v1_send_wqe_max_data(qp->sq.stride_log2, false);
+ signed_len = ionic_prep_inline(wqe->common.pld.data, mval,
+ wr->sg_list, wr->num_sge);
+ } else {
+ wqe->base.num_sge_key = wr->num_sge;
+ mval = ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ false);
+ signed_len = ionic_prep_pld(wqe, &wqe->common.pld,
+ qp->sq_spec, mval,
+ wr->sg_list, wr->num_sge);
+ }
+
+ if (unlikely(signed_len < 0))
+ return signed_len;
+
+ meta->len = signed_len;
+ wqe->common.length = cpu_to_be32(signed_len);
+
+ ionic_prep_base(qp, wr, meta, wqe);
+
+ return 0;
+}
+
+static void ionic_prep_sq_wqe(struct ionic_qp *qp, void *wqe)
+{
+ memset(wqe, 0, 1u << qp->sq.stride_log2);
+}
+
+static void ionic_prep_rq_wqe(struct ionic_qp *qp, void *wqe)
+{
+ memset(wqe, 0, 1u << qp->rq.stride_log2);
+}
+
+static int ionic_prep_send(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_SEND;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM);
+ wqe->base.imm_data_key = wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_INV);
+ wqe->base.imm_data_key =
+ cpu_to_be32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ionic_prep_common(qp, wr, meta, wqe);
+}
+
+static int ionic_prep_send_ud(struct ionic_qp *qp,
+ const struct ib_ud_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ struct ionic_ah *ah;
+
+ if (unlikely(!wr->ah))
+ return -EINVAL;
+
+ ah = to_ionic_ah(wr->ah);
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ wqe->common.send.ah_id = cpu_to_be32(ah->ahid);
+ wqe->common.send.dest_qpn = cpu_to_be32(wr->remote_qpn);
+ wqe->common.send.dest_qkey = cpu_to_be32(wr->remote_qkey);
+
+ meta->ibop = IB_WC_SEND;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_SEND:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM);
+ wqe->base.imm_data_key = wr->wr.ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_rdma(struct ionic_qp *qp,
+ const struct ib_rdma_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_RDMA_WRITE;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_RDMA_READ:
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+ meta->ibop = IB_WC_RDMA_READ;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_READ);
+ break;
+ case IB_WR_RDMA_WRITE:
+ if (wr->wr.send_flags & IB_SEND_SOLICITED)
+ return -EINVAL;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE_IMM);
+ wqe->base.imm_data_key = wr->wr.ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wqe->common.rdma.remote_va_high = cpu_to_be32(wr->remote_addr >> 32);
+ wqe->common.rdma.remote_va_low = cpu_to_be32(wr->remote_addr);
+ wqe->common.rdma.remote_rkey = cpu_to_be32(wr->rkey);
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_atomic(struct ionic_qp *qp,
+ const struct ib_atomic_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (wr->wr.num_sge != 1 || wr->wr.sg_list[0].length != 8)
+ return -EINVAL;
+
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_RDMA_WRITE;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ meta->ibop = IB_WC_COMP_SWAP;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_CS);
+ wqe->atomic.swap_add_high = cpu_to_be32(wr->swap >> 32);
+ wqe->atomic.swap_add_low = cpu_to_be32(wr->swap);
+ wqe->atomic.compare_high = cpu_to_be32(wr->compare_add >> 32);
+ wqe->atomic.compare_low = cpu_to_be32(wr->compare_add);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ meta->ibop = IB_WC_FETCH_ADD;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_FA);
+ wqe->atomic.swap_add_high = cpu_to_be32(wr->compare_add >> 32);
+ wqe->atomic.swap_add_low = cpu_to_be32(wr->compare_add);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wqe->atomic.remote_va_high = cpu_to_be32(wr->remote_addr >> 32);
+ wqe->atomic.remote_va_low = cpu_to_be32(wr->remote_addr);
+ wqe->atomic.remote_rkey = cpu_to_be32(wr->rkey);
+
+ wqe->base.num_sge_key = 1;
+ wqe->atomic.sge.va = cpu_to_be64(wr->wr.sg_list[0].addr);
+ wqe->atomic.sge.len = cpu_to_be32(8);
+ wqe->atomic.sge.lkey = cpu_to_be32(wr->wr.sg_list[0].lkey);
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_inv(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (wr->send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, LOCAL_INV);
+ wqe->base.imm_data_key = cpu_to_be32(wr->ex.invalidate_rkey);
+
+ meta->len = 0;
+ meta->ibop = IB_WC_LOCAL_INV;
+
+ ionic_prep_base(qp, wr, meta, wqe);
+
+ return 0;
+}
+
+static int ionic_prep_reg(struct ionic_qp *qp,
+ const struct ib_reg_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_mr *mr = to_ionic_mr(wr->mr);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ __le64 dma_addr;
+ int flags;
+
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ /* must call ib_map_mr_sg before posting reg wr */
+ if (!mr->buf.tbl_pages)
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ flags = to_ionic_mr_flags(wr->access);
+
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, REG_MR);
+ wqe->base.num_sge_key = wr->key;
+ wqe->base.imm_data_key = cpu_to_be32(mr->ibmr.lkey);
+ wqe->reg_mr.va = cpu_to_be64(mr->ibmr.iova);
+ wqe->reg_mr.length = cpu_to_be64(mr->ibmr.length);
+ wqe->reg_mr.offset = ionic_pgtbl_off(&mr->buf, mr->ibmr.iova);
+ dma_addr = ionic_pgtbl_dma(&mr->buf, mr->ibmr.iova);
+ wqe->reg_mr.dma_addr = cpu_to_be64(le64_to_cpu(dma_addr));
+
+ wqe->reg_mr.map_count = cpu_to_be32(mr->buf.tbl_pages);
+ wqe->reg_mr.flags = cpu_to_be16(flags);
+ wqe->reg_mr.dir_size_log2 = 0;
+ wqe->reg_mr.page_size_log2 = order_base_2(mr->ibmr.page_size);
+
+ meta->len = 0;
+ meta->ibop = IB_WC_REG_MR;
+
+ ionic_prep_base(qp, &wr->wr, meta, wqe);
+
+ return 0;
+}
+
+static int ionic_prep_one_rc(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ int rc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ rc = ionic_prep_send(qp, wr);
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc = ionic_prep_rdma(qp, rdma_wr(wr));
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc = ionic_prep_atomic(qp, atomic_wr(wr));
+ break;
+ case IB_WR_LOCAL_INV:
+ rc = ionic_prep_inv(qp, wr);
+ break;
+ case IB_WR_REG_MR:
+ rc = ionic_prep_reg(qp, reg_wr(wr));
+ break;
+ default:
+ ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static int ionic_prep_one_ud(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ int rc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ rc = ionic_prep_send_ud(qp, ud_wr(wr));
+ break;
+ default:
+ ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static int ionic_prep_recv(struct ionic_qp *qp,
+ const struct ib_recv_wr *wr)
+{
+ struct ionic_rq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ s64 signed_len;
+ u32 mval;
+
+ wqe = ionic_queue_at_prod(&qp->rq);
+
+ /* if wqe is owned by device, caller can try posting again soon */
+ if (wqe->base.flags & cpu_to_be16(IONIC_V1_FLAG_FENCE))
+ return -EAGAIN;
+
+ meta = qp->rq_meta_head;
+ if (unlikely(meta == IONIC_META_LAST) ||
+ unlikely(meta == IONIC_META_POSTED))
+ return -EIO;
+
+ ionic_prep_rq_wqe(qp, wqe);
+
+ mval = ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2, qp->rq_spec,
+ false);
+ signed_len = ionic_prep_pld(wqe, &wqe->recv.pld,
+ qp->rq_spec, mval,
+ wr->sg_list, wr->num_sge);
+ if (signed_len < 0)
+ return signed_len;
+
+ meta->wrid = wr->wr_id;
+
+ wqe->base.wqe_id = meta - qp->rq_meta;
+ wqe->base.num_sge_key = wr->num_sge;
+
+ /* total length for recv goes in base imm_data_key */
+ wqe->base.imm_data_key = cpu_to_be32(signed_len);
+
+ ionic_queue_produce(&qp->rq);
+
+ qp->rq_meta_head = meta->next;
+ meta->next = IONIC_META_POSTED;
+
+ return 0;
+}
+
+static int ionic_post_send_common(struct ionic_ibdev *dev,
+ struct ionic_vcq *vcq,
+ struct ionic_cq *cq,
+ struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad)
+{
+ unsigned long irqflags;
+ bool notify = false;
+ int spend, rc = 0;
+
+ if (!bad)
+ return -EINVAL;
+
+ if (!qp->has_sq) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ if (qp->state < IB_QPS_RTS) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->sq_lock, irqflags);
+
+ while (wr) {
+ if (ionic_queue_full(&qp->sq)) {
+ ibdev_dbg(&dev->ibdev, "queue full");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ rc = ionic_prep_one_ud(qp, wr);
+ else
+ rc = ionic_prep_one_rc(qp, wr);
+ if (rc)
+ goto out;
+
+ wr = wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&qp->sq_lock, irqflags);
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->sq_lock);
+
+ if (likely(qp->sq.prod != qp->sq_old_prod)) {
+ /* ring cq doorbell just in time */
+ spend = (qp->sq.prod - qp->sq_old_prod) & qp->sq.mask;
+ ionic_reserve_cq(dev, cq, spend);
+
+ qp->sq_old_prod = qp->sq.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.sq_qtype,
+ ionic_queue_dbell_val(&qp->sq));
+ }
+
+ if (qp->sq_flush) {
+ notify = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+
+ spin_unlock(&qp->sq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ if (notify && vcq->ibcq.comp_handler)
+ vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+
+ *bad = wr;
+ return rc;
+}
+
+static int ionic_post_recv_common(struct ionic_ibdev *dev,
+ struct ionic_vcq *vcq,
+ struct ionic_cq *cq,
+ struct ionic_qp *qp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad)
+{
+ unsigned long irqflags;
+ bool notify = false;
+ int spend, rc = 0;
+
+ if (!bad)
+ return -EINVAL;
+
+ if (!qp->has_rq) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ if (qp->state < IB_QPS_INIT) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->rq_lock, irqflags);
+
+ while (wr) {
+ if (ionic_queue_full(&qp->rq)) {
+ ibdev_dbg(&dev->ibdev, "queue full");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = ionic_prep_recv(qp, wr);
+ if (rc)
+ goto out;
+
+ wr = wr->next;
+ }
+
+out:
+ if (!cq) {
+ spin_unlock_irqrestore(&qp->rq_lock, irqflags);
+ goto out_unlocked;
+ }
+ spin_unlock_irqrestore(&qp->rq_lock, irqflags);
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->rq_lock);
+
+ if (likely(qp->rq.prod != qp->rq_old_prod)) {
+ /* ring cq doorbell just in time */
+ spend = (qp->rq.prod - qp->rq_old_prod) & qp->rq.mask;
+ ionic_reserve_cq(dev, cq, spend);
+
+ qp->rq_old_prod = qp->rq.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.rq_qtype,
+ ionic_queue_dbell_val(&qp->rq));
+ }
+
+ if (qp->rq_flush) {
+ notify = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+ }
+
+ spin_unlock(&qp->rq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ if (notify && vcq->ibcq.comp_handler)
+ vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+
+out_unlocked:
+ *bad = wr;
+ return rc;
+}
+
+int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibqp->send_cq);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_cq *cq =
+ to_ionic_vcq_cq(ibqp->send_cq, qp->udma_idx);
+
+ return ionic_post_send_common(dev, vcq, cq, qp, wr, bad);
+}
+
+int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibqp->recv_cq);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_cq *cq =
+ to_ionic_vcq_cq(ibqp->recv_cq, qp->udma_idx);
+
+ return ionic_post_recv_common(dev, vcq, cq, qp, wr, bad);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h
new file mode 100644
index 000000000000..adfbb89d856c
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_fw.h
@@ -0,0 +1,1029 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_FW_H_
+#define _IONIC_FW_H_
+
+#include <linux/kernel.h>
+#include <rdma/ib_verbs.h>
+
+/* common for ib spec */
+
+#define IONIC_EXP_DBELL_SZ 8
+
+enum ionic_mrid_bits {
+ IONIC_MRID_INDEX_SHIFT = 8,
+};
+
+static inline u32 ionic_mrid(u32 index, u8 key)
+{
+ return (index << IONIC_MRID_INDEX_SHIFT) | key;
+}
+
+static inline u32 ionic_mrid_index(u32 lrkey)
+{
+ return lrkey >> IONIC_MRID_INDEX_SHIFT;
+}
+
+/* common to all versions */
+
+/* wqe scatter gather element */
+struct ionic_sge {
+ __be64 va;
+ __be32 len;
+ __be32 lkey;
+};
+
+/* admin queue mr type */
+enum ionic_mr_flags {
+ /* bits that determine mr access */
+ IONIC_MRF_LOCAL_WRITE = BIT(0),
+ IONIC_MRF_REMOTE_WRITE = BIT(1),
+ IONIC_MRF_REMOTE_READ = BIT(2),
+ IONIC_MRF_REMOTE_ATOMIC = BIT(3),
+ IONIC_MRF_MW_BIND = BIT(4),
+ IONIC_MRF_ZERO_BASED = BIT(5),
+ IONIC_MRF_ON_DEMAND = BIT(6),
+ IONIC_MRF_PB = BIT(7),
+ IONIC_MRF_ACCESS_MASK = BIT(12) - 1,
+
+ /* bits that determine mr type */
+ IONIC_MRF_UKEY_EN = BIT(13),
+ IONIC_MRF_IS_MW = BIT(14),
+ IONIC_MRF_INV_EN = BIT(15),
+
+ /* base flags combinations for mr types */
+ IONIC_MRF_USER_MR = 0,
+ IONIC_MRF_PHYS_MR = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_INV_EN),
+ IONIC_MRF_MW_1 = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_IS_MW),
+ IONIC_MRF_MW_2 = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_IS_MW |
+ IONIC_MRF_INV_EN),
+};
+
+static inline int to_ionic_mr_flags(int access)
+{
+ int flags = 0;
+
+ if (access & IB_ACCESS_LOCAL_WRITE)
+ flags |= IONIC_MRF_LOCAL_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_READ)
+ flags |= IONIC_MRF_REMOTE_READ;
+
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ flags |= IONIC_MRF_REMOTE_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= IONIC_MRF_REMOTE_ATOMIC;
+
+ if (access & IB_ACCESS_MW_BIND)
+ flags |= IONIC_MRF_MW_BIND;
+
+ if (access & IB_ZERO_BASED)
+ flags |= IONIC_MRF_ZERO_BASED;
+
+ return flags;
+}
+
+enum ionic_qp_flags {
+ /* bits that determine qp access */
+ IONIC_QPF_REMOTE_WRITE = BIT(0),
+ IONIC_QPF_REMOTE_READ = BIT(1),
+ IONIC_QPF_REMOTE_ATOMIC = BIT(2),
+
+ /* bits that determine other qp behavior */
+ IONIC_QPF_SQ_PB = BIT(6),
+ IONIC_QPF_RQ_PB = BIT(7),
+ IONIC_QPF_SQ_SPEC = BIT(8),
+ IONIC_QPF_RQ_SPEC = BIT(9),
+ IONIC_QPF_REMOTE_PRIVILEGED = BIT(10),
+ IONIC_QPF_SQ_DRAINING = BIT(11),
+ IONIC_QPF_SQD_NOTIFY = BIT(12),
+ IONIC_QPF_SQ_CMB = BIT(13),
+ IONIC_QPF_RQ_CMB = BIT(14),
+ IONIC_QPF_PRIVILEGED = BIT(15),
+};
+
+static inline int from_ionic_qp_flags(int flags)
+{
+ int access_flags = 0;
+
+ if (flags & IONIC_QPF_REMOTE_WRITE)
+ access_flags |= IB_ACCESS_REMOTE_WRITE;
+
+ if (flags & IONIC_QPF_REMOTE_READ)
+ access_flags |= IB_ACCESS_REMOTE_READ;
+
+ if (flags & IONIC_QPF_REMOTE_ATOMIC)
+ access_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return access_flags;
+}
+
+static inline int to_ionic_qp_flags(int access, bool sqd_notify,
+ bool sq_is_cmb, bool rq_is_cmb,
+ bool sq_spec, bool rq_spec,
+ bool privileged, bool remote_privileged)
+{
+ int flags = 0;
+
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ flags |= IONIC_QPF_REMOTE_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_READ)
+ flags |= IONIC_QPF_REMOTE_READ;
+
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= IONIC_QPF_REMOTE_ATOMIC;
+
+ if (sqd_notify)
+ flags |= IONIC_QPF_SQD_NOTIFY;
+
+ if (sq_is_cmb)
+ flags |= IONIC_QPF_SQ_CMB;
+
+ if (rq_is_cmb)
+ flags |= IONIC_QPF_RQ_CMB;
+
+ if (sq_spec)
+ flags |= IONIC_QPF_SQ_SPEC;
+
+ if (rq_spec)
+ flags |= IONIC_QPF_RQ_SPEC;
+
+ if (privileged)
+ flags |= IONIC_QPF_PRIVILEGED;
+
+ if (remote_privileged)
+ flags |= IONIC_QPF_REMOTE_PRIVILEGED;
+
+ return flags;
+}
+
+/* cqe non-admin status indicated in status_length field when err bit is set */
+enum ionic_status {
+ IONIC_STS_OK,
+ IONIC_STS_LOCAL_LEN_ERR,
+ IONIC_STS_LOCAL_QP_OPER_ERR,
+ IONIC_STS_LOCAL_PROT_ERR,
+ IONIC_STS_WQE_FLUSHED_ERR,
+ IONIC_STS_MEM_MGMT_OPER_ERR,
+ IONIC_STS_BAD_RESP_ERR,
+ IONIC_STS_LOCAL_ACC_ERR,
+ IONIC_STS_REMOTE_INV_REQ_ERR,
+ IONIC_STS_REMOTE_ACC_ERR,
+ IONIC_STS_REMOTE_OPER_ERR,
+ IONIC_STS_RETRY_EXCEEDED,
+ IONIC_STS_RNR_RETRY_EXCEEDED,
+ IONIC_STS_XRC_VIO_ERR,
+ IONIC_STS_LOCAL_SGL_INV_ERR,
+};
+
+static inline int ionic_to_ib_status(int sts)
+{
+ switch (sts) {
+ case IONIC_STS_OK:
+ return IB_WC_SUCCESS;
+ case IONIC_STS_LOCAL_LEN_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case IONIC_STS_LOCAL_QP_OPER_ERR:
+ case IONIC_STS_LOCAL_SGL_INV_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case IONIC_STS_LOCAL_PROT_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case IONIC_STS_WQE_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case IONIC_STS_MEM_MGMT_OPER_ERR:
+ return IB_WC_MW_BIND_ERR;
+ case IONIC_STS_BAD_RESP_ERR:
+ return IB_WC_BAD_RESP_ERR;
+ case IONIC_STS_LOCAL_ACC_ERR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case IONIC_STS_REMOTE_INV_REQ_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case IONIC_STS_REMOTE_ACC_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case IONIC_STS_REMOTE_OPER_ERR:
+ return IB_WC_REM_OP_ERR;
+ case IONIC_STS_RETRY_EXCEEDED:
+ return IB_WC_RETRY_EXC_ERR;
+ case IONIC_STS_RNR_RETRY_EXCEEDED:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case IONIC_STS_XRC_VIO_ERR:
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+/* admin queue qp type */
+enum ionic_qp_type {
+ IONIC_QPT_RC,
+ IONIC_QPT_UC,
+ IONIC_QPT_RD,
+ IONIC_QPT_UD,
+ IONIC_QPT_SRQ,
+ IONIC_QPT_XRC_INI,
+ IONIC_QPT_XRC_TGT,
+ IONIC_QPT_XRC_SRQ,
+};
+
+static inline int to_ionic_qp_type(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ return IONIC_QPT_UD;
+ case IB_QPT_RC:
+ return IONIC_QPT_RC;
+ case IB_QPT_UC:
+ return IONIC_QPT_UC;
+ case IB_QPT_XRC_INI:
+ return IONIC_QPT_XRC_INI;
+ case IB_QPT_XRC_TGT:
+ return IONIC_QPT_XRC_TGT;
+ default:
+ return -EINVAL;
+ }
+}
+
+/* admin queue qp state */
+enum ionic_qp_state {
+ IONIC_QPS_RESET,
+ IONIC_QPS_INIT,
+ IONIC_QPS_RTR,
+ IONIC_QPS_RTS,
+ IONIC_QPS_SQD,
+ IONIC_QPS_SQE,
+ IONIC_QPS_ERR,
+};
+
+static inline int from_ionic_qp_state(enum ionic_qp_state state)
+{
+ switch (state) {
+ case IONIC_QPS_RESET:
+ return IB_QPS_RESET;
+ case IONIC_QPS_INIT:
+ return IB_QPS_INIT;
+ case IONIC_QPS_RTR:
+ return IB_QPS_RTR;
+ case IONIC_QPS_RTS:
+ return IB_QPS_RTS;
+ case IONIC_QPS_SQD:
+ return IB_QPS_SQD;
+ case IONIC_QPS_SQE:
+ return IB_QPS_SQE;
+ case IONIC_QPS_ERR:
+ return IB_QPS_ERR;
+ default:
+ return -EINVAL;
+ }
+}
+
+static inline int to_ionic_qp_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return IONIC_QPS_RESET;
+ case IB_QPS_INIT:
+ return IONIC_QPS_INIT;
+ case IB_QPS_RTR:
+ return IONIC_QPS_RTR;
+ case IB_QPS_RTS:
+ return IONIC_QPS_RTS;
+ case IB_QPS_SQD:
+ return IONIC_QPS_SQD;
+ case IB_QPS_SQE:
+ return IONIC_QPS_SQE;
+ case IB_QPS_ERR:
+ return IONIC_QPS_ERR;
+ default:
+ return 0;
+ }
+}
+
+static inline int to_ionic_qp_modify_state(enum ib_qp_state to_state,
+ enum ib_qp_state from_state)
+{
+ return to_ionic_qp_state(to_state) |
+ (to_ionic_qp_state(from_state) << 4);
+}
+
+/* fw abi v1 */
+
+/* data payload part of v1 wqe */
+union ionic_v1_pld {
+ struct ionic_sge sgl[2];
+ __be32 spec32[8];
+ __be16 spec16[16];
+ __u8 data[32];
+};
+
+/* completion queue v1 cqe */
+struct ionic_v1_cqe {
+ union {
+ struct {
+ __be16 cmd_idx;
+ __u8 cmd_op;
+ __u8 rsvd[17];
+ __le16 old_sq_cindex;
+ __le16 old_rq_cq_cindex;
+ } admin;
+ struct {
+ __u64 wqe_id;
+ __be32 src_qpn_op;
+ __u8 src_mac[6];
+ __be16 vlan_tag;
+ __be32 imm_data_rkey;
+ } recv;
+ struct {
+ __u8 rsvd[4];
+ __be32 msg_msn;
+ __u8 rsvd2[8];
+ __u64 npg_wqe_id;
+ } send;
+ };
+ __be32 status_length;
+ __be32 qid_type_flags;
+};
+
+/* bits for cqe recv */
+enum ionic_v1_cqe_src_qpn_bits {
+ IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff,
+ IONIC_V1_CQE_RECV_OP_SHIFT = 24,
+
+ /* MASK could be 0x3, but need 0x1f for makeshift values:
+ * OP_TYPE_RDMA_OPER_WITH_IMM, OP_TYPE_SEND_RCVD
+ */
+ IONIC_V1_CQE_RECV_OP_MASK = 0x1f,
+ IONIC_V1_CQE_RECV_OP_SEND = 0,
+ IONIC_V1_CQE_RECV_OP_SEND_INV = 1,
+ IONIC_V1_CQE_RECV_OP_SEND_IMM = 2,
+ IONIC_V1_CQE_RECV_OP_RDMA_IMM = 3,
+
+ IONIC_V1_CQE_RECV_IS_IPV4 = BIT(7 + IONIC_V1_CQE_RECV_OP_SHIFT),
+ IONIC_V1_CQE_RECV_IS_VLAN = BIT(6 + IONIC_V1_CQE_RECV_OP_SHIFT),
+};
+
+/* bits for cqe qid_type_flags */
+enum ionic_v1_cqe_qtf_bits {
+ IONIC_V1_CQE_COLOR = BIT(0),
+ IONIC_V1_CQE_ERROR = BIT(1),
+ IONIC_V1_CQE_TYPE_SHIFT = 5,
+ IONIC_V1_CQE_TYPE_MASK = 0x7,
+ IONIC_V1_CQE_QID_SHIFT = 8,
+
+ IONIC_V1_CQE_TYPE_ADMIN = 0,
+ IONIC_V1_CQE_TYPE_RECV = 1,
+ IONIC_V1_CQE_TYPE_SEND_MSN = 2,
+ IONIC_V1_CQE_TYPE_SEND_NPG = 3,
+};
+
+static inline bool ionic_v1_cqe_color(struct ionic_v1_cqe *cqe)
+{
+ return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_COLOR);
+}
+
+static inline bool ionic_v1_cqe_error(struct ionic_v1_cqe *cqe)
+{
+ return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_ERROR);
+}
+
+static inline bool ionic_v1_cqe_recv_is_ipv4(struct ionic_v1_cqe *cqe)
+{
+ return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_IPV4);
+}
+
+static inline bool ionic_v1_cqe_recv_is_vlan(struct ionic_v1_cqe *cqe)
+{
+ return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_VLAN);
+}
+
+static inline void ionic_v1_cqe_clean(struct ionic_v1_cqe *cqe)
+{
+ cqe->qid_type_flags |= cpu_to_be32(~0u << IONIC_V1_CQE_QID_SHIFT);
+}
+
+static inline u32 ionic_v1_cqe_qtf(struct ionic_v1_cqe *cqe)
+{
+ return be32_to_cpu(cqe->qid_type_flags);
+}
+
+static inline u8 ionic_v1_cqe_qtf_type(u32 qtf)
+{
+ return (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
+}
+
+static inline u32 ionic_v1_cqe_qtf_qid(u32 qtf)
+{
+ return qtf >> IONIC_V1_CQE_QID_SHIFT;
+}
+
+/* v1 base wqe header */
+struct ionic_v1_base_hdr {
+ __u64 wqe_id;
+ __u8 op;
+ __u8 num_sge_key;
+ __be16 flags;
+ __be32 imm_data_key;
+};
+
+/* v1 receive wqe body */
+struct ionic_v1_recv_bdy {
+ __u8 rsvd[16];
+ union ionic_v1_pld pld;
+};
+
+/* v1 send/rdma wqe body (common, has sgl) */
+struct ionic_v1_common_bdy {
+ union {
+ struct {
+ __be32 ah_id;
+ __be32 dest_qpn;
+ __be32 dest_qkey;
+ } send;
+ struct {
+ __be32 remote_va_high;
+ __be32 remote_va_low;
+ __be32 remote_rkey;
+ } rdma;
+ };
+ __be32 length;
+ union ionic_v1_pld pld;
+};
+
+/* v1 atomic wqe body */
+struct ionic_v1_atomic_bdy {
+ __be32 remote_va_high;
+ __be32 remote_va_low;
+ __be32 remote_rkey;
+ __be32 swap_add_high;
+ __be32 swap_add_low;
+ __be32 compare_high;
+ __be32 compare_low;
+ __u8 rsvd[4];
+ struct ionic_sge sge;
+};
+
+/* v1 reg mr wqe body */
+struct ionic_v1_reg_mr_bdy {
+ __be64 va;
+ __be64 length;
+ __be64 offset;
+ __be64 dma_addr;
+ __be32 map_count;
+ __be16 flags;
+ __u8 dir_size_log2;
+ __u8 page_size_log2;
+ __u8 rsvd[8];
+};
+
+/* v1 bind mw wqe body */
+struct ionic_v1_bind_mw_bdy {
+ __be64 va;
+ __be64 length;
+ __be32 lkey;
+ __be16 flags;
+ __u8 rsvd[26];
+};
+
+/* v1 send/recv wqe */
+struct ionic_v1_wqe {
+ struct ionic_v1_base_hdr base;
+ union {
+ struct ionic_v1_recv_bdy recv;
+ struct ionic_v1_common_bdy common;
+ struct ionic_v1_atomic_bdy atomic;
+ struct ionic_v1_reg_mr_bdy reg_mr;
+ struct ionic_v1_bind_mw_bdy bind_mw;
+ };
+};
+
+/* queue pair v1 send opcodes */
+enum ionic_v1_op {
+ IONIC_V1_OP_SEND,
+ IONIC_V1_OP_SEND_INV,
+ IONIC_V1_OP_SEND_IMM,
+ IONIC_V1_OP_RDMA_READ,
+ IONIC_V1_OP_RDMA_WRITE,
+ IONIC_V1_OP_RDMA_WRITE_IMM,
+ IONIC_V1_OP_ATOMIC_CS,
+ IONIC_V1_OP_ATOMIC_FA,
+ IONIC_V1_OP_REG_MR,
+ IONIC_V1_OP_LOCAL_INV,
+ IONIC_V1_OP_BIND_MW,
+
+ /* flags */
+ IONIC_V1_FLAG_FENCE = BIT(0),
+ IONIC_V1_FLAG_SOL = BIT(1),
+ IONIC_V1_FLAG_INL = BIT(2),
+ IONIC_V1_FLAG_SIG = BIT(3),
+
+ /* flags last four bits for sgl spec format */
+ IONIC_V1_FLAG_SPEC32 = (1u << 12),
+ IONIC_V1_FLAG_SPEC16 = (2u << 12),
+ IONIC_V1_SPEC_FIRST_SGE = 2,
+};
+
+/* queue pair v2 send opcodes */
+enum ionic_v2_op {
+ IONIC_V2_OPSL_OUT = 0x20,
+ IONIC_V2_OPSL_IMM = 0x40,
+ IONIC_V2_OPSL_INV = 0x80,
+
+ IONIC_V2_OP_SEND = 0x0 | IONIC_V2_OPSL_OUT,
+ IONIC_V2_OP_SEND_IMM = IONIC_V2_OP_SEND | IONIC_V2_OPSL_IMM,
+ IONIC_V2_OP_SEND_INV = IONIC_V2_OP_SEND | IONIC_V2_OPSL_INV,
+
+ IONIC_V2_OP_RDMA_WRITE = 0x1 | IONIC_V2_OPSL_OUT,
+ IONIC_V2_OP_RDMA_WRITE_IMM = IONIC_V2_OP_RDMA_WRITE | IONIC_V2_OPSL_IMM,
+
+ IONIC_V2_OP_RDMA_READ = 0x2,
+
+ IONIC_V2_OP_ATOMIC_CS = 0x4,
+ IONIC_V2_OP_ATOMIC_FA = 0x5,
+ IONIC_V2_OP_REG_MR = 0x6,
+ IONIC_V2_OP_LOCAL_INV = 0x7,
+ IONIC_V2_OP_BIND_MW = 0x8,
+};
+
+static inline size_t ionic_v1_send_wqe_min_size(int min_sge, int min_data,
+ int spec, bool expdb)
+{
+ size_t sz_wqe, sz_sgl, sz_data;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ min_sge += IONIC_V1_SPEC_FIRST_SGE;
+
+ if (expdb) {
+ min_sge += 1;
+ min_data += IONIC_EXP_DBELL_SZ;
+ }
+
+ sz_wqe = sizeof(struct ionic_v1_wqe);
+ sz_sgl = offsetof(struct ionic_v1_wqe, common.pld.sgl[min_sge]);
+ sz_data = offsetof(struct ionic_v1_wqe, common.pld.data[min_data]);
+
+ if (sz_sgl > sz_wqe)
+ sz_wqe = sz_sgl;
+
+ if (sz_data > sz_wqe)
+ sz_wqe = sz_data;
+
+ return sz_wqe;
+}
+
+static inline int ionic_v1_send_wqe_max_sge(u8 stride_log2, int spec,
+ bool expdb)
+{
+ struct ionic_sge *sge = (void *)(1ull << stride_log2);
+ struct ionic_v1_wqe *wqe = (void *)0;
+ int num_sge = 0;
+
+ if (expdb)
+ sge -= 1;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ num_sge = IONIC_V1_SPEC_FIRST_SGE;
+
+ num_sge = sge - &wqe->common.pld.sgl[num_sge];
+
+ if (spec && num_sge > spec)
+ num_sge = spec;
+
+ return num_sge;
+}
+
+static inline int ionic_v1_send_wqe_max_data(u8 stride_log2, bool expdb)
+{
+ struct ionic_v1_wqe *wqe = (void *)0;
+ __u8 *data = (void *)(1ull << stride_log2);
+
+ if (expdb)
+ data -= IONIC_EXP_DBELL_SZ;
+
+ return data - wqe->common.pld.data;
+}
+
+static inline size_t ionic_v1_recv_wqe_min_size(int min_sge, int spec,
+ bool expdb)
+{
+ size_t sz_wqe, sz_sgl;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ min_sge += IONIC_V1_SPEC_FIRST_SGE;
+
+ if (expdb)
+ min_sge += 1;
+
+ sz_wqe = sizeof(struct ionic_v1_wqe);
+ sz_sgl = offsetof(struct ionic_v1_wqe, recv.pld.sgl[min_sge]);
+
+ if (sz_sgl > sz_wqe)
+ sz_wqe = sz_sgl;
+
+ return sz_wqe;
+}
+
+static inline int ionic_v1_recv_wqe_max_sge(u8 stride_log2, int spec,
+ bool expdb)
+{
+ struct ionic_sge *sge = (void *)(1ull << stride_log2);
+ struct ionic_v1_wqe *wqe = (void *)0;
+ int num_sge = 0;
+
+ if (expdb)
+ sge -= 1;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ num_sge = IONIC_V1_SPEC_FIRST_SGE;
+
+ num_sge = sge - &wqe->recv.pld.sgl[num_sge];
+
+ if (spec && num_sge > spec)
+ num_sge = spec;
+
+ return num_sge;
+}
+
+static inline int ionic_v1_use_spec_sge(int min_sge, int spec)
+{
+ if (!spec || min_sge > spec)
+ return 0;
+
+ if (min_sge <= IONIC_V1_SPEC_FIRST_SGE)
+ return IONIC_V1_SPEC_FIRST_SGE;
+
+ return spec;
+}
+
+struct ionic_admin_stats_hdr {
+ __le64 dma_addr;
+ __le32 length;
+ __le32 id_ver;
+ __u8 type_state;
+} __packed;
+
+#define IONIC_ADMIN_STATS_HDRS_IN_V1_LEN 17
+static_assert(sizeof(struct ionic_admin_stats_hdr) ==
+ IONIC_ADMIN_STATS_HDRS_IN_V1_LEN);
+
+struct ionic_admin_create_ah {
+ __le64 dma_addr;
+ __le32 length;
+ __le32 pd_id;
+ __le32 id_ver;
+ __le16 dbid_flags;
+ __u8 csum_profile;
+ __u8 crypto;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_AH_IN_V1_LEN 24
+static_assert(sizeof(struct ionic_admin_create_ah) ==
+ IONIC_ADMIN_CREATE_AH_IN_V1_LEN);
+
+struct ionic_admin_destroy_ah {
+ __le32 ah_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_AH_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_ah) ==
+ IONIC_ADMIN_DESTROY_AH_IN_V1_LEN);
+
+struct ionic_admin_query_ah {
+ __le64 dma_addr;
+} __packed;
+
+#define IONIC_ADMIN_QUERY_AH_IN_V1_LEN 8
+static_assert(sizeof(struct ionic_admin_query_ah) ==
+ IONIC_ADMIN_QUERY_AH_IN_V1_LEN);
+
+struct ionic_admin_create_mr {
+ __le64 va;
+ __le64 length;
+ __le32 pd_id;
+ __le32 id_ver;
+ __le32 tbl_index;
+ __le32 map_count;
+ __le64 dma_addr;
+ __le16 dbid_flags;
+ __u8 pt_type;
+ __u8 dir_size_log2;
+ __u8 page_size_log2;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_MR_IN_V1_LEN 45
+static_assert(sizeof(struct ionic_admin_create_mr) ==
+ IONIC_ADMIN_CREATE_MR_IN_V1_LEN);
+
+struct ionic_admin_destroy_mr {
+ __le32 mr_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_MR_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_mr) ==
+ IONIC_ADMIN_DESTROY_MR_IN_V1_LEN);
+
+struct ionic_admin_create_cq {
+ __le32 eq_id;
+ __u8 depth_log2;
+ __u8 stride_log2;
+ __u8 dir_size_log2_rsvd;
+ __u8 page_size_log2;
+ __le32 cq_flags;
+ __le32 id_ver;
+ __le32 tbl_index;
+ __le32 map_count;
+ __le64 dma_addr;
+ __le16 dbid_flags;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_CQ_IN_V1_LEN 34
+static_assert(sizeof(struct ionic_admin_create_cq) ==
+ IONIC_ADMIN_CREATE_CQ_IN_V1_LEN);
+
+struct ionic_admin_destroy_cq {
+ __le32 cq_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_cq) ==
+ IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN);
+
+struct ionic_admin_create_qp {
+ __le32 pd_id;
+ __be32 priv_flags;
+ __le32 sq_cq_id;
+ __u8 sq_depth_log2;
+ __u8 sq_stride_log2;
+ __u8 sq_dir_size_log2_rsvd;
+ __u8 sq_page_size_log2;
+ __le32 sq_tbl_index_xrcd_id;
+ __le32 sq_map_count;
+ __le64 sq_dma_addr;
+ __le32 rq_cq_id;
+ __u8 rq_depth_log2;
+ __u8 rq_stride_log2;
+ __u8 rq_dir_size_log2_rsvd;
+ __u8 rq_page_size_log2;
+ __le32 rq_tbl_index_srq_id;
+ __le32 rq_map_count;
+ __le64 rq_dma_addr;
+ __le32 id_ver;
+ __le16 dbid_flags;
+ __u8 type_state;
+ __u8 rsvd;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_QP_IN_V1_LEN 64
+static_assert(sizeof(struct ionic_admin_create_qp) ==
+ IONIC_ADMIN_CREATE_QP_IN_V1_LEN);
+
+struct ionic_admin_destroy_qp {
+ __le32 qp_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_QP_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_qp) ==
+ IONIC_ADMIN_DESTROY_QP_IN_V1_LEN);
+
+struct ionic_admin_mod_qp {
+ __be32 attr_mask;
+ __u8 dcqcn_profile;
+ __u8 tfp_csum_profile;
+ __be16 access_flags;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ __le32 qkey_dest_qpn;
+ __le32 rate_limit_kbps;
+ __u8 pmtu;
+ __u8 retry;
+ __u8 rnr_timer;
+ __u8 retry_timeout;
+ __u8 rsq_depth;
+ __u8 rrq_depth;
+ __le16 pkey_id;
+ __le32 ah_id_len;
+ __u8 en_pcp;
+ __u8 ip_dscp;
+ __u8 rsvd2;
+ __u8 type_state;
+ union {
+ struct {
+ __le16 rsvd1;
+ };
+ __le32 rrq_index;
+ };
+ __le32 rsq_index;
+ __le64 dma_addr;
+ __le32 id_ver;
+} __packed;
+
+#define IONIC_ADMIN_MODIFY_QP_IN_V1_LEN 60
+static_assert(sizeof(struct ionic_admin_mod_qp) ==
+ IONIC_ADMIN_MODIFY_QP_IN_V1_LEN);
+
+struct ionic_admin_query_qp {
+ __le64 hdr_dma_addr;
+ __le64 sq_dma_addr;
+ __le64 rq_dma_addr;
+ __le32 ah_id;
+ __le32 id_ver;
+ __le16 dbid_flags;
+} __packed;
+
+#define IONIC_ADMIN_QUERY_QP_IN_V1_LEN 34
+static_assert(sizeof(struct ionic_admin_query_qp) ==
+ IONIC_ADMIN_QUERY_QP_IN_V1_LEN);
+
+#define ADMIN_WQE_STRIDE 64
+#define ADMIN_WQE_HDR_LEN 4
+
+/* admin queue v1 wqe */
+struct ionic_v1_admin_wqe {
+ __u8 op;
+ __u8 rsvd;
+ __le16 len;
+
+ union {
+ struct ionic_admin_stats_hdr stats;
+ struct ionic_admin_create_ah create_ah;
+ struct ionic_admin_destroy_ah destroy_ah;
+ struct ionic_admin_query_ah query_ah;
+ struct ionic_admin_create_mr create_mr;
+ struct ionic_admin_destroy_mr destroy_mr;
+ struct ionic_admin_create_cq create_cq;
+ struct ionic_admin_destroy_cq destroy_cq;
+ struct ionic_admin_create_qp create_qp;
+ struct ionic_admin_destroy_qp destroy_qp;
+ struct ionic_admin_mod_qp mod_qp;
+ struct ionic_admin_query_qp query_qp;
+ } cmd;
+};
+
+/* side data for query qp */
+struct ionic_v1_admin_query_qp_sq {
+ __u8 rnr_timer;
+ __u8 retry_timeout;
+ __be16 access_perms_flags;
+ __be16 rsvd;
+ __be16 pkey_id;
+ __be32 qkey_dest_qpn;
+ __be32 rate_limit_kbps;
+ __be32 rq_psn;
+};
+
+struct ionic_v1_admin_query_qp_rq {
+ __u8 state_pmtu;
+ __u8 retry_rnrtry;
+ __u8 rrq_depth;
+ __u8 rsq_depth;
+ __be32 sq_psn;
+ __be16 access_perms_flags;
+ __be16 rsvd;
+};
+
+/* admin queue v1 opcodes */
+enum ionic_v1_admin_op {
+ IONIC_V1_ADMIN_NOOP,
+ IONIC_V1_ADMIN_CREATE_CQ,
+ IONIC_V1_ADMIN_CREATE_QP,
+ IONIC_V1_ADMIN_CREATE_MR,
+ IONIC_V1_ADMIN_STATS_HDRS,
+ IONIC_V1_ADMIN_STATS_VALS,
+ IONIC_V1_ADMIN_DESTROY_MR,
+ IONIC_V1_ADMIN_RSVD_7, /* RESIZE_CQ */
+ IONIC_V1_ADMIN_DESTROY_CQ,
+ IONIC_V1_ADMIN_MODIFY_QP,
+ IONIC_V1_ADMIN_QUERY_QP,
+ IONIC_V1_ADMIN_DESTROY_QP,
+ IONIC_V1_ADMIN_DEBUG,
+ IONIC_V1_ADMIN_CREATE_AH,
+ IONIC_V1_ADMIN_QUERY_AH,
+ IONIC_V1_ADMIN_MODIFY_DCQCN,
+ IONIC_V1_ADMIN_DESTROY_AH,
+ IONIC_V1_ADMIN_QP_STATS_HDRS,
+ IONIC_V1_ADMIN_QP_STATS_VALS,
+ IONIC_V1_ADMIN_OPCODES_MAX,
+};
+
+/* admin queue v1 cqe status */
+enum ionic_v1_admin_status {
+ IONIC_V1_ASTS_OK,
+ IONIC_V1_ASTS_BAD_CMD,
+ IONIC_V1_ASTS_BAD_INDEX,
+ IONIC_V1_ASTS_BAD_STATE,
+ IONIC_V1_ASTS_BAD_TYPE,
+ IONIC_V1_ASTS_BAD_ATTR,
+ IONIC_V1_ASTS_MSG_TOO_BIG,
+};
+
+/* event queue v1 eqe */
+struct ionic_v1_eqe {
+ __be32 evt;
+};
+
+/* bits for cqe queue_type_flags */
+enum ionic_v1_eqe_evt_bits {
+ IONIC_V1_EQE_COLOR = BIT(0),
+ IONIC_V1_EQE_TYPE_SHIFT = 1,
+ IONIC_V1_EQE_TYPE_MASK = 0x7,
+ IONIC_V1_EQE_CODE_SHIFT = 4,
+ IONIC_V1_EQE_CODE_MASK = 0xf,
+ IONIC_V1_EQE_QID_SHIFT = 8,
+
+ /* cq events */
+ IONIC_V1_EQE_TYPE_CQ = 0,
+ /* cq normal events */
+ IONIC_V1_EQE_CQ_NOTIFY = 0,
+ /* cq error events */
+ IONIC_V1_EQE_CQ_ERR = 8,
+
+ /* qp and srq events */
+ IONIC_V1_EQE_TYPE_QP = 1,
+ /* qp normal events */
+ IONIC_V1_EQE_SRQ_LEVEL = 0,
+ IONIC_V1_EQE_SQ_DRAIN = 1,
+ IONIC_V1_EQE_QP_COMM_EST = 2,
+ IONIC_V1_EQE_QP_LAST_WQE = 3,
+ /* qp error events */
+ IONIC_V1_EQE_QP_ERR = 8,
+ IONIC_V1_EQE_QP_ERR_REQUEST = 9,
+ IONIC_V1_EQE_QP_ERR_ACCESS = 10,
+};
+
+enum ionic_tfp_csum_profiles {
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP = 0,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP = 1,
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP = 2,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP = 3,
+ IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 4,
+ IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 5,
+ IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 6,
+ IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 7,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_IPV4_UDP = 8,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_ESP_UDP = 9,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_UDP = 10,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_ESP_UDP = 11,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_CSUM = 12,
+};
+
+static inline bool ionic_v1_eqe_color(struct ionic_v1_eqe *eqe)
+{
+ return eqe->evt & cpu_to_be32(IONIC_V1_EQE_COLOR);
+}
+
+static inline u32 ionic_v1_eqe_evt(struct ionic_v1_eqe *eqe)
+{
+ return be32_to_cpu(eqe->evt);
+}
+
+static inline u8 ionic_v1_eqe_evt_type(u32 evt)
+{
+ return (evt >> IONIC_V1_EQE_TYPE_SHIFT) & IONIC_V1_EQE_TYPE_MASK;
+}
+
+static inline u8 ionic_v1_eqe_evt_code(u32 evt)
+{
+ return (evt >> IONIC_V1_EQE_CODE_SHIFT) & IONIC_V1_EQE_CODE_MASK;
+}
+
+static inline u32 ionic_v1_eqe_evt_qid(u32 evt)
+{
+ return evt >> IONIC_V1_EQE_QID_SHIFT;
+}
+
+enum ionic_v1_stat_bits {
+ IONIC_V1_STAT_TYPE_SHIFT = 28,
+ IONIC_V1_STAT_TYPE_NONE = 0,
+ IONIC_V1_STAT_TYPE_8 = 1,
+ IONIC_V1_STAT_TYPE_LE16 = 2,
+ IONIC_V1_STAT_TYPE_LE32 = 3,
+ IONIC_V1_STAT_TYPE_LE64 = 4,
+ IONIC_V1_STAT_TYPE_BE16 = 5,
+ IONIC_V1_STAT_TYPE_BE32 = 6,
+ IONIC_V1_STAT_TYPE_BE64 = 7,
+ IONIC_V1_STAT_OFF_MASK = BIT(IONIC_V1_STAT_TYPE_SHIFT) - 1,
+};
+
+struct ionic_v1_stat {
+ union {
+ __be32 be_type_off;
+ u32 type_off;
+ };
+ char name[28];
+};
+
+static inline int ionic_v1_stat_type(struct ionic_v1_stat *hdr)
+{
+ return hdr->type_off >> IONIC_V1_STAT_TYPE_SHIFT;
+}
+
+static inline unsigned int ionic_v1_stat_off(struct ionic_v1_stat *hdr)
+{
+ return hdr->type_off & IONIC_V1_STAT_OFF_MASK;
+}
+
+#endif /* _IONIC_FW_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_hw_stats.c b/drivers/infiniband/hw/ionic/ionic_hw_stats.c
new file mode 100644
index 000000000000..244a80dde08f
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_hw_stats.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/dma-mapping.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+static int ionic_v1_stat_normalize(struct ionic_v1_stat *hw_stats,
+ int hw_stats_count)
+{
+ int hw_stat_i;
+
+ for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) {
+ struct ionic_v1_stat *stat = &hw_stats[hw_stat_i];
+
+ stat->type_off = be32_to_cpu(stat->be_type_off);
+ stat->name[sizeof(stat->name) - 1] = 0;
+ if (ionic_v1_stat_type(stat) == IONIC_V1_STAT_TYPE_NONE)
+ break;
+ }
+
+ return hw_stat_i;
+}
+
+static void ionic_fill_stats_desc(struct rdma_stat_desc *hw_stats_hdrs,
+ struct ionic_v1_stat *hw_stats,
+ int hw_stats_count)
+{
+ int hw_stat_i;
+
+ for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) {
+ struct ionic_v1_stat *stat = &hw_stats[hw_stat_i];
+
+ hw_stats_hdrs[hw_stat_i].name = stat->name;
+ }
+}
+
+static u64 ionic_v1_stat_val(struct ionic_v1_stat *stat,
+ void *vals_buf, size_t vals_len)
+{
+ unsigned int off = ionic_v1_stat_off(stat);
+ int type = ionic_v1_stat_type(stat);
+
+#define __ionic_v1_stat_validate(__type) \
+ ((off + sizeof(__type) <= vals_len) && \
+ (IS_ALIGNED(off, sizeof(__type))))
+
+ switch (type) {
+ case IONIC_V1_STAT_TYPE_8:
+ if (__ionic_v1_stat_validate(u8))
+ return *(u8 *)(vals_buf + off);
+ break;
+ case IONIC_V1_STAT_TYPE_LE16:
+ if (__ionic_v1_stat_validate(__le16))
+ return le16_to_cpu(*(__le16 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_LE32:
+ if (__ionic_v1_stat_validate(__le32))
+ return le32_to_cpu(*(__le32 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_LE64:
+ if (__ionic_v1_stat_validate(__le64))
+ return le64_to_cpu(*(__le64 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE16:
+ if (__ionic_v1_stat_validate(__be16))
+ return be16_to_cpu(*(__be16 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE32:
+ if (__ionic_v1_stat_validate(__be32))
+ return be32_to_cpu(*(__be32 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE64:
+ if (__ionic_v1_stat_validate(__be64))
+ return be64_to_cpu(*(__be64 *)(vals_buf + off));
+ break;
+ }
+
+ return ~0ull;
+#undef __ionic_v1_stat_validate
+}
+
+static int ionic_hw_stats_cmd(struct ionic_ibdev *dev,
+ dma_addr_t dma, size_t len, int qid, int op)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = op,
+ .len = cpu_to_le16(IONIC_ADMIN_STATS_HDRS_IN_V1_LEN),
+ .cmd.stats = {
+ .dma_addr = cpu_to_le64(dma),
+ .length = cpu_to_le32(len),
+ .id_ver = cpu_to_le32(qid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= op)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_INTERRUPT);
+}
+
+static int ionic_init_hw_stats(struct ionic_ibdev *dev)
+{
+ dma_addr_t hw_stats_dma;
+ int rc, hw_stats_count;
+
+ if (dev->hw_stats_hdrs)
+ return 0;
+
+ dev->hw_stats_count = 0;
+
+ /* buffer for current values from the device */
+ dev->hw_stats_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dev->hw_stats_buf) {
+ rc = -ENOMEM;
+ goto err_buf;
+ }
+
+ /* buffer for names, sizes, offsets of values */
+ dev->hw_stats = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dev->hw_stats) {
+ rc = -ENOMEM;
+ goto err_hw_stats;
+ }
+
+ /* request the names, sizes, offsets */
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE, 0,
+ IONIC_V1_ADMIN_STATS_HDRS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* normalize and count the number of hw_stats */
+ hw_stats_count =
+ ionic_v1_stat_normalize(dev->hw_stats,
+ PAGE_SIZE / sizeof(*dev->hw_stats));
+ if (!hw_stats_count) {
+ rc = -ENODATA;
+ goto err_dma;
+ }
+
+ dev->hw_stats_count = hw_stats_count;
+
+ /* alloc and init array of names, for alloc_hw_stats */
+ dev->hw_stats_hdrs = kcalloc(hw_stats_count,
+ sizeof(*dev->hw_stats_hdrs),
+ GFP_KERNEL);
+ if (!dev->hw_stats_hdrs) {
+ rc = -ENOMEM;
+ goto err_dma;
+ }
+
+ ionic_fill_stats_desc(dev->hw_stats_hdrs, dev->hw_stats,
+ hw_stats_count);
+
+ return 0;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ kfree(dev->hw_stats);
+err_hw_stats:
+ kfree(dev->hw_stats_buf);
+err_buf:
+ dev->hw_stats_count = 0;
+ dev->hw_stats = NULL;
+ dev->hw_stats_buf = NULL;
+ dev->hw_stats_hdrs = NULL;
+ return rc;
+}
+
+static struct rdma_hw_stats *ionic_alloc_hw_stats(struct ib_device *ibdev,
+ u32 port)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ if (port != 1)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(dev->hw_stats_hdrs,
+ dev->hw_stats_count,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int ionic_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *hw_stats,
+ u32 port, int index)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ dma_addr_t hw_stats_dma;
+ int rc, hw_stat_i;
+
+ if (port != 1)
+ return -EINVAL;
+
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats_buf,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE,
+ 0, IONIC_V1_ADMIN_STATS_VALS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ for (hw_stat_i = 0; hw_stat_i < dev->hw_stats_count; ++hw_stat_i)
+ hw_stats->value[hw_stat_i] =
+ ionic_v1_stat_val(&dev->hw_stats[hw_stat_i],
+ dev->hw_stats_buf, PAGE_SIZE);
+
+ return hw_stat_i;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ return rc;
+}
+
+static struct rdma_hw_stats *
+ionic_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_counter *cntr;
+ int err;
+
+ cntr = kzalloc(sizeof(*cntr), GFP_KERNEL);
+ if (!cntr)
+ return NULL;
+
+ /* buffer for current values from the device */
+ cntr->vals = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!cntr->vals)
+ goto err_vals;
+
+ err = xa_alloc(&dev->counter_stats->xa_counters, &counter->id,
+ cntr,
+ XA_LIMIT(0, IONIC_MAX_QPID),
+ GFP_KERNEL);
+ if (err)
+ goto err_xa;
+
+ INIT_LIST_HEAD(&cntr->qp_list);
+
+ return rdma_alloc_hw_stats_struct(dev->counter_stats->stats_hdrs,
+ dev->counter_stats->queue_stats_count,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+err_xa:
+ kfree(cntr->vals);
+err_vals:
+ kfree(cntr);
+
+ return NULL;
+}
+
+static int ionic_counter_dealloc(struct rdma_counter *counter)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_counter *cntr;
+
+ cntr = xa_erase(&dev->counter_stats->xa_counters, counter->id);
+ if (!cntr)
+ return -EINVAL;
+
+ kfree(cntr->vals);
+ kfree(cntr);
+
+ return 0;
+}
+
+static int ionic_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *ibqp,
+ u32 port)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_counter *cntr;
+
+ cntr = xa_load(&dev->counter_stats->xa_counters, counter->id);
+ if (!cntr)
+ return -EINVAL;
+
+ list_add_tail(&qp->qp_list_counter, &cntr->qp_list);
+ ibqp->counter = counter;
+
+ return 0;
+}
+
+static int ionic_counter_unbind_qp(struct ib_qp *ibqp, u32 port)
+{
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+
+ if (ibqp->counter) {
+ list_del(&qp->qp_list_counter);
+ ibqp->counter = NULL;
+ }
+
+ return 0;
+}
+
+static int ionic_get_qp_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *hw_stats,
+ u32 counter_id)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ struct ionic_counter_stats *cs;
+ struct ionic_counter *cntr;
+ dma_addr_t hw_stats_dma;
+ struct ionic_qp *qp;
+ int rc, stat_i = 0;
+
+ cs = dev->counter_stats;
+ cntr = xa_load(&cs->xa_counters, counter_id);
+ if (!cntr)
+ return -EINVAL;
+
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, cntr->vals,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ return rc;
+
+ memset(hw_stats->value, 0, sizeof(u64) * hw_stats->num_counters);
+
+ list_for_each_entry(qp, &cntr->qp_list, qp_list_counter) {
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE,
+ qp->qpid,
+ IONIC_V1_ADMIN_QP_STATS_VALS);
+ if (rc)
+ goto err_cmd;
+
+ for (stat_i = 0; stat_i < cs->queue_stats_count; ++stat_i)
+ hw_stats->value[stat_i] +=
+ ionic_v1_stat_val(&cs->hdr[stat_i],
+ cntr->vals,
+ PAGE_SIZE);
+ }
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ return stat_i;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ return rc;
+}
+
+static int ionic_counter_update_stats(struct rdma_counter *counter)
+{
+ return ionic_get_qp_stats(counter->device, counter->stats, counter->id);
+}
+
+static int ionic_alloc_counters(struct ionic_ibdev *dev)
+{
+ struct ionic_counter_stats *cs = dev->counter_stats;
+ int rc, hw_stats_count;
+ dma_addr_t hdr_dma;
+
+ /* buffer for names, sizes, offsets of values */
+ cs->hdr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!cs->hdr)
+ return -ENOMEM;
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, cs->hdr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hdr_dma, PAGE_SIZE, 0,
+ IONIC_V1_ADMIN_QP_STATS_HDRS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* normalize and count the number of hw_stats */
+ hw_stats_count = ionic_v1_stat_normalize(cs->hdr,
+ PAGE_SIZE / sizeof(*cs->hdr));
+ if (!hw_stats_count) {
+ rc = -ENODATA;
+ goto err_dma;
+ }
+
+ cs->queue_stats_count = hw_stats_count;
+
+ /* alloc and init array of names */
+ cs->stats_hdrs = kcalloc(hw_stats_count, sizeof(*cs->stats_hdrs),
+ GFP_KERNEL);
+ if (!cs->stats_hdrs) {
+ rc = -ENOMEM;
+ goto err_dma;
+ }
+
+ ionic_fill_stats_desc(cs->stats_hdrs, cs->hdr, hw_stats_count);
+
+ return 0;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ kfree(cs->hdr);
+
+ return rc;
+}
+
+static const struct ib_device_ops ionic_hw_stats_ops = {
+ .driver_id = RDMA_DRIVER_IONIC,
+ .alloc_hw_port_stats = ionic_alloc_hw_stats,
+ .get_hw_stats = ionic_get_hw_stats,
+};
+
+static const struct ib_device_ops ionic_counter_stats_ops = {
+ .counter_alloc_stats = ionic_counter_alloc_stats,
+ .counter_dealloc = ionic_counter_dealloc,
+ .counter_bind_qp = ionic_counter_bind_qp,
+ .counter_unbind_qp = ionic_counter_unbind_qp,
+ .counter_update_stats = ionic_counter_update_stats,
+};
+
+void ionic_stats_init(struct ionic_ibdev *dev)
+{
+ u16 stats_type = dev->lif_cfg.stats_type;
+ int rc;
+
+ if (stats_type & IONIC_LIF_RDMA_STAT_GLOBAL) {
+ rc = ionic_init_hw_stats(dev);
+ if (rc)
+ ibdev_dbg(&dev->ibdev, "Failed to init hw stats\n");
+ else
+ ib_set_device_ops(&dev->ibdev, &ionic_hw_stats_ops);
+ }
+
+ if (stats_type & IONIC_LIF_RDMA_STAT_QP) {
+ dev->counter_stats = kzalloc(sizeof(*dev->counter_stats),
+ GFP_KERNEL);
+ if (!dev->counter_stats)
+ return;
+
+ rc = ionic_alloc_counters(dev);
+ if (rc) {
+ ibdev_dbg(&dev->ibdev, "Failed to init counter stats\n");
+ kfree(dev->counter_stats);
+ dev->counter_stats = NULL;
+ return;
+ }
+
+ xa_init_flags(&dev->counter_stats->xa_counters, XA_FLAGS_ALLOC);
+
+ ib_set_device_ops(&dev->ibdev, &ionic_counter_stats_ops);
+ }
+}
+
+void ionic_stats_cleanup(struct ionic_ibdev *dev)
+{
+ if (dev->counter_stats) {
+ xa_destroy(&dev->counter_stats->xa_counters);
+ kfree(dev->counter_stats->hdr);
+ kfree(dev->counter_stats->stats_hdrs);
+ kfree(dev->counter_stats);
+ dev->counter_stats = NULL;
+ }
+
+ kfree(dev->hw_stats);
+ kfree(dev->hw_stats_buf);
+ kfree(dev->hw_stats_hdrs);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c
new file mode 100644
index 000000000000..164046d00e5d
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+
+#include "ionic_ibdev.h"
+
+#define DRIVER_DESCRIPTION "AMD Pensando RoCE HCA driver"
+#define DEVICE_DESCRIPTION "AMD Pensando RoCE HCA"
+
+MODULE_AUTHOR("Allen Hubbe <allen.hubbe@amd.com>");
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("NET_IONIC");
+
+static int ionic_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ struct net_device *ndev;
+
+ ndev = ib_device_get_netdev(ibdev, 1);
+ addrconf_ifid_eui48((u8 *)&attr->sys_image_guid, ndev);
+ dev_put(ndev);
+ attr->max_mr_size = dev->lif_cfg.npts_per_lif * PAGE_SIZE / 2;
+ attr->page_size_cap = dev->lif_cfg.page_size_supported;
+
+ attr->vendor_id = to_pci_dev(dev->lif_cfg.hwdev)->vendor;
+ attr->vendor_part_id = to_pci_dev(dev->lif_cfg.hwdev)->device;
+
+ attr->hw_ver = ionic_lif_asic_rev(dev->lif_cfg.lif);
+ attr->fw_ver = 0;
+ attr->max_qp = dev->lif_cfg.qp_count;
+ attr->max_qp_wr = IONIC_MAX_DEPTH;
+ attr->device_cap_flags =
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_MEM_WINDOW_TYPE_2B |
+ 0;
+ attr->max_send_sge =
+ min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH);
+ attr->max_recv_sge =
+ min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH);
+ attr->max_sge_rd = attr->max_send_sge;
+ attr->max_cq = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count;
+ attr->max_cqe = IONIC_MAX_CQ_DEPTH - IONIC_CQ_GRACE;
+ attr->max_mr = dev->lif_cfg.nmrs_per_lif;
+ attr->max_pd = IONIC_MAX_PD;
+ attr->max_qp_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_ee_rd_atom = 0;
+ attr->max_res_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_qp_init_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_ee_init_rd_atom = 0;
+ attr->atomic_cap = IB_ATOMIC_GLOB;
+ attr->masked_atomic_cap = IB_ATOMIC_GLOB;
+ attr->max_mw = dev->lif_cfg.nmrs_per_lif;
+ attr->max_mcast_grp = 0;
+ attr->max_mcast_qp_attach = 0;
+ attr->max_ah = dev->lif_cfg.nahs_per_lif;
+ attr->max_fast_reg_page_list_len = dev->lif_cfg.npts_per_lif / 2;
+ attr->max_pkeys = IONIC_PKEY_TBL_LEN;
+
+ return 0;
+}
+
+static int ionic_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
+{
+ struct net_device *ndev;
+
+ if (port != 1)
+ return -EINVAL;
+
+ ndev = ib_device_get_netdev(ibdev, port);
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ attr->state = IB_PORT_ACTIVE;
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else if (netif_running(ndev)) {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
+ } else {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ attr->max_mtu = iboe_get_mtu(ndev->max_mtu);
+ attr->active_mtu = min(attr->max_mtu, iboe_get_mtu(ndev->mtu));
+ attr->gid_tbl_len = IONIC_GID_TBL_LEN;
+ attr->ip_gids = true;
+ attr->port_cap_flags = 0;
+ attr->max_msg_sz = 0x80000000;
+ attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN;
+ attr->max_vl_num = 1;
+ attr->subnet_prefix = 0xfe80000000000000ull;
+
+ dev_put(ndev);
+
+ return ib_get_eth_speed(ibdev, port,
+ &attr->active_speed,
+ &attr->active_width);
+}
+
+static enum rdma_link_layer ionic_get_link_layer(struct ib_device *ibdev,
+ u32 port)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int ionic_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (port != 1)
+ return -EINVAL;
+
+ if (index != 0)
+ return -EINVAL;
+
+ *pkey = IB_DEFAULT_PKEY_FULL;
+
+ return 0;
+}
+
+static int ionic_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC)
+ memcpy(dev->ibdev.node_desc, attr->node_desc,
+ IB_DEVICE_NODE_DESC_MAX);
+
+ return 0;
+}
+
+static int ionic_get_port_immutable(struct ib_device *ibdev, u32 port,
+ struct ib_port_immutable *attr)
+{
+ if (port != 1)
+ return -EINVAL;
+
+ attr->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN;
+ attr->gid_tbl_len = IONIC_GID_TBL_LEN;
+ attr->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
+static void ionic_get_dev_fw_str(struct ib_device *ibdev, char *str)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ ionic_lif_fw_version(dev->lif_cfg.lif, str, IB_FW_VERSION_NAME_MAX);
+}
+
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ionic_ibdev *dev =
+ rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", ionic_lif_asic_rev(dev->lif_cfg.lif));
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ionic_ibdev *dev =
+ rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+
+ return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ionic_rdma_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group ionic_rdma_attr_group = {
+ .attrs = ionic_rdma_attributes,
+};
+
+static void ionic_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ /*
+ * Dummy define disassociate_ucontext so that it does not
+ * wait for user context before cleaning up hw resources.
+ */
+}
+
+static const struct ib_device_ops ionic_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_IONIC,
+ .uverbs_abi_ver = IONIC_ABI_VERSION,
+
+ .alloc_ucontext = ionic_alloc_ucontext,
+ .dealloc_ucontext = ionic_dealloc_ucontext,
+ .mmap = ionic_mmap,
+ .mmap_free = ionic_mmap_free,
+ .alloc_pd = ionic_alloc_pd,
+ .dealloc_pd = ionic_dealloc_pd,
+ .create_ah = ionic_create_ah,
+ .query_ah = ionic_query_ah,
+ .destroy_ah = ionic_destroy_ah,
+ .create_user_ah = ionic_create_ah,
+ .get_dma_mr = ionic_get_dma_mr,
+ .reg_user_mr = ionic_reg_user_mr,
+ .reg_user_mr_dmabuf = ionic_reg_user_mr_dmabuf,
+ .dereg_mr = ionic_dereg_mr,
+ .alloc_mr = ionic_alloc_mr,
+ .map_mr_sg = ionic_map_mr_sg,
+ .alloc_mw = ionic_alloc_mw,
+ .dealloc_mw = ionic_dealloc_mw,
+ .create_cq = ionic_create_cq,
+ .destroy_cq = ionic_destroy_cq,
+ .create_qp = ionic_create_qp,
+ .modify_qp = ionic_modify_qp,
+ .query_qp = ionic_query_qp,
+ .destroy_qp = ionic_destroy_qp,
+
+ .post_send = ionic_post_send,
+ .post_recv = ionic_post_recv,
+ .poll_cq = ionic_poll_cq,
+ .req_notify_cq = ionic_req_notify_cq,
+
+ .query_device = ionic_query_device,
+ .query_port = ionic_query_port,
+ .get_link_layer = ionic_get_link_layer,
+ .query_pkey = ionic_query_pkey,
+ .modify_device = ionic_modify_device,
+ .get_port_immutable = ionic_get_port_immutable,
+ .get_dev_fw_str = ionic_get_dev_fw_str,
+ .device_group = &ionic_rdma_attr_group,
+ .disassociate_ucontext = ionic_disassociate_ucontext,
+
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, ionic_ctx, ibctx),
+ INIT_RDMA_OBJ_SIZE(ib_pd, ionic_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ah, ionic_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, ionic_vcq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_qp, ionic_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_mw, ionic_mr, ibmw),
+};
+
+static void ionic_init_resids(struct ionic_ibdev *dev)
+{
+ ionic_resid_init(&dev->inuse_cqid, dev->lif_cfg.cq_count);
+ dev->half_cqid_udma_shift =
+ order_base_2(dev->lif_cfg.cq_count / dev->lif_cfg.udma_count);
+ ionic_resid_init(&dev->inuse_pdid, IONIC_MAX_PD);
+ ionic_resid_init(&dev->inuse_ahid, dev->lif_cfg.nahs_per_lif);
+ ionic_resid_init(&dev->inuse_mrid, dev->lif_cfg.nmrs_per_lif);
+ /* skip reserved lkey */
+ dev->next_mrkey = 1;
+ ionic_resid_init(&dev->inuse_qpid, dev->lif_cfg.qp_count);
+ /* skip reserved SMI and GSI qpids */
+ dev->half_qpid_udma_shift =
+ order_base_2(dev->lif_cfg.qp_count / dev->lif_cfg.udma_count);
+ ionic_resid_init(&dev->inuse_dbid, dev->lif_cfg.dbid_count);
+}
+
+static void ionic_destroy_resids(struct ionic_ibdev *dev)
+{
+ ionic_resid_destroy(&dev->inuse_cqid);
+ ionic_resid_destroy(&dev->inuse_pdid);
+ ionic_resid_destroy(&dev->inuse_ahid);
+ ionic_resid_destroy(&dev->inuse_mrid);
+ ionic_resid_destroy(&dev->inuse_qpid);
+ ionic_resid_destroy(&dev->inuse_dbid);
+}
+
+static void ionic_destroy_ibdev(struct ionic_ibdev *dev)
+{
+ ionic_kill_rdma_admin(dev, false);
+ ib_unregister_device(&dev->ibdev);
+ ionic_stats_cleanup(dev);
+ ionic_destroy_rdma_admin(dev);
+ ionic_destroy_resids(dev);
+ WARN_ON(!xa_empty(&dev->qp_tbl));
+ xa_destroy(&dev->qp_tbl);
+ WARN_ON(!xa_empty(&dev->cq_tbl));
+ xa_destroy(&dev->cq_tbl);
+ ib_dealloc_device(&dev->ibdev);
+}
+
+static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev)
+{
+ struct ib_device *ibdev;
+ struct ionic_ibdev *dev;
+ struct net_device *ndev;
+ int rc;
+
+ dev = ib_alloc_device(ionic_ibdev, ibdev);
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ ionic_fill_lif_cfg(ionic_adev->lif, &dev->lif_cfg);
+
+ xa_init_flags(&dev->qp_tbl, GFP_ATOMIC);
+ xa_init_flags(&dev->cq_tbl, GFP_ATOMIC);
+
+ ionic_init_resids(dev);
+
+ rc = ionic_rdma_reset_devcmd(dev);
+ if (rc)
+ goto err_reset;
+
+ rc = ionic_create_rdma_admin(dev);
+ if (rc)
+ goto err_admin;
+
+ ibdev = &dev->ibdev;
+ ibdev->dev.parent = dev->lif_cfg.hwdev;
+
+ strscpy(ibdev->name, "ionic_%d", IB_DEVICE_NAME_MAX);
+ strscpy(ibdev->node_desc, DEVICE_DESCRIPTION, IB_DEVICE_NODE_DESC_MAX);
+
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ibdev->phys_port_cnt = 1;
+
+ /* the first two eq are reserved for async events */
+ ibdev->num_comp_vectors = dev->lif_cfg.eq_count - 2;
+
+ ndev = ionic_lif_netdev(ionic_adev->lif);
+ addrconf_ifid_eui48((u8 *)&ibdev->node_guid, ndev);
+ rc = ib_device_set_netdev(ibdev, ndev, 1);
+ /* ionic_lif_netdev() returns ndev with refcount held */
+ dev_put(ndev);
+ if (rc)
+ goto err_admin;
+
+ ib_set_device_ops(&dev->ibdev, &ionic_dev_ops);
+
+ ionic_stats_init(dev);
+
+ rc = ib_register_device(ibdev, "ionic_%d", ibdev->dev.parent);
+ if (rc)
+ goto err_register;
+
+ return dev;
+
+err_register:
+ ionic_stats_cleanup(dev);
+err_admin:
+ ionic_kill_rdma_admin(dev, false);
+ ionic_destroy_rdma_admin(dev);
+err_reset:
+ ionic_destroy_resids(dev);
+ xa_destroy(&dev->qp_tbl);
+ xa_destroy(&dev->cq_tbl);
+ ib_dealloc_device(&dev->ibdev);
+
+ return ERR_PTR(rc);
+}
+
+static int ionic_aux_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct ionic_aux_dev *ionic_adev;
+ struct ionic_ibdev *dev;
+
+ ionic_adev = container_of(adev, struct ionic_aux_dev, adev);
+ dev = ionic_create_ibdev(ionic_adev);
+ if (IS_ERR(dev))
+ return dev_err_probe(&adev->dev, PTR_ERR(dev),
+ "Failed to register ibdev\n");
+
+ auxiliary_set_drvdata(adev, dev);
+ ibdev_dbg(&dev->ibdev, "registered\n");
+
+ return 0;
+}
+
+static void ionic_aux_remove(struct auxiliary_device *adev)
+{
+ struct ionic_ibdev *dev = auxiliary_get_drvdata(adev);
+
+ dev_dbg(&adev->dev, "unregister ibdev\n");
+ ionic_destroy_ibdev(dev);
+ dev_dbg(&adev->dev, "unregistered\n");
+}
+
+static const struct auxiliary_device_id ionic_aux_id_table[] = {
+ { .name = "ionic.rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ionic_aux_id_table);
+
+static struct auxiliary_driver ionic_aux_r_driver = {
+ .name = "rdma",
+ .probe = ionic_aux_probe,
+ .remove = ionic_aux_remove,
+ .id_table = ionic_aux_id_table,
+};
+
+static int __init ionic_mod_init(void)
+{
+ int rc;
+
+ ionic_evt_workq = create_workqueue(KBUILD_MODNAME "-evt");
+ if (!ionic_evt_workq)
+ return -ENOMEM;
+
+ rc = auxiliary_driver_register(&ionic_aux_r_driver);
+ if (rc)
+ goto err_aux;
+
+ return 0;
+
+err_aux:
+ destroy_workqueue(ionic_evt_workq);
+
+ return rc;
+}
+
+static void __exit ionic_mod_exit(void)
+{
+ auxiliary_driver_unregister(&ionic_aux_r_driver);
+ destroy_workqueue(ionic_evt_workq);
+}
+
+module_init(ionic_mod_init);
+module_exit(ionic_mod_exit);
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h
new file mode 100644
index 000000000000..82fda1e3cdb6
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_IBDEV_H_
+#define _IONIC_IBDEV_H_
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include <rdma/ionic-abi.h>
+#include <ionic_api.h>
+#include <ionic_regs.h>
+
+#include "ionic_fw.h"
+#include "ionic_queue.h"
+#include "ionic_res.h"
+
+#include "ionic_lif_cfg.h"
+
+/* Config knobs */
+#define IONIC_EQ_DEPTH 511
+#define IONIC_EQ_COUNT 32
+#define IONIC_AQ_DEPTH 63
+#define IONIC_AQ_COUNT 4
+#define IONIC_EQ_ISR_BUDGET 10
+#define IONIC_EQ_WORK_BUDGET 1000
+#define IONIC_MAX_RD_ATOM 16
+#define IONIC_PKEY_TBL_LEN 1
+#define IONIC_GID_TBL_LEN 256
+
+#define IONIC_MAX_QPID 0xffffff
+#define IONIC_SPEC_HIGH 8
+#define IONIC_MAX_PD 1024
+#define IONIC_SPEC_HIGH 8
+#define IONIC_SQCMB_ORDER 5
+#define IONIC_RQCMB_ORDER 0
+
+#define IONIC_META_LAST ((void *)1ul)
+#define IONIC_META_POSTED ((void *)2ul)
+
+#define IONIC_CQ_GRACE 100
+
+#define IONIC_ROCE_UDP_SPORT 28272
+#define IONIC_DMA_LKEY 0
+#define IONIC_DMA_RKEY IONIC_DMA_LKEY
+
+#define IONIC_CMB_SUPPORTED \
+ (IONIC_CMB_ENABLE | IONIC_CMB_REQUIRE | IONIC_CMB_EXPDB | \
+ IONIC_CMB_WC | IONIC_CMB_UC)
+
+/* resource is not reserved on the device, indicated in tbl_order */
+#define IONIC_RES_INVALID -1
+
+struct ionic_aq;
+struct ionic_cq;
+struct ionic_eq;
+struct ionic_vcq;
+
+enum ionic_admin_state {
+ IONIC_ADMIN_ACTIVE, /* submitting admin commands to queue */
+ IONIC_ADMIN_PAUSED, /* not submitting, but may complete normally */
+ IONIC_ADMIN_KILLED, /* not submitting, locally completed */
+};
+
+enum ionic_admin_flags {
+ IONIC_ADMIN_F_BUSYWAIT = BIT(0), /* Don't sleep */
+ IONIC_ADMIN_F_TEARDOWN = BIT(1), /* In destroy path */
+ IONIC_ADMIN_F_INTERRUPT = BIT(2), /* Interruptible w/timeout */
+};
+
+enum ionic_mmap_flag {
+ IONIC_MMAP_WC = BIT(0),
+};
+
+struct ionic_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ unsigned long size;
+ unsigned long pfn;
+ u8 mmap_flags;
+};
+
+struct ionic_ibdev {
+ struct ib_device ibdev;
+
+ struct ionic_lif_cfg lif_cfg;
+
+ struct xarray qp_tbl;
+ struct xarray cq_tbl;
+
+ struct ionic_resid_bits inuse_dbid;
+ struct ionic_resid_bits inuse_pdid;
+ struct ionic_resid_bits inuse_ahid;
+ struct ionic_resid_bits inuse_mrid;
+ struct ionic_resid_bits inuse_qpid;
+ struct ionic_resid_bits inuse_cqid;
+
+ u8 half_cqid_udma_shift;
+ u8 half_qpid_udma_shift;
+ u8 next_qpid_udma_idx;
+ u8 next_mrkey;
+
+ struct work_struct reset_work;
+ bool reset_posted;
+ u32 reset_cnt;
+
+ struct delayed_work admin_dwork;
+ struct ionic_aq **aq_vec;
+ atomic_t admin_state;
+
+ struct ionic_eq **eq_vec;
+
+ struct ionic_v1_stat *hw_stats;
+ void *hw_stats_buf;
+ struct rdma_stat_desc *hw_stats_hdrs;
+ struct ionic_counter_stats *counter_stats;
+ int hw_stats_count;
+};
+
+struct ionic_eq {
+ struct ionic_ibdev *dev;
+
+ u32 eqid;
+ u32 intr;
+
+ struct ionic_queue q;
+
+ int armed;
+ bool enable;
+
+ struct work_struct work;
+
+ int irq;
+ char name[32];
+};
+
+struct ionic_admin_wr {
+ struct completion work;
+ struct list_head aq_ent;
+ struct ionic_v1_admin_wqe wqe;
+ struct ionic_v1_cqe cqe;
+ struct ionic_aq *aq;
+ int status;
+};
+
+struct ionic_admin_wr_q {
+ struct ionic_admin_wr *wr;
+ int wqe_strides;
+};
+
+struct ionic_aq {
+ struct ionic_ibdev *dev;
+ struct ionic_vcq *vcq;
+
+ struct work_struct work;
+
+ atomic_t admin_state;
+ unsigned long stamp;
+ bool armed;
+
+ u32 aqid;
+ u32 cqid;
+
+ spinlock_t lock; /* for posting */
+ struct ionic_queue q;
+ struct ionic_admin_wr_q *q_wr;
+ struct list_head wr_prod;
+ struct list_head wr_post;
+};
+
+struct ionic_ctx {
+ struct ib_ucontext ibctx;
+ u32 dbid;
+ struct rdma_user_mmap_entry *mmap_dbell;
+};
+
+struct ionic_tbl_buf {
+ u32 tbl_limit;
+ u32 tbl_pages;
+ size_t tbl_size;
+ __le64 *tbl_buf;
+ dma_addr_t tbl_dma;
+ u8 page_size_log2;
+};
+
+struct ionic_pd {
+ struct ib_pd ibpd;
+
+ u32 pdid;
+ u32 flags;
+};
+
+struct ionic_cq {
+ struct ionic_vcq *vcq;
+
+ u32 cqid;
+ u32 eqid;
+
+ spinlock_t lock; /* for polling */
+ struct list_head poll_sq;
+ bool flush;
+ struct list_head flush_sq;
+ struct list_head flush_rq;
+ struct list_head ibkill_flush_ent;
+
+ struct ionic_queue q;
+ bool color;
+ int credit;
+ u16 arm_any_prod;
+ u16 arm_sol_prod;
+
+ struct kref cq_kref;
+ struct completion cq_rel_comp;
+
+ /* infrequently accessed, keep at end */
+ struct ib_umem *umem;
+};
+
+struct ionic_vcq {
+ struct ib_cq ibcq;
+ struct ionic_cq cq[2];
+ u8 udma_mask;
+ u8 poll_idx;
+};
+
+struct ionic_sq_meta {
+ u64 wrid;
+ u32 len;
+ u16 seq;
+ u8 ibop;
+ u8 ibsts;
+ u8 remote:1;
+ u8 signal:1;
+ u8 local_comp:1;
+};
+
+struct ionic_rq_meta {
+ struct ionic_rq_meta *next;
+ u64 wrid;
+};
+
+struct ionic_qp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+
+ u32 qpid;
+ u32 ahid;
+ u32 sq_cqid;
+ u32 rq_cqid;
+ u8 udma_idx;
+ u8 has_ah:1;
+ u8 has_sq:1;
+ u8 has_rq:1;
+ u8 sig_all:1;
+
+ struct list_head qp_list_counter;
+
+ struct list_head cq_poll_sq;
+ struct list_head cq_flush_sq;
+ struct list_head cq_flush_rq;
+ struct list_head ibkill_flush_ent;
+
+ spinlock_t sq_lock; /* for posting and polling */
+ struct ionic_queue sq;
+ struct ionic_sq_meta *sq_meta;
+ u16 *sq_msn_idx;
+ int sq_spec;
+ u16 sq_old_prod;
+ u16 sq_msn_prod;
+ u16 sq_msn_cons;
+ u8 sq_cmb;
+ bool sq_flush;
+ bool sq_flush_rcvd;
+
+ spinlock_t rq_lock; /* for posting and polling */
+ struct ionic_queue rq;
+ struct ionic_rq_meta *rq_meta;
+ struct ionic_rq_meta *rq_meta_head;
+ int rq_spec;
+ u16 rq_old_prod;
+ u8 rq_cmb;
+ bool rq_flush;
+
+ struct kref qp_kref;
+ struct completion qp_rel_comp;
+
+ /* infrequently accessed, keep at end */
+ int sgid_index;
+ int sq_cmb_order;
+ u32 sq_cmb_pgid;
+ phys_addr_t sq_cmb_addr;
+ struct rdma_user_mmap_entry *mmap_sq_cmb;
+
+ struct ib_umem *sq_umem;
+
+ int rq_cmb_order;
+ u32 rq_cmb_pgid;
+ phys_addr_t rq_cmb_addr;
+ struct rdma_user_mmap_entry *mmap_rq_cmb;
+
+ struct ib_umem *rq_umem;
+
+ int dcqcn_profile;
+
+ struct ib_ud_header *hdr;
+};
+
+struct ionic_ah {
+ struct ib_ah ibah;
+ u32 ahid;
+ int sgid_index;
+ struct ib_ud_header hdr;
+};
+
+struct ionic_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ };
+
+ u32 mrid;
+ int flags;
+
+ struct ib_umem *umem;
+ struct ionic_tbl_buf buf;
+ bool created;
+};
+
+struct ionic_counter_stats {
+ int queue_stats_count;
+ struct ionic_v1_stat *hdr;
+ struct rdma_stat_desc *stats_hdrs;
+ struct xarray xa_counters;
+};
+
+struct ionic_counter {
+ void *vals;
+ struct list_head qp_list;
+};
+
+static inline struct ionic_ibdev *to_ionic_ibdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct ionic_ibdev, ibdev);
+}
+
+static inline struct ionic_ctx *to_ionic_ctx(struct ib_ucontext *ibctx)
+{
+ return container_of(ibctx, struct ionic_ctx, ibctx);
+}
+
+static inline struct ionic_ctx *to_ionic_ctx_uobj(struct ib_uobject *uobj)
+{
+ if (!uobj)
+ return NULL;
+
+ if (!uobj->context)
+ return NULL;
+
+ return to_ionic_ctx(uobj->context);
+}
+
+static inline struct ionic_pd *to_ionic_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct ionic_pd, ibpd);
+}
+
+static inline struct ionic_mr *to_ionic_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct ionic_mr, ibmr);
+}
+
+static inline struct ionic_mr *to_ionic_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct ionic_mr, ibmw);
+}
+
+static inline struct ionic_vcq *to_ionic_vcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct ionic_vcq, ibcq);
+}
+
+static inline struct ionic_cq *to_ionic_vcq_cq(struct ib_cq *ibcq,
+ uint8_t udma_idx)
+{
+ return &to_ionic_vcq(ibcq)->cq[udma_idx];
+}
+
+static inline struct ionic_qp *to_ionic_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct ionic_qp, ibqp);
+}
+
+static inline struct ionic_ah *to_ionic_ah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct ionic_ah, ibah);
+}
+
+static inline u32 ionic_ctx_dbid(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx)
+{
+ if (!ctx)
+ return dev->lif_cfg.dbid;
+
+ return ctx->dbid;
+}
+
+static inline u32 ionic_obj_dbid(struct ionic_ibdev *dev,
+ struct ib_uobject *uobj)
+{
+ return ionic_ctx_dbid(dev, to_ionic_ctx_uobj(uobj));
+}
+
+static inline bool ionic_ibop_is_local(enum ib_wr_opcode op)
+{
+ return op == IB_WR_LOCAL_INV || op == IB_WR_REG_MR;
+}
+
+static inline void ionic_qp_complete(struct kref *kref)
+{
+ struct ionic_qp *qp = container_of(kref, struct ionic_qp, qp_kref);
+
+ complete(&qp->qp_rel_comp);
+}
+
+static inline void ionic_cq_complete(struct kref *kref)
+{
+ struct ionic_cq *cq = container_of(kref, struct ionic_cq, cq_kref);
+
+ complete(&cq->cq_rel_comp);
+}
+
+/* ionic_admin.c */
+extern struct workqueue_struct *ionic_evt_workq;
+void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr);
+int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr,
+ enum ionic_admin_flags);
+
+int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev);
+
+int ionic_create_rdma_admin(struct ionic_ibdev *dev);
+void ionic_destroy_rdma_admin(struct ionic_ibdev *dev);
+void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path);
+
+/* ionic_controlpath.c */
+int ionic_create_cq_common(struct ionic_vcq *vcq,
+ struct ionic_tbl_buf *buf,
+ const struct ib_cq_init_attr *attr,
+ struct ionic_ctx *ctx,
+ struct ib_udata *udata,
+ struct ionic_qdesc *req_cq,
+ __u32 *resp_cqid,
+ int udma_idx);
+void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq);
+void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp);
+void ionic_notify_flush_cq(struct ionic_cq *cq);
+
+int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata);
+void ionic_dealloc_ucontext(struct ib_ucontext *ibctx);
+int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma);
+void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+int ionic_destroy_ah(struct ib_ah *ibah, u32 flags);
+struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access);
+struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 addr, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset,
+ u64 length, u64 addr, int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type,
+ u32 max_sg);
+int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
+int ionic_dealloc_mw(struct ib_mw *ibmw);
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata);
+int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata);
+int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_qp_init_attr *init_attr);
+int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+/* ionic_datapath.c */
+int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad);
+int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad);
+int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc);
+int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+/* ionic_hw_stats.c */
+void ionic_stats_init(struct ionic_ibdev *dev);
+void ionic_stats_cleanup(struct ionic_ibdev *dev);
+
+/* ionic_pgtbl.c */
+__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va);
+__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va);
+int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma);
+int ionic_pgtbl_init(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf,
+ struct ib_umem *umem,
+ dma_addr_t dma,
+ int limit,
+ u64 page_size);
+void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf);
+#endif /* _IONIC_IBDEV_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.c b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c
new file mode 100644
index 000000000000..f3cd281c3a2f
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/kernel.h>
+
+#include <ionic.h>
+#include <ionic_lif.h>
+
+#include "ionic_lif_cfg.h"
+
+#define IONIC_MIN_RDMA_VERSION 0
+#define IONIC_MAX_RDMA_VERSION 2
+
+static u8 ionic_get_expdb(struct ionic_lif *lif)
+{
+ u8 expdb_support = 0;
+
+ if (lif->ionic->idev.phy_cmb_expdb64_pages)
+ expdb_support |= IONIC_EXPDB_64B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb128_pages)
+ expdb_support |= IONIC_EXPDB_128B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb256_pages)
+ expdb_support |= IONIC_EXPDB_256B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb512_pages)
+ expdb_support |= IONIC_EXPDB_512B_WQE;
+
+ return expdb_support;
+}
+
+void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg)
+{
+ union ionic_lif_identity *ident = &lif->ionic->ident.lif;
+
+ cfg->lif = lif;
+ cfg->hwdev = &lif->ionic->pdev->dev;
+ cfg->lif_index = lif->index;
+ cfg->lif_hw_index = lif->hw_index;
+
+ cfg->dbid = lif->kern_pid;
+ cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+ cfg->dbpage = lif->kern_dbpage;
+ cfg->intr_ctrl = lif->ionic->idev.intr_ctrl;
+
+ cfg->db_phys = lif->ionic->bars[IONIC_PCI_BAR_DBELL].bus_addr;
+
+ if (IONIC_VERSION(ident->rdma.version, ident->rdma.minor_version) >=
+ IONIC_VERSION(2, 1))
+ cfg->page_size_supported =
+ le64_to_cpu(ident->rdma.page_size_cap);
+ else
+ cfg->page_size_supported = IONIC_PAGE_SIZE_SUPPORTED;
+
+ cfg->rdma_version = ident->rdma.version;
+ cfg->qp_opcodes = ident->rdma.qp_opcodes;
+ cfg->admin_opcodes = ident->rdma.admin_opcodes;
+
+ cfg->stats_type = le16_to_cpu(ident->rdma.stats_type);
+ cfg->npts_per_lif = le32_to_cpu(ident->rdma.npts_per_lif);
+ cfg->nmrs_per_lif = le32_to_cpu(ident->rdma.nmrs_per_lif);
+ cfg->nahs_per_lif = le32_to_cpu(ident->rdma.nahs_per_lif);
+
+ cfg->aq_base = le32_to_cpu(ident->rdma.aq_qtype.qid_base);
+ cfg->cq_base = le32_to_cpu(ident->rdma.cq_qtype.qid_base);
+ cfg->eq_base = le32_to_cpu(ident->rdma.eq_qtype.qid_base);
+
+ /*
+ * ionic_create_rdma_admin() may reduce aq_count or eq_count if
+ * it is unable to allocate all that were requested.
+ * aq_count is tunable; see ionic_aq_count
+ * eq_count is tunable; see ionic_eq_count
+ */
+ cfg->aq_count = le32_to_cpu(ident->rdma.aq_qtype.qid_count);
+ cfg->eq_count = le32_to_cpu(ident->rdma.eq_qtype.qid_count);
+ cfg->cq_count = le32_to_cpu(ident->rdma.cq_qtype.qid_count);
+ cfg->qp_count = le32_to_cpu(ident->rdma.sq_qtype.qid_count);
+ cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+
+ cfg->aq_qtype = ident->rdma.aq_qtype.qtype;
+ cfg->sq_qtype = ident->rdma.sq_qtype.qtype;
+ cfg->rq_qtype = ident->rdma.rq_qtype.qtype;
+ cfg->cq_qtype = ident->rdma.cq_qtype.qtype;
+ cfg->eq_qtype = ident->rdma.eq_qtype.qtype;
+ cfg->udma_qgrp_shift = ident->rdma.udma_shift;
+ cfg->udma_count = 2;
+
+ cfg->max_stride = ident->rdma.max_stride;
+ cfg->expdb_mask = ionic_get_expdb(lif);
+
+ cfg->sq_expdb =
+ !!(lif->qtype_info[IONIC_QTYPE_TXQ].features & IONIC_QIDENT_F_EXPDB);
+ cfg->rq_expdb =
+ !!(lif->qtype_info[IONIC_QTYPE_RXQ].features & IONIC_QIDENT_F_EXPDB);
+}
+
+struct net_device *ionic_lif_netdev(struct ionic_lif *lif)
+{
+ struct net_device *netdev = lif->netdev;
+
+ dev_hold(netdev);
+ return netdev;
+}
+
+void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len)
+{
+ strscpy(str, lif->ionic->idev.dev_info.fw_version, len);
+}
+
+u8 ionic_lif_asic_rev(struct ionic_lif *lif)
+{
+ return lif->ionic->idev.dev_info.asic_rev;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.h b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h
new file mode 100644
index 000000000000..20853429f623
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_LIF_CFG_H_
+
+#define IONIC_VERSION(a, b) (((a) << 16) + ((b) << 8))
+#define IONIC_PAGE_SIZE_SUPPORTED 0x40201000 /* 4kb, 2Mb, 1Gb */
+
+#define IONIC_EXPDB_64B_WQE BIT(0)
+#define IONIC_EXPDB_128B_WQE BIT(1)
+#define IONIC_EXPDB_256B_WQE BIT(2)
+#define IONIC_EXPDB_512B_WQE BIT(3)
+
+struct ionic_lif_cfg {
+ struct device *hwdev;
+ struct ionic_lif *lif;
+
+ int lif_index;
+ int lif_hw_index;
+
+ u32 dbid;
+ int dbid_count;
+ u64 __iomem *dbpage;
+ struct ionic_intr __iomem *intr_ctrl;
+ phys_addr_t db_phys;
+
+ u64 page_size_supported;
+ u32 npts_per_lif;
+ u32 nmrs_per_lif;
+ u32 nahs_per_lif;
+
+ u32 aq_base;
+ u32 cq_base;
+ u32 eq_base;
+
+ int aq_count;
+ int eq_count;
+ int cq_count;
+ int qp_count;
+
+ u16 stats_type;
+ u8 aq_qtype;
+ u8 sq_qtype;
+ u8 rq_qtype;
+ u8 cq_qtype;
+ u8 eq_qtype;
+
+ u8 udma_count;
+ u8 udma_qgrp_shift;
+
+ u8 rdma_version;
+ u8 qp_opcodes;
+ u8 admin_opcodes;
+
+ u8 max_stride;
+ bool sq_expdb;
+ bool rq_expdb;
+ u8 expdb_mask;
+};
+
+void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg);
+struct net_device *ionic_lif_netdev(struct ionic_lif *lif);
+void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len);
+u8 ionic_lif_asic_rev(struct ionic_lif *lif);
+
+#endif /* _IONIC_LIF_CFG_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_pgtbl.c b/drivers/infiniband/hw/ionic/ionic_pgtbl.c
new file mode 100644
index 000000000000..e74db73c9246
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_pgtbl.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/mman.h>
+#include <linux/dma-mapping.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va)
+{
+ u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1;
+ u64 dma;
+
+ if (!buf->tbl_pages)
+ return cpu_to_le64(0);
+
+ if (buf->tbl_pages > 1)
+ return cpu_to_le64(buf->tbl_dma);
+
+ if (buf->tbl_buf)
+ dma = le64_to_cpu(buf->tbl_buf[0]);
+ else
+ dma = buf->tbl_dma;
+
+ return cpu_to_le64(dma + (va & pg_mask));
+}
+
+__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va)
+{
+ if (buf->tbl_pages > 1) {
+ u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1;
+
+ return cpu_to_be64(va & pg_mask);
+ }
+
+ return 0;
+}
+
+int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma)
+{
+ if (unlikely(buf->tbl_pages == buf->tbl_limit))
+ return -ENOMEM;
+
+ if (buf->tbl_buf)
+ buf->tbl_buf[buf->tbl_pages] = cpu_to_le64(dma);
+ else
+ buf->tbl_dma = dma;
+
+ ++buf->tbl_pages;
+
+ return 0;
+}
+
+static int ionic_tbl_buf_alloc(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf)
+{
+ int rc;
+
+ buf->tbl_size = buf->tbl_limit * sizeof(*buf->tbl_buf);
+ buf->tbl_buf = kmalloc(buf->tbl_size, GFP_KERNEL);
+ if (!buf->tbl_buf)
+ return -ENOMEM;
+
+ buf->tbl_dma = dma_map_single(dev->lif_cfg.hwdev, buf->tbl_buf,
+ buf->tbl_size, DMA_TO_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, buf->tbl_dma);
+ if (rc) {
+ kfree(buf->tbl_buf);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int ionic_pgtbl_umem(struct ionic_tbl_buf *buf, struct ib_umem *umem)
+{
+ struct ib_block_iter biter;
+ u64 page_dma;
+ int rc;
+
+ rdma_umem_for_each_dma_block(umem, &biter, BIT_ULL(buf->page_size_log2)) {
+ page_dma = rdma_block_iter_dma_address(&biter);
+ rc = ionic_pgtbl_page(buf, page_dma);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf)
+{
+ if (buf->tbl_buf)
+ dma_unmap_single(dev->lif_cfg.hwdev, buf->tbl_dma,
+ buf->tbl_size, DMA_TO_DEVICE);
+
+ kfree(buf->tbl_buf);
+ memset(buf, 0, sizeof(*buf));
+}
+
+int ionic_pgtbl_init(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf,
+ struct ib_umem *umem,
+ dma_addr_t dma,
+ int limit,
+ u64 page_size)
+{
+ int rc;
+
+ memset(buf, 0, sizeof(*buf));
+
+ if (umem) {
+ limit = ib_umem_num_dma_blocks(umem, page_size);
+ buf->page_size_log2 = order_base_2(page_size);
+ }
+
+ if (limit < 1)
+ return -EINVAL;
+
+ buf->tbl_limit = limit;
+
+ /* skip pgtbl if contiguous / direct translation */
+ if (limit > 1) {
+ rc = ionic_tbl_buf_alloc(dev, buf);
+ if (rc)
+ return rc;
+ }
+
+ if (umem)
+ rc = ionic_pgtbl_umem(buf, umem);
+ else
+ rc = ionic_pgtbl_page(buf, dma);
+
+ if (rc)
+ goto err_unbuf;
+
+ return 0;
+
+err_unbuf:
+ ionic_pgtbl_unbuf(dev, buf);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_queue.c b/drivers/infiniband/hw/ionic/ionic_queue.c
new file mode 100644
index 000000000000..aa897ed2a412
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_queue.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/dma-mapping.h>
+
+#include "ionic_queue.h"
+
+int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev,
+ int depth, size_t stride)
+{
+ if (depth < 0 || depth > 0xffff)
+ return -EINVAL;
+
+ if (stride == 0 || stride > 0x10000)
+ return -EINVAL;
+
+ if (depth == 0)
+ depth = 1;
+
+ q->depth_log2 = order_base_2(depth + 1);
+ q->stride_log2 = order_base_2(stride);
+
+ if (q->depth_log2 + q->stride_log2 < PAGE_SHIFT)
+ q->depth_log2 = PAGE_SHIFT - q->stride_log2;
+
+ if (q->depth_log2 > 16 || q->stride_log2 > 16)
+ return -EINVAL;
+
+ q->size = BIT_ULL(q->depth_log2 + q->stride_log2);
+ q->mask = BIT(q->depth_log2) - 1;
+
+ q->ptr = dma_alloc_coherent(dma_dev, q->size, &q->dma, GFP_KERNEL);
+ if (!q->ptr)
+ return -ENOMEM;
+
+ /* it will always be page aligned, but just to be sure... */
+ if (!PAGE_ALIGNED(q->ptr)) {
+ dma_free_coherent(dma_dev, q->size, q->ptr, q->dma);
+ return -ENOMEM;
+ }
+
+ q->prod = 0;
+ q->cons = 0;
+ q->dbell = 0;
+
+ return 0;
+}
+
+void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev)
+{
+ dma_free_coherent(dma_dev, q->size, q->ptr, q->dma);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_queue.h b/drivers/infiniband/hw/ionic/ionic_queue.h
new file mode 100644
index 000000000000..d18020d4cad5
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_queue.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_QUEUE_H_
+#define _IONIC_QUEUE_H_
+
+#include <linux/io.h>
+#include <ionic_regs.h>
+
+#define IONIC_MAX_DEPTH 0xffff
+#define IONIC_MAX_CQ_DEPTH 0xffff
+#define IONIC_CQ_RING_ARM IONIC_DBELL_RING_1
+#define IONIC_CQ_RING_SOL IONIC_DBELL_RING_2
+
+/**
+ * struct ionic_queue - Ring buffer used between device and driver
+ * @size: Size of the buffer, in bytes
+ * @dma: Dma address of the buffer
+ * @ptr: Buffer virtual address
+ * @prod: Driver position in the queue
+ * @cons: Device position in the queue
+ * @mask: Capacity of the queue, subtracting the hole
+ * This value is equal to ((1 << depth_log2) - 1)
+ * @depth_log2: Log base two size depth of the queue
+ * @stride_log2: Log base two size of an element in the queue
+ * @dbell: Doorbell identifying bits
+ */
+struct ionic_queue {
+ size_t size;
+ dma_addr_t dma;
+ void *ptr;
+ u16 prod;
+ u16 cons;
+ u16 mask;
+ u8 depth_log2;
+ u8 stride_log2;
+ u64 dbell;
+};
+
+/**
+ * ionic_queue_init() - Initialize user space queue
+ * @q: Uninitialized queue structure
+ * @dma_dev: DMA device for mapping
+ * @depth: Depth of the queue
+ * @stride: Size of each element of the queue
+ *
+ * Return: status code
+ */
+int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev,
+ int depth, size_t stride);
+
+/**
+ * ionic_queue_destroy() - Destroy user space queue
+ * @q: Queue structure
+ * @dma_dev: DMA device for mapping
+ *
+ * Return: status code
+ */
+void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev);
+
+/**
+ * ionic_queue_empty() - Test if queue is empty
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: is empty
+ */
+static inline bool ionic_queue_empty(struct ionic_queue *q)
+{
+ return q->prod == q->cons;
+}
+
+/**
+ * ionic_queue_length() - Get the current length of the queue
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: length
+ */
+static inline u16 ionic_queue_length(struct ionic_queue *q)
+{
+ return (q->prod - q->cons) & q->mask;
+}
+
+/**
+ * ionic_queue_length_remaining() - Get the remaining length of the queue
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: length remaining
+ */
+static inline u16 ionic_queue_length_remaining(struct ionic_queue *q)
+{
+ return q->mask - ionic_queue_length(q);
+}
+
+/**
+ * ionic_queue_full() - Test if queue is full
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: is full
+ */
+static inline bool ionic_queue_full(struct ionic_queue *q)
+{
+ return q->mask == ionic_queue_length(q);
+}
+
+/**
+ * ionic_color_wrap() - Flip the color if prod is wrapped
+ * @prod: Queue index just after advancing
+ * @color: Queue color just prior to advancing the index
+ *
+ * Return: color after advancing the index
+ */
+static inline bool ionic_color_wrap(u16 prod, bool color)
+{
+ /* logical xor color with (prod == 0) */
+ return color != (prod == 0);
+}
+
+/**
+ * ionic_queue_at() - Get the element at the given index
+ * @q: Queue structure
+ * @idx: Index in the queue
+ *
+ * The index must be within the bounds of the queue. It is not checked here.
+ *
+ * Return: pointer to element at index
+ */
+static inline void *ionic_queue_at(struct ionic_queue *q, u16 idx)
+{
+ return q->ptr + ((unsigned long)idx << q->stride_log2);
+}
+
+/**
+ * ionic_queue_at_prod() - Get the element at the producer index
+ * @q: Queue structure
+ *
+ * Return: pointer to element at producer index
+ */
+static inline void *ionic_queue_at_prod(struct ionic_queue *q)
+{
+ return ionic_queue_at(q, q->prod);
+}
+
+/**
+ * ionic_queue_at_cons() - Get the element at the consumer index
+ * @q: Queue structure
+ *
+ * Return: pointer to element at consumer index
+ */
+static inline void *ionic_queue_at_cons(struct ionic_queue *q)
+{
+ return ionic_queue_at(q, q->cons);
+}
+
+/**
+ * ionic_queue_next() - Compute the next index
+ * @q: Queue structure
+ * @idx: Index
+ *
+ * Return: next index after idx
+ */
+static inline u16 ionic_queue_next(struct ionic_queue *q, u16 idx)
+{
+ return (idx + 1) & q->mask;
+}
+
+/**
+ * ionic_queue_produce() - Increase the producer index
+ * @q: Queue structure
+ *
+ * Caller must ensure that the queue is not full. It is not checked here.
+ */
+static inline void ionic_queue_produce(struct ionic_queue *q)
+{
+ q->prod = ionic_queue_next(q, q->prod);
+}
+
+/**
+ * ionic_queue_consume() - Increase the consumer index
+ * @q: Queue structure
+ *
+ * Caller must ensure that the queue is not empty. It is not checked here.
+ *
+ * This is only valid for to-device queues.
+ */
+static inline void ionic_queue_consume(struct ionic_queue *q)
+{
+ q->cons = ionic_queue_next(q, q->cons);
+}
+
+/**
+ * ionic_queue_consume_entries() - Increase the consumer index by entries
+ * @q: Queue structure
+ * @entries: Number of entries to increment
+ *
+ * Caller must ensure that the queue is not empty. It is not checked here.
+ *
+ * This is only valid for to-device queues.
+ */
+static inline void ionic_queue_consume_entries(struct ionic_queue *q,
+ u16 entries)
+{
+ q->cons = (q->cons + entries) & q->mask;
+}
+
+/**
+ * ionic_queue_dbell_init() - Initialize doorbell bits for queue id
+ * @q: Queue structure
+ * @qid: Queue identifying number
+ */
+static inline void ionic_queue_dbell_init(struct ionic_queue *q, u32 qid)
+{
+ q->dbell = IONIC_DBELL_QID(qid);
+}
+
+/**
+ * ionic_queue_dbell_val() - Get current doorbell update value
+ * @q: Queue structure
+ *
+ * Return: current doorbell update value
+ */
+static inline u64 ionic_queue_dbell_val(struct ionic_queue *q)
+{
+ return q->dbell | q->prod;
+}
+
+#endif /* _IONIC_QUEUE_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_res.h b/drivers/infiniband/hw/ionic/ionic_res.h
new file mode 100644
index 000000000000..46c8c584bd9a
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_res.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_RES_H_
+#define _IONIC_RES_H_
+
+#include <linux/kernel.h>
+#include <linux/idr.h>
+
+/**
+ * struct ionic_resid_bits - Number allocator based on IDA
+ *
+ * @inuse: IDA handle
+ * @inuse_size: Highest ID limit for IDA
+ */
+struct ionic_resid_bits {
+ struct ida inuse;
+ unsigned int inuse_size;
+};
+
+/**
+ * ionic_resid_init() - Initialize a resid allocator
+ * @resid: Uninitialized resid allocator
+ * @size: Capacity of the allocator
+ *
+ * Return: Zero on success, or negative error number
+ */
+static inline void ionic_resid_init(struct ionic_resid_bits *resid,
+ unsigned int size)
+{
+ resid->inuse_size = size;
+ ida_init(&resid->inuse);
+}
+
+/**
+ * ionic_resid_destroy() - Destroy a resid allocator
+ * @resid: Resid allocator
+ */
+static inline void ionic_resid_destroy(struct ionic_resid_bits *resid)
+{
+ ida_destroy(&resid->inuse);
+}
+
+/**
+ * ionic_resid_get_shared() - Allocate an available shared resource id
+ * @resid: Resid allocator
+ * @min: Smallest valid resource id
+ * @size: One after largest valid resource id
+ *
+ * Return: Resource id, or negative error number
+ */
+static inline int ionic_resid_get_shared(struct ionic_resid_bits *resid,
+ unsigned int min,
+ unsigned int size)
+{
+ return ida_alloc_range(&resid->inuse, min, size - 1, GFP_KERNEL);
+}
+
+/**
+ * ionic_resid_get() - Allocate an available resource id
+ * @resid: Resid allocator
+ *
+ * Return: Resource id, or negative error number
+ */
+static inline int ionic_resid_get(struct ionic_resid_bits *resid)
+{
+ return ionic_resid_get_shared(resid, 0, resid->inuse_size);
+}
+
+/**
+ * ionic_resid_put() - Free a resource id
+ * @resid: Resid allocator
+ * @id: Resource id
+ */
+static inline void ionic_resid_put(struct ionic_resid_bits *resid, int id)
+{
+ ida_free(&resid->inuse, id);
+}
+
+/**
+ * ionic_bitid_to_qid() - Transform a resource bit index into a queue id
+ * @bitid: Bit index
+ * @qgrp_shift: Log2 number of queues per queue group
+ * @half_qid_shift: Log2 of half the total number of queues
+ *
+ * Return: Queue id
+ *
+ * Udma-constrained queues (QPs and CQs) are associated with their udma by
+ * queue group. Even queue groups are associated with udma0, and odd queue
+ * groups with udma1.
+ *
+ * For allocating queue ids, we want to arrange the bits into two halves,
+ * with the even queue groups of udma0 in the lower half of the bitset,
+ * and the odd queue groups of udma1 in the upper half of the bitset.
+ * Then, one or two calls of find_next_zero_bit can examine all the bits
+ * for queues of an entire udma.
+ *
+ * For example, assuming eight queue groups with qgrp qids per group:
+ *
+ * bitid 0*qgrp..1*qgrp-1 : qid 0*qgrp..1*qgrp-1
+ * bitid 1*qgrp..2*qgrp-1 : qid 2*qgrp..3*qgrp-1
+ * bitid 2*qgrp..3*qgrp-1 : qid 4*qgrp..5*qgrp-1
+ * bitid 3*qgrp..4*qgrp-1 : qid 6*qgrp..7*qgrp-1
+ * bitid 4*qgrp..5*qgrp-1 : qid 1*qgrp..2*qgrp-1
+ * bitid 5*qgrp..6*qgrp-1 : qid 3*qgrp..4*qgrp-1
+ * bitid 6*qgrp..7*qgrp-1 : qid 5*qgrp..6*qgrp-1
+ * bitid 7*qgrp..8*qgrp-1 : qid 7*qgrp..8*qgrp-1
+ *
+ * There are three important ranges of bits in the qid. There is the udma
+ * bit "U" at qgrp_shift, which is the least significant bit of the group
+ * index, and determines which udma a queue is associated with.
+ * The bits of lesser significance we can call the idx bits "I", which are
+ * the index of the queue within the group. The bits of greater significance
+ * we can call the grp bits "G", which are other bits of the group index that
+ * do not determine the udma. Those bits are just rearranged in the bit index
+ * in the bitset. A bitid has the udma bit in the most significant place,
+ * then the grp bits, then the idx bits.
+ *
+ * bitid: 00000000000000 U GGG IIIIII
+ * qid: 00000000000000 GGG U IIIIII
+ *
+ * Transforming from bit index to qid, or from qid to bit index, can be
+ * accomplished by rearranging the bits by masking and shifting.
+ */
+static inline u32 ionic_bitid_to_qid(u32 bitid, u8 qgrp_shift,
+ u8 half_qid_shift)
+{
+ u32 udma_bit =
+ (bitid & BIT(half_qid_shift)) >> (half_qid_shift - qgrp_shift);
+ u32 grp_bits = (bitid & GENMASK(half_qid_shift - 1, qgrp_shift)) << 1;
+ u32 idx_bits = bitid & (BIT(qgrp_shift) - 1);
+
+ return grp_bits | udma_bit | idx_bits;
+}
+
+/**
+ * ionic_qid_to_bitid() - Transform a queue id into a resource bit index
+ * @qid: queue index
+ * @qgrp_shift: Log2 number of queues per queue group
+ * @half_qid_shift: Log2 of half the total number of queues
+ *
+ * Return: Resource bit index
+ *
+ * This is the inverse of ionic_bitid_to_qid().
+ */
+static inline u32 ionic_qid_to_bitid(u32 qid, u8 qgrp_shift, u8 half_qid_shift)
+{
+ u32 udma_bit = (qid & BIT(qgrp_shift)) << (half_qid_shift - qgrp_shift);
+ u32 grp_bits = (qid & GENMASK(half_qid_shift, qgrp_shift + 1)) >> 1;
+ u32 idx_bits = qid & (BIT(qgrp_shift) - 1);
+
+ return udma_bit | grp_bits | idx_bits;
+}
+#endif /* _IONIC_RES_H_ */
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
new file mode 100644
index 000000000000..0bd7e3fca1fb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_IRDMA
+ tristate "Intel(R) Ethernet Protocol Driver for RDMA"
+ depends on INET
+ depends on IPV6 || !IPV6
+ depends on PCI
+ depends on IDPF && ICE && I40E
+ select GENERIC_ALLOCATOR
+ select AUXILIARY_BUS
+ select CRC32
+ help
+ This is an Intel(R) Ethernet Protocol Driver for RDMA that
+ supports IPU E2000 (RoCEv2), E810 (iWARP/RoCEv2) and X722 (iWARP)
+ network devices.
diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile
new file mode 100644
index 000000000000..03ceb9e5475f
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+# Copyright (c) 2019, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection RDMA Linux Driver
+#
+
+obj-$(CONFIG_INFINIBAND_IRDMA) += irdma.o
+
+irdma-objs := cm.o \
+ ctrl.o \
+ hmc.o \
+ hw.o \
+ i40iw_hw.o \
+ i40iw_if.o \
+ ig3rdma_if.o\
+ icrdma_if.o \
+ icrdma_hw.o \
+ ig3rdma_hw.o\
+ main.o \
+ pble.o \
+ puda.o \
+ trace.o \
+ uda.o \
+ uk.o \
+ utils.o \
+ verbs.o \
+ virtchnl.o \
+ ws.o \
+
+CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
new file mode 100644
index 000000000000..f4f4f92ba63a
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -0,0 +1,4434 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+#include "trace.h"
+
+static void irdma_cm_post_event(struct irdma_cm_event *event);
+static void irdma_disconnect_worker(struct work_struct *work);
+
+/**
+ * irdma_free_sqbuf - put back puda buffer if refcount is 0
+ * @vsi: The VSI structure of the device
+ * @bufp: puda buffer to free
+ */
+void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp)
+{
+ struct irdma_puda_buf *buf = bufp;
+ struct irdma_puda_rsrc *ilq = vsi->ilq;
+
+ if (refcount_dec_and_test(&buf->refcount))
+ irdma_puda_ret_bufpool(ilq, buf);
+}
+
+/**
+ * irdma_record_ird_ord - Record IRD/ORD passed in
+ * @cm_node: connection's node
+ * @conn_ird: connection IRD
+ * @conn_ord: connection ORD
+ */
+static void irdma_record_ird_ord(struct irdma_cm_node *cm_node, u32 conn_ird,
+ u32 conn_ord)
+{
+ if (conn_ird > cm_node->dev->hw_attrs.max_hw_ird)
+ conn_ird = cm_node->dev->hw_attrs.max_hw_ird;
+
+ if (conn_ord > cm_node->dev->hw_attrs.max_hw_ord)
+ conn_ord = cm_node->dev->hw_attrs.max_hw_ord;
+ else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
+ conn_ord = 1;
+ cm_node->ird_size = conn_ird;
+ cm_node->ord_size = conn_ord;
+}
+
+/**
+ * irdma_copy_ip_ntohl - copy IP address from network to host
+ * @dst: IP address in host order
+ * @src: IP address in network order (big endian)
+ */
+void irdma_copy_ip_ntohl(u32 *dst, __be32 *src)
+{
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst = ntohl(*src);
+}
+
+/**
+ * irdma_copy_ip_htonl - copy IP address from host to network order
+ * @dst: IP address in network order (big endian)
+ * @src: IP address in host order
+ */
+void irdma_copy_ip_htonl(__be32 *dst, u32 *src)
+{
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst = htonl(*src);
+}
+
+/**
+ * irdma_get_addr_info
+ * @cm_node: contains ip/tcp info
+ * @cm_info: to get a copy of the cm_node ip/tcp info
+ */
+static void irdma_get_addr_info(struct irdma_cm_node *cm_node,
+ struct irdma_cm_info *cm_info)
+{
+ memset(cm_info, 0, sizeof(*cm_info));
+ cm_info->ipv4 = cm_node->ipv4;
+ cm_info->vlan_id = cm_node->vlan_id;
+ memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
+ memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
+ cm_info->loc_port = cm_node->loc_port;
+ cm_info->rem_port = cm_node->rem_port;
+}
+
+/**
+ * irdma_fill_sockaddr4 - fill in addr info for IPv4 connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void irdma_fill_sockaddr4(struct irdma_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
+
+ laddr->sin_family = AF_INET;
+ raddr->sin_family = AF_INET;
+
+ laddr->sin_port = htons(cm_node->loc_port);
+ raddr->sin_port = htons(cm_node->rem_port);
+
+ laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
+ raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
+}
+
+/**
+ * irdma_fill_sockaddr6 - fill in addr info for IPv6 connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void irdma_fill_sockaddr6(struct irdma_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
+
+ laddr6->sin6_family = AF_INET6;
+ raddr6->sin6_family = AF_INET6;
+
+ laddr6->sin6_port = htons(cm_node->loc_port);
+ raddr6->sin6_port = htons(cm_node->rem_port);
+
+ irdma_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ irdma_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+}
+
+/**
+ * irdma_get_cmevent_info - for cm event upcall
+ * @cm_node: connection's node
+ * @cm_id: upper layers cm struct for the event
+ * @event: upper layer's cm event
+ */
+static inline void irdma_get_cmevent_info(struct irdma_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ struct iw_cm_event *event)
+{
+ memcpy(&event->local_addr, &cm_id->m_local_addr,
+ sizeof(event->local_addr));
+ memcpy(&event->remote_addr, &cm_id->m_remote_addr,
+ sizeof(event->remote_addr));
+ if (cm_node) {
+ event->private_data = cm_node->pdata_buf;
+ event->private_data_len = (u8)cm_node->pdata.size;
+ event->ird = cm_node->ird_size;
+ event->ord = cm_node->ord_size;
+ }
+}
+
+/**
+ * irdma_send_cm_event - upcall cm's event handler
+ * @cm_node: connection's node
+ * @cm_id: upper layer's cm info struct
+ * @type: Event type to indicate
+ * @status: status for the event type
+ */
+static int irdma_send_cm_event(struct irdma_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type, int status)
+{
+ struct iw_cm_event event = {};
+
+ event.event = type;
+ event.status = status;
+ trace_irdma_send_cm_event(cm_node, cm_id, type, status,
+ __builtin_return_address(0));
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node %p cm_id=%p state=%d accel=%d event_type=%d status=%d\n",
+ cm_node, cm_id, cm_node->accelerated, cm_node->state, type,
+ status);
+
+ switch (type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ if (cm_node->ipv4)
+ irdma_fill_sockaddr4(cm_node, &event);
+ else
+ irdma_fill_sockaddr6(cm_node, &event);
+ event.provider_data = cm_node;
+ event.private_data = cm_node->pdata_buf;
+ event.private_data_len = (u8)cm_node->pdata.size;
+ event.ird = cm_node->ird_size;
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ irdma_get_cmevent_info(cm_node, cm_id, &event);
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ event.ird = cm_node->ird_size;
+ event.ord = cm_node->ord_size;
+ break;
+ case IW_CM_EVENT_DISCONNECT:
+ case IW_CM_EVENT_CLOSE:
+ /* Wait if we are in RTS but havent issued the iwcm event upcall */
+ if (!cm_node->accelerated)
+ wait_for_completion(&cm_node->establish_comp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return cm_id->event_handler(cm_id, &event);
+}
+
+/**
+ * irdma_timer_list_prep - add connection nodes to a list to perform timer tasks
+ * @cm_core: cm's core
+ * @timer_list: a timer list to which cm_node will be selected
+ */
+static void irdma_timer_list_prep(struct irdma_cm_core *cm_core,
+ struct list_head *timer_list)
+{
+ struct irdma_cm_node *cm_node;
+ int bkt;
+
+ hash_for_each_rcu(cm_core->cm_hash_tbl, bkt, cm_node, list) {
+ if ((cm_node->close_entry || cm_node->send_entry) &&
+ refcount_inc_not_zero(&cm_node->refcnt))
+ list_add(&cm_node->timer_entry, timer_list);
+ }
+}
+
+/**
+ * irdma_create_event - create cm event
+ * @cm_node: connection's node
+ * @type: Event type to generate
+ */
+static struct irdma_cm_event *irdma_create_event(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type)
+{
+ struct irdma_cm_event *event;
+
+ if (!cm_node->cm_id)
+ return NULL;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+ if (!event)
+ return NULL;
+
+ event->type = type;
+ event->cm_node = cm_node;
+ memcpy(event->cm_info.rem_addr, cm_node->rem_addr,
+ sizeof(event->cm_info.rem_addr));
+ memcpy(event->cm_info.loc_addr, cm_node->loc_addr,
+ sizeof(event->cm_info.loc_addr));
+ event->cm_info.rem_port = cm_node->rem_port;
+ event->cm_info.loc_port = cm_node->loc_port;
+ event->cm_info.cm_id = cm_node->cm_id;
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: node=%p event=%p type=%u dst=%pI4 src=%pI4\n", cm_node,
+ event, type, event->cm_info.loc_addr,
+ event->cm_info.rem_addr);
+ trace_irdma_create_event(cm_node, type, __builtin_return_address(0));
+ irdma_cm_post_event(event);
+
+ return event;
+}
+
+/**
+ * irdma_free_retrans_entry - free send entry
+ * @cm_node: connection's node
+ */
+static void irdma_free_retrans_entry(struct irdma_cm_node *cm_node)
+{
+ struct irdma_device *iwdev = cm_node->iwdev;
+ struct irdma_timer_entry *send_entry;
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ return;
+
+ cm_node->send_entry = NULL;
+ irdma_free_sqbuf(&iwdev->vsi, send_entry->sqbuf);
+ kfree(send_entry);
+ refcount_dec(&cm_node->refcnt);
+}
+
+/**
+ * irdma_cleanup_retrans_entry - free send entry with lock
+ * @cm_node: connection's node
+ */
+static void irdma_cleanup_retrans_entry(struct irdma_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ irdma_free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+/**
+ * irdma_form_ah_cm_frame - get a free packet and build frame with address handle
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct irdma_puda_buf *irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_mpa_priv_info *pdata,
+ u8 flags)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ u8 *buf;
+ struct tcphdr *tcph;
+ u16 pktsize;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ if (!cm_node->ah || !cm_node->ah->ah_info.ah_valid) {
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: AH invalid\n");
+ return NULL;
+ }
+
+ sqbuf = irdma_puda_get_bufpool(vsi->ilq);
+ if (!sqbuf) {
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: SQ buf NULL\n");
+ return NULL;
+ }
+
+ sqbuf->ah_id = cm_node->ah->ah_info.ah_idx;
+ buf = sqbuf->mem.va;
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata)
+ pd_len = pdata->size;
+
+ pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len;
+
+ memset(buf, 0, sizeof(*tcph));
+
+ sqbuf->totallen = pktsize;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = cm_node;
+
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
+
+ refcount_set(&sqbuf->refcount, 1);
+
+ print_hex_dump_debug("ILQ: TRANSMIT ILQ BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, sqbuf->mem.va, sqbuf->totallen, false);
+
+ return sqbuf;
+}
+
+/**
+ * irdma_form_uda_cm_frame - get a free packet and build frame full tcpip packet
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct irdma_puda_buf *irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_mpa_priv_info *pdata,
+ u8 flags)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ u8 *buf;
+
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct ethhdr *ethh;
+ u16 pktsize;
+ u16 eth_hlen = ETH_HLEN;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ u16 vtag;
+
+ sqbuf = irdma_puda_get_bufpool(vsi->ilq);
+ if (!sqbuf)
+ return NULL;
+
+ buf = sqbuf->mem.va;
+
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata)
+ pd_len = pdata->size;
+
+ if (cm_node->vlan_id < VLAN_N_VID)
+ eth_hlen += 4;
+
+ if (cm_node->ipv4)
+ pktsize = sizeof(*iph) + sizeof(*tcph);
+ else
+ pktsize = sizeof(*ip6h) + sizeof(*tcph);
+ pktsize += opts_len + hdr_len + pd_len;
+
+ memset(buf, 0, eth_hlen + pktsize);
+
+ sqbuf->totallen = pktsize + eth_hlen;
+ sqbuf->maclen = eth_hlen;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = cm_node;
+
+ ethh = (struct ethhdr *)buf;
+ buf += eth_hlen;
+
+ if (cm_node->do_lpb)
+ sqbuf->do_lpb = true;
+
+ if (cm_node->ipv4) {
+ sqbuf->ipv4 = true;
+
+ iph = (struct iphdr *)buf;
+ buf += sizeof(*iph);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto =
+ htons(ETH_P_8021Q);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) |
+ cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
+
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto =
+ htons(ETH_P_IP);
+ } else {
+ ethh->h_proto = htons(ETH_P_IP);
+ }
+
+ iph->version = IPVERSION;
+ iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
+ iph->tos = cm_node->tos;
+ iph->tot_len = htons(pktsize);
+ iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+ iph->frag_off = htons(0x4000);
+ iph->ttl = 0x40;
+ iph->protocol = IPPROTO_TCP;
+ iph->saddr = htonl(cm_node->loc_addr[0]);
+ iph->daddr = htonl(cm_node->rem_addr[0]);
+ } else {
+ sqbuf->ipv4 = false;
+ ip6h = (struct ipv6hdr *)buf;
+ buf += sizeof(*ip6h);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto =
+ htons(ETH_P_8021Q);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) |
+ cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto =
+ htons(ETH_P_IPV6);
+ } else {
+ ethh->h_proto = htons(ETH_P_IPV6);
+ }
+ ip6h->version = 6;
+ ip6h->priority = cm_node->tos >> 4;
+ ip6h->flow_lbl[0] = cm_node->tos << 4;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->payload_len = htons(pktsize - sizeof(*ip6h));
+ ip6h->nexthdr = 6;
+ ip6h->hop_limit = 128;
+ irdma_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ irdma_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+ }
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
+
+ refcount_set(&sqbuf->refcount, 1);
+
+ print_hex_dump_debug("ILQ: TRANSMIT ILQ BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, sqbuf->mem.va, sqbuf->totallen, false);
+ return sqbuf;
+}
+
+/**
+ * irdma_send_reset - Send RST packet
+ * @cm_node: connection's node
+ */
+int irdma_send_reset(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+ int flags = SET_RST | SET_ACK;
+
+ trace_irdma_send_reset(cm_node, 0, __builtin_return_address(0));
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
+ flags);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: caller: %pS cm_node %p cm_id=%p accel=%d state=%d rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n",
+ __builtin_return_address(0), cm_node, cm_node->cm_id,
+ cm_node->accelerated, cm_node->state, cm_node->rem_port,
+ cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr);
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 0,
+ 1);
+}
+
+/**
+ * irdma_active_open_err - send event for active side cm error
+ * @cm_node: connection's node
+ * @reset: Flag to send reset or not
+ */
+static void irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset)
+{
+ trace_irdma_active_open_err(cm_node, reset,
+ __builtin_return_address(0));
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_connect_errs++;
+ if (reset) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node=%p state=%d\n", cm_node,
+ cm_node->state);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ }
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+}
+
+/**
+ * irdma_passive_open_err - handle passive side cm error
+ * @cm_node: connection's node
+ * @reset: send reset or just free cm_node
+ */
+static void irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset)
+{
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_passive_errs++;
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: cm_node=%p state =%d\n",
+ cm_node, cm_node->state);
+ trace_irdma_passive_open_err(cm_node, reset,
+ __builtin_return_address(0));
+ if (reset)
+ irdma_send_reset(cm_node);
+ else
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * irdma_event_connect_error - to create connect error event
+ * @event: cm information for connect event
+ */
+static void irdma_event_connect_error(struct irdma_cm_event *event)
+{
+ struct irdma_qp *iwqp;
+ struct iw_cm_id *cm_id;
+
+ cm_id = event->cm_node->cm_id;
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+
+ if (!iwqp || !iwqp->iwdev)
+ return;
+
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ irdma_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * irdma_process_options - process options from TCP header
+ * @cm_node: connection's node
+ * @optionsloc: point to start of options
+ * @optionsize: size of all options
+ * @syn_pkt: flag if syn packet
+ */
+static int irdma_process_options(struct irdma_cm_node *cm_node, u8 *optionsloc,
+ u32 optionsize, u32 syn_pkt)
+{
+ u32 tmp;
+ u32 offset = 0;
+ union all_known_options *all_options;
+ char got_mss_option = 0;
+
+ while (offset < optionsize) {
+ all_options = (union all_known_options *)(optionsloc + offset);
+ switch (all_options->base.optionnum) {
+ case OPTION_NUM_EOL:
+ offset = optionsize;
+ break;
+ case OPTION_NUM_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUM_MSS:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: MSS Length: %d Offset: %d Size: %d\n",
+ all_options->mss.len, offset, optionsize);
+ got_mss_option = 1;
+ if (all_options->mss.len != 4)
+ return -EINVAL;
+ tmp = ntohs(all_options->mss.mss);
+ if ((cm_node->ipv4 &&
+ (tmp + IRDMA_MTU_TO_MSS_IPV4) < IRDMA_MIN_MTU_IPV4) ||
+ (!cm_node->ipv4 &&
+ (tmp + IRDMA_MTU_TO_MSS_IPV6) < IRDMA_MIN_MTU_IPV6))
+ return -EINVAL;
+ if (tmp < cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ break;
+ case OPTION_NUM_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale =
+ all_options->windowscale.shiftcount;
+ break;
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Unsupported TCP Option: %x\n",
+ all_options->base.optionnum);
+ break;
+ }
+ offset += all_options->base.len;
+ }
+ if (!got_mss_option && syn_pkt)
+ cm_node->tcp_cntxt.mss = IRDMA_CM_DEFAULT_MSS;
+
+ return 0;
+}
+
+/**
+ * irdma_handle_tcp_options - setup TCP context info after parsing TCP options
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ * @optionsize: size of options rcvd
+ * @passive: active or passive flag
+ */
+static int irdma_handle_tcp_options(struct irdma_cm_node *cm_node,
+ struct tcphdr *tcph, int optionsize,
+ int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
+ int ret;
+
+ if (optionsize) {
+ ret = irdma_process_options(cm_node, optionsloc, optionsize,
+ (u32)tcph->syn);
+ if (ret) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Node %p, Sending Reset\n", cm_node);
+ if (passive)
+ irdma_passive_open_err(cm_node, true);
+ else
+ irdma_active_open_err(cm_node, true);
+ return ret;
+ }
+ }
+
+ cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window)
+ << cm_node->tcp_cntxt.snd_wscale;
+
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+ cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+
+ return 0;
+}
+
+/**
+ * irdma_build_mpa_v1 - build a MPA V1 frame
+ * @cm_node: connection's node
+ * @start_addr: address where to build frame
+ * @mpa_key: to do read0 or write0
+ */
+static void irdma_build_mpa_v1(struct irdma_cm_node *cm_node, void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v1 *mpa_frame = start_addr;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+ break;
+ case MPA_KEY_REPLY:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+ break;
+ default:
+ break;
+ }
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = cm_node->mpa_frame_rev;
+ mpa_frame->priv_data_len = htons(cm_node->pdata.size);
+}
+
+/**
+ * irdma_build_mpa_v2 - build a MPA V2 frame
+ * @cm_node: connection's node
+ * @start_addr: buffer start address
+ * @mpa_key: to do read0 or write0
+ */
+static void irdma_build_mpa_v2(struct irdma_cm_node *cm_node, void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v2 *mpa_frame = start_addr;
+ struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+ u16 ctrl_ird, ctrl_ord;
+
+ /* initialize the upper 5 bytes of the frame */
+ irdma_build_mpa_v1(cm_node, start_addr, mpa_key);
+ mpa_frame->flags |= IETF_MPA_V2_FLAG;
+ if (cm_node->iwdev->iw_ooo) {
+ mpa_frame->flags |= IETF_MPA_FLAGS_MARKERS;
+ cm_node->rcv_mark_en = true;
+ }
+ mpa_frame->priv_data_len = cpu_to_be16(be16_to_cpu(mpa_frame->priv_data_len) +
+ IETF_RTR_MSG_SIZE);
+
+ /* initialize RTR msg */
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD :
+ cm_node->ird_size;
+ ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD :
+ cm_node->ord_size;
+ }
+ ctrl_ird |= IETF_PEER_TO_PEER;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ case MPA_KEY_REPLY:
+ switch (cm_node->send_rdma0_op) {
+ case SEND_RDMA_WRITE_ZERO:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ break;
+ case SEND_RDMA_READ_ZERO:
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ rtr_msg->ctrl_ird = htons(ctrl_ird);
+ rtr_msg->ctrl_ord = htons(ctrl_ord);
+}
+
+/**
+ * irdma_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
+ * @cm_node: connection's node
+ * @mpa: mpa: data buffer
+ * @mpa_key: to do read0 or write0
+ */
+static int irdma_cm_build_mpa_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *mpa, u8 mpa_key)
+{
+ int hdr_len = 0;
+
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V1:
+ hdr_len = sizeof(struct ietf_mpa_v1);
+ irdma_build_mpa_v1(cm_node, mpa->addr, mpa_key);
+ break;
+ case IETF_MPA_V2:
+ hdr_len = sizeof(struct ietf_mpa_v2);
+ irdma_build_mpa_v2(cm_node, mpa->addr, mpa_key);
+ break;
+ default:
+ break;
+ }
+
+ return hdr_len;
+}
+
+/**
+ * irdma_send_mpa_request - active node send mpa request to passive node
+ * @cm_node: connection's node
+ */
+static int irdma_send_mpa_request(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame;
+ cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REQUEST);
+ if (!cm_node->mpa_hdr.size) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: mpa size = %d\n", cm_node->mpa_hdr.size);
+ return -EINVAL;
+ }
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL,
+ &cm_node->mpa_hdr,
+ &cm_node->pdata, SET_ACK);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_send_mpa_reject -
+ * @cm_node: connection's node
+ * @pdata: reject data for connection
+ * @plen: length of reject data
+ */
+static int irdma_send_mpa_reject(struct irdma_cm_node *cm_node,
+ const void *pdata, u8 plen)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_mpa_priv_info priv_info;
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame;
+ cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REPLY);
+
+ cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
+ priv_info.addr = pdata;
+ priv_info.size = plen;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL,
+ &cm_node->mpa_hdr, &priv_info,
+ SET_ACK | SET_FIN);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ cm_node->state = IRDMA_CM_STATE_FIN_WAIT1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_negotiate_mpa_v2_ird_ord - negotiate MPAv2 IRD/ORD
+ * @cm_node: connection's node
+ * @buf: Data pointer
+ */
+static int irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node,
+ u8 *buf)
+{
+ struct ietf_mpa_v2 *mpa_v2_frame;
+ struct ietf_rtr_msg *rtr_msg;
+ u16 ird_size;
+ u16 ord_size;
+ u16 ctrl_ord;
+ u16 ctrl_ird;
+
+ mpa_v2_frame = (struct ietf_mpa_v2 *)buf;
+ rtr_msg = &mpa_v2_frame->rtr_msg;
+
+ /* parse rtr message */
+ ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+ ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+ ird_size = ctrl_ird & IETF_NO_IRD_ORD;
+ ord_size = ctrl_ord & IETF_NO_IRD_ORD;
+
+ if (!(ctrl_ird & IETF_PEER_TO_PEER))
+ return -EOPNOTSUPP;
+
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+ goto negotiate_done;
+ }
+
+ if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
+ /* responder */
+ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
+ cm_node->ird_size = 1;
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+ } else {
+ /* initiator */
+ if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
+ /* Remote peer doesn't support RDMA0_READ */
+ return -EOPNOTSUPP;
+
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+
+ if (cm_node->ird_size < ord_size)
+ /* no resources available */
+ return -EINVAL;
+ }
+
+negotiate_done:
+ if (ctrl_ord & IETF_RDMA0_READ)
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ else if (ctrl_ord & IETF_RDMA0_WRITE)
+ cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+ else
+ /* Not supported RDMA0 operation */
+ return -EOPNOTSUPP;
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: MPAV2 Negotiated ORD: %d, IRD: %d\n",
+ cm_node->ord_size, cm_node->ird_size);
+ trace_irdma_negotiate_mpa_v2(cm_node);
+ return 0;
+}
+
+/**
+ * irdma_parse_mpa - process an IETF MPA frame
+ * @cm_node: connection's node
+ * @buf: Data pointer
+ * @type: to return accept or reject
+ * @len: Len of mpa buffer
+ */
+static int irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type,
+ u32 len)
+{
+ struct ietf_mpa_v1 *mpa_frame;
+ int mpa_hdr_len, priv_data_len, ret;
+
+ *type = IRDMA_MPA_REQUEST_ACCEPT;
+
+ if (len < sizeof(struct ietf_mpa_v1)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: ietf buffer small (%x)\n", len);
+ return -EINVAL;
+ }
+
+ mpa_frame = (struct ietf_mpa_v1 *)buf;
+ mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+ priv_data_len = ntohs(mpa_frame->priv_data_len);
+
+ if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: private_data too big %d\n", priv_data_len);
+ return -EOVERFLOW;
+ }
+
+ if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: unsupported mpa rev = %d\n", mpa_frame->rev);
+ return -EINVAL;
+ }
+
+ if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: rev %d\n",
+ mpa_frame->rev);
+ return -EINVAL;
+ }
+
+ cm_node->mpa_frame_rev = mpa_frame->rev;
+ if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ,
+ IETF_MPA_KEY_SIZE)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Unexpected MPA Key received\n");
+ return -EINVAL;
+ }
+ } else {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP,
+ IETF_MPA_KEY_SIZE)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Unexpected MPA Key received\n");
+ return -EINVAL;
+ }
+ }
+
+ if (priv_data_len + mpa_hdr_len > len) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: ietf buffer len(%x + %x != %x)\n",
+ priv_data_len, mpa_hdr_len, len);
+ return -EOVERFLOW;
+ }
+
+ if (len > IRDMA_MAX_CM_BUF) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: ietf buffer large len = %d\n", len);
+ return -EOVERFLOW;
+ }
+
+ switch (mpa_frame->rev) {
+ case IETF_MPA_V2:
+ mpa_hdr_len += IETF_RTR_MSG_SIZE;
+ ret = irdma_negotiate_mpa_v2_ird_ord(cm_node, buf);
+ if (ret)
+ return ret;
+ break;
+ case IETF_MPA_V1:
+ default:
+ break;
+ }
+
+ memcpy(cm_node->pdata_buf, buf + mpa_hdr_len, priv_data_len);
+ cm_node->pdata.size = priv_data_len;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+ *type = IRDMA_MPA_REQUEST_REJECT;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
+ cm_node->snd_mark_en = true;
+
+ return 0;
+}
+
+/**
+ * irdma_schedule_cm_timer
+ * @cm_node: connection's node
+ * @sqbuf: buffer to send
+ * @type: if it is send or close
+ * @send_retrans: if rexmits to be done
+ * @close_when_complete: is cm_node to be removed
+ *
+ * note - cm_node needs to be protected before calling this. Encase in:
+ * irdma_rem_ref_cm_node(cm_core, cm_node);
+ * irdma_schedule_cm_timer(...)
+ * refcount_inc(&cm_node->refcnt);
+ */
+int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *sqbuf,
+ enum irdma_timer_type type, int send_retrans,
+ int close_when_complete)
+{
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_timer_entry *new_send;
+ u32 was_timer_set;
+ unsigned long flags;
+
+ new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+ if (!new_send) {
+ if (type != IRDMA_TIMER_TYPE_CLOSE)
+ irdma_free_sqbuf(vsi, sqbuf);
+ return -ENOMEM;
+ }
+
+ new_send->retrycount = IRDMA_DEFAULT_RETRYS;
+ new_send->retranscount = IRDMA_DEFAULT_RETRANS;
+ new_send->sqbuf = sqbuf;
+ new_send->timetosend = jiffies;
+ new_send->type = type;
+ new_send->send_retrans = send_retrans;
+ new_send->close_when_complete = close_when_complete;
+
+ if (type == IRDMA_TIMER_TYPE_CLOSE) {
+ new_send->timetosend += (HZ / 10);
+ if (cm_node->close_entry) {
+ kfree(new_send);
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: already close entry\n");
+ return -EINVAL;
+ }
+
+ cm_node->close_entry = new_send;
+ } else { /* type == IRDMA_TIMER_TYPE_SEND */
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ refcount_inc(&cm_node->refcnt);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT;
+
+ refcount_inc(&sqbuf->refcount);
+ irdma_puda_send_buf(vsi->ilq, sqbuf);
+ if (!send_retrans) {
+ irdma_cleanup_retrans_entry(cm_node);
+ if (close_when_complete)
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+ }
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+ if (!was_timer_set) {
+ cm_core->tcp_timer.expires = new_send->timetosend;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_retrans_expired - Could not rexmit the packet
+ * @cm_node: connection's node
+ */
+static void irdma_retrans_expired(struct irdma_cm_node *cm_node)
+{
+ enum irdma_cm_node_state state = cm_node->state;
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ switch (state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_CLOSING:
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_LAST_ACK:
+ irdma_send_reset(cm_node);
+ break;
+ default:
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+ break;
+ }
+}
+
+/**
+ * irdma_handle_close_entry - for handling retry/timeouts
+ * @cm_node: connection's node
+ * @rem_node: flag for remove cm_node
+ */
+static void irdma_handle_close_entry(struct irdma_cm_node *cm_node,
+ u32 rem_node)
+{
+ struct irdma_timer_entry *close_entry = cm_node->close_entry;
+ struct irdma_qp *iwqp;
+ unsigned long flags;
+
+ if (!close_entry)
+ return;
+ iwqp = (struct irdma_qp *)close_entry->sqbuf;
+ if (iwqp) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->cm_id) {
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->hw_iwarp_state = IRDMA_QP_STATE_ERROR;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ iwqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_cm_disconn(iwqp);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else if (rem_node) {
+ /* TIME_WAIT state */
+ irdma_rem_ref_cm_node(cm_node);
+ }
+
+ kfree(close_entry);
+ cm_node->close_entry = NULL;
+}
+
+/**
+ * irdma_cm_timer_tick - system's timer expired callback
+ * @t: Pointer to timer_list
+ */
+static void irdma_cm_timer_tick(struct timer_list *t)
+{
+ unsigned long nexttimeout = jiffies + IRDMA_LONG_TIME;
+ struct irdma_cm_node *cm_node;
+ struct irdma_timer_entry *send_entry, *close_entry;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct irdma_cm_core *cm_core = timer_container_of(cm_core, t,
+ tcp_timer);
+ struct irdma_sc_vsi *vsi;
+ u32 settimer = 0;
+ unsigned long timetosend;
+ unsigned long flags;
+ struct list_head timer_list;
+
+ INIT_LIST_HEAD(&timer_list);
+
+ rcu_read_lock();
+ irdma_timer_list_prep(cm_core, &timer_list);
+ rcu_read_unlock();
+
+ list_for_each_safe (list_node, list_core_temp, &timer_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node,
+ timer_entry);
+ close_entry = cm_node->close_entry;
+
+ if (close_entry) {
+ if (time_after(close_entry->timetosend, jiffies)) {
+ if (nexttimeout > close_entry->timetosend ||
+ !settimer) {
+ nexttimeout = close_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ irdma_handle_close_entry(cm_node, 1);
+ }
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ goto done;
+ if (time_after(send_entry->timetosend, jiffies)) {
+ if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
+ if (nexttimeout > send_entry->timetosend ||
+ !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ irdma_free_retrans_entry(cm_node);
+ }
+ goto done;
+ }
+
+ if (cm_node->state == IRDMA_CM_STATE_OFFLOADED ||
+ cm_node->state == IRDMA_CM_STATE_CLOSED) {
+ irdma_free_retrans_entry(cm_node);
+ goto done;
+ }
+
+ if (!send_entry->retranscount || !send_entry->retrycount) {
+ irdma_free_retrans_entry(cm_node);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock,
+ flags);
+ irdma_retrans_expired(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ goto done;
+ }
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+ vsi = &cm_node->iwdev->vsi;
+ if (!cm_node->ack_rcvd) {
+ refcount_inc(&send_entry->sqbuf->refcount);
+ irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf);
+ cm_node->cm_core->stats_pkt_retrans++;
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ if (send_entry->send_retrans) {
+ send_entry->retranscount--;
+ timetosend = (IRDMA_RETRY_TIMEOUT <<
+ (IRDMA_DEFAULT_RETRANS -
+ send_entry->retranscount));
+
+ send_entry->timetosend = jiffies +
+ min(timetosend, IRDMA_MAX_TIMEOUT);
+ if (nexttimeout > send_entry->timetosend || !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ int close_when_complete;
+
+ close_when_complete = send_entry->close_when_complete;
+ irdma_free_retrans_entry(cm_node);
+ if (close_when_complete)
+ irdma_rem_ref_cm_node(cm_node);
+ }
+done:
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+
+ if (settimer) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (!timer_pending(&cm_core->tcp_timer)) {
+ cm_core->tcp_timer.expires = nexttimeout;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+}
+
+/**
+ * irdma_send_syn - send SYN packet
+ * @cm_node: connection's node
+ * @sendack: flag to set ACK bit or not
+ */
+int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack)
+{
+ struct irdma_puda_buf *sqbuf;
+ int flags = SET_SYN;
+ char optionsbuf[sizeof(struct option_mss) +
+ sizeof(struct option_windowscale) +
+ sizeof(struct option_base) + TCP_OPTIONS_PADDING];
+ struct irdma_kmem_info opts;
+ int optionssize = 0;
+ /* Sending MSS option */
+ union all_known_options *options;
+
+ opts.addr = optionsbuf;
+ if (!cm_node)
+ return -EINVAL;
+
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->mss.optionnum = OPTION_NUM_MSS;
+ options->mss.len = sizeof(struct option_mss);
+ options->mss.mss = htons(cm_node->tcp_cntxt.mss);
+ optionssize += sizeof(struct option_mss);
+
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->windowscale.optionnum = OPTION_NUM_WINDOW_SCALE;
+ options->windowscale.len = sizeof(struct option_windowscale);
+ options->windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+ optionssize += sizeof(struct option_windowscale);
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->eol = OPTION_NUM_EOL;
+ optionssize += 1;
+
+ if (sendack)
+ flags |= SET_ACK;
+
+ opts.size = optionssize;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, &opts, NULL, NULL,
+ flags);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_send_ack - Send ACK packet
+ * @cm_node: connection's node
+ */
+void irdma_send_ack(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
+ SET_ACK);
+ if (sqbuf)
+ irdma_puda_send_buf(vsi->ilq, sqbuf);
+}
+
+/**
+ * irdma_send_fin - Send FIN pkt
+ * @cm_node: connection's node
+ */
+static int irdma_send_fin(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
+ SET_ACK | SET_FIN);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_addr: listener ip addr
+ * @ipv4: flag indicating IPv4 when true
+ * @dst_port: listener tcp port num
+ * @vlan_id: virtual LAN ID
+ * @listener_state: state to match with listen node's
+ */
+static struct irdma_cm_listener *
+irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4,
+ u16 dst_port, u16 vlan_id,
+ enum irdma_cm_listener_state listener_state)
+{
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ u32 listen_addr[4];
+ u16 listen_port;
+ unsigned long flags;
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry (listen_node, &cm_core->listen_list, list) {
+ memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+ listen_port = listen_node->loc_port;
+ if (listen_node->ipv4 != ipv4 || listen_port != dst_port ||
+ !(listener_state & listen_node->listener_state))
+ continue;
+ /* compare node pair, return node handle if a match */
+ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
+ (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
+ vlan_id == listen_node->vlan_id)) {
+ refcount_inc(&listen_node->refcnt);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock,
+ flags);
+ trace_irdma_find_listener(listen_node);
+ return listen_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ return NULL;
+}
+
+/**
+ * irdma_del_multiple_qhash - Remove qhash and child listens
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ */
+static int irdma_del_multiple_qhash(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct irdma_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_for_each_safe (pos, tpos,
+ &cm_parent_listen_node->child_listen_list) {
+ child_listen_node = list_entry(pos, struct irdma_cm_listener,
+ child_listen_list);
+ if (child_listen_node->ipv4)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: removing child listen for IP=%pI4, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: removing child listen for IP=%pI6, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ trace_irdma_del_multiple_qhash(child_listen_node);
+ list_del(pos);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ cm_info->vlan_id = child_listen_node->vlan_id;
+ if (child_listen_node->qhash_set) {
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ child_listen_node->qhash_set = false;
+ } else {
+ ret = 0;
+ }
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Child listen node freed = %p\n",
+ child_listen_node);
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
+ }
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+
+ return ret;
+}
+
+static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4)
+{
+ struct net_device *ndev = NULL;
+
+ rcu_read_lock();
+ if (ipv4) {
+ ndev = ip_dev_find(&init_net, htonl(loc_addr[0]));
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ struct net_device *ip_dev;
+ struct in6_addr laddr6;
+
+ irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, loc_addr);
+
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ ndev = ip_dev;
+ break;
+ }
+ }
+ }
+
+ if (!ndev)
+ goto done;
+ if (is_vlan_dev(ndev))
+ prio = (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK)
+ >> VLAN_PRIO_SHIFT;
+ if (ipv4)
+ dev_put(ndev);
+
+done:
+ rcu_read_unlock();
+
+ return prio;
+}
+
+/**
+ * irdma_get_vlan_mac_ipv6 - Gets the vlan and mac
+ * @addr: local IPv6 address
+ * @vlan_id: vlan id for the given IPv6 address
+ * @mac: mac address for the given IPv6 address
+ *
+ * Returns the vlan id and mac for an IPv6 address.
+ */
+void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+{
+ struct net_device *ip_dev = NULL;
+ struct in6_addr laddr6;
+
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return;
+
+ irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
+ if (vlan_id)
+ *vlan_id = 0xFFFF; /* Match rdma_vlan_dev_vlan_id() */
+ if (mac)
+ eth_zero_addr(mac);
+
+ rcu_read_lock();
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ if (ip_dev->dev_addr && mac)
+ ether_addr_copy(mac, ip_dev->dev_addr);
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * irdma_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
+ * @addr: local IPv4 address
+ */
+u16 irdma_get_vlan_ipv4(u32 *addr)
+{
+ struct net_device *netdev;
+ u16 vlan_id = 0xFFFF;
+
+ netdev = ip_dev_find(&init_net, htonl(addr[0]));
+ if (netdev) {
+ vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ dev_put(netdev);
+ }
+
+ return vlan_id;
+}
+
+/**
+ * irdma_add_mqh_6 - Adds multiple qhashes for IPv6
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv6 address
+ * on the adapter and adds the associated qhash filter
+ */
+static int irdma_add_mqh_6(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp, *tmp;
+ struct irdma_cm_listener *child_listen_node;
+ unsigned long flags;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, ip_dev) {
+ if (!(ip_dev->flags & IFF_UP))
+ continue;
+
+ if (((rdma_vlan_dev_vlan_id(ip_dev) >= VLAN_N_VID) ||
+ (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) &&
+ ip_dev != iwdev->netdev)
+ continue;
+
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ ibdev_dbg(&iwdev->ibdev, "CM: idev == NULL\n");
+ break;
+ }
+ list_for_each_entry_safe (ifp, tmp, &idev->addr_list, if_list) {
+ ibdev_dbg(&iwdev->ibdev, "CM: IP=%pI6, vlan_id=%d, MAC=%pM\n",
+ &ifp->addr, rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
+ ibdev_dbg(&iwdev->ibdev, "CM: Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n");
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+ irdma_copy_ip_ntohl(child_listen_node->loc_addr,
+ ifp->addr.in6_u.u6_addr32);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ if (!iwdev->vsi.dscp_mode)
+ cm_info->user_pri =
+ irdma_iw_get_vlan_prio(child_listen_node->loc_addr,
+ cm_info->user_pri,
+ false);
+
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (ret) {
+ kfree(child_listen_node);
+ continue;
+ }
+
+ trace_irdma_add_mqh_6(iwdev, child_listen_node,
+ ip_dev->dev_addr);
+
+ child_listen_node->qhash_set = true;
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ }
+ }
+exit:
+ rtnl_unlock();
+
+ return ret;
+}
+
+/**
+ * irdma_add_mqh_4 - Adds multiple qhashes for IPv4
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv4 address
+ * on the adapter and adds the associated qhash filter
+ */
+static int irdma_add_mqh_4(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *ip_dev;
+ struct in_device *idev;
+ struct irdma_cm_listener *child_listen_node;
+ unsigned long flags;
+ const struct in_ifaddr *ifa;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, ip_dev) {
+ if (!(ip_dev->flags & IFF_UP))
+ continue;
+
+ if (((rdma_vlan_dev_vlan_id(ip_dev) >= VLAN_N_VID) ||
+ (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) &&
+ ip_dev != iwdev->netdev)
+ continue;
+
+ idev = in_dev_get(ip_dev);
+ if (!idev)
+ continue;
+
+ in_dev_for_each_ifa_rtnl(ifa, idev) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+ &ifa->ifa_address, rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ ibdev_dbg(&iwdev->ibdev, "CM: Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n");
+ in_dev_put(idev);
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+ child_listen_node->loc_addr[0] =
+ ntohl(ifa->ifa_address);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ if (!iwdev->vsi.dscp_mode)
+ cm_info->user_pri =
+ irdma_iw_get_vlan_prio(child_listen_node->loc_addr,
+ cm_info->user_pri,
+ true);
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (ret) {
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core
+ ->stats_listen_nodes_created--;
+ continue;
+ }
+
+ trace_irdma_add_mqh_4(iwdev, child_listen_node,
+ ip_dev->dev_addr);
+
+ child_listen_node->qhash_set = true;
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock,
+ flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ }
+ in_dev_put(idev);
+ }
+exit:
+ rtnl_unlock();
+
+ return ret;
+}
+
+/**
+ * irdma_add_mqh - Adds multiple qhashes
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_listen_node: The parent listen node
+ */
+static int irdma_add_mqh(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_listen_node)
+{
+ if (cm_info->ipv4)
+ return irdma_add_mqh_4(iwdev, cm_info, cm_listen_node);
+ else
+ return irdma_add_mqh_6(iwdev, cm_info, cm_listen_node);
+}
+
+/**
+ * irdma_reset_list_prep - add connection nodes slated for reset to list
+ * @cm_core: cm's core
+ * @listener: pointer to listener node
+ * @reset_list: a list to which cm_node will be selected
+ */
+static void irdma_reset_list_prep(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ struct list_head *reset_list)
+{
+ struct irdma_cm_node *cm_node;
+ int bkt;
+
+ hash_for_each_rcu(cm_core->cm_hash_tbl, bkt, cm_node, list) {
+ if (cm_node->listener == listener &&
+ !cm_node->accelerated &&
+ refcount_inc_not_zero(&cm_node->refcnt))
+ list_add(&cm_node->reset_entry, reset_list);
+ }
+}
+
+/**
+ * irdma_dec_refcnt_listen - delete listener and associated cm nodes
+ * @cm_core: cm's core
+ * @listener: pointer to listener node
+ * @free_hanging_nodes: to free associated cm_nodes
+ * @apbvt_del: flag to delete the apbvt
+ */
+static int irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ int free_hanging_nodes, bool apbvt_del)
+{
+ int err;
+ struct list_head *list_pos;
+ struct list_head *list_temp;
+ struct irdma_cm_node *cm_node;
+ struct list_head reset_list;
+ struct irdma_cm_info nfo;
+ enum irdma_cm_node_state old_state;
+ unsigned long flags;
+
+ trace_irdma_dec_refcnt_listen(listener, __builtin_return_address(0));
+ /* free non-accelerated child nodes for this listener */
+ INIT_LIST_HEAD(&reset_list);
+ if (free_hanging_nodes) {
+ rcu_read_lock();
+ irdma_reset_list_prep(cm_core, listener, &reset_list);
+ rcu_read_unlock();
+ }
+
+ list_for_each_safe (list_pos, list_temp, &reset_list) {
+ cm_node = container_of(list_pos, struct irdma_cm_node,
+ reset_entry);
+ if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) {
+ irdma_rem_ref_cm_node(cm_node);
+ continue;
+ }
+
+ irdma_cleanup_retrans_entry(cm_node);
+ err = irdma_send_reset(cm_node);
+ if (err) {
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: send reset failed\n");
+ } else {
+ old_state = cm_node->state;
+ cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD)
+ irdma_rem_ref_cm_node(cm_node);
+ }
+ }
+
+ if (refcount_dec_and_test(&listener->refcnt)) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_del(&listener->list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ if (apbvt_del)
+ irdma_del_apbvt(listener->iwdev,
+ listener->apbvt_entry);
+ memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
+ nfo.loc_port = listener->loc_port;
+ nfo.ipv4 = listener->ipv4;
+ nfo.vlan_id = listener->vlan_id;
+ nfo.user_pri = listener->user_pri;
+ nfo.qh_qpid = listener->iwdev->vsi.ilq->qp_id;
+
+ if (!list_empty(&listener->child_listen_list)) {
+ irdma_del_multiple_qhash(listener->iwdev, &nfo,
+ listener);
+ } else {
+ if (listener->qhash_set)
+ irdma_manage_qhash(listener->iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ }
+
+ cm_core->stats_listen_destroyed++;
+ cm_core->stats_listen_nodes_destroyed++;
+ ibdev_dbg(&listener->iwdev->ibdev,
+ "CM: loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d apbvt_del=%d\n",
+ listener->loc_port, listener->loc_addr, listener,
+ listener->cm_id, listener->qhash_set,
+ listener->vlan_id, apbvt_del);
+ kfree(listener);
+ listener = NULL;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * irdma_cm_del_listen - delete a listener
+ * @cm_core: cm's core
+ * @listener: passive connection's listener
+ * @apbvt_del: flag to delete apbvt
+ */
+static int irdma_cm_del_listen(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ bool apbvt_del)
+{
+ listener->listener_state = IRDMA_CM_LISTENER_PASSIVE_STATE;
+ listener->cm_id = NULL;
+
+ return irdma_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
+}
+
+/**
+ * irdma_addr_resolve_neigh - resolve neighbor address
+ * @iwdev: iwarp device structure
+ * @src_ip: local ip address
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip,
+ u32 dst_ip, int arpindex)
+{
+ struct rtable *rt;
+ struct neighbour *neigh;
+ int rc = arpindex;
+ __be32 dst_ipaddr = htonl(dst_ip);
+ __be32 src_ipaddr = htonl(src_ip);
+
+ rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0,
+ RT_SCOPE_UNIVERSE);
+ if (IS_ERR(rt)) {
+ ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n");
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
+ if (!neigh)
+ goto exit;
+
+ if (neigh->nud_state & NUD_VALID)
+ rc = irdma_add_arp(iwdev->rf, &dst_ip, true, neigh->ha);
+ else
+ neigh_event_send(neigh, NULL);
+ if (neigh)
+ neigh_release(neigh);
+exit:
+ ip_rt_put(rt);
+
+ return rc;
+}
+
+/**
+ * irdma_get_dst_ipv6 - get destination cache entry via ipv6 lookup
+ * @src_addr: local ipv6 sock address
+ * @dst_addr: destination ipv6 sock address
+ */
+static struct dst_entry *irdma_get_dst_ipv6(struct sockaddr_in6 *src_addr,
+ struct sockaddr_in6 *dst_addr)
+{
+ struct dst_entry *dst = NULL;
+
+ if ((IS_ENABLED(CONFIG_IPV6))) {
+ struct flowi6 fl6 = {};
+
+ fl6.daddr = dst_addr->sin6_addr;
+ fl6.saddr = src_addr->sin6_addr;
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = dst_addr->sin6_scope_id;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ }
+
+ return dst;
+}
+
+/**
+ * irdma_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
+ * @iwdev: iwarp device structure
+ * @src: local ip address
+ * @dest: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int irdma_addr_resolve_neigh_ipv6(struct irdma_device *iwdev, u32 *src,
+ u32 *dest, int arpindex)
+{
+ struct neighbour *neigh;
+ int rc = arpindex;
+ struct dst_entry *dst;
+ struct sockaddr_in6 dst_addr = {};
+ struct sockaddr_in6 src_addr = {};
+
+ dst_addr.sin6_family = AF_INET6;
+ irdma_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
+ src_addr.sin6_family = AF_INET6;
+ irdma_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
+ dst = irdma_get_dst_ipv6(&src_addr, &dst_addr);
+ if (!dst || dst->error) {
+ if (dst) {
+ dst_release(dst);
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: ip6_route_output returned dst->error = %d\n",
+ dst->error);
+ }
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
+ if (!neigh)
+ goto exit;
+
+ ibdev_dbg(&iwdev->ibdev, "CM: dst_neigh_lookup MAC=%pM\n",
+ neigh->ha);
+
+ trace_irdma_addr_resolve(iwdev, neigh->ha);
+
+ if (neigh->nud_state & NUD_VALID)
+ rc = irdma_add_arp(iwdev->rf, dest, false, neigh->ha);
+ else
+ neigh_event_send(neigh, NULL);
+ if (neigh)
+ neigh_release(neigh);
+exit:
+ dst_release(dst);
+
+ return rc;
+}
+
+/**
+ * irdma_find_node - find a cm node that matches the reference cm node
+ * @cm_core: cm's core
+ * @rem_port: remote tcp port num
+ * @rem_addr: remote ip addr
+ * @loc_port: local tcp port num
+ * @loc_addr: local ip addr
+ * @vlan_id: local VLAN ID
+ */
+struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core,
+ u16 rem_port, u32 *rem_addr, u16 loc_port,
+ u32 *loc_addr, u16 vlan_id)
+{
+ struct irdma_cm_node *cm_node;
+ u32 key = (rem_port << 16) | loc_port;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(cm_core->cm_hash_tbl, cm_node, list, key) {
+ if (cm_node->vlan_id == vlan_id &&
+ cm_node->loc_port == loc_port && cm_node->rem_port == rem_port &&
+ !memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
+ !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr))) {
+ if (!refcount_inc_not_zero(&cm_node->refcnt))
+ goto exit;
+ rcu_read_unlock();
+ trace_irdma_find_node(cm_node, 0, NULL);
+ return cm_node;
+ }
+ }
+
+exit:
+ rcu_read_unlock();
+
+ /* no owner node */
+ return NULL;
+}
+
+/**
+ * irdma_add_hte_node - add a cm node to the hash table
+ * @cm_core: cm's core
+ * @cm_node: connection's node
+ */
+static void irdma_add_hte_node(struct irdma_cm_core *cm_core,
+ struct irdma_cm_node *cm_node)
+{
+ unsigned long flags;
+ u32 key = (cm_node->rem_port << 16) | cm_node->loc_port;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ hash_add_rcu(cm_core->cm_hash_tbl, &cm_node->list, key);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+}
+
+/**
+ * irdma_ipv4_is_lpb - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr)
+{
+ return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
+}
+
+/**
+ * irdma_ipv6_is_lpb - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr)
+{
+ struct in6_addr raddr6;
+
+ irdma_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
+
+ return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
+}
+
+/**
+ * irdma_cm_create_ah - create a cm address handle
+ * @cm_node: The connection manager node to create AH for
+ * @wait: Provides option to wait for ah creation or not
+ */
+static int irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait)
+{
+ struct irdma_ah_info ah_info = {};
+ struct irdma_device *iwdev = cm_node->iwdev;
+
+ ether_addr_copy(ah_info.mac_addr, iwdev->netdev->dev_addr);
+
+ ah_info.hop_ttl = 0x40;
+ ah_info.tc_tos = cm_node->tos;
+ ah_info.vsi = &iwdev->vsi;
+
+ if (cm_node->ipv4) {
+ ah_info.ipv4_valid = true;
+ ah_info.dest_ip_addr[0] = cm_node->rem_addr[0];
+ ah_info.src_ip_addr[0] = cm_node->loc_addr[0];
+ ah_info.do_lpbk = irdma_ipv4_is_lpb(ah_info.src_ip_addr[0],
+ ah_info.dest_ip_addr[0]);
+ } else {
+ memcpy(ah_info.dest_ip_addr, cm_node->rem_addr,
+ sizeof(ah_info.dest_ip_addr));
+ memcpy(ah_info.src_ip_addr, cm_node->loc_addr,
+ sizeof(ah_info.src_ip_addr));
+ ah_info.do_lpbk = irdma_ipv6_is_lpb(ah_info.src_ip_addr,
+ ah_info.dest_ip_addr);
+ }
+
+ ah_info.vlan_tag = cm_node->vlan_id;
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ ah_info.insert_vlan_tag = 1;
+ ah_info.vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT;
+ }
+
+ ah_info.dst_arpindex =
+ irdma_arp_table(iwdev->rf, ah_info.dest_ip_addr,
+ ah_info.ipv4_valid, NULL, IRDMA_ARP_RESOLVE);
+
+ if (irdma_puda_create_ah(&iwdev->rf->sc_dev, &ah_info, wait,
+ IRDMA_PUDA_RSRC_TYPE_ILQ, cm_node,
+ &cm_node->ah))
+ return -ENOMEM;
+
+ trace_irdma_create_ah(cm_node);
+ return 0;
+}
+
+/**
+ * irdma_cm_free_ah - free a cm address handle
+ * @cm_node: The connection manager node to create AH for
+ */
+static void irdma_cm_free_ah(struct irdma_cm_node *cm_node)
+{
+ struct irdma_device *iwdev = cm_node->iwdev;
+
+ trace_irdma_cm_free_ah(cm_node);
+ irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah);
+ cm_node->ah = NULL;
+}
+
+/**
+ * irdma_make_cm_node - create a new instance of a cm node
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ * @listener: passive connection's listener
+ */
+static struct irdma_cm_node *
+irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *listener)
+{
+ struct irdma_cm_node *cm_node;
+ int oldarpindex;
+ int arpindex;
+ struct net_device *netdev = iwdev->netdev;
+
+ /* create an hte and cm_node for this instance */
+ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+ if (!cm_node)
+ return NULL;
+
+ /* set our node specific transport info */
+ cm_node->ipv4 = cm_info->ipv4;
+ cm_node->vlan_id = cm_info->vlan_id;
+ if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ cm_node->vlan_id = 0;
+ cm_node->tos = cm_info->tos;
+ cm_node->user_pri = cm_info->user_pri;
+ if (listener) {
+ if (listener->tos != cm_info->tos)
+ ibdev_warn(&iwdev->ibdev,
+ "application TOS[%d] and remote client TOS[%d] mismatch\n",
+ listener->tos, cm_info->tos);
+ if (iwdev->vsi.dscp_mode) {
+ cm_node->user_pri = listener->user_pri;
+ } else {
+ cm_node->tos = max(listener->tos, cm_info->tos);
+ cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ cm_node->user_pri =
+ irdma_iw_get_vlan_prio(cm_info->loc_addr,
+ cm_node->user_pri,
+ cm_info->ipv4);
+ }
+ ibdev_dbg(&iwdev->ibdev,
+ "DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos,
+ cm_node->user_pri);
+ trace_irdma_listener_tos(iwdev, cm_node->tos,
+ cm_node->user_pri);
+ }
+ memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
+ memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
+ cm_node->loc_port = cm_info->loc_port;
+ cm_node->rem_port = cm_info->rem_port;
+
+ cm_node->mpa_frame_rev = IRDMA_CM_DEFAULT_MPA_VER;
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ cm_node->iwdev = iwdev;
+ cm_node->dev = &iwdev->rf->sc_dev;
+
+ cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird;
+ cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord;
+
+ cm_node->listener = listener;
+ cm_node->cm_id = cm_info->cm_id;
+ ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
+ spin_lock_init(&cm_node->retrans_list_lock);
+ cm_node->ack_rcvd = false;
+
+ init_completion(&cm_node->establish_comp);
+ refcount_set(&cm_node->refcnt, 1);
+ /* associate our parent CM core */
+ cm_node->cm_core = cm_core;
+ cm_node->tcp_cntxt.loc_id = IRDMA_CM_DEFAULT_LOCAL_ID;
+ cm_node->tcp_cntxt.rcv_wscale = iwdev->rcv_wscale;
+ cm_node->tcp_cntxt.rcv_wnd = iwdev->rcv_wnd >> cm_node->tcp_cntxt.rcv_wscale;
+ if (cm_node->ipv4) {
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
+ htonl(cm_node->rem_addr[0]),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV4;
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ __be32 loc[4] = {
+ htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
+ htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
+ };
+ __be32 rem[4] = {
+ htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
+ htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
+ };
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV6;
+ }
+
+ if ((cm_node->ipv4 &&
+ irdma_ipv4_is_lpb(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+ (!cm_node->ipv4 &&
+ irdma_ipv6_is_lpb(cm_node->loc_addr, cm_node->rem_addr))) {
+ cm_node->do_lpb = true;
+ arpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr,
+ cm_node->ipv4, NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ oldarpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr,
+ cm_node->ipv4, NULL,
+ IRDMA_ARP_RESOLVE);
+ if (cm_node->ipv4)
+ arpindex = irdma_addr_resolve_neigh(iwdev,
+ cm_info->loc_addr[0],
+ cm_info->rem_addr[0],
+ oldarpindex);
+ else if (IS_ENABLED(CONFIG_IPV6))
+ arpindex = irdma_addr_resolve_neigh_ipv6(iwdev,
+ cm_info->loc_addr,
+ cm_info->rem_addr,
+ oldarpindex);
+ else
+ arpindex = -EINVAL;
+ }
+
+ if (arpindex < 0)
+ goto err;
+
+ ether_addr_copy(cm_node->rem_mac,
+ iwdev->rf->arp_table[arpindex].mac_addr);
+ irdma_add_hte_node(cm_core, cm_node);
+ cm_core->stats_nodes_created++;
+ return cm_node;
+
+err:
+ kfree(cm_node);
+
+ return NULL;
+}
+
+static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_qp *iwqp;
+ struct irdma_cm_info nfo;
+
+ /* if the node is destroyed before connection was accelerated */
+ if (!cm_node->accelerated && cm_node->accept_pend) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: node destroyed before established\n");
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ }
+ if (cm_node->close_entry)
+ irdma_handle_close_entry(cm_node, 0);
+ if (cm_node->listener) {
+ irdma_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
+ } else {
+ if (cm_node->apbvt_set) {
+ irdma_del_apbvt(cm_node->iwdev, cm_node->apbvt_entry);
+ cm_node->apbvt_set = 0;
+ }
+ irdma_get_addr_info(cm_node, &nfo);
+ if (cm_node->qhash_set) {
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL,
+ false);
+ cm_node->qhash_set = 0;
+ }
+ }
+
+ iwqp = cm_node->iwqp;
+ if (iwqp) {
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ cm_node->cm_id = NULL;
+ iwqp->cm_id = NULL;
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ cm_node->iwqp = NULL;
+ } else if (cm_node->qhash_set) {
+ irdma_get_addr_info(cm_node, &nfo);
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+ cm_node->qhash_set = 0;
+ }
+
+ cm_core->cm_free_ah(cm_node);
+}
+
+/**
+ * irdma_rem_ref_cm_node - destroy an instance of a cm node
+ * @cm_node: connection's node
+ */
+void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ unsigned long flags;
+
+ trace_irdma_rem_ref_cm_node(cm_node, 0, __builtin_return_address(0));
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+ if (!refcount_dec_and_test(&cm_node->refcnt)) {
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ return;
+ }
+ if (cm_node->iwqp) {
+ cm_node->iwqp->cm_node = NULL;
+ cm_node->iwqp->cm_id = NULL;
+ }
+ hash_del_rcu(&cm_node->list);
+ cm_node->cm_core->stats_nodes_destroyed++;
+
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ irdma_destroy_connection(cm_node);
+
+ kfree_rcu(cm_node, rcu_head);
+}
+
+/**
+ * irdma_handle_fin_pkt - FIN packet received
+ * @cm_node: connection's node
+ */
+static void irdma_handle_fin_pkt(struct irdma_cm_node *cm_node)
+{
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_LAST_ACK;
+ irdma_send_fin(cm_node);
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSING;
+ irdma_send_ack(cm_node);
+ /*
+ * Wait for ACK as this is simultaneous close.
+ * After we receive ACK, do not send anything.
+ * Just rm the node.
+ */
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_TIME_WAIT;
+ irdma_send_ack(cm_node);
+ irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE,
+ 1, 0);
+ break;
+ case IRDMA_CM_STATE_TIME_WAIT:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: bad state node state = %d\n", cm_node->state);
+ break;
+ }
+}
+
+/**
+ * irdma_handle_rst_pkt - process received RST packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_rst_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: caller: %pS cm_node=%p state=%d rem_port=0x%04x loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n",
+ __builtin_return_address(0), cm_node, cm_node->state,
+ cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
+ cm_node->loc_addr);
+
+ irdma_cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V2:
+ /* Drop down to MPA_V1*/
+ cm_node->mpa_frame_rev = IETF_MPA_V1;
+ /* send a syn and goto syn sent state */
+ cm_node->state = IRDMA_CM_STATE_SYN_SENT;
+ if (irdma_send_syn(cm_node, 0))
+ irdma_active_open_err(cm_node, false);
+ break;
+ case IETF_MPA_V1:
+ default:
+ irdma_active_open_err(cm_node, false);
+ break;
+ }
+ break;
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ atomic_inc(&cm_node->passive_state);
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_passive_open_err(cm_node, false);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ irdma_active_open_err(cm_node, false);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_TIME_WAIT:
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_rcv_mpa - Process a recv'd mpa buffer
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ int err;
+ int datasize = rbuf->datalen;
+ u8 *dataloc = rbuf->data;
+
+ enum irdma_cm_event_type type = IRDMA_CM_EVENT_UNKNOWN;
+ u32 res_type;
+
+ err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize);
+ if (err) {
+ if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT)
+ irdma_active_open_err(cm_node, true);
+ else
+ irdma_passive_open_err(cm_node, true);
+ return;
+ }
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_ESTABLISHED:
+ if (res_type == IRDMA_MPA_REQUEST_REJECT)
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: state for reject\n");
+ cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD;
+ type = IRDMA_CM_EVENT_MPA_REQ;
+ irdma_send_ack(cm_node); /* ACK received MPA request */
+ atomic_set(&cm_node->passive_state,
+ IRDMA_PASSIVE_STATE_INDICATED);
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ irdma_cleanup_retrans_entry(cm_node);
+ if (res_type == IRDMA_MPA_REQUEST_REJECT) {
+ type = IRDMA_CM_EVENT_MPA_REJECT;
+ cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD;
+ } else {
+ type = IRDMA_CM_EVENT_CONNECTED;
+ cm_node->state = IRDMA_CM_STATE_OFFLOADED;
+ }
+ irdma_send_ack(cm_node);
+ break;
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: wrong cm_node state =%d\n", cm_node->state);
+ break;
+ }
+ irdma_create_event(cm_node, type);
+}
+
+/**
+ * irdma_check_syn - Check for error on received syn ack
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int irdma_check_syn(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
+{
+ if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
+ irdma_active_open_err(cm_node, true);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_check_seq - check seq numbers if OK
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int irdma_check_seq(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
+{
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+ int err = 0;
+
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num ||
+ !between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+ err = -1;
+ if (err)
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: seq number err\n");
+
+ return err;
+}
+
+void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_info nfo;
+
+ irdma_get_addr_info(cm_node, &nfo);
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ cm_node, false);
+ cm_node->qhash_set = true;
+}
+
+/**
+ * irdma_handle_syn_pkt - is for Passive node
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_syn_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int err;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection */
+ irdma_active_open_err(cm_node, 1);
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ cm_node->cm_core->stats_backlog_drops++;
+ irdma_passive_open_err(cm_node, false);
+ break;
+ }
+ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (err) {
+ irdma_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ err = cm_node->cm_core->cm_create_ah(cm_node, false);
+ if (err) {
+ irdma_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+ cm_node->state = IRDMA_CM_STATE_SYN_RCVD;
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ irdma_cleanup_retrans_entry(cm_node);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_CLOSING:
+ case IRDMA_CM_STATE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_synack_pkt - Process SYN+ACK packet (active side)
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_synack_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int err;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ irdma_cleanup_retrans_entry(cm_node);
+ /* active open */
+ if (irdma_check_syn(cm_node, tcph)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: check syn fail\n");
+ return;
+ }
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 0);
+ if (err) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ irdma_send_ack(cm_node); /* ACK for the syn_ack */
+ err = irdma_send_mpa_request(cm_node);
+ if (err) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node=%p irdma_send_mpa_request failed\n",
+ cm_node);
+ break;
+ }
+ cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT;
+ break;
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ irdma_passive_open_err(cm_node, true);
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ irdma_cleanup_retrans_entry(cm_node);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_CLOSING:
+ case IRDMA_CM_STATE_UNKNOWN:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_ack_pkt - process packet with ACK
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static int irdma_handle_ack_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 inc_sequence;
+ int ret;
+ int optionsize;
+ u32 datasize = rbuf->datalen;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+ if (irdma_check_seq(cm_node, tcph))
+ return -EINVAL;
+
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ irdma_cleanup_retrans_entry(cm_node);
+ ret = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (ret)
+ return ret;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ cm_node->state = IRDMA_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ irdma_cleanup_retrans_entry(cm_node);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->ack_rcvd = false;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ } else {
+ cm_node->ack_rcvd = true;
+ }
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ irdma_cleanup_retrans_entry(cm_node);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_CLOSING:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_FIN_WAIT2;
+ break;
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ case IRDMA_CM_STATE_UNKNOWN:
+ default:
+ irdma_cleanup_retrans_entry(cm_node);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_process_pkt - process cm packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_process_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ enum irdma_tcpip_pkt_type pkt_type = IRDMA_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 fin_set = 0;
+ int err;
+
+ if (tcph->rst) {
+ pkt_type = IRDMA_PKT_TYPE_RST;
+ } else if (tcph->syn) {
+ pkt_type = IRDMA_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = IRDMA_PKT_TYPE_SYNACK;
+ } else if (tcph->ack) {
+ pkt_type = IRDMA_PKT_TYPE_ACK;
+ }
+ if (tcph->fin)
+ fin_set = 1;
+
+ switch (pkt_type) {
+ case IRDMA_PKT_TYPE_SYN:
+ irdma_handle_syn_pkt(cm_node, rbuf);
+ break;
+ case IRDMA_PKT_TYPE_SYNACK:
+ irdma_handle_synack_pkt(cm_node, rbuf);
+ break;
+ case IRDMA_PKT_TYPE_ACK:
+ err = irdma_handle_ack_pkt(cm_node, rbuf);
+ if (fin_set && !err)
+ irdma_handle_fin_pkt(cm_node);
+ break;
+ case IRDMA_PKT_TYPE_RST:
+ irdma_handle_rst_pkt(cm_node, rbuf);
+ break;
+ default:
+ if (fin_set &&
+ (!irdma_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
+ irdma_handle_fin_pkt(cm_node);
+ break;
+ }
+}
+
+/**
+ * irdma_make_listen_node - create a listen node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ */
+static struct irdma_cm_listener *
+irdma_make_listen_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info)
+{
+ struct irdma_cm_listener *listener;
+ unsigned long flags;
+
+ /* cannot have multiple matching listeners */
+ listener =
+ irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4,
+ cm_info->loc_port, cm_info->vlan_id,
+ IRDMA_CM_LISTENER_EITHER_STATE);
+ if (listener &&
+ listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) {
+ refcount_dec(&listener->refcnt);
+ return NULL;
+ }
+
+ if (!listener) {
+ /* create a CM listen node
+ * 1/2 node to compare incoming traffic to
+ */
+ listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+ if (!listener)
+ return NULL;
+ cm_core->stats_listen_nodes_created++;
+ memcpy(listener->loc_addr, cm_info->loc_addr,
+ sizeof(listener->loc_addr));
+ listener->loc_port = cm_info->loc_port;
+
+ INIT_LIST_HEAD(&listener->child_listen_list);
+
+ refcount_set(&listener->refcnt, 1);
+ } else {
+ listener->reused_node = 1;
+ }
+
+ listener->cm_id = cm_info->cm_id;
+ listener->ipv4 = cm_info->ipv4;
+ listener->vlan_id = cm_info->vlan_id;
+ atomic_set(&listener->pend_accepts_cnt, 0);
+ listener->cm_core = cm_core;
+ listener->iwdev = iwdev;
+
+ listener->backlog = cm_info->backlog;
+ listener->listener_state = IRDMA_CM_LISTENER_ACTIVE_STATE;
+
+ if (!listener->reused_node) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_add(&listener->list, &cm_core->listen_list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ }
+
+ return listener;
+}
+
+/**
+ * irdma_create_cm_node - make a connection node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @conn_param: connection parameters
+ * @cm_info: quad info for connection
+ * @caller_cm_node: pointer to cm_node structure to return
+ */
+static int irdma_create_cm_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct iw_cm_conn_param *conn_param,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_node **caller_cm_node)
+{
+ struct irdma_cm_node *cm_node;
+ u16 private_data_len = conn_param->private_data_len;
+ const void *private_data = conn_param->private_data;
+
+ /* create a CM connection node */
+ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL);
+ if (!cm_node)
+ return -ENOMEM;
+
+ /* set our node side to client (active) side */
+ cm_node->tcp_cntxt.client = 1;
+ cm_node->tcp_cntxt.rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+
+ irdma_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
+
+ cm_node->pdata.size = private_data_len;
+ cm_node->pdata.addr = cm_node->pdata_buf;
+
+ memcpy(cm_node->pdata_buf, private_data, private_data_len);
+ *caller_cm_node = cm_node;
+
+ return 0;
+}
+
+/**
+ * irdma_cm_reject - reject and teardown a connection
+ * @cm_node: connection's node
+ * @pdata: ptr to private data for reject
+ * @plen: size of private data
+ */
+static int irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata,
+ u8 plen)
+{
+ int ret;
+ int passive_state;
+
+ if (cm_node->tcp_cntxt.client)
+ return 0;
+
+ irdma_cleanup_retrans_entry(cm_node);
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == IRDMA_SEND_RESET_EVENT) {
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+
+ if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+
+ ret = irdma_send_mpa_reject(cm_node, pdata, plen);
+ if (!ret)
+ return 0;
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ if (irdma_send_reset(cm_node))
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: send reset failed\n");
+
+ return ret;
+}
+
+/**
+ * irdma_cm_close - close of cm connection
+ * @cm_node: connection's node
+ */
+static int irdma_cm_close(struct irdma_cm_node *cm_node)
+{
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_ACCEPTING:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ irdma_cleanup_retrans_entry(cm_node);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSE_WAIT:
+ cm_node->state = IRDMA_CM_STATE_LAST_ACK;
+ irdma_send_fin(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_TIME_WAIT:
+ case IRDMA_CM_STATE_CLOSING:
+ return -EINVAL;
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_cleanup_retrans_entry(cm_node);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ case IRDMA_CM_STATE_UNKNOWN:
+ case IRDMA_CM_STATE_INITED:
+ case IRDMA_CM_STATE_CLOSED:
+ case IRDMA_CM_STATE_LISTENER_DESTROYED:
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ if (cm_node->send_entry)
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: CM send_entry in OFFLOADED state\n");
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_receive_ilq - recv an ETHERNET packet, and process it
+ * through CM
+ * @vsi: VSI structure of dev
+ * @rbuf: receive buffer
+ */
+void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
+{
+ struct irdma_cm_node *cm_node;
+ struct irdma_cm_listener *listener;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ struct irdma_cm_info cm_info = {};
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct vlan_ethhdr *ethh;
+ u16 vtag;
+
+ /* if vlan, then maclen = 18 else 14 */
+ iph = (struct iphdr *)rbuf->iph;
+ print_hex_dump_debug("ILQ: RECEIVE ILQ BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, rbuf->mem.va, rbuf->totallen, false);
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (rbuf->vlan_valid) {
+ vtag = rbuf->vlan_id;
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >>
+ VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ }
+ } else {
+ ethh = rbuf->mem.va;
+
+ if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
+ vtag = ntohs(ethh->h_vlan_TCI);
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >>
+ VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
+ ibdev_dbg(&cm_core->iwdev->ibdev,
+ "CM: vlan_id=%d\n", cm_info.vlan_id);
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ }
+ }
+ tcph = (struct tcphdr *)rbuf->tcph;
+
+ if (rbuf->ipv4) {
+ cm_info.loc_addr[0] = ntohl(iph->daddr);
+ cm_info.rem_addr[0] = ntohl(iph->saddr);
+ cm_info.ipv4 = true;
+ cm_info.tos = iph->tos;
+ } else {
+ ip6h = (struct ipv6hdr *)rbuf->iph;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(cm_info.rem_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ cm_info.ipv4 = false;
+ cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
+ }
+ cm_info.loc_port = ntohs(tcph->dest);
+ cm_info.rem_port = ntohs(tcph->source);
+ cm_node = irdma_find_node(cm_core, cm_info.rem_port, cm_info.rem_addr,
+ cm_info.loc_port, cm_info.loc_addr, cm_info.vlan_id);
+
+ if (!cm_node) {
+ /* Only type of packet accepted are for the
+ * PASSIVE open (syn only)
+ */
+ if (!tcph->syn || tcph->ack)
+ return;
+
+ listener = irdma_find_listener(cm_core,
+ cm_info.loc_addr,
+ cm_info.ipv4,
+ cm_info.loc_port,
+ cm_info.vlan_id,
+ IRDMA_CM_LISTENER_ACTIVE_STATE);
+ if (!listener) {
+ cm_info.cm_id = NULL;
+ ibdev_dbg(&cm_core->iwdev->ibdev,
+ "CM: no listener found\n");
+ return;
+ }
+
+ cm_info.cm_id = listener->cm_id;
+ cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info,
+ listener);
+ if (!cm_node) {
+ ibdev_dbg(&cm_core->iwdev->ibdev,
+ "CM: allocate node failed\n");
+ refcount_dec(&listener->refcnt);
+ return;
+ }
+
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = IRDMA_CM_STATE_LISTENING;
+ } else {
+ irdma_rem_ref_cm_node(cm_node);
+ return;
+ }
+
+ refcount_inc(&cm_node->refcnt);
+ } else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return;
+ }
+
+ irdma_process_pkt(cm_node, rbuf);
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+static int irdma_add_qh(struct irdma_cm_node *cm_node, bool active)
+{
+ if (!active)
+ irdma_add_conn_est_qh(cm_node);
+ return 0;
+}
+
+static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node)
+{
+}
+
+/**
+ * irdma_setup_cm_core - setup top level instance of a cm core
+ * @iwdev: iwarp device structure
+ * @rdma_ver: HW version
+ */
+int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+
+ cm_core->iwdev = iwdev;
+ cm_core->dev = &iwdev->rf->sc_dev;
+
+ /* Handles CM event work items send to Iwarp core */
+ cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0);
+ if (!cm_core->event_wq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&cm_core->listen_list);
+
+ timer_setup(&cm_core->tcp_timer, irdma_cm_timer_tick, 0);
+
+ spin_lock_init(&cm_core->ht_lock);
+ spin_lock_init(&cm_core->listen_list_lock);
+ spin_lock_init(&cm_core->apbvt_lock);
+ switch (rdma_ver) {
+ case IRDMA_GEN_1:
+ cm_core->form_cm_frame = irdma_form_uda_cm_frame;
+ cm_core->cm_create_ah = irdma_add_qh;
+ cm_core->cm_free_ah = irdma_cm_free_ah_nop;
+ break;
+ case IRDMA_GEN_2:
+ default:
+ cm_core->form_cm_frame = irdma_form_ah_cm_frame;
+ cm_core->cm_create_ah = irdma_cm_create_ah;
+ cm_core->cm_free_ah = irdma_cm_free_ah;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_cleanup_cm_core - deallocate a top level instance of a
+ * cm core
+ * @cm_core: cm's core
+ */
+void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
+{
+ if (!cm_core)
+ return;
+
+ timer_delete_sync(&cm_core->tcp_timer);
+
+ destroy_workqueue(cm_core->event_wq);
+ cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
+}
+
+/**
+ * irdma_init_tcp_ctx - setup qp context
+ * @cm_node: connection's node
+ * @tcp_info: offload info for tcp
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_init_tcp_ctx(struct irdma_cm_node *cm_node,
+ struct irdma_tcp_offload_info *tcp_info,
+ struct irdma_qp *iwqp)
+{
+ tcp_info->ipv4 = cm_node->ipv4;
+ tcp_info->drop_ooo_seg = !iwqp->iwdev->iw_ooo;
+ tcp_info->wscale = true;
+ tcp_info->ignore_tcp_opt = true;
+ tcp_info->ignore_tcp_uns_opt = true;
+ tcp_info->no_nagle = false;
+
+ tcp_info->ttl = IRDMA_DEFAULT_TTL;
+ tcp_info->rtt_var = IRDMA_DEFAULT_RTT_VAR;
+ tcp_info->ss_thresh = IRDMA_DEFAULT_SS_THRESH;
+ tcp_info->rexmit_thresh = IRDMA_DEFAULT_REXMIT_THRESH;
+
+ tcp_info->tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
+ tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->snd_nxt = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ tcp_info->rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ tcp_info->snd_max = cm_node->tcp_cntxt.loc_seq_num;
+
+ tcp_info->snd_una = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->cwnd = 2 * cm_node->tcp_cntxt.mss;
+ tcp_info->snd_wl1 = cm_node->tcp_cntxt.rcv_nxt;
+ tcp_info->snd_wl2 = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->max_snd_window = cm_node->tcp_cntxt.max_snd_wnd;
+ tcp_info->rcv_wnd = cm_node->tcp_cntxt.rcv_wnd
+ << cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->flow_label = 0;
+ tcp_info->snd_mss = (u32)cm_node->tcp_cntxt.mss;
+ tcp_info->tos = cm_node->tos;
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ tcp_info->insert_vlan_tag = true;
+ tcp_info->vlan_tag = cm_node->vlan_id;
+ tcp_info->vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT;
+ }
+ if (cm_node->ipv4) {
+ tcp_info->src_port = cm_node->loc_port;
+ tcp_info->dst_port = cm_node->rem_port;
+
+ tcp_info->dest_ip_addr[3] = cm_node->rem_addr[0];
+ tcp_info->local_ipaddr[3] = cm_node->loc_addr[0];
+ tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
+ &tcp_info->dest_ip_addr[3],
+ true, NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ tcp_info->src_port = cm_node->loc_port;
+ tcp_info->dst_port = cm_node->rem_port;
+ memcpy(tcp_info->dest_ip_addr, cm_node->rem_addr,
+ sizeof(tcp_info->dest_ip_addr));
+ memcpy(tcp_info->local_ipaddr, cm_node->loc_addr,
+ sizeof(tcp_info->local_ipaddr));
+
+ tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
+ &tcp_info->dest_ip_addr[0],
+ false, NULL,
+ IRDMA_ARP_RESOLVE);
+ }
+}
+
+/**
+ * irdma_cm_init_tsa_conn - setup qp for RTS
+ * @iwqp: associate qp for the connection
+ * @cm_node: connection's node
+ */
+static void irdma_cm_init_tsa_conn(struct irdma_qp *iwqp,
+ struct irdma_cm_node *cm_node)
+{
+ struct irdma_iwarp_offload_info *iwarp_info;
+ struct irdma_qp_host_ctx_info *ctx_info;
+
+ iwarp_info = &iwqp->iwarp_info;
+ ctx_info = &iwqp->ctx_info;
+
+ ctx_info->tcp_info = &iwqp->tcp_info;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+ iwarp_info->ord_size = cm_node->ord_size;
+ iwarp_info->ird_size = cm_node->ird_size;
+ iwarp_info->rd_en = true;
+ iwarp_info->rdmap_ver = 1;
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
+
+ ctx_info->tcp_info_valid = true;
+ ctx_info->iwarp_info_valid = true;
+ ctx_info->user_pri = cm_node->user_pri;
+
+ irdma_init_tcp_ctx(cm_node, &iwqp->tcp_info, iwqp);
+ if (cm_node->snd_mark_en) {
+ iwarp_info->snd_mark_en = true;
+ iwarp_info->snd_mark_offset = (iwqp->tcp_info.snd_nxt & SNDMARKER_SEQNMASK) +
+ cm_node->lsmm_size;
+ }
+
+ cm_node->state = IRDMA_CM_STATE_OFFLOADED;
+ iwqp->tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ iwqp->tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx;
+
+ if (cm_node->rcv_mark_en) {
+ iwarp_info->rcv_mark_en = true;
+ iwarp_info->align_hdrs = true;
+ }
+
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+
+ /* once tcp_info is set, no need to do it again */
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+}
+
+/**
+ * irdma_cm_disconn - when a connection is being closed
+ * @iwqp: associated qp for the connection
+ */
+void irdma_cm_disconn(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct disconn_work *work;
+ unsigned long flags;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (!iwdev->rf->qp_table[iwqp->ibqp.qp_num]) {
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: qp_id %d is already freed\n",
+ iwqp->ibqp.qp_num);
+ kfree(work);
+ return;
+ }
+ irdma_qp_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+
+ work->iwqp = iwqp;
+ INIT_WORK(&work->work, irdma_disconnect_worker);
+ queue_work(iwdev->cleanup_wq, &work->work);
+}
+
+/**
+ * irdma_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_qp_disconnect(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+
+ iwqp->active_conn = 0;
+ /* close the CM node down if it is still active */
+ ibdev_dbg(&iwdev->ibdev, "CM: Call close API\n");
+ irdma_cm_close(iwqp->cm_node);
+}
+
+/**
+ * irdma_cm_disconn_true - called by worker thread to disconnect qp
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
+{
+ struct iw_cm_id *cm_id;
+ struct irdma_device *iwdev;
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+ u16 last_ae;
+ u8 original_hw_tcp_state;
+ u8 original_ibqp_state;
+ int disconn_status = 0;
+ int issue_disconn = 0;
+ int issue_close = 0;
+ int issue_flush = 0;
+ unsigned long flags;
+ int err;
+
+ iwdev = iwqp->iwdev;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ struct ib_qp_attr attr;
+
+ if (iwqp->flush_issued || iwqp->sc_qp.qp_uk.destroy_pending) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp_roce(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ irdma_ib_qp_event(iwqp, qp->event_type);
+ return;
+ }
+
+ cm_id = iwqp->cm_id;
+ original_hw_tcp_state = iwqp->hw_tcp_state;
+ original_ibqp_state = iwqp->ibqp_state;
+ last_ae = iwqp->last_aeq;
+
+ if (qp->term_flags) {
+ issue_disconn = 1;
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ irdma_terminate_del_timer(qp);
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ } else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) {
+ issue_disconn = 1;
+ if (last_ae == IRDMA_AE_LLP_CONNECTION_RESET)
+ disconn_status = -ECONNRESET;
+ }
+
+ if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+ original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+ last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+ last_ae == IRDMA_AE_BAD_CLOSE ||
+ last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ qp->term_flags = 0;
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (issue_flush && !iwqp->sc_qp.qp_uk.destroy_pending) {
+ irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ |
+ IRDMA_FLUSH_WAIT);
+
+ if (qp->term_flags)
+ irdma_ib_qp_event(iwqp, qp->event_type);
+ }
+
+ if (!cm_id || !cm_id->event_handler)
+ return;
+
+ spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+ if (!iwqp->cm_node) {
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ return;
+ }
+ refcount_inc(&iwqp->cm_node->refcnt);
+
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+
+ if (issue_disconn) {
+ err = irdma_send_cm_event(iwqp->cm_node, cm_id,
+ IW_CM_EVENT_DISCONNECT,
+ disconn_status);
+ if (err)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: disconnect event failed: - cm_id = %p\n",
+ cm_id);
+ }
+ if (issue_close) {
+ cm_id->provider_data = iwqp;
+ err = irdma_send_cm_event(iwqp->cm_node, cm_id,
+ IW_CM_EVENT_CLOSE, 0);
+ if (err)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: close event failed: - cm_id = %p\n",
+ cm_id);
+ irdma_qp_disconnect(iwqp);
+ }
+ irdma_rem_ref_cm_node(iwqp->cm_node);
+}
+
+/**
+ * irdma_disconnect_worker - worker for connection close
+ * @work: points or disconn structure
+ */
+static void irdma_disconnect_worker(struct work_struct *work)
+{
+ struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+ struct irdma_qp *iwqp = dwork->iwqp;
+
+ kfree(dwork);
+ irdma_cm_disconn_true(iwqp);
+ irdma_qp_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_free_lsmm_rsrc - free lsmm memory and deregister
+ * @iwqp: associate qp for the connection
+ */
+void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev;
+
+ iwdev = iwqp->iwdev;
+
+ if (iwqp->ietf_mem.va) {
+ if (iwqp->lsmm_mr)
+ iwdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr, NULL);
+ dma_free_coherent(iwdev->rf->sc_dev.hw->device,
+ iwqp->ietf_mem.size, iwqp->ietf_mem.va,
+ iwqp->ietf_mem.pa);
+ iwqp->ietf_mem.va = NULL;
+ }
+}
+
+/**
+ * irdma_accept - registered call for connection to be accepted
+ * @cm_id: cm information for passive connection
+ * @conn_param: accept parameters
+ */
+int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct irdma_cm_node *cm_node;
+ struct ib_qp_attr attr = {};
+ int passive_state;
+ struct ib_mr *ibmr;
+ struct irdma_pd *iwpd;
+ u16 buf_len = 0;
+ struct irdma_kmem_info accept;
+ u64 tagged_offset;
+ int wait_ret;
+ int ret = 0;
+
+ ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+
+ iwqp = to_iwqp(ibqp);
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->rf->sc_dev;
+ cm_node = cm_id->provider_data;
+
+ if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
+ cm_node->ipv4 = true;
+ cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr);
+ } else {
+ cm_node->ipv4 = false;
+ irdma_get_vlan_mac_ipv6(cm_node->loc_addr, &cm_node->vlan_id,
+ NULL);
+ }
+ ibdev_dbg(&iwdev->ibdev, "CM: Accept vlan_id=%d\n",
+ cm_node->vlan_id);
+
+ trace_irdma_accept(cm_node, 0, NULL);
+
+ if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == IRDMA_SEND_RESET_EVENT) {
+ ret = -ECONNRESET;
+ goto error;
+ }
+
+ buf_len = conn_param->private_data_len + IRDMA_MAX_IETF_SIZE;
+ iwqp->ietf_mem.size = ALIGN(buf_len, 1);
+ iwqp->ietf_mem.va = dma_alloc_coherent(dev->hw->device,
+ iwqp->ietf_mem.size,
+ &iwqp->ietf_mem.pa, GFP_KERNEL);
+ if (!iwqp->ietf_mem.va) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ cm_node->pdata.size = conn_param->private_data_len;
+ accept.addr = iwqp->ietf_mem.va;
+ accept.size = irdma_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
+ memcpy((u8 *)accept.addr + accept.size, conn_param->private_data,
+ conn_param->private_data_len);
+
+ if (cm_node->dev->ws_add(iwqp->sc_qp.vsi, cm_node->user_pri)) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ iwqp->sc_qp.user_pri = cm_node->user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ iwpd = iwqp->iwpd;
+ tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
+ ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len,
+ IB_ACCESS_LOCAL_WRITE, &tagged_offset, false);
+ if (IS_ERR(ibmr)) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ibmr->pd = &iwpd->ibpd;
+ ibmr->device = iwpd->ibpd.device;
+ iwqp->lsmm_mr = ibmr;
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page);
+
+ cm_node->lsmm_size = accept.size + conn_param->private_data_len;
+ irdma_sc_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, cm_node->lsmm_size,
+ ibmr->lkey);
+
+ if (iwqp->page)
+ kunmap_local(iwqp->sc_qp.qp_uk.sq_base);
+
+ iwqp->cm_id = cm_id;
+ cm_node->cm_id = cm_id;
+
+ cm_id->provider_data = iwqp;
+ iwqp->active_conn = 0;
+ iwqp->cm_node = cm_node;
+ cm_node->iwqp = iwqp;
+ irdma_cm_init_tsa_conn(iwqp, cm_node);
+ irdma_qp_add_ref(&iwqp->ibqp);
+ cm_id->add_ref(cm_id);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ cm_node->cm_core->cm_free_ah(cm_node);
+
+ irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
+ wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
+ iwqp->rts_ae_rcvd,
+ IRDMA_MAX_TIMEOUT);
+ if (!wait_ret) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
+ cm_node, cm_node->loc_port,
+ cm_node->rem_port, cm_node->cm_id);
+ ret = -ECONNRESET;
+ goto error;
+ }
+ }
+
+ irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+ cm_node->accelerated = true;
+ complete(&cm_node->establish_comp);
+
+ if (cm_node->accept_pend) {
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n",
+ cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
+ cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num);
+ cm_node->cm_core->stats_accepts++;
+
+ return 0;
+error:
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_rem_ref_cm_node(cm_node);
+
+ return ret;
+}
+
+/**
+ * irdma_reject - registered call for connection to be rejected
+ * @cm_id: cm information for passive connection
+ * @pdata: private data to be sent
+ * @pdata_len: private data length
+ */
+int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+
+ cm_node = cm_id->provider_data;
+ cm_node->pdata.size = pdata_len;
+
+ trace_irdma_reject(cm_node, 0, NULL);
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ cm_node->cm_core->stats_rejects++;
+
+ if (pdata_len + sizeof(struct ietf_mpa_v2) > IRDMA_MAX_CM_BUF)
+ return -EINVAL;
+
+ return irdma_cm_reject(cm_node, pdata, pdata_len);
+}
+
+/**
+ * irdma_connect - registered call for connection to be established
+ * @cm_id: cm information for passive connection
+ * @conn_param: Information about the connection
+ */
+int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+ struct irdma_cm_info cm_info;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ int ret = 0;
+
+ ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ iwqp = to_iwqp(ibqp);
+ if (!iwqp)
+ return -EINVAL;
+ iwdev = iwqp->iwdev;
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+ if (!(laddr->sin_port) || !(raddr->sin_port))
+ return -EINVAL;
+
+ iwqp->active_conn = 1;
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = iwqp;
+
+ /* set up the connection params for the node */
+ if (cm_id->remote_addr.ss_family == AF_INET) {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
+ return -EINVAL;
+
+ cm_info.ipv4 = true;
+ memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
+ memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_port = ntohs(raddr->sin_port);
+ cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
+ } else {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
+ return -EINVAL;
+
+ cm_info.ipv4 = false;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(cm_info.rem_addr,
+ raddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ cm_info.rem_port = ntohs(raddr6->sin6_port);
+ irdma_get_vlan_mac_ipv6(cm_info.loc_addr, &cm_info.vlan_id,
+ NULL);
+ }
+ cm_info.cm_id = cm_id;
+ cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
+ cm_info.tos = cm_id->tos;
+ if (iwdev->vsi.dscp_mode) {
+ cm_info.user_pri =
+ iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)];
+ } else {
+ cm_info.user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = irdma_iw_get_vlan_prio(cm_info.loc_addr,
+ cm_info.user_pri,
+ cm_info.ipv4);
+ }
+
+ if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri))
+ return -ENOMEM;
+ iwqp->sc_qp.user_pri = cm_info.user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ ibdev_dbg(&iwdev->ibdev, "DCB: TOS:[%d] UP:[%d]\n", cm_id->tos,
+ cm_info.user_pri);
+
+ trace_irdma_dcb_tos(iwdev, cm_id->tos, cm_info.user_pri);
+
+ ret = irdma_create_cm_node(&iwdev->cm_core, iwdev, conn_param, &cm_info,
+ &cm_node);
+ if (ret)
+ return ret;
+ ret = cm_node->cm_core->cm_create_ah(cm_node, true);
+ if (ret)
+ goto err;
+ if (irdma_manage_qhash(iwdev, &cm_info,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ cm_node->qhash_set = true;
+
+ cm_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port);
+ if (!cm_node->apbvt_entry) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ cm_node->apbvt_set = true;
+ iwqp->cm_node = cm_node;
+ cm_node->iwqp = iwqp;
+ iwqp->cm_id = cm_id;
+ irdma_qp_add_ref(&iwqp->ibqp);
+ cm_id->add_ref(cm_id);
+
+ if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
+ cm_node->state = IRDMA_CM_STATE_SYN_SENT;
+ ret = irdma_send_syn(cm_node, 0);
+ if (ret)
+ goto err;
+ }
+
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n",
+ cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
+ cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num);
+
+ trace_irdma_connect(cm_node, 0, NULL);
+
+ return 0;
+
+err:
+ if (cm_info.ipv4)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: connect() FAILED: dest addr=%pI4",
+ cm_info.rem_addr);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: connect() FAILED: dest addr=%pI6",
+ cm_info.rem_addr);
+ irdma_rem_ref_cm_node(cm_node);
+ iwdev->cm_core.stats_connect_errs++;
+
+ return ret;
+}
+
+/**
+ * irdma_create_listen - registered call creating listener
+ * @cm_id: cm information for passive connection
+ * @backlog: to max accept pending count
+ */
+int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct irdma_device *iwdev;
+ struct irdma_cm_listener *cm_listen_node;
+ struct irdma_cm_info cm_info = {};
+ struct sockaddr_in *laddr;
+ struct sockaddr_in6 *laddr6;
+ bool wildcard = false;
+ int err;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
+
+ if (laddr->sin_family == AF_INET) {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
+ return -EINVAL;
+
+ cm_info.ipv4 = true;
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+
+ if (laddr->sin_addr.s_addr != htonl(INADDR_ANY)) {
+ cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ wildcard = true;
+ }
+ } else {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
+ return -EINVAL;
+
+ cm_info.ipv4 = false;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) {
+ irdma_get_vlan_mac_ipv6(cm_info.loc_addr,
+ &cm_info.vlan_id, NULL);
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ wildcard = true;
+ }
+ }
+
+ if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ cm_info.vlan_id = 0;
+ cm_info.backlog = backlog;
+ cm_info.cm_id = cm_id;
+
+ trace_irdma_create_listen(iwdev, &cm_info);
+
+ cm_listen_node = irdma_make_listen_node(&iwdev->cm_core, iwdev,
+ &cm_info);
+ if (!cm_listen_node) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: cm_listen_node == NULL\n");
+ return -ENOMEM;
+ }
+
+ cm_id->provider_data = cm_listen_node;
+
+ cm_listen_node->tos = cm_id->tos;
+ if (iwdev->vsi.dscp_mode)
+ cm_listen_node->user_pri =
+ iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)];
+ else
+ cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = cm_listen_node->user_pri;
+ if (!cm_listen_node->reused_node) {
+ if (wildcard) {
+ err = irdma_add_mqh(iwdev, &cm_info, cm_listen_node);
+ if (err)
+ goto error;
+ } else {
+ if (!iwdev->vsi.dscp_mode)
+ cm_listen_node->user_pri =
+ irdma_iw_get_vlan_prio(cm_info.loc_addr,
+ cm_info.user_pri,
+ cm_info.ipv4);
+ cm_info.user_pri = cm_listen_node->user_pri;
+ err = irdma_manage_qhash(iwdev, &cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (err)
+ goto error;
+
+ cm_listen_node->qhash_set = true;
+ }
+
+ cm_listen_node->apbvt_entry = irdma_add_apbvt(iwdev,
+ cm_info.loc_port);
+ if (!cm_listen_node->apbvt_entry)
+ goto error;
+ }
+ cm_id->add_ref(cm_id);
+ cm_listen_node->cm_core->stats_listen_created++;
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d\n",
+ cm_listen_node->loc_port, cm_listen_node->loc_addr,
+ cm_listen_node, cm_listen_node->cm_id,
+ cm_listen_node->qhash_set, cm_listen_node->vlan_id);
+
+ return 0;
+
+error:
+
+ irdma_cm_del_listen(&iwdev->cm_core, cm_listen_node, false);
+
+ return -EINVAL;
+}
+
+/**
+ * irdma_destroy_listen - registered call to destroy listener
+ * @cm_id: cm information for passive connection
+ */
+int irdma_destroy_listen(struct iw_cm_id *cm_id)
+{
+ struct irdma_device *iwdev;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (cm_id->provider_data)
+ irdma_cm_del_listen(&iwdev->cm_core, cm_id->provider_data,
+ true);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: cm_id->provider_data was NULL\n");
+
+ cm_id->rem_ref(cm_id);
+
+ return 0;
+}
+
+/**
+ * irdma_teardown_list_prep - add conn nodes slated for tear down to list
+ * @cm_core: cm's core
+ * @teardown_list: a list to which cm_node will be selected
+ * @ipaddr: pointer to ip address
+ * @nfo: pointer to cm_info structure instance
+ * @disconnect_all: flag indicating disconnect all QPs
+ */
+static void irdma_teardown_list_prep(struct irdma_cm_core *cm_core,
+ struct list_head *teardown_list,
+ u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all)
+{
+ struct irdma_cm_node *cm_node;
+ int bkt;
+
+ hash_for_each_rcu(cm_core->cm_hash_tbl, bkt, cm_node, list) {
+ if ((disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) &&
+ refcount_inc_not_zero(&cm_node->refcnt))
+ list_add(&cm_node->teardown_entry, teardown_list);
+ }
+}
+
+/**
+ * irdma_cm_event_connected - handle connected active node
+ * @event: the info for cm_node of connection
+ */
+static void irdma_cm_event_connected(struct irdma_cm_event *event)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+ struct irdma_sc_dev *dev;
+ struct ib_qp_attr attr = {};
+ struct iw_cm_id *cm_id;
+ int status;
+ bool read0;
+ int wait_ret = 0;
+
+ cm_node = event->cm_node;
+ cm_id = cm_node->cm_id;
+ iwqp = cm_id->provider_data;
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->rf->sc_dev;
+ if (iwqp->sc_qp.qp_uk.destroy_pending) {
+ status = -ETIMEDOUT;
+ goto error;
+ }
+
+ irdma_cm_init_tsa_conn(iwqp, cm_node);
+ read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page);
+ irdma_sc_send_rtt(&iwqp->sc_qp, read0);
+ if (iwqp->page)
+ kunmap_local(iwqp->sc_qp.qp_uk.sq_base);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
+ wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
+ iwqp->rts_ae_rcvd,
+ IRDMA_MAX_TIMEOUT);
+ if (!wait_ret)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
+ cm_node, cm_node->loc_port,
+ cm_node->rem_port, cm_node->cm_id);
+ }
+
+ irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
+ cm_node->accelerated = true;
+ complete(&cm_node->establish_comp);
+ cm_node->cm_core->cm_free_ah(cm_node);
+ return;
+
+error:
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
+ status);
+ irdma_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * irdma_cm_event_reset - handle reset
+ * @event: the info for cm_node of connection
+ */
+static void irdma_cm_event_reset(struct irdma_cm_event *event)
+{
+ struct irdma_cm_node *cm_node = event->cm_node;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct irdma_qp *iwqp;
+
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+ if (!iwqp)
+ return;
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: reset event %p - cm_id = %p\n", event->cm_node, cm_id);
+ iwqp->cm_id = NULL;
+
+ irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT,
+ -ECONNRESET);
+ irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
+}
+
+/**
+ * irdma_cm_event_handler - send event to cm upper layer
+ * @work: pointer of cm event info.
+ */
+static void irdma_cm_event_handler(struct work_struct *work)
+{
+ struct irdma_cm_event *event = container_of(work, struct irdma_cm_event, event_work);
+ struct irdma_cm_node *cm_node;
+
+ if (!event || !event->cm_node || !event->cm_node->cm_core)
+ return;
+
+ cm_node = event->cm_node;
+ trace_irdma_cm_event_handler(cm_node, event->type, NULL);
+
+ switch (event->type) {
+ case IRDMA_CM_EVENT_MPA_REQ:
+ irdma_send_cm_event(cm_node, cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REQUEST, 0);
+ break;
+ case IRDMA_CM_EVENT_RESET:
+ irdma_cm_event_reset(event);
+ break;
+ case IRDMA_CM_EVENT_CONNECTED:
+ if (!event->cm_node->cm_id ||
+ event->cm_node->state != IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_cm_event_connected(event);
+ break;
+ case IRDMA_CM_EVENT_MPA_REJECT:
+ if (!event->cm_node->cm_id ||
+ cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_send_cm_event(cm_node, cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
+ break;
+ case IRDMA_CM_EVENT_ABORTED:
+ if (!event->cm_node->cm_id ||
+ event->cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_event_connect_error(event);
+ break;
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: bad event type = %d\n", event->type);
+ break;
+ }
+
+ irdma_rem_ref_cm_node(event->cm_node);
+ kfree(event);
+}
+
+/**
+ * irdma_cm_post_event - queue event request for worker thread
+ * @event: cm node's info for up event call
+ */
+static void irdma_cm_post_event(struct irdma_cm_event *event)
+{
+ refcount_inc(&event->cm_node->refcnt);
+ INIT_WORK(&event->event_work, irdma_cm_event_handler);
+ queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+}
+
+/**
+ * irdma_cm_teardown_connections - teardown QPs
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @nfo: Connection info
+ * @disconnect_all: flag indicating disconnect all QPs
+ *
+ * teardown QPs where source or destination addr matches ip addr
+ */
+void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct irdma_cm_node *cm_node;
+ struct list_head teardown_list;
+ struct ib_qp_attr attr;
+
+ INIT_LIST_HEAD(&teardown_list);
+
+ rcu_read_lock();
+ irdma_teardown_list_prep(cm_core, &teardown_list, ipaddr, nfo, disconnect_all);
+ rcu_read_unlock();
+
+ list_for_each_safe (list_node, list_core_temp, &teardown_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node,
+ teardown_entry);
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (iwdev->rf->reset)
+ irdma_cm_disconn(cm_node->iwqp);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+}
+
+/**
+ * irdma_qhash_ctrl - enable/disable qhash for list
+ * @iwdev: device pointer
+ * @parent_listen_node: parent listen node
+ * @nfo: cm info node
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ *
+ * Enables or disables the qhash for the node in the child
+ * listen list that matches ipaddr. If no matching IP was found
+ * it will allocate and add a new child listen node to the
+ * parent listen node. The listen_list_lock is assumed to be
+ * held when called.
+ */
+static void irdma_qhash_ctrl(struct irdma_device *iwdev,
+ struct irdma_cm_listener *parent_listen_node,
+ struct irdma_cm_info *nfo, u32 *ipaddr, bool ipv4,
+ bool ifup)
+{
+ struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
+ struct irdma_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ bool node_allocated = false;
+ enum irdma_quad_hash_manage_type op = ifup ?
+ IRDMA_QHASH_MANAGE_TYPE_ADD :
+ IRDMA_QHASH_MANAGE_TYPE_DELETE;
+ int err;
+
+ list_for_each_safe (pos, tpos, child_listen_list) {
+ child_listen_node = list_entry(pos, struct irdma_cm_listener,
+ child_listen_list);
+ if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
+ goto set_qhash;
+ }
+
+ /* if not found then add a child listener if interface is going up */
+ if (!ifup)
+ return;
+ child_listen_node = kmemdup(parent_listen_node,
+ sizeof(*child_listen_node), GFP_ATOMIC);
+ if (!child_listen_node)
+ return;
+
+ node_allocated = true;
+ memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
+
+set_qhash:
+ memcpy(nfo->loc_addr, child_listen_node->loc_addr,
+ sizeof(nfo->loc_addr));
+ nfo->vlan_id = child_listen_node->vlan_id;
+ err = irdma_manage_qhash(iwdev, nfo, IRDMA_QHASH_TYPE_TCP_SYN, op, NULL,
+ false);
+ if (!err) {
+ child_listen_node->qhash_set = ifup;
+ if (node_allocated)
+ list_add(&child_listen_node->child_listen_list,
+ &parent_listen_node->child_listen_list);
+ } else if (node_allocated) {
+ kfree(child_listen_node);
+ }
+}
+
+/**
+ * irdma_if_notify - process an ifdown on an interface
+ * @iwdev: device pointer
+ * @netdev: network device structure
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ */
+void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ struct irdma_cm_info nfo = {};
+ u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ enum irdma_quad_hash_manage_type op = ifup ?
+ IRDMA_QHASH_MANAGE_TYPE_ADD :
+ IRDMA_QHASH_MANAGE_TYPE_DELETE;
+
+ nfo.vlan_id = vlan_id;
+ nfo.ipv4 = ipv4;
+ nfo.qh_qpid = 1;
+
+ /* Disable or enable qhash for listeners */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry (listen_node, &cm_core->listen_list, list) {
+ if (vlan_id != listen_node->vlan_id ||
+ (memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) &&
+ memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16)))
+ continue;
+
+ memcpy(nfo.loc_addr, listen_node->loc_addr,
+ sizeof(nfo.loc_addr));
+ nfo.loc_port = listen_node->loc_port;
+ nfo.user_pri = listen_node->user_pri;
+ if (!list_empty(&listen_node->child_listen_list)) {
+ irdma_qhash_ctrl(iwdev, listen_node, &nfo, ipaddr, ipv4,
+ ifup);
+ } else if (memcmp(listen_node->loc_addr, ip_zero,
+ ipv4 ? 4 : 16)) {
+ if (!irdma_manage_qhash(iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN, op,
+ NULL, false))
+ listen_node->qhash_set = ifup;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ /* disconnect any connected qp's on ifdown */
+ if (!ifup)
+ irdma_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
+}
diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
new file mode 100644
index 000000000000..48ee285cf745
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/cm.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_CM_H
+#define IRDMA_CM_H
+
+#define IRDMA_MPA_REQUEST_ACCEPT 1
+#define IRDMA_MPA_REQUEST_REJECT 2
+
+/* IETF MPA -- defines */
+#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VER 1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE 20
+#define IETF_RTR_MSG_SIZE 4
+#define IETF_MPA_V2_FLAG 0x10
+#define SNDMARKER_SEQNMASK 0x000001ff
+#define IRDMA_MAX_IETF_SIZE 32
+
+/* IETF RTR MSG Fields */
+#define IETF_PEER_TO_PEER 0x8000
+#define IETF_FLPDU_ZERO_LEN 0x4000
+#define IETF_RDMA0_WRITE 0x8000
+#define IETF_RDMA0_READ 0x4000
+#define IETF_NO_IRD_ORD 0x3fff
+
+#define MAX_PORTS 65536
+
+#define IRDMA_PASSIVE_STATE_INDICATED 0
+#define IRDMA_DO_NOT_SEND_RESET_EVENT 1
+#define IRDMA_SEND_RESET_EVENT 2
+
+#define MAX_IRDMA_IFS 4
+
+#define SET_ACK 1
+#define SET_SYN 2
+#define SET_FIN 4
+#define SET_RST 8
+
+#define TCP_OPTIONS_PADDING 3
+
+#define IRDMA_DEFAULT_RETRYS 64
+#define IRDMA_DEFAULT_RETRANS 32
+#define IRDMA_DEFAULT_TTL 0x40
+#define IRDMA_DEFAULT_RTT_VAR 6
+#define IRDMA_DEFAULT_SS_THRESH 0x3fffffff
+#define IRDMA_DEFAULT_REXMIT_THRESH 8
+
+#define IRDMA_RETRY_TIMEOUT HZ
+#define IRDMA_SHORT_TIME 10
+#define IRDMA_LONG_TIME (2 * HZ)
+#define IRDMA_MAX_TIMEOUT ((unsigned long)(12 * HZ))
+
+#define IRDMA_CM_HASHTABLE_SIZE 1024
+#define IRDMA_CM_TCP_TIMER_INTERVAL 3000
+#define IRDMA_CM_DEFAULT_MTU 1540
+#define IRDMA_CM_DEFAULT_FRAME_CNT 10
+#define IRDMA_CM_THREAD_STACK_SIZE 256
+#define IRDMA_CM_DEFAULT_RCV_WND 64240
+#define IRDMA_CM_DEFAULT_RCV_WND_SCALED 0x3FFFC
+#define IRDMA_CM_DEFAULT_RCV_WND_SCALE 2
+#define IRDMA_CM_DEFAULT_FREE_PKTS 10
+#define IRDMA_CM_FREE_PKT_LO_WATERMARK 2
+#define IRDMA_CM_DEFAULT_MSS 536
+#define IRDMA_CM_DEFAULT_MPA_VER 2
+#define IRDMA_CM_DEFAULT_SEQ 0x159bf75f
+#define IRDMA_CM_DEFAULT_LOCAL_ID 0x3b47
+#define IRDMA_CM_DEFAULT_SEQ2 0x18ed5740
+#define IRDMA_CM_DEFAULT_LOCAL_ID2 0xb807
+#define IRDMA_MAX_CM_BUF (IRDMA_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
+
+enum ietf_mpa_flags {
+ IETF_MPA_FLAGS_REJECT = 0x20,
+ IETF_MPA_FLAGS_CRC = 0x40,
+ IETF_MPA_FLAGS_MARKERS = 0x80,
+};
+
+enum irdma_timer_type {
+ IRDMA_TIMER_TYPE_SEND,
+ IRDMA_TIMER_TYPE_CLOSE,
+};
+
+enum option_nums {
+ OPTION_NUM_EOL,
+ OPTION_NUM_NONE,
+ OPTION_NUM_MSS,
+ OPTION_NUM_WINDOW_SCALE,
+ OPTION_NUM_SACK_PERM,
+ OPTION_NUM_SACK,
+ OPTION_NUM_WRITE0 = 0xbc,
+};
+
+/* cm node transition states */
+enum irdma_cm_node_state {
+ IRDMA_CM_STATE_UNKNOWN,
+ IRDMA_CM_STATE_INITED,
+ IRDMA_CM_STATE_LISTENING,
+ IRDMA_CM_STATE_SYN_RCVD,
+ IRDMA_CM_STATE_SYN_SENT,
+ IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED,
+ IRDMA_CM_STATE_ESTABLISHED,
+ IRDMA_CM_STATE_ACCEPTING,
+ IRDMA_CM_STATE_MPAREQ_SENT,
+ IRDMA_CM_STATE_MPAREQ_RCVD,
+ IRDMA_CM_STATE_MPAREJ_RCVD,
+ IRDMA_CM_STATE_OFFLOADED,
+ IRDMA_CM_STATE_FIN_WAIT1,
+ IRDMA_CM_STATE_FIN_WAIT2,
+ IRDMA_CM_STATE_CLOSE_WAIT,
+ IRDMA_CM_STATE_TIME_WAIT,
+ IRDMA_CM_STATE_LAST_ACK,
+ IRDMA_CM_STATE_CLOSING,
+ IRDMA_CM_STATE_LISTENER_DESTROYED,
+ IRDMA_CM_STATE_CLOSED,
+};
+
+enum mpa_frame_ver {
+ IETF_MPA_V1 = 1,
+ IETF_MPA_V2 = 2,
+};
+
+enum mpa_frame_key {
+ MPA_KEY_REQUEST,
+ MPA_KEY_REPLY,
+};
+
+enum send_rdma0 {
+ SEND_RDMA_READ_ZERO = 1,
+ SEND_RDMA_WRITE_ZERO = 2,
+};
+
+enum irdma_tcpip_pkt_type {
+ IRDMA_PKT_TYPE_UNKNOWN,
+ IRDMA_PKT_TYPE_SYN,
+ IRDMA_PKT_TYPE_SYNACK,
+ IRDMA_PKT_TYPE_ACK,
+ IRDMA_PKT_TYPE_FIN,
+ IRDMA_PKT_TYPE_RST,
+};
+
+enum irdma_cm_listener_state {
+ IRDMA_CM_LISTENER_PASSIVE_STATE = 1,
+ IRDMA_CM_LISTENER_ACTIVE_STATE = 2,
+ IRDMA_CM_LISTENER_EITHER_STATE = 3,
+};
+
+/* CM event codes */
+enum irdma_cm_event_type {
+ IRDMA_CM_EVENT_UNKNOWN,
+ IRDMA_CM_EVENT_ESTABLISHED,
+ IRDMA_CM_EVENT_MPA_REQ,
+ IRDMA_CM_EVENT_MPA_CONNECT,
+ IRDMA_CM_EVENT_MPA_ACCEPT,
+ IRDMA_CM_EVENT_MPA_REJECT,
+ IRDMA_CM_EVENT_MPA_ESTABLISHED,
+ IRDMA_CM_EVENT_CONNECTED,
+ IRDMA_CM_EVENT_RESET,
+ IRDMA_CM_EVENT_ABORTED,
+};
+
+struct ietf_mpa_v1 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ u8 priv_data[];
+};
+
+struct ietf_rtr_msg {
+ __be16 ctrl_ird;
+ __be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ struct ietf_rtr_msg rtr_msg;
+ u8 priv_data[];
+};
+
+struct option_base {
+ u8 optionnum;
+ u8 len;
+};
+
+struct option_mss {
+ u8 optionnum;
+ u8 len;
+ __be16 mss;
+};
+
+struct option_windowscale {
+ u8 optionnum;
+ u8 len;
+ u8 shiftcount;
+};
+
+union all_known_options {
+ char eol;
+ struct option_base base;
+ struct option_mss mss;
+ struct option_windowscale windowscale;
+};
+
+struct irdma_timer_entry {
+ struct list_head list;
+ unsigned long timetosend; /* jiffies */
+ struct irdma_puda_buf *sqbuf;
+ u32 type;
+ u32 retrycount;
+ u32 retranscount;
+ u32 context;
+ u32 send_retrans;
+ int close_when_complete;
+};
+
+/* CM context params */
+struct irdma_cm_tcp_context {
+ u8 client;
+ u32 loc_seq_num;
+ u32 loc_ack_num;
+ u32 rem_ack_num;
+ u32 rcv_nxt;
+ u32 loc_id;
+ u32 rem_id;
+ u32 snd_wnd;
+ u32 max_snd_wnd;
+ u32 rcv_wnd;
+ u32 mss;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+};
+
+struct irdma_apbvt_entry {
+ struct hlist_node hlist;
+ u32 use_cnt;
+ u16 port;
+};
+
+struct irdma_cm_listener {
+ struct list_head list;
+ struct iw_cm_id *cm_id;
+ struct irdma_cm_core *cm_core;
+ struct irdma_device *iwdev;
+ struct list_head child_listen_list;
+ struct irdma_apbvt_entry *apbvt_entry;
+ enum irdma_cm_listener_state listener_state;
+ refcount_t refcnt;
+ atomic_t pend_accepts_cnt;
+ u32 loc_addr[4];
+ u32 reused_node;
+ int backlog;
+ u16 loc_port;
+ u16 vlan_id;
+ u8 loc_mac[ETH_ALEN];
+ u8 user_pri;
+ u8 tos;
+ bool qhash_set:1;
+ bool ipv4:1;
+};
+
+struct irdma_kmem_info {
+ void *addr;
+ u32 size;
+};
+
+struct irdma_mpa_priv_info {
+ const void *addr;
+ u32 size;
+};
+
+struct irdma_cm_node {
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct irdma_cm_tcp_context tcp_cntxt;
+ struct irdma_cm_core *cm_core;
+ struct irdma_timer_entry *send_entry;
+ struct irdma_timer_entry *close_entry;
+ struct irdma_cm_listener *listener;
+ struct list_head timer_entry;
+ struct list_head reset_entry;
+ struct list_head teardown_entry;
+ struct irdma_apbvt_entry *apbvt_entry;
+ struct rcu_head rcu_head;
+ struct irdma_mpa_priv_info pdata;
+ struct irdma_sc_ah *ah;
+ union {
+ struct ietf_mpa_v1 mpa_frame;
+ struct ietf_mpa_v2 mpa_v2_frame;
+ };
+ struct irdma_kmem_info mpa_hdr;
+ struct iw_cm_id *cm_id;
+ struct hlist_node list;
+ struct completion establish_comp;
+ spinlock_t retrans_list_lock; /* protect CM node rexmit updates*/
+ atomic_t passive_state;
+ refcount_t refcnt;
+ enum irdma_cm_node_state state;
+ enum send_rdma0 send_rdma0_op;
+ enum mpa_frame_ver mpa_frame_rev;
+ u32 loc_addr[4], rem_addr[4];
+ u16 loc_port, rem_port;
+ int apbvt_set;
+ int accept_pend;
+ u16 vlan_id;
+ u16 ird_size;
+ u16 ord_size;
+ u16 mpav2_ird_ord;
+ u16 lsmm_size;
+ u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+ u8 user_pri;
+ u8 tos;
+ bool ack_rcvd:1;
+ bool qhash_set:1;
+ bool ipv4:1;
+ bool snd_mark_en:1;
+ bool rcv_mark_en:1;
+ bool do_lpb:1;
+ bool accelerated:1;
+};
+
+/* Used by internal CM APIs to pass CM information*/
+struct irdma_cm_info {
+ struct iw_cm_id *cm_id;
+ u16 loc_port;
+ u16 rem_port;
+ u32 loc_addr[4];
+ u32 rem_addr[4];
+ u32 qh_qpid;
+ u16 vlan_id;
+ int backlog;
+ u8 user_pri;
+ u8 tos;
+ bool ipv4;
+};
+
+struct irdma_cm_event {
+ enum irdma_cm_event_type type;
+ struct irdma_cm_info cm_info;
+ struct work_struct event_work;
+ struct irdma_cm_node *cm_node;
+};
+
+struct irdma_cm_core {
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct list_head listen_list;
+ DECLARE_HASHTABLE(cm_hash_tbl, 8);
+ DECLARE_HASHTABLE(apbvt_hash_tbl, 8);
+ struct timer_list tcp_timer;
+ struct workqueue_struct *event_wq;
+ spinlock_t ht_lock; /* protect CM node (active side) list */
+ spinlock_t listen_list_lock; /* protect listener list */
+ spinlock_t apbvt_lock; /*serialize apbvt add/del entries*/
+ u64 stats_nodes_created;
+ u64 stats_nodes_destroyed;
+ u64 stats_listen_created;
+ u64 stats_listen_destroyed;
+ u64 stats_listen_nodes_created;
+ u64 stats_listen_nodes_destroyed;
+ u64 stats_lpbs;
+ u64 stats_accepts;
+ u64 stats_rejects;
+ u64 stats_connect_errs;
+ u64 stats_passive_errs;
+ u64 stats_pkt_retrans;
+ u64 stats_backlog_drops;
+ struct irdma_puda_buf *(*form_cm_frame)(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_mpa_priv_info *pdata,
+ u8 flags);
+ int (*cm_create_ah)(struct irdma_cm_node *cm_node, bool wait);
+ void (*cm_free_ah)(struct irdma_cm_node *cm_node);
+};
+
+int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *sqbuf,
+ enum irdma_timer_type type, int send_retrans,
+ int close_when_complete);
+
+static inline u8 irdma_tos2dscp(u8 tos)
+{
+#define IRDMA_DSCP_VAL GENMASK(7, 2)
+ return (u8)FIELD_GET(IRDMA_DSCP_VAL, tos);
+}
+
+int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int irdma_create_listen(struct iw_cm_id *cm_id, int backlog);
+int irdma_destroy_listen(struct iw_cm_id *cm_id);
+int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac);
+void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all);
+int irdma_cm_start(struct irdma_device *dev);
+int irdma_cm_stop(struct irdma_device *dev);
+bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr);
+bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr);
+int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4,
+ const u8 *mac_addr, u32 action);
+void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup);
+bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port);
+void irdma_send_ack(struct irdma_cm_node *cm_node);
+void irdma_lpb_nop(struct irdma_sc_qp *qp);
+void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node);
+void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node);
+#endif /* IRDMA_CM_H */
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
new file mode 100644
index 000000000000..ce5cf89c463c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -0,0 +1,6602 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include <linux/etherdevice.h>
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "ws.h"
+#include "protos.h"
+
+/**
+ * irdma_get_qp_from_list - get next qp from a list
+ * @head: Listhead of qp's
+ * @qp: current qp
+ */
+struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
+ struct irdma_sc_qp *qp)
+{
+ struct list_head *lastentry;
+ struct list_head *entry = NULL;
+
+ if (list_empty(head))
+ return NULL;
+
+ if (!qp) {
+ entry = head->next;
+ } else {
+ lastentry = &qp->list;
+ entry = lastentry->next;
+ if (entry == head)
+ return NULL;
+ }
+
+ return container_of(entry, struct irdma_sc_qp, list);
+}
+
+/**
+ * irdma_sc_suspend_resume_qps - suspend/resume all qp's on VSI
+ * @vsi: the VSI struct pointer
+ * @op: Set to IRDMA_OP_RESUME or IRDMA_OP_SUSPEND
+ */
+void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op)
+{
+ struct irdma_sc_qp *qp = NULL;
+ u8 i;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ mutex_lock(&vsi->qos[i].qos_mutex);
+ qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp);
+ while (qp) {
+ if (op == IRDMA_OP_RESUME) {
+ if (!qp->dev->ws_add(vsi, i)) {
+ qp->qs_handle =
+ vsi->qos[qp->user_pri].qs_handle;
+ irdma_cqp_qp_suspend_resume(qp, op);
+ } else {
+ irdma_cqp_qp_suspend_resume(qp, op);
+ irdma_modify_qp_to_err(qp);
+ }
+ } else if (op == IRDMA_OP_SUSPEND) {
+ /* issue cqp suspend command */
+ if (!irdma_cqp_qp_suspend_resume(qp, op))
+ atomic_inc(&vsi->qp_suspend_reqs);
+ }
+ qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp);
+ }
+ mutex_unlock(&vsi->qos[i].qos_mutex);
+ }
+}
+
+static void irdma_set_qos_info(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2p)
+{
+ u8 i;
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ vsi->qos[i].qs_handle = vsi->dev->qos[i].qs_handle;
+ vsi->qos[i].valid = true;
+ }
+
+ return;
+ }
+ vsi->qos_rel_bw = l2p->vsi_rel_bw;
+ vsi->qos_prio_type = l2p->vsi_prio_type;
+ vsi->dscp_mode = l2p->dscp_mode;
+ if (l2p->dscp_mode) {
+ memcpy(vsi->dscp_map, l2p->dscp_map, sizeof(vsi->dscp_map));
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ l2p->up2tc[i] = i;
+ }
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ vsi->qos[i].qs_handle = l2p->qs_handle_list[i];
+ vsi->qos[i].traffic_class = l2p->up2tc[i];
+ vsi->qos[i].rel_bw =
+ l2p->tc_info[vsi->qos[i].traffic_class].rel_bw;
+ vsi->qos[i].prio_type =
+ l2p->tc_info[vsi->qos[i].traffic_class].prio_type;
+ vsi->qos[i].valid = false;
+ }
+}
+
+/**
+ * irdma_change_l2params - given the new l2 parameters, change all qp
+ * @vsi: RDMA VSI pointer
+ * @l2params: New parameters from l2
+ */
+void irdma_change_l2params(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2params)
+{
+ if (l2params->mtu_changed) {
+ vsi->mtu = l2params->mtu;
+ if (vsi->ieq)
+ irdma_reinitialize_ieq(vsi);
+ }
+
+ if (!l2params->tc_changed)
+ return;
+
+ vsi->tc_change_pending = false;
+ irdma_set_qos_info(vsi, l2params);
+ irdma_sc_suspend_resume_qps(vsi, IRDMA_OP_RESUME);
+}
+
+/**
+ * irdma_qp_rem_qos - remove qp from qos lists during destroy qp
+ * @qp: qp to be removed from qos
+ */
+void irdma_qp_rem_qos(struct irdma_sc_qp *qp)
+{
+ struct irdma_sc_vsi *vsi = qp->vsi;
+
+ ibdev_dbg(to_ibdev(qp->dev),
+ "DCB: DCB: Remove qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n",
+ qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle,
+ qp->on_qoslist);
+ mutex_lock(&vsi->qos[qp->user_pri].qos_mutex);
+ if (qp->on_qoslist) {
+ qp->on_qoslist = false;
+ list_del(&qp->list);
+ }
+ mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex);
+}
+
+/**
+ * irdma_qp_add_qos - called during setctx for qp to be added to qos
+ * @qp: qp to be added to qos
+ */
+void irdma_qp_add_qos(struct irdma_sc_qp *qp)
+{
+ struct irdma_sc_vsi *vsi = qp->vsi;
+
+ ibdev_dbg(to_ibdev(qp->dev),
+ "DCB: DCB: Add qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n",
+ qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle,
+ qp->on_qoslist);
+ mutex_lock(&vsi->qos[qp->user_pri].qos_mutex);
+ if (!qp->on_qoslist) {
+ list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
+ qp->on_qoslist = true;
+ qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
+ }
+ mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex);
+}
+
+/**
+ * irdma_sc_pd_init - initialize sc pd struct
+ * @dev: sc device struct
+ * @pd: sc pd ptr
+ * @pd_id: pd_id for allocated pd
+ * @abi_ver: User/Kernel ABI version
+ */
+void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
+ int abi_ver)
+{
+ pd->pd_id = pd_id;
+ pd->abi_ver = abi_ver;
+ pd->dev = dev;
+}
+
+/**
+ * irdma_sc_add_arp_cache_entry - cqp wqe add arp cache entry
+ * @cqp: struct for cqp hw
+ * @info: arp entry information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_add_arp_cache_entry_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 8, info->reach_max);
+ set_64bit_val(wqe, 16, ether_addr_to_u64(info->mac_addr));
+
+ hdr = info->arp_index |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) |
+ FIELD_PREP(IRDMA_CQPSQ_MAT_PERMANENT, (info->permanent ? 1 : 0)) |
+ FIELD_PREP(IRDMA_CQPSQ_MAT_ENTRYVALID, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ARP_CACHE_ENTRY WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_del_arp_cache_entry - dele arp cache entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ u16 arp_index, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ hdr = arp_index |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ARP_CACHE_DEL_ENTRY WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_apbvt_entry - for adding and deleting apbvt entries
+ * @cqp: struct for cqp hw
+ * @info: info for apbvt entry to add or delete
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_apbvt_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, info->port);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_APBVT) |
+ FIELD_PREP(IRDMA_CQPSQ_MAPT_ADDPORT, info->add) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MANAGE_APBVT WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_qhash_table_entry - manage quad hash entries
+ * @cqp: struct for cqp hw
+ * @info: info for quad hash to manage
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ *
+ * This is called before connection establishment is started.
+ * For passive connections, when listener is created, it will
+ * call with entry type of IRDMA_QHASH_TYPE_TCP_SYN with local
+ * ip address and tcp port. When SYN is received (passive
+ * connections) or sent (active connections), this routine is
+ * called with entry type of IRDMA_QHASH_TYPE_TCP_ESTABLISHED
+ * and quad is passed in info.
+ *
+ * When iwarp connection is done and its state moves to RTS, the
+ * quad hash entry in the hardware will point to iwarp's qp
+ * number and requires no calls from the driver.
+ */
+static int
+irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_qhash_table_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 qw1 = 0;
+ u64 qw2 = 0;
+ u64 temp;
+ struct irdma_sc_vsi *vsi = info->vsi;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr));
+
+ qw1 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QPN, info->qp_num) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_DEST_PORT, info->dest_port);
+ if (info->ipv4_valid) {
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[0]));
+ } else {
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR0, info->dest_ip[0]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR1, info->dest_ip[1]));
+
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->dest_ip[2]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[3]));
+ }
+ qw2 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QS_HANDLE,
+ vsi->qos[info->user_pri].qs_handle);
+ if (info->vlan_valid)
+ qw2 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANID, info->vlan_id);
+ set_64bit_val(wqe, 16, qw2);
+ if (info->entry_type == IRDMA_QHASH_TYPE_TCP_ESTABLISHED) {
+ qw1 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_SRC_PORT, info->src_port);
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR0, info->src_ip[0]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR1, info->src_ip[1]));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->src_ip[2]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->src_ip[3]));
+ } else {
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->src_ip[0]));
+ }
+ }
+
+ set_64bit_val(wqe, 8, qw1);
+ temp = FIELD_PREP(IRDMA_CQPSQ_QHASH_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_OPCODE,
+ IRDMA_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_MANAGE, info->manage) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_IPV4VALID, info->ipv4_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANVALID, info->vlan_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ENTRYTYPE, info->entry_type);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: MANAGE_QHASH WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_init - initialize qp
+ * @qp: sc qp
+ * @info: initialization qp info
+ */
+int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info)
+{
+ int ret_code;
+ u32 pble_obj_cnt;
+ u16 wqe_size;
+
+ if (info->qp_uk_init_info.max_sq_frag_cnt >
+ info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags ||
+ info->qp_uk_init_info.max_rq_frag_cnt >
+ info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags)
+ return -EINVAL;
+
+ qp->dev = info->pd->dev;
+ qp->vsi = info->vsi;
+ qp->ieq_qp = info->vsi->exception_lan_q;
+ qp->sq_pa = info->sq_pa;
+ qp->rq_pa = info->rq_pa;
+ qp->hw_host_ctx_pa = info->host_ctx_pa;
+ qp->q2_pa = info->q2_pa;
+ qp->shadow_area_pa = info->shadow_area_pa;
+ qp->q2_buf = info->q2;
+ qp->pd = info->pd;
+ qp->hw_host_ctx = info->host_ctx;
+ info->qp_uk_init_info.wqe_alloc_db = qp->pd->dev->wqe_alloc_db;
+ ret_code = irdma_uk_qp_init(&qp->qp_uk, &info->qp_uk_init_info);
+ if (ret_code)
+ return ret_code;
+
+ qp->virtual_map = info->virtual_map;
+ pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) ||
+ (!info->qp_uk_init_info.srq_uk &&
+ info->virtual_map && info->rq_pa >= pble_obj_cnt))
+ return -EINVAL;
+
+ qp->llp_stream_handle = (void *)(-1);
+ qp->hw_sq_size = irdma_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
+ IRDMA_QUEUE_TYPE_SQ_RQ);
+ ibdev_dbg(to_ibdev(qp->dev),
+ "WQE: hw_sq_size[%04d] sq_ring.size[%04d]\n",
+ qp->hw_sq_size, qp->qp_uk.sq_ring.size);
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1 && qp->pd->abi_ver > 4)
+ wqe_size = IRDMA_WQE_SIZE_128;
+ else
+ ret_code = irdma_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
+ &wqe_size);
+ if (ret_code)
+ return ret_code;
+
+ qp->hw_rq_size = irdma_get_encoded_wqe_size(qp->qp_uk.rq_size *
+ (wqe_size / IRDMA_QP_WQE_MIN_SIZE), IRDMA_QUEUE_TYPE_SQ_RQ);
+ ibdev_dbg(to_ibdev(qp->dev),
+ "WQE: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
+ qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
+ qp->sq_tph_val = info->sq_tph_val;
+ qp->rq_tph_val = info->rq_tph_val;
+ qp->sq_tph_en = info->sq_tph_en;
+ qp->rq_tph_en = info->rq_tph_en;
+ qp->rcv_tph_en = info->rcv_tph_en;
+ qp->xmit_tph_en = info->xmit_tph_en;
+ qp->qp_uk.first_sq_wq = info->qp_uk_init_info.first_sq_wq;
+ qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_init - init sc_srq structure
+ * @srq: srq sc struct
+ * @info: parameters for srq init
+ */
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info)
+{
+ u32 srq_size_quanta;
+ int ret_code;
+
+ ret_code = irdma_uk_srq_init(&srq->srq_uk, &info->srq_uk_init_info);
+ if (ret_code)
+ return ret_code;
+
+ srq->dev = info->pd->dev;
+ srq->pd = info->pd;
+ srq->vsi = info->vsi;
+ srq->srq_pa = info->srq_pa;
+ srq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ srq->pasid = info->pasid;
+ srq->pasid_valid = info->pasid_valid;
+ srq->srq_limit = info->srq_limit;
+ srq->leaf_pbl_size = info->leaf_pbl_size;
+ srq->virtual_map = info->virtual_map;
+ srq->tph_en = info->tph_en;
+ srq->arm_limit_event = info->arm_limit_event;
+ srq->tph_val = info->tph_value;
+ srq->shadow_area_pa = info->shadow_area_pa;
+
+ /* Smallest SRQ size is 256B i.e. 8 quanta */
+ srq_size_quanta = max((u32)IRDMA_SRQ_MIN_QUANTA,
+ srq->srq_uk.srq_size *
+ srq->srq_uk.wqe_size_multiplier);
+ srq->hw_srq_size = irdma_get_encoded_wqe_size(srq_size_quanta,
+ IRDMA_QUEUE_TYPE_SRQ);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_create - send srq create CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_create(struct irdma_sc_srq *srq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->pd->dev->cqp;
+ if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+ srq->srq_uk.srq_id >
+ (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, srq->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size));
+ set_64bit_val(wqe, 8, (uintptr_t)srq);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+ srq->srq_pa >>
+ IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+ srq->shadow_area_pa >>
+ IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+ srq->first_pm_pbl_idx));
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT,
+ srq->arm_limit_event) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_modify - send modify_srq CQP WQE
+ * @srq: srq sc struct
+ * @info: parameters for srq modification
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_modify(struct irdma_sc_srq *srq,
+ struct irdma_modify_srq_info *info, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->dev->cqp;
+ if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+ srq->srq_uk.srq_id >
+ (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, info->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQCTX, srq->srq_uk.srq_id));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+ srq->srq_pa >>
+ IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+ srq->shadow_area_pa >>
+ IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+ srq->first_pm_pbl_idx));
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT,
+ info->arm_limit_event) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_destroy - send srq_destroy CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_destroy(struct irdma_sc_srq *srq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->dev->cqp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8, (uintptr_t)srq);
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_create - create qp
+ * @qp: sc qp
+ * @info: qp create info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info,
+ u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = qp->dev->cqp;
+ if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id ||
+ qp->qp_uk.qp_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt)
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ORDVALID, (info->ord_valid ? 1 : 0)) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TOECTXVALID, info->tcp_ctx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_MACVALID, info->mac_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_VQ, qp->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_FORCELOOPBACK, info->force_lpb) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, info->cq_num_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ARPTABIDXVALID,
+ info->arp_cache_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, info->next_iwarp_state) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_modify - modify qp cqp wqe
+ * @qp: sc qp
+ * @info: modify qp info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ u8 term_actions = 0;
+ u8 term_len = 0;
+
+ cqp = qp->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ if (info->next_iwarp_state == IRDMA_QP_STATE_TERMINATE) {
+ if (info->dont_send_fin)
+ term_actions += IRDMAQP_TERM_SEND_TERM_ONLY;
+ if (info->dont_send_term)
+ term_actions += IRDMAQP_TERM_SEND_FIN_ONLY;
+ if (term_actions == IRDMAQP_TERM_SEND_TERM_AND_FIN ||
+ term_actions == IRDMAQP_TERM_SEND_TERM_ONLY)
+ term_len = info->termlen;
+ }
+
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEWMSS, info->new_mss) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TERMLEN, term_len));
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ORDVALID, info->ord_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TOECTXVALID, info->tcp_ctx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CACHEDVARVALID,
+ info->cached_var_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_VQ, qp->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_FORCELOOPBACK, info->force_lpb) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, info->cq_num_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_MACVALID, info->mac_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_MSSCHANGE, info->mss_change) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_REMOVEHASHENTRY,
+ info->remove_hash_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TERMACT, term_actions) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_RESETCON, info->reset_tcp_conn) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ARPTABIDXVALID,
+ info->arp_cache_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, info->next_iwarp_state) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_destroy - cqp destroy qp
+ * @qp: sc qp
+ * @scratch: u64 saved to be used during cqp completion
+ * @remove_hash_idx: flag if to remove hash idx
+ * @ignore_mw_bnd: memory window bind flag
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
+ bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = qp->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_IGNOREMWBOUND, ignore_mw_bnd) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_REMOVEHASHENTRY, remove_hash_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_DESTROY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_get_encoded_ird_size -
+ * @ird_size: IRD size
+ * The ird from the connection is rounded to a supported HW setting and then encoded
+ * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based
+ * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input
+ */
+static u8 irdma_sc_get_encoded_ird_size(u16 ird_size)
+{
+ switch (ird_size ?
+ roundup_pow_of_two(2 * ird_size) : 4) {
+ case 256:
+ return IRDMA_IRD_HW_SIZE_256;
+ case 128:
+ return IRDMA_IRD_HW_SIZE_128;
+ case 64:
+ case 32:
+ return IRDMA_IRD_HW_SIZE_64;
+ case 16:
+ case 8:
+ return IRDMA_IRD_HW_SIZE_16;
+ case 4:
+ default:
+ break;
+ }
+
+ return IRDMA_IRD_HW_SIZE_4;
+}
+
+/**
+ * irdma_sc_qp_setctx_roce_gen_2 - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static void irdma_sc_qp_setctx_roce_gen_2(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp;
+ u8 push_mode_en;
+ u32 push_idx;
+
+ roce_info = info->roce_info;
+ udp = info->udp_info;
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+ set_64bit_val(qp_ctx, 0,
+ FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) |
+ FIELD_PREP(IRDMAQPC_PDIDXHI, roce_info->pd_id >> 16) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ERR_RQ_IDX_VALID, roce_info->err_rq_idx_valid) |
+ FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) |
+ FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) |
+ FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag));
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ if ((roce_info->dcqcn_en || roce_info->dctcp_en) &&
+ !(udp->tos & 0x03))
+ udp->tos |= ECN_CODE_PT_VAL;
+ set_64bit_val(qp_ctx, 24,
+ FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) |
+ FIELD_PREP(IRDMAQPC_TTL, udp->ttl) | FIELD_PREP(IRDMAQPC_TOS, udp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port));
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx));
+ set_64bit_val(qp_ctx, 56,
+ FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) |
+ FIELD_PREP(IRDMAQPC_PDIDX, roce_info->pd_id) |
+ FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) |
+ FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label));
+ set_64bit_val(qp_ctx, 64,
+ FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) |
+ FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp));
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) |
+ FIELD_PREP(IRDMAQPC_LSN, udp->lsn));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_EPSN, udp->epsn));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) |
+ FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd));
+ set_64bit_val(qp_ctx, 128,
+ FIELD_PREP(IRDMAQPC_ERR_RQ_IDX, roce_info->err_rq_idx) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin));
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 144,
+ FIELD_PREP(IRDMAQPC_STAT_INDEX, info->stats_idx));
+ set_64bit_val(qp_ctx, 152, ether_addr_to_u64(roce_info->mac_addr) << 16);
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE, roce_info->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE, irdma_sc_get_encoded_ird_size(roce_info->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) |
+ FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) |
+ FIELD_PREP(IRDMAQPC_FW_CC_ENABLE, roce_info->fw_cc_enable) |
+ FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE, roce_info->udprivcq_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) |
+ FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low));
+ set_64bit_val(qp_ctx, 208,
+ FIELD_PREP(IRDMAQPC_REMENDPOINTIDX, info->rem_endpoint_idx));
+
+ print_hex_dump_debug("WQE: QP_HOST CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+/**
+ * irdma_sc_get_encoded_ird_size_gen_3 - get encoded IRD size for GEN 3
+ * @ird_size: IRD size
+ * The ird from the connection is rounded to a supported HW setting and then encoded
+ * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based
+ * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u8 irdma_sc_get_encoded_ird_size_gen_3(u16 ird_size)
+{
+ switch (ird_size ?
+ roundup_pow_of_two(2 * ird_size) : 4) {
+ case 4096:
+ return IRDMA_IRD_HW_SIZE_4096_GEN3;
+ case 2048:
+ return IRDMA_IRD_HW_SIZE_2048_GEN3;
+ case 1024:
+ return IRDMA_IRD_HW_SIZE_1024_GEN3;
+ case 512:
+ return IRDMA_IRD_HW_SIZE_512_GEN3;
+ case 256:
+ return IRDMA_IRD_HW_SIZE_256_GEN3;
+ case 128:
+ return IRDMA_IRD_HW_SIZE_128_GEN3;
+ case 64:
+ return IRDMA_IRD_HW_SIZE_64_GEN3;
+ case 32:
+ return IRDMA_IRD_HW_SIZE_32_GEN3;
+ case 16:
+ return IRDMA_IRD_HW_SIZE_16_GEN3;
+ case 8:
+ return IRDMA_IRD_HW_SIZE_8_GEN3;
+ case 4:
+ default:
+ break;
+ }
+
+ return IRDMA_IRD_HW_SIZE_4_GEN3;
+}
+
+/**
+ * irdma_sc_qp_setctx_roce_gen_3 - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_roce_offload_info *roce_info = info->roce_info;
+ struct irdma_udp_offload_info *udp = info->udp_info;
+ u64 qw0, qw3, qw7 = 0, qw8 = 0;
+ u8 push_mode_en;
+ u32 push_idx;
+
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+
+ qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) |
+ FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) |
+ FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) |
+ FIELD_PREP(IRDMAQPC_USE_SRQ, !qp->qp_uk.srq_uk ? 0 : 1) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag);
+ set_64bit_val(qp_ctx, 0, qw0);
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) |
+ FIELD_PREP(IRDMAQPC_TTL, udp->ttl) |
+ FIELD_PREP(IRDMAQPC_TOS, udp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port);
+ set_64bit_val(qp_ctx, 24, qw3);
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx));
+ qw7 = FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) |
+ FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) |
+ FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label);
+ set_64bit_val(qp_ctx, 56, qw7);
+ qw8 = FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) |
+ FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp);
+ set_64bit_val(qp_ctx, 64, qw8);
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) |
+ FIELD_PREP(IRDMAQPC_LSN, udp->lsn));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_EPSN, udp->epsn));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) |
+ FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd));
+ set_64bit_val(qp_ctx, 128,
+ FIELD_PREP(IRDMAQPC_MINRNR_TIMER, udp->min_rnr_timer) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_TMR, udp->rnr_nak_tmr) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin));
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 152,
+ FIELD_PREP(IRDMAQPC_MACADDRESS,
+ ether_addr_to_u64(roce_info->mac_addr)) |
+ FIELD_PREP(IRDMAQPC_LOCALACKTIMEOUT,
+ roce_info->local_ack_timeout));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE_GEN3, roce_info->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE_GEN3,
+ irdma_sc_get_encoded_ird_size_gen_3(roce_info->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE,
+ info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) |
+ FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) |
+ FIELD_PREP(IRDMAQPC_FW_CC_ENABLE,
+ roce_info->fw_cc_enable) |
+ FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE,
+ roce_info->udprivcq_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) |
+ FIELD_PREP(IRDMAQPC_SRQ_ID,
+ !qp->qp_uk.srq_uk ?
+ 0 : qp->qp_uk.srq_uk->srq_id) |
+ FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low));
+ set_64bit_val(qp_ctx, 208, roce_info->pd_id |
+ FIELD_PREP(IRDMAQPC_STAT_INDEX_GEN3, info->stats_idx) |
+ FIELD_PREP(IRDMAQPC_PKT_LIMIT, qp->pkt_limit));
+
+ print_hex_dump_debug("WQE: QP_HOST ROCE CTX WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2)
+ irdma_sc_qp_setctx_roce_gen_2(qp, qp_ctx, info);
+ else
+ irdma_sc_qp_setctx_roce_gen_3(qp, qp_ctx, info);
+}
+
+/* irdma_sc_alloc_local_mac_entry - allocate a mac entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ALLOCATE_LOCAL_MAC WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_add_local_mac_entry - add mac enry
+ * @cqp: struct for cqp hw
+ * @info:mac addr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_local_mac_entry_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 header;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 32, ether_addr_to_u64(info->mac_addr));
+
+ header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, info->entry_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ print_hex_dump_debug("WQE: ADD_LOCAL_MAC WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_del_local_mac_entry - cqp wqe to dele local mac
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @entry_idx: index of mac entry
+ * @ignore_ref_count: to force mac adde delete
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ u16 entry_idx, u8 ignore_ref_count,
+ bool post_sq)
+{
+ __le64 *wqe;
+ u64 header;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, entry_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) |
+ FIELD_PREP(IRDMA_CQPSQ_MLM_FREEENTRY, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_MLM_IGNORE_REF_CNT, ignore_ref_count);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ print_hex_dump_debug("WQE: DEL_LOCAL_MAC_IPADDR WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_setctx - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_iwarp_offload_info *iw;
+ struct irdma_tcp_offload_info *tcp;
+ struct irdma_sc_dev *dev;
+ u8 push_mode_en;
+ u32 push_idx;
+ u64 qw0, qw3, qw7 = 0, qw16 = 0;
+ u64 mac = 0;
+
+ iw = info->iwarp_info;
+ tcp = info->tcp_info;
+ dev = qp->dev;
+ if (iw->rcv_mark_en) {
+ qp->pfpdu.marker_len = 4;
+ qp->pfpdu.rcv_start_seq = tcp->rcv_nxt;
+ }
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+ qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en);
+
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+ qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size);
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ qw3 |= FIELD_PREP(IRDMAQPC_GEN1_SRCMACADDRIDX,
+ qp->src_mac_addr_idx);
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle) |
+ FIELD_PREP(IRDMAQPC_EXCEPTION_LAN_QUEUE, qp->ieq_qp));
+ if (info->iwarp_info_valid) {
+ qw0 |= FIELD_PREP(IRDMAQPC_DDP_VER, iw->ddp_ver) |
+ FIELD_PREP(IRDMAQPC_RDMAP_VER, iw->rdmap_ver) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, iw->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ECN_EN, iw->ecn_en) |
+ FIELD_PREP(IRDMAQPC_IBRDENABLE, iw->ib_rd_en) |
+ FIELD_PREP(IRDMAQPC_PDIDXHI, iw->pd_id >> 16) |
+ FIELD_PREP(IRDMAQPC_ERR_RQ_IDX_VALID,
+ iw->err_rq_idx_valid);
+ qw7 |= FIELD_PREP(IRDMAQPC_PDIDX, iw->pd_id);
+ qw16 |= FIELD_PREP(IRDMAQPC_ERR_RQ_IDX, iw->err_rq_idx) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, iw->rtomin);
+ set_64bit_val(qp_ctx, 144,
+ FIELD_PREP(IRDMAQPC_Q2ADDR, qp->q2_pa >> 8) |
+ FIELD_PREP(IRDMAQPC_STAT_INDEX, info->stats_idx));
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ mac = ether_addr_to_u64(iw->mac_addr);
+
+ set_64bit_val(qp_ctx, 152,
+ mac << 16 | FIELD_PREP(IRDMAQPC_LASTBYTESENT, iw->last_byte_sent));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE, iw->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE, irdma_sc_get_encoded_ird_size(iw->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, iw->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, iw->rd_en) |
+ FIELD_PREP(IRDMAQPC_SNDMARKERS, iw->snd_mark_en) |
+ FIELD_PREP(IRDMAQPC_BINDEN, iw->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, iw->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, iw->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_IWARPMODE, 1) |
+ FIELD_PREP(IRDMAQPC_RCVMARKERS, iw->rcv_mark_en) |
+ FIELD_PREP(IRDMAQPC_ALIGNHDRS, iw->align_hdrs) |
+ FIELD_PREP(IRDMAQPC_RCVNOMPACRC, iw->rcv_no_mpa_crc) |
+ FIELD_PREP(IRDMAQPC_RCVMARKOFFSET, iw->rcv_mark_offset || !tcp ? iw->rcv_mark_offset : tcp->rcv_nxt) |
+ FIELD_PREP(IRDMAQPC_SNDMARKOFFSET, iw->snd_mark_offset || !tcp ? iw->snd_mark_offset : tcp->snd_nxt) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, iw->timely_en));
+ }
+ if (info->tcp_info_valid) {
+ qw0 |= FIELD_PREP(IRDMAQPC_IPV4, tcp->ipv4) |
+ FIELD_PREP(IRDMAQPC_NONAGLE, tcp->no_nagle) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG,
+ tcp->insert_vlan_tag) |
+ FIELD_PREP(IRDMAQPC_TIMESTAMP, tcp->time_stamp) |
+ FIELD_PREP(IRDMAQPC_LIMIT, tcp->cwnd_inc_limit) |
+ FIELD_PREP(IRDMAQPC_DROPOOOSEG, tcp->drop_ooo_seg) |
+ FIELD_PREP(IRDMAQPC_DUPACK_THRESH, tcp->dup_ack_thresh);
+
+ if ((iw->ecn_en || iw->dctcp_en) && !(tcp->tos & 0x03))
+ tcp->tos |= ECN_CODE_PT_VAL;
+
+ qw3 |= FIELD_PREP(IRDMAQPC_TTL, tcp->ttl) |
+ FIELD_PREP(IRDMAQPC_AVOIDSTRETCHACK, tcp->avoid_stretch_ack) |
+ FIELD_PREP(IRDMAQPC_TOS, tcp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, tcp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, tcp->dst_port);
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) {
+ qw3 |= FIELD_PREP(IRDMAQPC_GEN1_SRCMACADDRIDX, tcp->src_mac_addr_idx);
+
+ qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
+ }
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, tcp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, tcp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, tcp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, tcp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, tcp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_SYN_RST_HANDLING, tcp->syn_rst_handling) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, tcp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, tcp->arp_idx));
+ qw7 |= FIELD_PREP(IRDMAQPC_FLOWLABEL, tcp->flow_label) |
+ FIELD_PREP(IRDMAQPC_WSCALE, tcp->wscale) |
+ FIELD_PREP(IRDMAQPC_IGNORE_TCP_OPT,
+ tcp->ignore_tcp_opt) |
+ FIELD_PREP(IRDMAQPC_IGNORE_TCP_UNS_OPT,
+ tcp->ignore_tcp_uns_opt) |
+ FIELD_PREP(IRDMAQPC_TCPSTATE, tcp->tcp_state) |
+ FIELD_PREP(IRDMAQPC_RCVSCALE, tcp->rcv_wscale) |
+ FIELD_PREP(IRDMAQPC_SNDSCALE, tcp->snd_wscale);
+ set_64bit_val(qp_ctx, 72,
+ FIELD_PREP(IRDMAQPC_TIMESTAMP_RECENT, tcp->time_stamp_recent) |
+ FIELD_PREP(IRDMAQPC_TIMESTAMP_AGE, tcp->time_stamp_age));
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_SNDNXT, tcp->snd_nxt) |
+ FIELD_PREP(IRDMAQPC_SNDWND, tcp->snd_wnd));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_RCVNXT, tcp->rcv_nxt) |
+ FIELD_PREP(IRDMAQPC_RCVWND, tcp->rcv_wnd));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_SNDMAX, tcp->snd_max) |
+ FIELD_PREP(IRDMAQPC_SNDUNA, tcp->snd_una));
+ set_64bit_val(qp_ctx, 104,
+ FIELD_PREP(IRDMAQPC_SRTT, tcp->srtt) |
+ FIELD_PREP(IRDMAQPC_RTTVAR, tcp->rtt_var));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_SSTHRESH, tcp->ss_thresh) |
+ FIELD_PREP(IRDMAQPC_CWND, tcp->cwnd));
+ set_64bit_val(qp_ctx, 120,
+ FIELD_PREP(IRDMAQPC_SNDWL1, tcp->snd_wl1) |
+ FIELD_PREP(IRDMAQPC_SNDWL2, tcp->snd_wl2));
+ qw16 |= FIELD_PREP(IRDMAQPC_MAXSNDWND, tcp->max_snd_window) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, tcp->rexmit_thresh);
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, tcp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, tcp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, tcp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, tcp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, iw->t_high) |
+ FIELD_PREP(IRDMAQPC_TLOW, iw->t_low));
+ set_64bit_val(qp_ctx, 208,
+ FIELD_PREP(IRDMAQPC_REMENDPOINTIDX, info->rem_endpoint_idx));
+ }
+
+ set_64bit_val(qp_ctx, 0, qw0);
+ set_64bit_val(qp_ctx, 24, qw3);
+ set_64bit_val(qp_ctx, 56, qw7);
+ set_64bit_val(qp_ctx, 128, qw16);
+
+ print_hex_dump_debug("WQE: QP_HOST CTX", DUMP_PREFIX_OFFSET, 16, 8,
+ qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+/**
+ * irdma_sc_alloc_stag - mr stag alloc
+ * @dev: sc device struct
+ * @info: stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
+ struct irdma_allocate_stag_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ enum irdma_page_size page_size;
+
+ if (!info->total_len && !info->all_memory)
+ return -EINVAL;
+
+ if (info->page_size == 0x40000000)
+ page_size = IRDMA_PAGE_SIZE_1G;
+ else if (info->page_size == 0x200000)
+ page_size = IRDMA_PAGE_SIZE_2M;
+ else
+ page_size = IRDMA_PAGE_SIZE_4K;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8,
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, info->hmc_fcn_index));
+
+ if (info->chunk_size)
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_idx));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MR, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_ARIGHTS, info->access_rights) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ALLOC_STAG WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_mr_reg_non_shared - non-shared mr registration
+ * @dev: sc device struct
+ * @info: mr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
+ struct irdma_reg_ns_stag_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 fbo;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ u32 pble_obj_cnt;
+ bool remote_access;
+ u8 addr_type;
+ enum irdma_page_size page_size;
+
+ if (!info->total_len && !info->all_memory)
+ return -EINVAL;
+
+ if (info->page_size == 0x40000000)
+ page_size = IRDMA_PAGE_SIZE_1G;
+ else if (info->page_size == 0x200000)
+ page_size = IRDMA_PAGE_SIZE_2M;
+ else if (info->page_size == 0x1000)
+ page_size = IRDMA_PAGE_SIZE_4K;
+ else
+ return -EINVAL;
+
+ if (info->access_rights & (IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY |
+ IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+ remote_access = true;
+ else
+ remote_access = false;
+
+ pble_obj_cnt = dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+ if (info->chunk_size && info->first_pm_pbl_index >= pble_obj_cnt)
+ return -EINVAL;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ fbo = info->va & (info->page_size - 1);
+
+ set_64bit_val(wqe, 0,
+ (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED ?
+ info->va : fbo));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len) |
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
+ if (!info->chunk_size) {
+ set_64bit_val(wqe, 32, info->reg_addr_pa);
+ set_64bit_val(wqe, 48, 0);
+ } else {
+ set_64bit_val(wqe, 32, 0);
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_index));
+ }
+ set_64bit_val(wqe, 40, info->hmc_fcn_index);
+ set_64bit_val(wqe, 56, 0);
+
+ addr_type = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ? 1 : 0;
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_REG_MR) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MR, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_ARIGHTS, info->access_rights) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, remote_access) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MR_REG_NS WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_dealloc_stag - deallocate stag
+ * @dev: sc device struct
+ * @info: dealloc stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
+ struct irdma_dealloc_stag_info *info,
+ u64 scratch, bool post_sq)
+{
+ u64 hdr;
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8,
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DEALLOC_STAG) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MR, info->mr) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: DEALLOC_STAG WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_mw_alloc - mw allocate
+ * @dev: sc device struct
+ * @info: memory window allocation information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev,
+ struct irdma_mw_alloc_info *info, u64 scratch,
+ bool post_sq)
+{
+ u64 hdr;
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8,
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MWTYPE, info->mw_wide) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY,
+ info->mw1_bind_dont_vldt_key) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MW_ALLOC WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
+ * @qp: sc qp struct
+ * @info: fast mr info
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq)
+{
+ u64 temp, hdr;
+ __le64 *wqe;
+ u32 wqe_idx;
+ enum irdma_page_size page_size;
+ struct irdma_post_sq_info sq_info = {};
+
+ if (info->page_size == 0x40000000)
+ page_size = IRDMA_PAGE_SIZE_1G;
+ else if (info->page_size == 0x200000)
+ page_size = IRDMA_PAGE_SIZE_2M;
+ else
+ page_size = IRDMA_PAGE_SIZE_4K;
+
+ sq_info.wr_id = info->wr_id;
+ sq_info.signaled = info->signaled;
+
+ wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx,
+ IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(&qp->qp_uk, wqe_idx);
+
+ ibdev_dbg(to_ibdev(qp->dev),
+ "MR: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
+ info->wr_id, wqe_idx,
+ &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
+
+ temp = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ?
+ (uintptr_t)info->va : info->fbo;
+ set_64bit_val(wqe, 0, temp);
+
+ temp = FIELD_GET(IRDMAQPSQ_FIRSTPMPBLIDXHI,
+ info->first_pm_pbl_index >> 16);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FIRSTPMPBLIDXHI, temp) |
+ FIELD_PREP(IRDMAQPSQ_PBLADDR >> IRDMA_HW_PAGE_SHIFT, info->reg_addr_pa));
+ set_64bit_val(wqe, 16,
+ info->total_len |
+ FIELD_PREP(IRDMAQPSQ_FIRSTPMPBLIDXLO, info->first_pm_pbl_index));
+
+ hdr = FIELD_PREP(IRDMAQPSQ_STAGKEY, info->stag_key) |
+ FIELD_PREP(IRDMAQPSQ_STAGINDEX, info->stag_idx) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_FAST_REGISTER) |
+ FIELD_PREP(IRDMAQPSQ_LPBLSIZE, info->chunk_size) |
+ FIELD_PREP(IRDMAQPSQ_HPAGESIZE, page_size) |
+ FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, info->access_rights) |
+ FIELD_PREP(IRDMAQPSQ_VABASEDTO, info->addr_type) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, info->remote_atomics_en) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: FAST_REG WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(&qp->qp_uk);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_gen_rts_ae - request AE generated after RTS
+ * @qp: sc qp struct
+ */
+static void irdma_sc_gen_rts_ae(struct irdma_sc_qp *qp)
+{
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+
+ wqe = qp_uk->sq_base[1].elem;
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, 1) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ print_hex_dump_debug("QP: NOP W/LOCAL FENCE WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+
+ wqe = qp_uk->sq_base[2].elem;
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_GEN_RTS_AE) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ print_hex_dump_debug("QP: CONN EST WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+}
+
+/**
+ * irdma_sc_send_lsmm - send last streaming mode message
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ * @stag: stag of lsmm buffer
+ */
+void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size,
+ irdma_stag stag)
+{
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size) |
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, stag));
+ } else {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) |
+ FIELD_PREP(IRDMAQPSQ_FRAG_STAG, stag) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity));
+ }
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) |
+ FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) |
+ FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SEND_LSMM WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+
+ if (qp->dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE)
+ irdma_sc_gen_rts_ae(qp);
+}
+
+/**
+ * irdma_sc_send_rtt - send last read0 or write0
+ * @qp: sc qp struct
+ * @read: Do read0 or write0
+ */
+void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read)
+{
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 16, 0);
+ if (read) {
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, 0xabcd));
+ } else {
+ set_64bit_val(wqe, 8,
+ (u64)0xabcd | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity));
+ }
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, 0x1234) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_READ) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+
+ } else {
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8, 0);
+ } else {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity));
+ }
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_WRITE) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ }
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: RTR WQE", DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_QP_WQE_MIN_SIZE, false);
+
+ if (qp->dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE)
+ irdma_sc_gen_rts_ae(qp);
+}
+
+/**
+ * irdma_iwarp_opcode - determine if incoming is rdma layer
+ * @info: aeq info for the packet
+ * @pkt: packet for error
+ */
+static u32 irdma_iwarp_opcode(struct irdma_aeqe_info *info, u8 *pkt)
+{
+ __be16 *mpa;
+ u32 opcode = 0xffffffff;
+
+ if (info->q2_data_written) {
+ mpa = (__be16 *)pkt;
+ opcode = ntohs(mpa[1]) & 0xf;
+ }
+
+ return opcode;
+}
+
+/**
+ * irdma_locate_mpa - return pointer to mpa in the pkt
+ * @pkt: packet with data
+ */
+static u8 *irdma_locate_mpa(u8 *pkt)
+{
+ /* skip over ethernet header */
+ pkt += IRDMA_MAC_HLEN;
+
+ /* Skip over IP and TCP headers */
+ pkt += 4 * (pkt[0] & 0x0f);
+ pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+
+ return pkt;
+}
+
+/**
+ * irdma_bld_termhdr_ctrl - setup terminate hdr control fields
+ * @qp: sc qp ptr for pkt
+ * @hdr: term hdr
+ * @opcode: flush opcode for termhdr
+ * @layer_etype: error layer + error type
+ * @err: error cod ein the header
+ */
+static void irdma_bld_termhdr_ctrl(struct irdma_sc_qp *qp,
+ struct irdma_terminate_hdr *hdr,
+ enum irdma_flush_opcode opcode,
+ u8 layer_etype, u8 err)
+{
+ qp->flush_code = opcode;
+ hdr->layer_etype = layer_etype;
+ hdr->error_code = err;
+}
+
+/**
+ * irdma_bld_termhdr_ddp_rdma - setup ddp and rdma hdrs in terminate hdr
+ * @pkt: ptr to mpa in offending pkt
+ * @hdr: term hdr
+ * @copy_len: offending pkt length to be copied to term hdr
+ * @is_tagged: DDP tagged or untagged
+ */
+static void irdma_bld_termhdr_ddp_rdma(u8 *pkt, struct irdma_terminate_hdr *hdr,
+ int *copy_len, u8 *is_tagged)
+{
+ u16 ddp_seg_len;
+
+ ddp_seg_len = ntohs(*(__be16 *)pkt);
+ if (ddp_seg_len) {
+ *copy_len = 2;
+ hdr->hdrct = DDP_LEN_FLAG;
+ if (pkt[2] & 0x80) {
+ *is_tagged = 1;
+ if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+ *copy_len += TERM_DDP_LEN_TAGGED;
+ hdr->hdrct |= DDP_HDR_FLAG;
+ }
+ } else {
+ if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+ *copy_len += TERM_DDP_LEN_UNTAGGED;
+ hdr->hdrct |= DDP_HDR_FLAG;
+ }
+ if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN) &&
+ ((pkt[3] & RDMA_OPCODE_M) == RDMA_READ_REQ_OPCODE)) {
+ *copy_len += TERM_RDMA_LEN;
+ hdr->hdrct |= RDMA_HDR_FLAG;
+ }
+ }
+ }
+}
+
+/**
+ * irdma_bld_terminate_hdr - build terminate message header
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+static int irdma_bld_terminate_hdr(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+ int copy_len = 0;
+ u8 is_tagged = 0;
+ u32 opcode;
+ struct irdma_terminate_hdr *termhdr;
+
+ termhdr = (struct irdma_terminate_hdr *)qp->q2_buf;
+ memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
+
+ if (info->q2_data_written) {
+ pkt = irdma_locate_mpa(pkt);
+ irdma_bld_termhdr_ddp_rdma(pkt, termhdr, &copy_len, &is_tagged);
+ }
+
+ opcode = irdma_iwarp_opcode(info, pkt);
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ qp->sq_flush_code = info->sq;
+ qp->rq_flush_code = info->rq;
+
+ switch (info->ae_id) {
+ case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ if (opcode == IRDMA_OP_TYPE_RDMA_WRITE)
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_INV_STAG);
+ else
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_INV_STAG);
+ break;
+ case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ if (info->q2_data_written)
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_BOUNDS);
+ else
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_INV_BOUNDS);
+ break;
+ case IRDMA_AE_AMP_BAD_PD:
+ switch (opcode) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_UNASSOC_STAG);
+ break;
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_CANT_INV_STAG);
+ break;
+ default:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_UNASSOC_STAG);
+ }
+ break;
+ case IRDMA_AE_AMP_INVALID_STAG:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_INV_STAG);
+ break;
+ case IRDMA_AE_AMP_BAD_QP:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_QN);
+ break;
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ switch (opcode) {
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_OP_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_CANT_INV_STAG);
+ break;
+ default:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_INV_STAG);
+ }
+ break;
+ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+ case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_ACCESS);
+ break;
+ case IRDMA_AE_AMP_TO_WRAP:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_TO_WRAP);
+ break;
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
+ break;
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_DDP << 4) | DDP_CATASTROPHIC,
+ DDP_CATASTROPHIC_LOCAL);
+ break;
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_DDP_NO_L_BIT:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_FATAL_ERR,
+ (LAYER_DDP << 4) | DDP_CATASTROPHIC,
+ DDP_CATASTROPHIC_LOCAL);
+ break;
+ case IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_MSN_RANGE);
+ break;
+ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_TOO_LONG);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION:
+ if (is_tagged)
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_INV_DDP_VER);
+ else
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_DDP_VER);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_MO);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_OP_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_MSN_NO_BUF);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_QN:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_QN);
+ break;
+ case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_INV_RDMAP_VER);
+ break;
+ default:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_FATAL_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_UNSPECIFIED);
+ break;
+ }
+
+ if (copy_len)
+ memcpy(termhdr + 1, pkt, copy_len);
+
+ return sizeof(struct irdma_terminate_hdr) + copy_len;
+}
+
+/**
+ * irdma_terminate_send_fin() - Send fin for terminate message
+ * @qp: qp associated with received terminate AE
+ */
+void irdma_terminate_send_fin(struct irdma_sc_qp *qp)
+{
+ irdma_term_modify_qp(qp, IRDMA_QP_STATE_TERMINATE,
+ IRDMAQP_TERM_SEND_FIN_ONLY, 0);
+}
+
+/**
+ * irdma_terminate_connection() - Bad AE and send terminate to remote QP
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void irdma_terminate_connection(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ u8 termlen = 0;
+
+ if (qp->term_flags & IRDMA_TERM_SENT)
+ return;
+
+ termlen = irdma_bld_terminate_hdr(qp, info);
+ irdma_terminate_start_timer(qp);
+ qp->term_flags |= IRDMA_TERM_SENT;
+ irdma_term_modify_qp(qp, IRDMA_QP_STATE_TERMINATE,
+ IRDMAQP_TERM_SEND_TERM_ONLY, termlen);
+}
+
+/**
+ * irdma_terminate_received - handle terminate received AE
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void irdma_terminate_received(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+ __be32 *mpa;
+ u8 ddp_ctl;
+ u8 rdma_ctl;
+ u16 aeq_id = 0;
+ struct irdma_terminate_hdr *termhdr;
+
+ mpa = (__be32 *)irdma_locate_mpa(pkt);
+ if (info->q2_data_written) {
+ /* did not validate the frame - do it now */
+ ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
+ rdma_ctl = ntohl(mpa[0]) & 0xff;
+ if ((ddp_ctl & 0xc0) != 0x40)
+ aeq_id = IRDMA_AE_LCE_QP_CATASTROPHIC;
+ else if ((ddp_ctl & 0x03) != 1)
+ aeq_id = IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION;
+ else if (ntohl(mpa[2]) != 2)
+ aeq_id = IRDMA_AE_DDP_UBE_INVALID_QN;
+ else if (ntohl(mpa[3]) != 1)
+ aeq_id = IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN;
+ else if (ntohl(mpa[4]) != 0)
+ aeq_id = IRDMA_AE_DDP_UBE_INVALID_MO;
+ else if ((rdma_ctl & 0xc0) != 0x40)
+ aeq_id = IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+ info->ae_id = aeq_id;
+ if (info->ae_id) {
+ /* Bad terminate recvd - send back a terminate */
+ irdma_terminate_connection(qp, info);
+ return;
+ }
+ }
+
+ qp->term_flags |= IRDMA_TERM_RCVD;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ termhdr = (struct irdma_terminate_hdr *)&mpa[5];
+ if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
+ termhdr->layer_etype == RDMAP_REMOTE_OP) {
+ irdma_terminate_done(qp, 0);
+ } else {
+ irdma_terminate_start_timer(qp);
+ irdma_terminate_send_fin(qp);
+ }
+}
+
+static int irdma_null_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ return 0;
+}
+
+static void irdma_null_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ /* do nothing */
+}
+
+static void irdma_null_ws_reset(struct irdma_sc_vsi *vsi)
+{
+ /* do nothing */
+}
+
+/**
+ * irdma_sc_vsi_init - Init the vsi structure
+ * @vsi: pointer to vsi structure to initialize
+ * @info: the info used to initialize the vsi struct
+ */
+void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_init_info *info)
+{
+ int i;
+
+ vsi->dev = info->dev;
+ vsi->back_vsi = info->back_vsi;
+ vsi->register_qset = info->register_qset;
+ vsi->unregister_qset = info->unregister_qset;
+ vsi->mtu = info->params->mtu;
+ vsi->exception_lan_q = info->exception_lan_q;
+ vsi->vsi_idx = info->pf_data_vsi_num;
+
+ irdma_set_qos_info(vsi, info->params);
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ mutex_init(&vsi->qos[i].qos_mutex);
+ INIT_LIST_HEAD(&vsi->qos[i].qplist);
+ }
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) {
+ vsi->dev->ws_add = irdma_ws_add;
+ vsi->dev->ws_remove = irdma_ws_remove;
+ vsi->dev->ws_reset = irdma_ws_reset;
+ } else {
+ vsi->dev->ws_add = irdma_null_ws_add;
+ vsi->dev->ws_remove = irdma_null_ws_remove;
+ vsi->dev->ws_reset = irdma_null_ws_reset;
+ }
+}
+
+/**
+ * irdma_get_stats_idx - Return stats index
+ * @vsi: pointer to the vsi
+ */
+static u16 irdma_get_stats_idx(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_stats_inst_info stats_info = {};
+ struct irdma_sc_dev *dev = vsi->dev;
+ u8 i;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (!irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_ALLOCATE,
+ &stats_info))
+ return stats_info.stats_idx;
+ }
+
+ for (i = 0; i < IRDMA_MAX_STATS_COUNT_GEN_1; i++) {
+ if (!dev->stats_idx_array[i]) {
+ dev->stats_idx_array[i] = true;
+ return i;
+ }
+ }
+
+ return IRDMA_INVALID_STATS_IDX;
+}
+
+/**
+ * irdma_hw_stats_init_gen1 - Initialize stat reg table used for gen1
+ * @vsi: vsi structure where hw_regs are set
+ *
+ * Populate the HW stats table
+ */
+static void irdma_hw_stats_init_gen1(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_sc_dev *dev = vsi->dev;
+ const struct irdma_hw_stat_map *map;
+ u64 *stat_reg = vsi->hw_stats_regs;
+ u64 *regs = dev->hw_stats_regs;
+ u16 i, stats_reg_set = vsi->stats_idx;
+
+ map = dev->hw_stats_map;
+
+ /* First 4 stat instances are reserved for port level statistics. */
+ stats_reg_set += vsi->stats_inst_alloc ? IRDMA_FIRST_NON_PF_STAT : 0;
+
+ for (i = 0; i < dev->hw_attrs.max_stat_idx; i++) {
+ if (map[i].bitmask <= IRDMA_MAX_STATS_32)
+ stat_reg[i] = regs[i] + stats_reg_set * sizeof(u32);
+ else
+ stat_reg[i] = regs[i] + stats_reg_set * sizeof(u64);
+ }
+}
+
+/**
+ * irdma_vsi_stats_init - Initialize the vsi statistics
+ * @vsi: pointer to the vsi structure
+ * @info: The info structure used for initialization
+ */
+int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info)
+{
+ struct irdma_dma_mem *stats_buff_mem;
+
+ vsi->pestat = info->pestat;
+ vsi->pestat->hw = vsi->dev->hw;
+ vsi->pestat->vsi = vsi;
+ stats_buff_mem = &vsi->pestat->gather_info.stats_buff_mem;
+ stats_buff_mem->size = ALIGN(IRDMA_GATHER_STATS_BUF_SIZE * 2, 1);
+ stats_buff_mem->va = dma_alloc_coherent(vsi->pestat->hw->device,
+ stats_buff_mem->size,
+ &stats_buff_mem->pa,
+ GFP_KERNEL);
+ if (!stats_buff_mem->va)
+ return -ENOMEM;
+
+ vsi->pestat->gather_info.gather_stats_va = stats_buff_mem->va;
+ vsi->pestat->gather_info.last_gather_stats_va =
+ (void *)((uintptr_t)stats_buff_mem->va +
+ IRDMA_GATHER_STATS_BUF_SIZE);
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ irdma_hw_stats_start_timer(vsi);
+
+ /* when stat allocation is not required default to fcn_id. */
+ vsi->stats_idx = info->fcn_id;
+ if (info->alloc_stats_inst) {
+ u16 stats_idx = irdma_get_stats_idx(vsi);
+
+ if (stats_idx != IRDMA_INVALID_STATS_IDX) {
+ vsi->stats_inst_alloc = true;
+ vsi->stats_idx = stats_idx;
+ vsi->pestat->gather_info.use_stats_inst = true;
+ vsi->pestat->gather_info.stats_inst_index = stats_idx;
+ }
+ }
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_hw_stats_init_gen1(vsi);
+
+ return 0;
+}
+
+/**
+ * irdma_vsi_stats_free - Free the vsi stats
+ * @vsi: pointer to the vsi structure
+ */
+void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_stats_inst_info stats_info = {};
+ struct irdma_sc_dev *dev = vsi->dev;
+ u16 stats_idx = vsi->stats_idx;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (vsi->stats_inst_alloc) {
+ stats_info.stats_idx = vsi->stats_idx;
+ irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_FREE,
+ &stats_info);
+ }
+ } else {
+ if (vsi->stats_inst_alloc &&
+ stats_idx < vsi->dev->hw_attrs.max_stat_inst)
+ vsi->dev->stats_idx_array[stats_idx] = false;
+ }
+
+ if (!vsi->pestat)
+ return;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ irdma_hw_stats_stop_timer(vsi);
+ dma_free_coherent(vsi->pestat->hw->device,
+ vsi->pestat->gather_info.stats_buff_mem.size,
+ vsi->pestat->gather_info.stats_buff_mem.va,
+ vsi->pestat->gather_info.stats_buff_mem.pa);
+ vsi->pestat->gather_info.stats_buff_mem.va = NULL;
+}
+
+/**
+ * irdma_get_encoded_wqe_size - given wq size, returns hardware encoded size
+ * @wqsize: size of the wq (sq, rq) to encoded_size
+ * @queue_type: queue type selected for the calculation algorithm
+ */
+u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type)
+{
+ u8 encoded_size = 0;
+
+ if (queue_type == IRDMA_QUEUE_TYPE_SRQ) {
+ /* Smallest SRQ size is 256B (8 quanta) that gets
+ * encoded to 0.
+ */
+ encoded_size = ilog2(wqsize) - 3;
+
+ return encoded_size;
+ }
+ /* cqp sq's hw coded value starts from 1 for size of 4
+ * while it starts from 0 for qp' wq's.
+ */
+ if (queue_type == IRDMA_QUEUE_TYPE_CQP)
+ encoded_size = 1;
+ wqsize >>= 2;
+ while (wqsize >>= 1)
+ encoded_size++;
+
+ return encoded_size;
+}
+
+/**
+ * irdma_sc_gather_stats - collect the statistics
+ * @cqp: struct for cqp hw
+ * @info: gather stats info structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_gather_stats(struct irdma_sc_cqp *cqp,
+ struct irdma_stats_gather_info *info,
+ u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp;
+
+ if (info->stats_buff_mem.size < IRDMA_GATHER_STATS_BUF_SIZE)
+ return -ENOMEM;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fcn_index));
+ set_64bit_val(wqe, 32, info->stats_buff_mem.pa);
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_USE_INST, info->use_stats_inst) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX,
+ info->stats_inst_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX,
+ info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_GATHER_STATS);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("STATS: GATHER_STATS WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ irdma_sc_cqp_post_sq(cqp);
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "STATS: CQP SQ head 0x%x tail 0x%x size 0x%x\n",
+ cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_stats_inst - allocate or free stats instance
+ * @cqp: struct for cqp hw
+ * @info: stats info structure
+ * @alloc: alloc vs. delete flag
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp,
+ struct irdma_stats_inst_info *info,
+ bool alloc, u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id));
+ temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_ALLOC_INST, alloc) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX,
+ info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX, info->stats_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_MANAGE_STATS);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: MANAGE_STATS WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_set_up_map - set the up map table
+ * @cqp: struct for cqp hw
+ * @info: User priority map info
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_set_up_map(struct irdma_sc_cqp *cqp,
+ struct irdma_up_info *info, u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp = 0;
+ int i;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ temp |= (u64)info->map[i] << (i * 8);
+
+ set_64bit_val(wqe, 0, temp);
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_UP_CNPOVERRIDE, info->cnp_up_override) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_HMCFCNIDX, info->hmc_fcn_idx));
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_UP_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_USEVLAN, info->use_vlan) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_USEOVERRIDE,
+ info->use_cnp_up_override) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_OP, IRDMA_CQP_OP_UP_MAP);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: UPMAP WQE", DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_ws_node - create/modify/destroy WS node
+ * @cqp: struct for cqp hw
+ * @info: node info structure
+ * @node_op: 0 for add 1 for modify, 2 for delete
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp,
+ struct irdma_ws_node_info *info,
+ enum irdma_ws_node_op node_op, u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp = 0;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_WS_VSI, info->vsi) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_WEIGHT, info->weight));
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_WS_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_NODEOP, node_op) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_ENABLENODE, info->enable) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_NODETYPE, info->type_leaf) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_PRIOTYPE, info->prio_type) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_TC, info->tc) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_OP, IRDMA_CQP_OP_WORK_SCHED_NODE) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_PARENTID, info->parent_id) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_NODEID, info->id);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: MANAGE_WS WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_flush_wqes - flush qp's wqe
+ * @qp: sc qp
+ * @info: dlush information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, u64 scratch,
+ bool post_sq)
+{
+ u64 temp = 0;
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ bool flush_sq = false, flush_rq = false;
+
+ if (info->rq && !qp->flush_rq)
+ flush_rq = true;
+ if (info->sq && !qp->flush_sq)
+ flush_sq = true;
+ qp->flush_sq |= flush_sq;
+ qp->flush_rq |= flush_rq;
+
+ if (!flush_sq && !flush_rq) {
+ ibdev_dbg(to_ibdev(qp->dev),
+ "CQP: Additional flush request ignored for qp %x\n",
+ qp->qp_uk.qp_id);
+ return -EALREADY;
+ }
+
+ cqp = qp->pd->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ if (info->userflushcode) {
+ if (flush_rq)
+ temp |= FIELD_PREP(IRDMA_CQPSQ_FWQE_RQMNERR,
+ info->rq_minor_code) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_RQMJERR,
+ info->rq_major_code);
+ if (flush_sq)
+ temp |= FIELD_PREP(IRDMA_CQPSQ_FWQE_SQMNERR,
+ info->sq_minor_code) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_SQMJERR,
+ info->sq_major_code);
+ }
+ set_64bit_val(wqe, 16, temp);
+
+ temp = (info->generate_ae) ?
+ info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
+ info->ae_src) : 0;
+ set_64bit_val(wqe, 8, temp);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx));
+ }
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_GENERATE_AE, info->generate_ae) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_USERFLCODE, info->userflushcode) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_FLUSH WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_gen_ae - generate AE, uses flush WQE CQP OP
+ * @qp: sc qp
+ * @info: gen ae information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_gen_ae(struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, u64 scratch,
+ bool post_sq)
+{
+ u64 temp;
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = qp->pd->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ temp = info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
+ info->ae_src);
+ set_64bit_val(wqe, 8, temp);
+
+ hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_GEN_AE) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_GENERATE_AE, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: GEN_AE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/*** irdma_sc_qp_upload_context - upload qp's context
+ * @dev: sc device struct
+ * @info: upload context info ptr for return
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_qp_upload_context(struct irdma_sc_dev *dev,
+ struct irdma_upload_context_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, info->buf_pa);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_UCTX_QPID, info->qp_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_UPLOAD_CONTEXT) |
+ FIELD_PREP(IRDMA_CQPSQ_UCTX_QPTYPE, info->qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_UCTX_RAWFORMAT, info->raw_format) |
+ FIELD_PREP(IRDMA_CQPSQ_UCTX_FREEZEQP, info->freeze_qp) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_UPLOAD_CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_push_page - Handle push page
+ * @cqp: struct for cqp hw
+ * @info: push page info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_manage_push_page_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ if (info->free_page &&
+ info->push_idx >= cqp->dev->hw_attrs.max_hw_device_pages)
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, info->qs_handle);
+ hdr = FIELD_PREP(IRDMA_CQPSQ_MPP_PPIDX, info->push_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_MPP_PPTYPE, info->push_page_type) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_PUSH_PAGES) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_MPP_FREE_PAGE, info->free_page);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MANAGE_PUSH_PAGES WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_suspend_qp - suspend qp for param change
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
+ u64 scratch)
+{
+ u64 hdr;
+ __le64 *wqe;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_SUSPENDQP_QPID, qp->qp_uk.qp_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SUSPEND_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SUSPEND_QP WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_resume_qp - resume qp after suspend
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
+ u64 scratch)
+{
+ u64 hdr;
+ __le64 *wqe;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QSHANDLE, qp->qs_handle));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QPID, qp->qp_uk.qp_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_RESUME_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: RESUME_QP WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_ack - acknowledge completion q
+ * @cq: cq struct
+ */
+static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq)
+{
+ writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db);
+}
+
+/**
+ * irdma_sc_cq_init - initialize completion q
+ * @cq: cq struct
+ * @info: cq initialization info
+ */
+int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ cq->cq_pa = info->cq_base_pa;
+ cq->dev = info->dev;
+ cq->ceq_id = info->ceq_id;
+ info->cq_uk_init_info.cqe_alloc_db = cq->dev->cq_arm_db;
+ info->cq_uk_init_info.cq_ack_db = cq->dev->cq_ack_db;
+ irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info);
+
+ cq->virtual_map = info->virtual_map;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ cq->ceqe_mask = info->ceqe_mask;
+ cq->cq_type = (info->type) ? info->type : IRDMA_CQ_TYPE_IWARP;
+ cq->shadow_area_pa = info->shadow_area_pa;
+ cq->shadow_read_threshold = info->shadow_read_threshold;
+ cq->ceq_id_valid = info->ceq_id_valid;
+ cq->tph_en = info->tph_en;
+ cq->tph_val = info->tph_val;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->vsi = info->vsi;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_create - create completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: flag for overflow check
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
+ bool check_overflow, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = cq->dev->cqp;
+ if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt)
+ return -EINVAL;
+
+ if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold));
+ set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX, (cq->virtual_map ? cq->first_pm_pbl_idx : 0)));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx));
+
+ hdr = FLD_LS_64(cq->dev, cq->cq_uk.cq_id, IRDMA_CQPSQ_CQ_CQID) |
+ FLD_LS_64(cq->dev, (cq->ceq_id_valid ? cq->ceq_id : 0),
+ IRDMA_CQPSQ_CQ_CEQID) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CQID_HIGH, cq->cq_uk.cq_id >> 22) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQID_HIGH,
+ (cq->ceq_id_valid ? cq->ceq_id : 0) >> 10) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT,
+ cq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_destroy - destroy completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = cq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48,
+ (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+
+ hdr = cq->cq_uk.cq_id |
+ FLD_LS_64(cq->dev, (cq->ceq_id_valid ? cq->ceq_id : 0),
+ IRDMA_CQPSQ_CQ_CEQID) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, cq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_resize - set resized cq buffer info
+ * @cq: resized cq
+ * @info: resized cq buffer info
+ */
+void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info)
+{
+ cq->virtual_map = info->virtual_map;
+ cq->cq_pa = info->cq_pa;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ irdma_uk_cq_resize(&cq->cq_uk, info->cq_base, info->cq_size);
+}
+
+/**
+ * irdma_sc_cq_modify - modify a Completion Queue
+ * @cq: cq struct
+ * @info: modification info struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag to post to sq
+ */
+static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
+ struct irdma_modify_cq_info *info, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+ u32 pble_obj_cnt;
+
+ pble_obj_cnt = cq->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+ if (info->cq_resize && info->virtual_map &&
+ info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ cqp = cq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, info->cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, info->shadow_read_threshold));
+ set_64bit_val(wqe, 32, info->cq_pa);
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48, info->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx));
+
+ hdr = cq->cq_uk.cq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CQRESIZE, info->cq_resize) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, info->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, info->check_overflow) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, info->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT,
+ cq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CQ_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_get_decoded_ird_size_gen_3 - get decoded IRD size for GEN 3
+ * @ird_enc: IRD encoding
+ * IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u16 irdma_sc_get_decoded_ird_size_gen_3(u8 ird_enc)
+{
+ switch (ird_enc) {
+ case IRDMA_IRD_HW_SIZE_4096_GEN3:
+ return 4096;
+ case IRDMA_IRD_HW_SIZE_2048_GEN3:
+ return 2048;
+ case IRDMA_IRD_HW_SIZE_1024_GEN3:
+ return 1024;
+ case IRDMA_IRD_HW_SIZE_512_GEN3:
+ return 512;
+ case IRDMA_IRD_HW_SIZE_256_GEN3:
+ return 256;
+ case IRDMA_IRD_HW_SIZE_128_GEN3:
+ return 128;
+ case IRDMA_IRD_HW_SIZE_64_GEN3:
+ return 64;
+ case IRDMA_IRD_HW_SIZE_32_GEN3:
+ return 32;
+ case IRDMA_IRD_HW_SIZE_16_GEN3:
+ return 16;
+ case IRDMA_IRD_HW_SIZE_8_GEN3:
+ return 8;
+ case IRDMA_IRD_HW_SIZE_4_GEN3:
+ return 4;
+ default:
+ return 4;
+ }
+}
+
+/**
+ * irdma_check_cqp_progress - check cqp processing progress
+ * @timeout: timeout info struct
+ * @dev: sc device struct
+ */
+void irdma_check_cqp_progress(struct irdma_cqp_timeout *timeout, struct irdma_sc_dev *dev)
+{
+ u64 completed_ops = atomic64_read(&dev->cqp->completed_ops);
+
+ if (timeout->compl_cqp_cmds != completed_ops) {
+ timeout->compl_cqp_cmds = completed_ops;
+ timeout->count = 0;
+ } else if (timeout->compl_cqp_cmds != dev->cqp->requested_ops) {
+ timeout->count++;
+ }
+}
+
+/**
+ * irdma_get_cqp_reg_info - get head and tail for cqp using registers
+ * @cqp: struct for cqp hw
+ * @val: cqp tail register value
+ * @tail: wqtail register value
+ * @error: cqp processing err
+ */
+static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val,
+ u32 *tail, u32 *error)
+{
+ *val = readl(cqp->dev->hw_regs[IRDMA_CQPTAIL]);
+ *tail = FIELD_GET(IRDMA_CQPTAIL_WQTAIL, *val);
+ *error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val);
+}
+
+/**
+ * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list
+ * @dev: sc device struct
+ * @info: AE entry info
+ * @first: true if this is the first call to this handler for given AEQE
+ * @scratch: (out) scratch entry pointer
+ * @sw_def_info: (in/out) SW ticket value for this AE
+ *
+ * In case of AE_DEF_CMPL event, this function should be called in a loop
+ * until it returns NULL-ptr via scratch.
+ * For each call, it looks for a matching CQP request on pending list,
+ * removes it from the list and returns the pointer to the associated scratch
+ * entry.
+ * If this is the first call to this function for given AEQE, sw_def_info
+ * value is not used to find matching requests. Instead, it is populated
+ * with the value from the first matching cqp_request on the list.
+ * For subsequent calls, ooo_op->sw_def_info need to match the value passed
+ * by a caller.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if no matching request is found.
+ */
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ *scratch = 0;
+
+ spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags);
+ list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+ if (ooo_op->deferred &&
+ ((first && ooo_op->def_info == info->def_info) ||
+ (!first && ooo_op->sw_def_info == *sw_def_info))) {
+ *sw_def_info = ooo_op->sw_def_info;
+ *scratch = ooo_op->scratch;
+
+ list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+ atomic64_inc(&dev->cqp->completed_ops);
+
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags);
+
+ if (first && !*scratch)
+ ibdev_dbg(to_ibdev(dev),
+ "AEQ: deferred completion with unknown ticket: def_info 0x%x\n",
+ info->def_info);
+}
+
+/**
+ * irdma_sc_cqp_cleanup_handler - remove requests from pending list
+ * @dev: sc device struct
+ *
+ * This function should be called in a loop from irdma_cleanup_pending_cqp_op.
+ * For each call, it returns first CQP request on pending list, removes it
+ * from the list and returns the pointer to the associated scratch entry.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if pending list is empty.
+ */
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ u64 scratch = 0;
+
+ list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+ scratch = ooo_op->scratch;
+
+ list_del(&ooo_op->list_entry);
+ list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+ atomic64_inc(&dev->cqp->completed_ops);
+
+ break;
+ }
+
+ return scratch;
+}
+
+/**
+ * irdma_cqp_poll_registers - poll cqp registers
+ * @cqp: struct for cqp hw
+ * @tail: wqtail register value
+ * @count: how many times to try for completion
+ */
+static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail,
+ u32 count)
+{
+ u32 i = 0;
+ u32 newtail, error, val;
+
+ while (i++ < count) {
+ irdma_get_cqp_reg_info(cqp, &val, &newtail, &error);
+ if (error) {
+ error = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]);
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: CQPERRCODES error_code[x%08X]\n",
+ error);
+ return -EIO;
+ }
+ if (newtail != tail) {
+ /* SUCCESS */
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+ atomic64_inc(&cqp->completed_ops);
+ return 0;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * irdma_sc_decode_fpm_commit - decode a 64 bit value into count and base
+ * @dev: sc device struct
+ * @buf: pointer to commit buffer
+ * @buf_idx: buffer index
+ * @obj_info: object info pointer
+ * @rsrc_idx: indexs of memory resource
+ */
+static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf,
+ u32 buf_idx, struct irdma_hmc_obj_info *obj_info,
+ u32 rsrc_idx)
+{
+ u64 temp;
+
+ get_64bit_val(buf, buf_idx, &temp);
+
+ switch (rsrc_idx) {
+ case IRDMA_HMC_IW_QP:
+ obj_info[rsrc_idx].cnt = (u32)FIELD_GET(IRDMA_COMMIT_FPM_QPCNT, temp);
+ break;
+ case IRDMA_HMC_IW_CQ:
+ obj_info[rsrc_idx].cnt = (u32)FLD_RS_64(dev, temp, IRDMA_COMMIT_FPM_CQCNT);
+ break;
+ case IRDMA_HMC_IW_APBVT_ENTRY:
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ obj_info[rsrc_idx].cnt = 1;
+ else
+ obj_info[rsrc_idx].cnt = 0;
+ break;
+ default:
+ obj_info[rsrc_idx].cnt = (u32)temp;
+ break;
+ }
+
+ obj_info[rsrc_idx].base = (temp >> IRDMA_COMMIT_FPM_BASE_S) * 512;
+
+ return temp;
+}
+
+/**
+ * irdma_sc_parse_fpm_commit_buf - parse fpm commit buffer
+ * @dev: pointer to dev struct
+ * @buf: ptr to fpm commit buffer
+ * @info: ptr to irdma_hmc_obj_info struct
+ * @sd: number of SDs for HMC objects
+ *
+ * parses fpm commit info and copy base value
+ * of hmc objects in hmc_info
+ */
+static void
+irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
+ struct irdma_hmc_obj_info *info, u32 *sd)
+{
+ u64 size;
+ u32 i;
+ u64 max_base = 0;
+ u32 last_hmc_obj = 0;
+
+ irdma_sc_decode_fpm_commit(dev, buf, 0, info,
+ IRDMA_HMC_IW_QP);
+ irdma_sc_decode_fpm_commit(dev, buf, 8, info,
+ IRDMA_HMC_IW_CQ);
+ irdma_sc_decode_fpm_commit(dev, buf, 16, info,
+ IRDMA_HMC_IW_SRQ);
+ irdma_sc_decode_fpm_commit(dev, buf, 24, info,
+ IRDMA_HMC_IW_HTE);
+ irdma_sc_decode_fpm_commit(dev, buf, 32, info,
+ IRDMA_HMC_IW_ARP);
+ irdma_sc_decode_fpm_commit(dev, buf, 40, info,
+ IRDMA_HMC_IW_APBVT_ENTRY);
+ irdma_sc_decode_fpm_commit(dev, buf, 48, info,
+ IRDMA_HMC_IW_MR);
+ irdma_sc_decode_fpm_commit(dev, buf, 56, info,
+ IRDMA_HMC_IW_XF);
+ irdma_sc_decode_fpm_commit(dev, buf, 64, info,
+ IRDMA_HMC_IW_XFFL);
+ irdma_sc_decode_fpm_commit(dev, buf, 72, info,
+ IRDMA_HMC_IW_Q1);
+ irdma_sc_decode_fpm_commit(dev, buf, 80, info,
+ IRDMA_HMC_IW_Q1FL);
+ irdma_sc_decode_fpm_commit(dev, buf, 88, info,
+ IRDMA_HMC_IW_TIMER);
+ irdma_sc_decode_fpm_commit(dev, buf, 112, info,
+ IRDMA_HMC_IW_PBLE);
+ /* skipping RSVD. */
+ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1) {
+ irdma_sc_decode_fpm_commit(dev, buf, 96, info,
+ IRDMA_HMC_IW_FSIMC);
+ irdma_sc_decode_fpm_commit(dev, buf, 104, info,
+ IRDMA_HMC_IW_FSIAV);
+ irdma_sc_decode_fpm_commit(dev, buf, 128, info,
+ IRDMA_HMC_IW_RRF);
+ irdma_sc_decode_fpm_commit(dev, buf, 136, info,
+ IRDMA_HMC_IW_RRFFL);
+ irdma_sc_decode_fpm_commit(dev, buf, 144, info,
+ IRDMA_HMC_IW_HDR);
+ irdma_sc_decode_fpm_commit(dev, buf, 152, info,
+ IRDMA_HMC_IW_MD);
+ if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) {
+ irdma_sc_decode_fpm_commit(dev, buf, 160, info,
+ IRDMA_HMC_IW_OOISC);
+ irdma_sc_decode_fpm_commit(dev, buf, 168, info,
+ IRDMA_HMC_IW_OOISCFFL);
+ }
+ }
+
+ /* searching for the last object in HMC to find the size of the HMC area. */
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) {
+ if (info[i].base > max_base && info[i].cnt) {
+ max_base = info[i].base;
+ last_hmc_obj = i;
+ }
+ }
+
+ size = info[last_hmc_obj].cnt * info[last_hmc_obj].size +
+ info[last_hmc_obj].base;
+
+ if (size & 0x1FFFFF)
+ *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
+ else
+ *sd = (u32)(size >> 21);
+
+}
+
+/**
+ * irdma_sc_decode_fpm_query() - Decode a 64 bit value into max count and size
+ * @buf: ptr to fpm query buffer
+ * @buf_idx: index into buf
+ * @obj_info: ptr to irdma_hmc_obj_info struct
+ * @rsrc_idx: resource index into info
+ *
+ * Decode a 64 bit value from fpm query buffer into max count and size
+ */
+static u64 irdma_sc_decode_fpm_query(__le64 *buf, u32 buf_idx,
+ struct irdma_hmc_obj_info *obj_info,
+ u32 rsrc_idx)
+{
+ u64 temp;
+ u32 size;
+
+ get_64bit_val(buf, buf_idx, &temp);
+ obj_info[rsrc_idx].max_cnt = (u32)temp;
+ size = (u32)(temp >> 32);
+ obj_info[rsrc_idx].size = BIT_ULL(size);
+
+ return temp;
+}
+
+/**
+ * irdma_sc_parse_fpm_query_buf() - parses fpm query buffer
+ * @dev: ptr to shared code device
+ * @buf: ptr to fpm query buffer
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ *
+ * parses fpm query buffer and copy max_cnt and
+ * size value of hmc objects in hmc_info
+ */
+static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
+ struct irdma_hmc_info *hmc_info,
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc)
+{
+ struct irdma_hmc_obj_info *obj_info;
+ u8 ird_encoding;
+ u64 temp;
+ u32 size;
+ u16 max_pe_sds;
+
+ obj_info = hmc_info->hmc_obj;
+
+ get_64bit_val(buf, 0, &temp);
+ hmc_info->first_sd_index = (u16)FIELD_GET(IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX, temp);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3, temp);
+ else
+ max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp);
+
+ /* Reduce SD count for unprivleged functions by 1 to account for PBLE
+ * backing page rounding
+ */
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2 &&
+ (hmc_info->hmc_fn_id >= dev->hw_attrs.first_hw_vf_fpm_id ||
+ !dev->privileged))
+ max_pe_sds--;
+
+ hmc_fpm_misc->max_sds = max_pe_sds;
+ hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
+ get_64bit_val(buf, 8, &temp);
+ obj_info[IRDMA_HMC_IW_QP].max_cnt = (u32)FIELD_GET(IRDMA_QUERY_FPM_MAX_QPS, temp);
+ size = (u32)(temp >> 32);
+ obj_info[IRDMA_HMC_IW_QP].size = BIT_ULL(size);
+
+ get_64bit_val(buf, 16, &temp);
+ obj_info[IRDMA_HMC_IW_CQ].max_cnt = (u32)FIELD_GET(IRDMA_QUERY_FPM_MAX_CQS, temp);
+ size = (u32)(temp >> 32);
+ obj_info[IRDMA_HMC_IW_CQ].size = BIT_ULL(size);
+
+ irdma_sc_decode_fpm_query(buf, 24, obj_info, IRDMA_HMC_IW_SRQ);
+ irdma_sc_decode_fpm_query(buf, 32, obj_info, IRDMA_HMC_IW_HTE);
+ irdma_sc_decode_fpm_query(buf, 40, obj_info, IRDMA_HMC_IW_ARP);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 0;
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 0;
+ } else {
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192;
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+ }
+
+ irdma_sc_decode_fpm_query(buf, 48, obj_info, IRDMA_HMC_IW_MR);
+ irdma_sc_decode_fpm_query(buf, 56, obj_info, IRDMA_HMC_IW_XF);
+
+ get_64bit_val(buf, 64, &temp);
+ obj_info[IRDMA_HMC_IW_XFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_XFFL].size = 4;
+ hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp);
+ if (obj_info[IRDMA_HMC_IW_XF].max_cnt && !hmc_fpm_misc->xf_block_size)
+ return -EINVAL;
+
+ irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1);
+ get_64bit_val(buf, 80, &temp);
+ obj_info[IRDMA_HMC_IW_Q1FL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_Q1FL].size = 4;
+
+ hmc_fpm_misc->q1_block_size = FIELD_GET(IRDMA_QUERY_FPM_Q1BLOCKSIZE, temp);
+ if (!hmc_fpm_misc->q1_block_size)
+ return -EINVAL;
+
+ irdma_sc_decode_fpm_query(buf, 88, obj_info, IRDMA_HMC_IW_TIMER);
+
+ get_64bit_val(buf, 112, &temp);
+ obj_info[IRDMA_HMC_IW_PBLE].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_PBLE].size = 8;
+
+ get_64bit_val(buf, 120, &temp);
+ hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp);
+ hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, temp);
+ hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, temp);
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2,
+ dev->feature_info[IRDMA_FTN_FLAGS])) {
+ ird_encoding = (u8)FIELD_GET(IRDMA_QUERY_FPM_MAX_IRD, temp);
+ hmc_fpm_misc->ird =
+ irdma_sc_get_decoded_ird_size_gen_3(ird_encoding) / 2;
+ dev->hw_attrs.max_hw_ird = hmc_fpm_misc->ird;
+ dev->hw_attrs.max_hw_ord = hmc_fpm_misc->ird;
+ }
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ return 0;
+ irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC);
+ irdma_sc_decode_fpm_query(buf, 104, obj_info, IRDMA_HMC_IW_FSIAV);
+ irdma_sc_decode_fpm_query(buf, 128, obj_info, IRDMA_HMC_IW_RRF);
+
+ get_64bit_val(buf, 136, &temp);
+ obj_info[IRDMA_HMC_IW_RRFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_RRFFL].size = 4;
+ hmc_fpm_misc->rrf_block_size = FIELD_GET(IRDMA_QUERY_FPM_RRFBLOCKSIZE, temp);
+ if (!hmc_fpm_misc->rrf_block_size &&
+ obj_info[IRDMA_HMC_IW_RRFFL].max_cnt)
+ return -EINVAL;
+
+ irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR);
+ irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD);
+
+ if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) {
+ irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC);
+
+ get_64bit_val(buf, 168, &temp);
+ obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4;
+ hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp);
+ if (!hmc_fpm_misc->ooiscf_block_size &&
+ obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt)
+ return -EINVAL;
+ }
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ get_64bit_val(buf, 176, &temp);
+ hmc_fpm_misc->loc_mem_pages = (u32)FIELD_GET(IRDMA_QUERY_FPM_LOC_MEM_PAGES, temp);
+ if (!hmc_fpm_misc->loc_mem_pages)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cqp_init - Initialize buffers for a control Queue Pair
+ * @cqp: IWARP control queue pair pointer
+ * @info: IWARP control queue pair init info pointer
+ *
+ * Initializes the object and context buffers for a control Queue Pair.
+ */
+int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_init_info *info)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ u32 num_ooo_ops;
+ u8 hw_sq_size;
+
+ if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 ||
+ info->sq_size < IRDMA_CQP_SW_SQSIZE_4 ||
+ ((info->sq_size & (info->sq_size - 1))))
+ return -EINVAL;
+
+ hw_sq_size = irdma_get_encoded_wqe_size(info->sq_size,
+ IRDMA_QUEUE_TYPE_CQP);
+ cqp->size = sizeof(*cqp);
+ cqp->sq_size = info->sq_size;
+ cqp->hw_sq_size = hw_sq_size;
+ cqp->sq_base = info->sq;
+ cqp->host_ctx = info->host_ctx;
+ cqp->sq_pa = info->sq_pa;
+ cqp->host_ctx_pa = info->host_ctx_pa;
+ cqp->dev = info->dev;
+ cqp->struct_ver = info->struct_ver;
+ cqp->hw_maj_ver = info->hw_maj_ver;
+ cqp->hw_min_ver = info->hw_min_ver;
+ cqp->scratch_array = info->scratch_array;
+ cqp->polarity = 0;
+ cqp->en_datacenter_tcp = info->en_datacenter_tcp;
+ cqp->ena_vf_count = info->ena_vf_count;
+ cqp->hmc_profile = info->hmc_profile;
+ cqp->ceqs_per_vf = info->ceqs_per_vf;
+ cqp->disable_packed = info->disable_packed;
+ cqp->rocev2_rto_policy = info->rocev2_rto_policy;
+ cqp->protocol_used = info->protocol_used;
+ memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params));
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ cqp->ooisc_blksize = info->ooisc_blksize;
+ cqp->rrsp_blksize = info->rrsp_blksize;
+ cqp->q1_blksize = info->q1_blksize;
+ cqp->xmit_blksize = info->xmit_blksize;
+ cqp->blksizes_valid = info->blksizes_valid;
+ cqp->ts_shift = info->ts_shift;
+ cqp->ts_override = info->ts_override;
+ cqp->en_fine_grained_timers = info->en_fine_grained_timers;
+ cqp->pe_en_vf_cnt = info->pe_en_vf_cnt;
+ cqp->ooo_op_array = info->ooo_op_array;
+ /* initialize the OOO lists */
+ INIT_LIST_HEAD(&cqp->ooo_avail);
+ INIT_LIST_HEAD(&cqp->ooo_pnd);
+ if (cqp->ooo_op_array) {
+ /* Populate avail list entries */
+ for (num_ooo_ops = 0, ooo_op = info->ooo_op_array;
+ num_ooo_ops < cqp->sq_size;
+ num_ooo_ops++, ooo_op++)
+ list_add(&ooo_op->list_entry, &cqp->ooo_avail);
+ }
+ }
+ info->dev->cqp = cqp;
+
+ IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
+ cqp->last_def_cmpl_ticket = 0;
+ cqp->sw_def_cmpl_ticket = 0;
+ cqp->requested_ops = 0;
+ atomic64_set(&cqp->completed_ops, 0);
+ /* for the cqp commands backlog. */
+ INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
+
+ writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) {
+ writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]);
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ }
+
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n",
+ cqp->sq_size, cqp->hw_sq_size, cqp->sq_base,
+ (u64 *)(uintptr_t)cqp->sq_pa, cqp, cqp->polarity);
+ return 0;
+}
+
+/**
+ * irdma_sc_cqp_create - create cqp during bringup
+ * @cqp: struct for cqp hw
+ * @maj_err: If error, major err number
+ * @min_err: If error, minor err number
+ */
+int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
+{
+ u64 temp;
+ u8 hw_rev;
+ u32 cnt = 0, p1, p2, val = 0, err_code;
+ int ret_code;
+
+ hw_rev = cqp->dev->hw_attrs.uk_attrs.hw_rev;
+ cqp->sdbuf.size = ALIGN(IRDMA_UPDATE_SD_BUFF_SIZE * cqp->sq_size,
+ IRDMA_SD_BUF_ALIGNMENT);
+ cqp->sdbuf.va = dma_alloc_coherent(cqp->dev->hw->device,
+ cqp->sdbuf.size, &cqp->sdbuf.pa,
+ GFP_KERNEL);
+ if (!cqp->sdbuf.va)
+ return -ENOMEM;
+
+ spin_lock_init(&cqp->dev->cqp_lock);
+ spin_lock_init(&cqp->ooo_list_lock);
+
+ temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) |
+ FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) |
+ FIELD_PREP(IRDMA_CQPHC_DISABLE_PFPDUS, cqp->disable_packed) |
+ FIELD_PREP(IRDMA_CQPHC_CEQPERVF, cqp->ceqs_per_vf);
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_ROCEV2_RTO_POLICY,
+ cqp->rocev2_rto_policy) |
+ FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED,
+ cqp->protocol_used);
+ }
+ if (hw_rev >= IRDMA_GEN_3)
+ temp |= FIELD_PREP(IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS,
+ cqp->en_fine_grained_timers);
+
+ set_64bit_val(cqp->host_ctx, 0, temp);
+ set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
+
+ temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) |
+ FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile);
+
+ if (hw_rev >= IRDMA_GEN_3)
+ temp |= FIELD_PREP(IRDMA_CQPHC_OOISC_BLKSIZE,
+ cqp->ooisc_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_RRSP_BLKSIZE,
+ cqp->rrsp_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_Q1_BLKSIZE, cqp->q1_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_XMIT_BLKSIZE,
+ cqp->xmit_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_BLKSIZES_VALID,
+ cqp->blksizes_valid) |
+ FIELD_PREP(IRDMA_CQPHC_TIMESTAMP_OVERRIDE,
+ cqp->ts_override) |
+ FIELD_PREP(IRDMA_CQPHC_TS_SHIFT, cqp->ts_shift);
+ set_64bit_val(cqp->host_ctx, 16, temp);
+ set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
+ temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) |
+ FIELD_PREP(IRDMA_CQPHC_HW_MINVER, cqp->hw_min_ver);
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_MIN_RATE, cqp->dcqcn_params.min_rate) |
+ FIELD_PREP(IRDMA_CQPHC_MIN_DEC_FACTOR, cqp->dcqcn_params.min_dec_factor);
+ }
+ set_64bit_val(cqp->host_ctx, 32, temp);
+ set_64bit_val(cqp->host_ctx, 40, 0);
+ temp = 0;
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_DCQCN_T, cqp->dcqcn_params.dcqcn_t) |
+ FIELD_PREP(IRDMA_CQPHC_RAI_FACTOR, cqp->dcqcn_params.rai_factor) |
+ FIELD_PREP(IRDMA_CQPHC_HAI_FACTOR, cqp->dcqcn_params.hai_factor);
+ }
+ set_64bit_val(cqp->host_ctx, 48, temp);
+ temp = 0;
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_DCQCN_B, cqp->dcqcn_params.dcqcn_b) |
+ FIELD_PREP(IRDMA_CQPHC_DCQCN_F, cqp->dcqcn_params.dcqcn_f) |
+ FIELD_PREP(IRDMA_CQPHC_CC_CFG_VALID, cqp->dcqcn_params.cc_cfg_valid) |
+ FIELD_PREP(IRDMA_CQPHC_RREDUCE_MPERIOD, cqp->dcqcn_params.rreduce_mperiod);
+ }
+ set_64bit_val(cqp->host_ctx, 56, temp);
+ print_hex_dump_debug("WQE: CQP_HOST_CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, cqp->host_ctx, IRDMA_CQP_CTX_SIZE * 8, false);
+ p1 = cqp->host_ctx_pa >> 32;
+ p2 = (u32)cqp->host_ctx_pa;
+
+ writel(p1, cqp->dev->hw_regs[IRDMA_CCQPHIGH]);
+ writel(p2, cqp->dev->hw_regs[IRDMA_CCQPLOW]);
+
+ do {
+ if (cnt++ > cqp->dev->hw_attrs.max_done_count) {
+ ret_code = -ETIMEDOUT;
+ goto err;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ val = readl(cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ } while (!val);
+
+ if (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_ERR)) {
+ ret_code = -EOPNOTSUPP;
+ goto err;
+ }
+
+ cqp->process_cqp_sds = irdma_update_sds_noccq;
+ return 0;
+
+err:
+ dma_free_coherent(cqp->dev->hw->device, cqp->sdbuf.size,
+ cqp->sdbuf.va, cqp->sdbuf.pa);
+ cqp->sdbuf.va = NULL;
+ err_code = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]);
+ *min_err = FIELD_GET(IRDMA_CQPERRCODES_CQP_MINOR_CODE, err_code);
+ *maj_err = FIELD_GET(IRDMA_CQPERRCODES_CQP_MAJOR_CODE, err_code);
+ return ret_code;
+}
+
+/**
+ * irdma_sc_cqp_post_sq - post of cqp's sq
+ * @cqp: struct for cqp hw
+ */
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp)
+{
+ writel(IRDMA_RING_CURRENT_HEAD(cqp->sq_ring), cqp->dev->cqp_db);
+
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: CQP SQ head 0x%x tail 0x%x size 0x%x\n",
+ cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size);
+}
+
+/**
+ * irdma_sc_cqp_get_next_send_wqe_idx - get next wqe on cqp sq
+ * and pass back index
+ * @cqp: CQP HW structure
+ * @scratch: private data for CQP WQE
+ * @wqe_idx: WQE index of CQP SQ
+ */
+__le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch,
+ u32 *wqe_idx)
+{
+ __le64 *wqe = NULL;
+ int ret_code;
+
+ if (IRDMA_RING_FULL_ERR(cqp->sq_ring)) {
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: CQP SQ is full, head 0x%x tail 0x%x size 0x%x\n",
+ cqp->sq_ring.head, cqp->sq_ring.tail,
+ cqp->sq_ring.size);
+ return NULL;
+ }
+ IRDMA_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ cqp->requested_ops++;
+ if (!*wqe_idx)
+ cqp->polarity = !cqp->polarity;
+ wqe = cqp->sq_base[*wqe_idx].elem;
+ cqp->scratch_array[*wqe_idx] = scratch;
+ IRDMA_CQP_INIT_WQE(wqe);
+
+ return wqe;
+}
+
+/**
+ * irdma_sc_cqp_destroy - destroy cqp during close
+ * @cqp: struct for cqp hw
+ */
+int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp)
+{
+ u32 cnt = 0, val;
+ int ret_code = 0;
+
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPHIGH]);
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPLOW]);
+ do {
+ if (cnt++ > cqp->dev->hw_attrs.max_done_count) {
+ ret_code = -ETIMEDOUT;
+ break;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ val = readl(cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ } while (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_DONE));
+
+ dma_free_coherent(cqp->dev->hw->device, cqp->sdbuf.size,
+ cqp->sdbuf.va, cqp->sdbuf.pa);
+ cqp->sdbuf.va = NULL;
+ return ret_code;
+}
+
+/**
+ * irdma_sc_ccq_arm - enable intr for control cq
+ * @ccq: ccq sc struct
+ */
+void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
+{
+ unsigned long flags;
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se;
+ u8 arm_seq_num;
+
+ spin_lock_irqsave(&ccq->dev->cqp_lock, flags);
+ get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
+ sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
+ arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_seq_num++;
+ temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, 1);
+ set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
+ spin_unlock_irqrestore(&ccq->dev->cqp_lock, flags);
+
+ dma_wmb(); /* make sure shadow area is updated before arming */
+
+ writel(ccq->cq_uk.cq_id, ccq->dev->cq_arm_db);
+}
+
+/**
+ * irdma_sc_process_def_cmpl - process deferred or pending completion
+ * @cqp: CQP sc struct
+ * @info: CQP CQE info
+ * @wqe_idx: CQP WQE descriptor index
+ * @def_info: deferred op ticket value or out-of-order completion id
+ * @def_cmpl: true for deferred completion, false for pending (RCA)
+ */
+static void irdma_sc_process_def_cmpl(struct irdma_sc_cqp *cqp,
+ struct irdma_ccq_cqe_info *info,
+ u32 wqe_idx, u32 def_info, bool def_cmpl)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ /* Deferred and out-of-order completions share the same list of pending
+ * completions. Since the list can be also accessed from AE handler,
+ * it must be protected by a lock.
+ */
+ spin_lock_irqsave(&cqp->ooo_list_lock, flags);
+
+ /* For deferred completions bump up SW completion ticket value. */
+ if (def_cmpl) {
+ cqp->last_def_cmpl_ticket = def_info;
+ cqp->sw_def_cmpl_ticket++;
+ }
+ if (!list_empty(&cqp->ooo_avail)) {
+ ooo_op = (struct irdma_ooo_cqp_op *)
+ list_entry(cqp->ooo_avail.next,
+ struct irdma_ooo_cqp_op, list_entry);
+
+ list_del(&ooo_op->list_entry);
+ ooo_op->scratch = info->scratch;
+ ooo_op->def_info = def_info;
+ ooo_op->sw_def_info = cqp->sw_def_cmpl_ticket;
+ ooo_op->deferred = def_cmpl;
+ ooo_op->wqe_idx = wqe_idx;
+ /* Pending completions must be chronologically ordered,
+ * so adding at the end of list.
+ */
+ list_add_tail(&ooo_op->list_entry, &cqp->ooo_pnd);
+ }
+ spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
+
+ info->pending = true;
+}
+
+/**
+ * irdma_sc_process_ooo_cmpl - process out-of-order (final) completion
+ * @cqp: CQP sc struct
+ * @info: CQP CQE info
+ * @def_info: out-of-order completion id
+ */
+static void irdma_sc_process_ooo_cmpl(struct irdma_sc_cqp *cqp,
+ struct irdma_ccq_cqe_info *info,
+ u32 def_info)
+{
+ struct irdma_ooo_cqp_op *ooo_op_tmp;
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ info->scratch = 0;
+
+ spin_lock_irqsave(&cqp->ooo_list_lock, flags);
+ list_for_each_entry_safe(ooo_op, ooo_op_tmp, &cqp->ooo_pnd,
+ list_entry) {
+ if (!ooo_op->deferred && ooo_op->def_info == def_info) {
+ list_del(&ooo_op->list_entry);
+ info->scratch = ooo_op->scratch;
+ list_add(&ooo_op->list_entry, &cqp->ooo_avail);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
+
+ if (!info->scratch)
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: DEBUG_FW_OOO out-of-order completion with unknown def_info = 0x%x\n",
+ def_info);
+}
+
+/**
+ * irdma_sc_ccq_get_cqe_info - get ccq's cq entry
+ * @ccq: ccq sc struct
+ * @info: completion q entry to return
+ */
+int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_cqe_info *info)
+{
+ u32 def_info;
+ bool def_cmpl = false;
+ bool pend_cmpl = false;
+ bool ooo_final_cmpl = false;
+ u64 qp_ctx, temp, temp1;
+ __le64 *cqe;
+ struct irdma_sc_cqp *cqp;
+ u32 wqe_idx;
+ u32 error;
+ u8 polarity;
+ int ret_code = 0;
+ unsigned long flags;
+
+ if (ccq->cq_uk.avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(&ccq->cq_uk);
+
+ get_64bit_val(cqe, 24, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, temp);
+ if (polarity != ccq->cq_uk.polarity)
+ return -ENOENT;
+
+ /* Ensure CEQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(cqe, 8, &qp_ctx);
+ cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx;
+ info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, temp);
+ info->maj_err_code = IRDMA_CQPSQ_MAJ_NO_ERROR;
+ info->min_err_code = (u16)FIELD_GET(IRDMA_CQ_MINERR, temp);
+ if (info->error) {
+ info->maj_err_code = (u16)FIELD_GET(IRDMA_CQ_MAJERR, temp);
+ error = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]);
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: CQPERRCODES error_code[x%08X]\n", error);
+ }
+
+ wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, temp);
+ info->scratch = cqp->scratch_array[wqe_idx];
+
+ get_64bit_val(cqe, 16, &temp1);
+ info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ def_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
+ info->min_err_code == IRDMA_CQPSQ_MIN_DEF_CMPL;
+ def_info = (u32)FIELD_GET(IRDMA_CCQ_DEFINFO, temp1);
+
+ pend_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
+ info->min_err_code == IRDMA_CQPSQ_MIN_OOO_CMPL;
+
+ ooo_final_cmpl = (bool)FIELD_GET(IRDMA_OOO_CMPL, temp);
+
+ if (def_cmpl || pend_cmpl || ooo_final_cmpl) {
+ if (ooo_final_cmpl)
+ irdma_sc_process_ooo_cmpl(cqp, info, def_info);
+ else
+ irdma_sc_process_def_cmpl(cqp, info, wqe_idx,
+ def_info, def_cmpl);
+ }
+ }
+
+ get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
+ info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1);
+ info->cqp = cqp;
+
+ /* move the head for cq */
+ IRDMA_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
+ if (!IRDMA_RING_CURRENT_HEAD(ccq->cq_uk.cq_ring))
+ ccq->cq_uk.polarity ^= 1;
+
+ /* update cq tail in cq shadow memory also */
+ IRDMA_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
+ set_64bit_val(ccq->cq_uk.shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(ccq->cq_uk.cq_ring));
+
+ dma_wmb(); /* make sure shadow area is updated before moving tail */
+
+ spin_lock_irqsave(&cqp->dev->cqp_lock, flags);
+ if (!ooo_final_cmpl)
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+ spin_unlock_irqrestore(&cqp->dev->cqp_lock, flags);
+
+ /* Do not increment completed_ops counter on pending or deferred
+ * completions.
+ */
+ if (pend_cmpl || def_cmpl)
+ return ret_code;
+ atomic64_inc(&cqp->completed_ops);
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
+ * @cqp: struct for cqp hw
+ * @op_code: cqp opcode for completion
+ * @compl_info: completion q entry to return
+ */
+int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code,
+ struct irdma_ccq_cqe_info *compl_info)
+{
+ struct irdma_ccq_cqe_info info = {};
+ struct irdma_sc_cq *ccq;
+ int ret_code = 0;
+ u32 cnt = 0;
+
+ ccq = cqp->dev->ccq;
+ while (1) {
+ if (cnt++ > 100 * cqp->dev->hw_attrs.max_done_count)
+ return -ETIMEDOUT;
+
+ if (irdma_sc_ccq_get_cqe_info(ccq, &info)) {
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ continue;
+ }
+ if (info.error && info.op_code != IRDMA_CQP_OP_QUERY_STAG) {
+ ret_code = -EIO;
+ break;
+ }
+ /* make sure op code matches*/
+ if (op_code == info.op_code)
+ break;
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: opcode mismatch for my op code 0x%x, returned opcode %x\n",
+ op_code, info.op_code);
+ }
+
+ if (compl_info)
+ memcpy(compl_info, &info, sizeof(*compl_info));
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_manage_hmc_pm_func_table - manage of function table
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @info: info for the manage function table operation
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp,
+ struct irdma_hmc_fcn_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+ set_64bit_val(wqe, 32, 0);
+ set_64bit_val(wqe, 40, 0);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56, 0);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_MHMC_VFIDX, info->vf_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE) |
+ FIELD_PREP(IRDMA_CQPSQ_MHMC_FREEPMFN, info->free_fcn) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MANAGE_HMC_PM_FUNC_TABLE WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_commit_fpm_val_done - wait for cqp eqe completion
+ * for fpm commit
+ * @cqp: struct for cqp hw
+ */
+static int irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp)
+{
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_COMMIT_FPM_VAL,
+ NULL);
+}
+
+/**
+ * irdma_sc_commit_fpm_val - cqp wqe for commit fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @commit_fpm_mem: Memory for fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static int irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id,
+ struct irdma_dma_mem *commit_fpm_mem,
+ bool post_sq, u8 wait_type)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 tail, val, error;
+ int ret_code = 0;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, hmc_fn_id);
+ set_64bit_val(wqe, 32, commit_fpm_mem->pa);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_BUFSIZE, IRDMA_COMMIT_FPM_BUF_SIZE) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_COMMIT_FPM_VAL) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: COMMIT_FPM_VAL WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ if (wait_type == IRDMA_CQP_WAIT_POLL_REGS)
+ ret_code = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ else if (wait_type == IRDMA_CQP_WAIT_POLL_CQ)
+ ret_code = irdma_sc_commit_fpm_val_done(cqp);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_query_fpm_val_done - poll for cqp wqe completion for
+ * query fpm
+ * @cqp: struct for cqp hw
+ */
+static int irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp)
+{
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_FPM_VAL,
+ NULL);
+}
+
+/**
+ * irdma_sc_query_fpm_val - cqp wqe query fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @query_fpm_mem: memory for return fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static int irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id,
+ struct irdma_dma_mem *query_fpm_mem,
+ bool post_sq, u8 wait_type)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 tail, val, error;
+ int ret_code = 0;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, hmc_fn_id);
+ set_64bit_val(wqe, 32, query_fpm_mem->pa);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_QUERY_FPM_VAL) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QUERY_FPM WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ if (wait_type == IRDMA_CQP_WAIT_POLL_REGS)
+ ret_code = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ else if (wait_type == IRDMA_CQP_WAIT_POLL_CQ)
+ ret_code = irdma_sc_query_fpm_val_done(cqp);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_ceq_init - initialize ceq
+ * @ceq: ceq sc structure
+ * @info: ceq initialization info
+ */
+int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ struct irdma_ceq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->elem_cnt < info->dev->hw_attrs.min_hw_ceq_size ||
+ info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size)
+ return -EINVAL;
+
+ if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ ceq->size = sizeof(*ceq);
+ ceq->ceqe_base = (struct irdma_ceqe *)info->ceqe_base;
+ ceq->ceq_id = info->ceq_id;
+ ceq->dev = info->dev;
+ ceq->elem_cnt = info->elem_cnt;
+ ceq->ceq_elem_pa = info->ceqe_pa;
+ ceq->virtual_map = info->virtual_map;
+ ceq->itr_no_expire = info->itr_no_expire;
+ ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
+ ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
+ ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
+ ceq->tph_en = info->tph_en;
+ ceq->tph_val = info->tph_val;
+ ceq->vsi_idx = info->vsi_idx;
+ ceq->polarity = 1;
+ IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
+ ceq->dev->ceq[info->ceq_id] = ceq;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_ceq_create - create ceq wqe
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+
+static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = ceq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 16, ceq->elem_cnt);
+ set_64bit_val(wqe, 32,
+ (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
+ set_64bit_val(wqe, 48,
+ (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi_idx));
+ hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID_HIGH, ceq->ceq_id >> 10) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CEQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cceq_create_done - poll for control ceq wqe to complete
+ * @ceq: ceq sc structure
+ */
+static int irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq)
+{
+ struct irdma_sc_cqp *cqp;
+
+ cqp = ceq->dev->cqp;
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_CREATE_CEQ,
+ NULL);
+}
+
+/**
+ * irdma_sc_cceq_destroy_done - poll for destroy cceq to complete
+ * @ceq: ceq sc structure
+ */
+int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq)
+{
+ struct irdma_sc_cqp *cqp;
+
+ cqp = ceq->dev->cqp;
+ cqp->process_cqp_sds = irdma_update_sds_noccq;
+
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_DESTROY_CEQ,
+ NULL);
+}
+
+/**
+ * irdma_sc_cceq_create - create cceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch)
+{
+ int ret_code;
+ struct irdma_sc_dev *dev = ceq->dev;
+
+ dev->ccq->vsi_idx = ceq->vsi_idx;
+
+ ret_code = irdma_sc_ceq_create(ceq, scratch, true);
+ if (!ret_code)
+ return irdma_sc_cceq_create_done(ceq);
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_ceq_destroy - destroy ceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = ceq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, ceq->elem_cnt);
+ set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid));
+ hdr = ceq->ceq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CEQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_process_ceq - process ceq
+ * @dev: sc device struct
+ * @ceq: ceq sc structure
+ *
+ * It is expected caller serializes this function with cleanup_ceqes()
+ * because these functions manipulate the same ceq
+ */
+void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq)
+{
+ u64 temp;
+ __le64 *ceqe;
+ struct irdma_sc_cq *cq = NULL;
+ struct irdma_sc_cq *temp_cq;
+ u8 polarity;
+ u32 cq_idx;
+
+ do {
+ cq_idx = 0;
+ ceqe = IRDMA_GET_CURRENT_CEQ_ELEM(ceq);
+ get_64bit_val(ceqe, 0, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp);
+ if (polarity != ceq->polarity)
+ return NULL;
+
+ temp_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1);
+ if (!temp_cq) {
+ cq_idx = IRDMA_INVALID_CQ_IDX;
+ IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
+
+ if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
+ ceq->polarity ^= 1;
+ continue;
+ }
+
+ cq = temp_cq;
+
+ IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
+ if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
+ ceq->polarity ^= 1;
+ } while (cq_idx == IRDMA_INVALID_CQ_IDX);
+
+ if (cq)
+ irdma_sc_cq_ack(cq);
+ return cq;
+}
+
+/**
+ * irdma_sc_cleanup_ceqes - clear the valid ceqes ctx matching the cq
+ * @cq: cq for which the ceqes need to be cleaned up
+ * @ceq: ceq ptr
+ *
+ * The function is called after the cq is destroyed to cleanup
+ * its pending ceqe entries. It is expected caller serializes this
+ * function with process_ceq() in interrupt context.
+ */
+void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq)
+{
+ struct irdma_sc_cq *next_cq;
+ u8 ceq_polarity = ceq->polarity;
+ __le64 *ceqe;
+ u8 polarity;
+ u64 temp;
+ int next;
+ u32 i;
+
+ next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, 0);
+
+ for (i = 1; i <= IRDMA_RING_SIZE(*ceq); i++) {
+ ceqe = IRDMA_GET_CEQ_ELEM_AT_POS(ceq, next);
+
+ get_64bit_val(ceqe, 0, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp);
+ if (polarity != ceq_polarity)
+ return;
+
+ next_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1);
+ if (cq == next_cq)
+ set_64bit_val(ceqe, 0, temp & IRDMA_CEQE_VALID);
+
+ next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, i);
+ if (!next)
+ ceq_polarity ^= 1;
+ }
+}
+
+/**
+ * irdma_sc_aeq_init - initialize aeq
+ * @aeq: aeq structure ptr
+ * @info: aeq initialization info
+ */
+int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
+ struct irdma_aeq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->elem_cnt < info->dev->hw_attrs.min_hw_aeq_size ||
+ info->elem_cnt > info->dev->hw_attrs.max_hw_aeq_size)
+ return -EINVAL;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ aeq->size = sizeof(*aeq);
+ aeq->polarity = 1;
+ aeq->aeqe_base = (struct irdma_sc_aeqe *)info->aeqe_base;
+ aeq->dev = info->dev;
+ aeq->elem_cnt = info->elem_cnt;
+ aeq->aeq_elem_pa = info->aeq_elem_pa;
+ IRDMA_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
+ aeq->virtual_map = info->virtual_map;
+ aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
+ aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
+ aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
+ aeq->msix_idx = info->msix_idx;
+ info->dev->aeq = aeq;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_aeq_create - create aeq
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = aeq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 16, aeq->elem_cnt);
+ set_64bit_val(wqe, 32,
+ (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
+ set_64bit_val(wqe, 48,
+ (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_AEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: AEQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_aeq_destroy - destroy aeq during close
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ struct irdma_sc_dev *dev;
+ u64 hdr;
+
+ dev = aeq->dev;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 16, aeq->elem_cnt);
+ set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid));
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: AEQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_get_next_aeqe - get next aeq entry
+ * @aeq: aeq structure ptr
+ * @info: aeqe info to be returned
+ */
+int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ struct irdma_aeqe_info *info)
+{
+ u64 temp, compl_ctx;
+ __le64 *aeqe;
+ u8 ae_src;
+ u8 polarity;
+
+ aeqe = IRDMA_GET_CURRENT_AEQ_ELEM(aeq);
+ get_64bit_val(aeqe, 8, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp);
+
+ if (aeq->polarity != polarity)
+ return -ENOENT;
+
+ /* Ensure AEQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(aeqe, 0, &compl_ctx);
+
+ print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ aeqe, 16, false);
+
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC_GEN_3, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX_GEN_3,
+ temp);
+ info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_GEN_3, temp);
+ info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE_GEN_3, temp);
+ info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE_GEN_3, compl_ctx);
+ info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE_GEN_3, temp);
+ info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA_GEN_3, compl_ctx);
+ info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW_GEN_3, temp);
+ info->compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx);
+ compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx) << IRDMA_AEQE_CMPL_CTXT_S;
+ } else {
+ ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp);
+ info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) |
+ ((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18);
+ info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp);
+ info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp);
+ info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp);
+ info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp);
+ info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW,
+ temp);
+ }
+
+ info->ae_src = ae_src;
+ switch (info->ae_id) {
+ case IRDMA_AE_SRQ_LIMIT:
+ info->srq = true;
+ /* [63:6] from CMPL_CTXT, [5:0] from WQDESCIDX. */
+ info->compl_ctx = compl_ctx;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ case IRDMA_AE_AMP_INVALIDATE_TYPE1_MW:
+ case IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW:
+ case IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG:
+ case IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_XMIT_BAD_PD:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_BAD_CLOSE:
+ case IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO:
+ case IRDMA_AE_STAG_ZERO_INVALID:
+ case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
+ case IRDMA_AE_IB_INVALID_REQUEST:
+ case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
+ case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+ case IRDMA_AE_IB_REMOTE_OP_ERROR:
+ case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_INVALID_QN:
+ case IRDMA_AE_DDP_NO_L_BIT:
+ case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST:
+ case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
+ case IRDMA_AE_INVALID_ARP_ENTRY:
+ case IRDMA_AE_INVALID_TCP_OPTION_RCVD:
+ case IRDMA_AE_STALE_ARP_ENTRY:
+ case IRDMA_AE_INVALID_AH_ENTRY:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ case IRDMA_AE_LLP_DOUBT_REACHABILITY:
+ case IRDMA_AE_LLP_CONNECTION_ESTABLISHED:
+ case IRDMA_AE_RESET_SENT:
+ case IRDMA_AE_TERMINATE_SENT:
+ case IRDMA_AE_RESET_NOT_SENT:
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_QP_SUSPEND_COMPLETE:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ break;
+ case IRDMA_AE_LCE_CQ_CATASTROPHIC:
+ info->cq = true;
+ info->compl_ctx = compl_ctx << 1;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
+ case IRDMA_AE_CQP_DEFERRED_COMPLETE:
+ info->def_info = info->wqe_idx;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
+ case IRDMA_AE_ROCE_EMPTY_MCG:
+ case IRDMA_AE_ROCE_BAD_MC_IP_ADDR:
+ case IRDMA_AE_ROCE_BAD_MC_QPID:
+ case IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH:
+ fallthrough;
+ case IRDMA_AE_LLP_CONNECTION_RESET:
+ case IRDMA_AE_LLP_SYN_RECEIVED:
+ case IRDMA_AE_LLP_FIN_RECEIVED:
+ case IRDMA_AE_LLP_CLOSE_COMPLETE:
+ case IRDMA_AE_LLP_TERMINATE_RECEIVED:
+ case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE:
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ break;
+ default:
+ break;
+ }
+
+ switch (ae_src) {
+ case IRDMA_AE_SOURCE_RQ:
+ case IRDMA_AE_SOURCE_RQ_0011:
+ info->qp = true;
+ info->rq = true;
+ info->compl_ctx = compl_ctx;
+ info->err_rq_idx_valid = true;
+ break;
+ case IRDMA_AE_SOURCE_CQ:
+ case IRDMA_AE_SOURCE_CQ_0110:
+ case IRDMA_AE_SOURCE_CQ_1010:
+ case IRDMA_AE_SOURCE_CQ_1110:
+ info->cq = true;
+ info->compl_ctx = compl_ctx << 1;
+ break;
+ case IRDMA_AE_SOURCE_SQ:
+ case IRDMA_AE_SOURCE_SQ_0111:
+ info->qp = true;
+ info->sq = true;
+ info->compl_ctx = compl_ctx;
+ break;
+ case IRDMA_AE_SOURCE_IN_RR_WR:
+ info->qp = true;
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ info->err_rq_idx_valid = true;
+ info->compl_ctx = compl_ctx;
+ info->in_rdrsp_wr = true;
+ break;
+ case IRDMA_AE_SOURCE_IN_RR_WR_1011:
+ info->qp = true;
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ info->sq = true;
+ info->err_rq_idx_valid = true;
+ }
+ info->compl_ctx = compl_ctx;
+ info->in_rdrsp_wr = true;
+ break;
+ case IRDMA_AE_SOURCE_OUT_RR:
+ case IRDMA_AE_SOURCE_OUT_RR_1111:
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ info->out_rdrsp = true;
+ break;
+ case IRDMA_AE_SOURCE_RSVD:
+ default:
+ break;
+ }
+
+ IRDMA_RING_MOVE_TAIL(aeq->aeq_ring);
+ if (!IRDMA_RING_CURRENT_TAIL(aeq->aeq_ring))
+ aeq->polarity ^= 1;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_repost_aeq_entries - repost completed aeq entries
+ * @dev: sc device struct
+ * @count: allocate count
+ */
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
+{
+ writel(count, dev->hw_regs[IRDMA_AEQALLOC]);
+}
+
+/**
+ * irdma_sc_ccq_init - initialize control cq
+ * @cq: sc's cq ctruct
+ * @info: info for control cq initialization
+ */
+int irdma_sc_ccq_init(struct irdma_sc_cq *cq, struct irdma_ccq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->num_elem < info->dev->hw_attrs.uk_attrs.min_hw_cq_size ||
+ info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size)
+ return -EINVAL;
+
+ if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ cq->cq_pa = info->cq_pa;
+ cq->cq_uk.cq_base = info->cq_base;
+ cq->shadow_area_pa = info->shadow_area_pa;
+ cq->cq_uk.shadow_area = info->shadow_area;
+ cq->shadow_read_threshold = info->shadow_read_threshold;
+ cq->dev = info->dev;
+ cq->ceq_id = info->ceq_id;
+ cq->cq_uk.cq_size = info->num_elem;
+ cq->cq_type = IRDMA_CQ_TYPE_CQP;
+ cq->ceqe_mask = info->ceqe_mask;
+ IRDMA_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
+ cq->cq_uk.cq_id = 0; /* control cq is id 0 always */
+ cq->ceq_id_valid = info->ceq_id_valid;
+ cq->tph_en = info->tph_en;
+ cq->tph_val = info->tph_val;
+ cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
+ cq->pbl_list = info->pbl_list;
+ cq->virtual_map = info->virtual_map;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->cq_uk.polarity = true;
+ cq->vsi = info->vsi;
+ cq->cq_uk.cq_ack_db = cq->dev->cq_ack_db;
+
+ /* Only applicable to CQs other than CCQ so initialize to zero */
+ cq->cq_uk.cqe_alloc_db = NULL;
+
+ info->dev->ccq = cq;
+ return 0;
+}
+
+/**
+ * irdma_sc_ccq_create_done - poll cqp for ccq create
+ * @ccq: ccq sc struct
+ */
+static inline int irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq)
+{
+ struct irdma_sc_cqp *cqp;
+
+ cqp = ccq->dev->cqp;
+
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_CREATE_CQ, NULL);
+}
+
+/**
+ * irdma_sc_ccq_create - create control cq
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: overlow flag for ccq
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
+ bool check_overflow, bool post_sq)
+{
+ int ret_code;
+
+ ret_code = irdma_sc_cq_create(ccq, scratch, check_overflow, post_sq);
+ if (ret_code)
+ return ret_code;
+
+ if (post_sq) {
+ ret_code = irdma_sc_ccq_create_done(ccq);
+ if (ret_code)
+ return ret_code;
+ }
+ ccq->dev->cqp->process_cqp_sds = irdma_cqp_sds_cmd;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_ccq_destroy - destroy ccq during close
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+ int ret_code = 0;
+ u32 tail, val, error;
+
+ cqp = ccq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1);
+ set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+
+ hdr = ccq->cq_uk.cq_id |
+ FLD_LS_64(ccq->dev, (ccq->ceq_id_valid ? ccq->ceq_id : 0),
+ IRDMA_CQPSQ_CQ_CEQID) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, ccq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, ccq->ceq_id_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, ccq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, ccq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CCQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ ret_code = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ }
+
+ cqp->process_cqp_sds = irdma_update_sds_noccq;
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
+ * @dev : ptr to irdma_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id)
+{
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ struct irdma_dma_mem query_fpm_mem;
+ int ret_code = 0;
+ u8 wait_type;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ query_fpm_mem.pa = dev->fpm_query_buf_pa;
+ query_fpm_mem.va = dev->fpm_query_buf;
+ hmc_info->hmc_fn_id = hmc_fn_id;
+ wait_type = (u8)IRDMA_CQP_WAIT_POLL_REGS;
+
+ ret_code = irdma_sc_query_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
+ &query_fpm_mem, true, wait_type);
+ if (ret_code)
+ return ret_code;
+
+ /* parse the fpm_query_buf and fill hmc obj info */
+ ret_code = irdma_sc_parse_fpm_query_buf(dev, query_fpm_mem.va, hmc_info,
+ hmc_fpm_misc);
+
+ print_hex_dump_debug("HMC: QUERY FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, query_fpm_mem.va, IRDMA_QUERY_FPM_BUF_SIZE,
+ false);
+ return ret_code;
+}
+
+/**
+ * irdma_set_loc_mem() - set a local memory bit field
+ * @buf: ptr to a buffer where local memory gets enabled
+ */
+static void irdma_set_loc_mem(__le64 *buf)
+{
+ u64 loc_mem_en = BIT_ULL(ENABLE_LOC_MEM);
+ u32 offset;
+ u64 temp;
+
+ for (offset = 0; offset < IRDMA_COMMIT_FPM_BUF_SIZE;
+ offset += sizeof(__le64)) {
+ if (offset == IRDMA_PBLE_COMMIT_OFFSET)
+ continue;
+ get_64bit_val(buf, offset, &temp);
+ if (temp)
+ set_64bit_val(buf, offset, temp | loc_mem_en);
+ }
+}
+
+/**
+ * irdma_sc_cfg_iw_fpm() - commits hmc obj cnt values using cqp
+ * command and populates fpm base address in hmc_info
+ * @dev : ptr to irdma_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id)
+{
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_hmc_obj_info *obj_info;
+ __le64 *buf;
+ struct irdma_dma_mem commit_fpm_mem;
+ int ret_code = 0;
+ u8 wait_type;
+
+ hmc_info = dev->hmc_info;
+ obj_info = hmc_info->hmc_obj;
+ buf = dev->fpm_commit_buf;
+
+ set_64bit_val(buf, 0, (u64)obj_info[IRDMA_HMC_IW_QP].cnt);
+ set_64bit_val(buf, 8, (u64)obj_info[IRDMA_HMC_IW_CQ].cnt);
+ set_64bit_val(buf, 16, (u64)obj_info[IRDMA_HMC_IW_SRQ].cnt);
+ set_64bit_val(buf, 24, (u64)obj_info[IRDMA_HMC_IW_HTE].cnt);
+ set_64bit_val(buf, 32, (u64)obj_info[IRDMA_HMC_IW_ARP].cnt);
+ set_64bit_val(buf, 40, (u64)0); /* RSVD */
+ set_64bit_val(buf, 48, (u64)obj_info[IRDMA_HMC_IW_MR].cnt);
+ set_64bit_val(buf, 56, (u64)obj_info[IRDMA_HMC_IW_XF].cnt);
+ set_64bit_val(buf, 64, (u64)obj_info[IRDMA_HMC_IW_XFFL].cnt);
+ set_64bit_val(buf, 72, (u64)obj_info[IRDMA_HMC_IW_Q1].cnt);
+ set_64bit_val(buf, 80, (u64)obj_info[IRDMA_HMC_IW_Q1FL].cnt);
+ set_64bit_val(buf, 88,
+ (u64)obj_info[IRDMA_HMC_IW_TIMER].cnt);
+ set_64bit_val(buf, 96,
+ (u64)obj_info[IRDMA_HMC_IW_FSIMC].cnt);
+ set_64bit_val(buf, 104,
+ (u64)obj_info[IRDMA_HMC_IW_FSIAV].cnt);
+ set_64bit_val(buf, 112,
+ (u64)obj_info[IRDMA_HMC_IW_PBLE].cnt);
+ set_64bit_val(buf, 120, (u64)0); /* RSVD */
+ set_64bit_val(buf, 128, (u64)obj_info[IRDMA_HMC_IW_RRF].cnt);
+ set_64bit_val(buf, 136,
+ (u64)obj_info[IRDMA_HMC_IW_RRFFL].cnt);
+ set_64bit_val(buf, 144, (u64)obj_info[IRDMA_HMC_IW_HDR].cnt);
+ set_64bit_val(buf, 152, (u64)obj_info[IRDMA_HMC_IW_MD].cnt);
+ set_64bit_val(buf, 160,
+ (u64)obj_info[IRDMA_HMC_IW_OOISC].cnt);
+ set_64bit_val(buf, 168,
+ (u64)obj_info[IRDMA_HMC_IW_OOISCFFL].cnt);
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 &&
+ dev->hmc_fpm_misc.loc_mem_pages)
+ irdma_set_loc_mem(buf);
+ commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
+ commit_fpm_mem.va = dev->fpm_commit_buf;
+
+ wait_type = (u8)IRDMA_CQP_WAIT_POLL_REGS;
+ print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
+ false);
+ ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
+ &commit_fpm_mem, true, wait_type);
+ if (!ret_code)
+ irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
+ hmc_info->hmc_obj,
+ &hmc_info->sd_table.sd_cnt);
+ print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
+ false);
+
+ return ret_code;
+}
+
+/**
+ * cqp_sds_wqe_fill - fill cqp wqe doe sd
+ * @cqp: struct for cqp hw
+ * @info: sd info for wqe
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp,
+ struct irdma_update_sds_info *info, u64 scratch)
+{
+ u64 data;
+ u64 hdr;
+ __le64 *wqe;
+ int mem_entries, wqe_entries;
+ struct irdma_dma_mem *sdbuf = &cqp->sdbuf;
+ u64 offset = 0;
+ u32 wqe_idx;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
+ mem_entries = info->cnt - wqe_entries;
+
+ if (mem_entries) {
+ offset = wqe_idx * IRDMA_UPDATE_SD_BUFF_SIZE;
+ memcpy(((char *)sdbuf->va + offset), &info->entry[3], mem_entries << 4);
+
+ data = (u64)sdbuf->pa + offset;
+ } else {
+ data = 0;
+ }
+ data |= FIELD_PREP(IRDMA_CQPSQ_UPESD_HMCFNID, info->hmc_fn_id);
+ set_64bit_val(wqe, 16, data);
+
+ switch (wqe_entries) {
+ case 3:
+ set_64bit_val(wqe, 48,
+ (FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[2].cmd) |
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_VALID, 1)));
+
+ set_64bit_val(wqe, 56, info->entry[2].data);
+ fallthrough;
+ case 2:
+ set_64bit_val(wqe, 32,
+ (FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[1].cmd) |
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_VALID, 1)));
+
+ set_64bit_val(wqe, 40, info->entry[1].data);
+ fallthrough;
+ case 1:
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[0].cmd));
+
+ set_64bit_val(wqe, 8, info->entry[0].data);
+ break;
+ default:
+ break;
+ }
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_UPDATE_PE_SDS) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_COUNT, mem_entries);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (mem_entries)
+ print_hex_dump_debug("WQE: UPDATE_PE_SDS WQE Buffer",
+ DUMP_PREFIX_OFFSET, 16, 8,
+ (char *)sdbuf->va + offset,
+ mem_entries << 4, false);
+
+ print_hex_dump_debug("WQE: UPDATE_PE_SDS WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ return 0;
+}
+
+/**
+ * irdma_update_pe_sds - cqp wqe for sd
+ * @dev: ptr to irdma_dev struct
+ * @info: sd info for sd's
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_update_pe_sds(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info, u64 scratch)
+{
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ int ret_code;
+
+ ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
+ if (!ret_code)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return ret_code;
+}
+
+/**
+ * irdma_update_sds_noccq - update sd before ccq created
+ * @dev: sc device struct
+ * @info: sd info for sd's
+ */
+int irdma_update_sds_noccq(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info)
+{
+ u32 error, val, tail;
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ int ret_code;
+
+ ret_code = cqp_sds_wqe_fill(cqp, info, 0);
+ if (ret_code)
+ return ret_code;
+
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ irdma_sc_cqp_post_sq(cqp);
+ return irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+}
+
+/**
+ * irdma_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers)
+{
+ u64 hdr;
+ __le64 *wqe;
+ u32 tail, val, error;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID, hmc_fn_id));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SHMC_PAGES_ALLOCATED WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ if (poll_registers)
+ /* check for cqp sq tail update */
+ return irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ else
+ return irdma_sc_poll_for_cqp_op_done(cqp,
+ IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED,
+ NULL);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_cqp_ring_full - check if cqp ring is full
+ * @cqp: struct for cqp hw
+ */
+static bool irdma_cqp_ring_full(struct irdma_sc_cqp *cqp)
+{
+ return IRDMA_RING_FULL_ERR(cqp->sq_ring);
+}
+
+/**
+ * irdma_est_sd - returns approximate number of SDs for HMC
+ * @dev: sc device struct
+ * @hmc_info: hmc structure, size and count for HMC objects
+ */
+static u32 irdma_est_sd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info)
+{
+ struct irdma_hmc_obj_info *pble_info;
+ int i;
+ u64 size = 0;
+ u64 sd;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ if (i != IRDMA_HMC_IW_PBLE)
+ size += round_up(hmc_info->hmc_obj[i].cnt *
+ hmc_info->hmc_obj[i].size, 512);
+
+ pble_info = &hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE];
+ if (dev->privileged)
+ size += round_up(pble_info->cnt * pble_info->size, 512);
+ if (size & 0x1FFFFF)
+ sd = (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd = size >> 21;
+ if (!dev->privileged && !dev->hmc_fpm_misc.loc_mem_pages) {
+ /* 2MB alignment for VF PBLE HMC */
+ size = pble_info->cnt * pble_info->size;
+ if (size & 0x1FFFFF)
+ sd += (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd += size >> 21;
+ }
+ if (sd > 0xFFFFFFFF) {
+ ibdev_dbg(to_ibdev(dev), "HMC: sd overflow[%lld]\n", sd);
+ sd = 0xFFFFFFFF - 1;
+ }
+
+ return (u32)sd;
+}
+
+/**
+ * irdma_sc_query_rdma_features - query RDMA features and FW ver
+ * @cqp: struct for cqp hw
+ * @buf: buffer to hold query info
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
+ struct irdma_dma_mem *buf, u64 scratch)
+{
+ u32 tail, val, error;
+ __le64 *wqe;
+ int status;
+ u64 temp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ temp = buf->pa;
+ set_64bit_val(wqe, 32, temp);
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID,
+ cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN, buf->size) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_OP, IRDMA_CQP_OP_QUERY_RDMA_FEATURES);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: QUERY RDMA FEATURES", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ irdma_sc_cqp_post_sq(cqp);
+ status = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ if (error || status)
+ status = -EINVAL;
+
+ return status;
+}
+
+/**
+ * irdma_get_rdma_features - get RDMA features
+ * @dev: sc device struct
+ */
+int irdma_get_rdma_features(struct irdma_sc_dev *dev)
+{
+ int ret_code;
+ struct irdma_dma_mem feat_buf;
+ u64 temp;
+ u16 byte_idx, feat_type, feat_cnt, feat_idx;
+
+ feat_buf.size = ALIGN(IRDMA_FEATURE_BUF_SIZE,
+ IRDMA_FEATURE_BUF_ALIGNMENT);
+ feat_buf.va = dma_alloc_coherent(dev->hw->device, feat_buf.size,
+ &feat_buf.pa, GFP_KERNEL);
+ if (!feat_buf.va)
+ return -ENOMEM;
+
+ ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
+ if (ret_code)
+ goto exit;
+
+ get_64bit_val(feat_buf.va, 0, &temp);
+ feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp);
+ if (feat_cnt < 2) {
+ ret_code = -EINVAL;
+ goto exit;
+ } else if (feat_cnt > IRDMA_MAX_FEATURES) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: feature buf size insufficient, retrying with larger buffer\n");
+ dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va,
+ feat_buf.pa);
+ feat_buf.va = NULL;
+ feat_buf.size = ALIGN(8 * feat_cnt,
+ IRDMA_FEATURE_BUF_ALIGNMENT);
+ feat_buf.va = dma_alloc_coherent(dev->hw->device,
+ feat_buf.size, &feat_buf.pa,
+ GFP_KERNEL);
+ if (!feat_buf.va)
+ return -ENOMEM;
+
+ ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
+ if (ret_code)
+ goto exit;
+
+ get_64bit_val(feat_buf.va, 0, &temp);
+ feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp);
+ if (feat_cnt < 2) {
+ ret_code = -EINVAL;
+ goto exit;
+ }
+ }
+
+ print_hex_dump_debug("WQE: QUERY RDMA FEATURES", DUMP_PREFIX_OFFSET,
+ 16, 8, feat_buf.va, feat_cnt * 8, false);
+
+ for (byte_idx = 0, feat_idx = 0; feat_idx < min(feat_cnt, (u16)IRDMA_MAX_FEATURES);
+ feat_idx++, byte_idx += 8) {
+ get_64bit_val(feat_buf.va, byte_idx, &temp);
+ feat_type = FIELD_GET(IRDMA_FEATURE_TYPE, temp);
+ if (feat_type >= IRDMA_MAX_FEATURES) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: found unrecognized feature type %d\n",
+ feat_type);
+ continue;
+ }
+ dev->feature_info[feat_type] = temp;
+ }
+
+ if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT)
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_ATOMIC_OPS;
+
+exit:
+ dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va,
+ feat_buf.pa);
+ feat_buf.va = NULL;
+ return ret_code;
+}
+
+static u32 irdma_q1_cnt(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 qpwanted)
+{
+ u32 q1_cnt;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) {
+ q1_cnt = roundup_pow_of_two(dev->hw_attrs.max_hw_ird * 2 * qpwanted);
+ } else {
+ if (dev->cqp->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ q1_cnt = roundup_pow_of_two(dev->hw_attrs.max_hw_ird * 2 * qpwanted + 512);
+ else
+ q1_cnt = dev->hw_attrs.max_hw_ird * 2 * qpwanted;
+ }
+
+ return q1_cnt;
+}
+
+static void cfg_fpm_value_gen_1(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 qpwanted)
+{
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt = roundup_pow_of_two(qpwanted * dev->hw_attrs.max_hw_wqes);
+}
+
+static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt =
+ 4 * hmc_fpm_misc->xf_block_size * qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt = 32 * qpwanted;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].cnt = 32 * qpwanted;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].cnt /
+ hmc_fpm_misc->ooiscf_block_size;
+}
+
+/**
+ * irdma_get_rsrc_mem_config - configure resources if local memory or host
+ * @dev: sc device struct
+ * @is_mrte_loc_mem: if true, MR's to be in local memory because sd=loc pages
+ *
+ * Only mr can be configured host or local memory if qp's are in local memory.
+ * If qp is in local memory, then all resource object will be in local memory
+ * except mr which can be either host or local memory. The only exception
+ * is pble's which are always in host memory.
+ */
+static void irdma_get_rsrc_mem_config(struct irdma_sc_dev *dev, bool is_mrte_loc_mem)
+{
+ struct irdma_hmc_info *hmc_info = dev->hmc_info;
+ int i;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].mem_loc = IRDMA_LOC_MEM;
+
+ if (dev->feature_info[IRDMA_OBJ_1] && !is_mrte_loc_mem) {
+ u8 mem_type;
+
+ mem_type = (u8)FIELD_GET(IRDMA_MR_MEM_LOC, dev->feature_info[IRDMA_OBJ_1]);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc =
+ (mem_type & IRDMA_OBJ_LOC_MEM_BIT) ?
+ IRDMA_LOC_MEM : IRDMA_HOST_MEM;
+ } else {
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc = IRDMA_LOC_MEM;
+ }
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc = IRDMA_HOST_MEM;
+
+ ibdev_dbg(to_ibdev(dev), "HMC: INFO: mrte_mem_loc = %d pble = %d\n",
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc);
+}
+
+/**
+ * irdma_cfg_sd_mem - allocate sd memory
+ * @dev: sc device struct
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ */
+static int irdma_cfg_sd_mem(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info)
+{
+ struct irdma_virt_mem virt_mem;
+ u32 mem_size;
+
+ mem_size = sizeof(struct irdma_hmc_sd_entry) * hmc_info->sd_table.sd_cnt;
+ virt_mem.size = mem_size;
+ virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL);
+ if (!virt_mem.va)
+ return -ENOMEM;
+ hmc_info->sd_table.sd_entry = virt_mem.va;
+
+ return 0;
+}
+
+/**
+ * irdma_get_objs_pages - get number of 2M pages needed
+ * @dev: sc device struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @mem_loc: pages for local or host memory
+ */
+static u32 irdma_get_objs_pages(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info,
+ enum irdma_hmc_obj_mem mem_loc)
+{
+ u64 size = 0;
+ int i;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) {
+ if (hmc_info->hmc_obj[i].mem_loc == mem_loc) {
+ size += round_up(hmc_info->hmc_obj[i].cnt *
+ hmc_info->hmc_obj[i].size, 512);
+ }
+ }
+
+ return DIV_ROUND_UP(size, IRDMA_HMC_PAGE_SIZE);
+}
+
+/**
+ * irdma_set_host_hmc_rsrc_gen_3 - calculate host hmc resources for gen 3
+ * @dev: sc device struct
+ */
+static void irdma_set_host_hmc_rsrc_gen_3(struct irdma_sc_dev *dev)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ struct irdma_hmc_info *hmc_info;
+ enum irdma_hmc_obj_mem mrte_loc;
+ u32 mrwanted, pblewanted;
+ u32 avail_sds, mr_sds;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ avail_sds = hmc_fpm_misc->max_sds;
+ mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt;
+ pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt;
+
+ if (mrte_loc == IRDMA_HOST_MEM && avail_sds > IRDMA_MIN_PBLE_PAGES) {
+ mr_sds = avail_sds - IRDMA_MIN_PBLE_PAGES;
+ mrwanted = min(mrwanted, mr_sds * MAX_MR_PER_SD);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted;
+ avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD);
+ }
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]) &&
+ pblewanted > avail_sds * MAX_PBLE_PER_SD)
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: Warn: Resource version 2: pble wanted = 0x%x available = 0x%x\n",
+ pblewanted, avail_sds * MAX_PBLE_PER_SD);
+
+ pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+}
+
+/**
+ * irdma_verify_commit_fpm_gen_3 - verify query fpm values
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_verify_commit_fpm_gen_3(struct irdma_sc_dev *dev,
+ u32 max_pages,
+ u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+ struct irdma_hmc_info *hmc_info;
+ u32 rrffl_cnt = 0;
+ u32 xffl_cnt = 0;
+ u32 q1fl_cnt;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ rrffl_cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+
+ xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted);
+
+ if (xf_cnt)
+ xffl_cnt = xf_cnt / hmc_fpm_misc->xf_block_size;
+
+ timer_cnt = (round_up(qpwanted, 512) / 512 + 1) *
+ hmc_fpm_misc->timer_bucket;
+
+ q1fl_cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+
+ pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM);
+ if (pages_needed > max_pages) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: FAIL: SW counts rrf_cnt = %u rrffl_cnt = %u timer_cnt = %u",
+ rrf_cnt, rrffl_cnt, timer_cnt);
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: FAIL: SW counts xf_cnt = %u xffl_cnt = %u q1fl_cnt = %u",
+ xf_cnt, xffl_cnt, q1fl_cnt);
+
+ return -EINVAL;
+ }
+
+ hmc_fpm_misc->max_sds -= pages_needed;
+ hmc_fpm_misc->loc_mem_pages -= pages_needed;
+
+ return 0;
+}
+
+/**
+ * irdma_set_loc_hmc_rsrc_gen_3 - calculate hmc resources for gen 3
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_set_loc_hmc_rsrc_gen_3(struct irdma_sc_dev *dev,
+ u32 max_pages,
+ u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+ struct irdma_hmc_info *hmc_info;
+ u32 ird, ord;
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ return irdma_verify_commit_fpm_gen_3(dev, max_pages, qpwanted);
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ ird = dev->hw_attrs.max_hw_ird;
+ ord = dev->hw_attrs.max_hw_ord;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, qpwanted * 2);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt =
+ min(qpwanted * 8, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt);
+
+ rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt, rrf_cnt);
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+
+ xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_XF].max_cnt, xf_cnt);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt =
+ xf_cnt / hmc_fpm_misc->xf_block_size;
+
+ timer_cnt = (round_up(qpwanted, 512) / 512 + 1) *
+ hmc_fpm_misc->timer_bucket;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt =
+ min(timer_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt);
+
+ do {
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = roundup_pow_of_two(ird * 2 * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+
+ pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM);
+ if (pages_needed <= max_pages)
+ break;
+
+ ird /= 2;
+ ord /= 2;
+ } while (ird >= IRDMA_MIN_IRD);
+
+ if (ird < IRDMA_MIN_IRD) {
+ ibdev_dbg(to_ibdev(dev), "HMC: FAIL: IRD=%u Q1 CNT = %u\n",
+ ird, hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt);
+ return -EINVAL;
+ }
+
+ dev->hw_attrs.max_hw_ird = ird;
+ dev->hw_attrs.max_hw_ord = ord;
+ hmc_fpm_misc->max_sds -= pages_needed;
+
+ return 0;
+}
+
+/**
+ * cfg_fpm_value_gen_3 - configure fpm for gen 3
+ * @dev: sc device struct
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ */
+static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info,
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc)
+{
+ enum irdma_hmc_obj_mem mrte_loc;
+ u32 mrwanted, qpwanted;
+ int i, ret_code = 0;
+ u32 loc_mem_pages;
+ bool is_mrte_loc_mem;
+
+ loc_mem_pages = hmc_fpm_misc->loc_mem_pages;
+ is_mrte_loc_mem = hmc_fpm_misc->loc_mem_pages == hmc_fpm_misc->max_sds ?
+ true : false;
+
+ irdma_get_rsrc_mem_config(dev, is_mrte_loc_mem);
+ mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
+
+ if (is_mrte_loc_mem)
+ loc_mem_pages -= IRDMA_MIN_PBLE_PAGES;
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: mrte_loc %d loc_mem %u fpm max sds %u host_obj %d\n",
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc,
+ hmc_fpm_misc->loc_mem_pages, hmc_fpm_misc->max_sds,
+ is_mrte_loc_mem);
+
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt;
+ qpwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt = 0;
+
+ if (!FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt,
+ (u32)IRDMA_FSIAV_CNT_MAX);
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+
+ while (qpwanted >= IRDMA_MIN_QP_CNT) {
+ if (!irdma_set_loc_hmc_rsrc_gen_3(dev, loc_mem_pages, qpwanted))
+ break;
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ return -EINVAL;
+
+ qpwanted /= 2;
+ if (mrte_loc == IRDMA_LOC_MEM) {
+ mrwanted = qpwanted * IRDMA_MIN_MR_PER_QP;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt, mrwanted);
+ }
+ }
+
+ if (qpwanted < IRDMA_MIN_QP_CNT) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: ERROR: could not allocate fpm resources\n");
+ return -EINVAL;
+ }
+
+ irdma_set_host_hmc_rsrc_gen_3(dev);
+ ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_iw_fpm returned error_code[x%08X]\n",
+ readl(dev->hw_regs[IRDMA_CQPERRCODES]));
+
+ return ret_code;
+ }
+
+ return irdma_cfg_sd_mem(dev, hmc_info);
+}
+
+/**
+ * irdma_cfg_fpm_val - configure HMC objects
+ * @dev: sc device struct
+ * @qp_count: desired qp count
+ */
+int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
+{
+ u32 qpwanted, mrwanted, pblewanted;
+ u32 powerof2, hte, i;
+ u32 sd_needed;
+ u32 sd_diff;
+ u32 loop_count = 0;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ int ret_code = 0;
+ u32 max_sds;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ ret_code = irdma_sc_init_iw_hmc(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: irdma_sc_init_iw_hmc returned error_code = %d\n",
+ ret_code);
+ return ret_code;
+ }
+
+ max_sds = hmc_fpm_misc->max_sds;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ return cfg_fpm_value_gen_3(dev, hmc_info, hmc_fpm_misc);
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+ sd_needed = irdma_est_sd(dev, hmc_info);
+ ibdev_dbg(to_ibdev(dev), "HMC: sd count %u where max sd is %u\n",
+ hmc_info->sd_table.sd_cnt, max_sds);
+
+ qpwanted = min(qp_count, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt);
+
+ powerof2 = 1;
+ while (powerof2 <= qpwanted)
+ powerof2 *= 2;
+ powerof2 /= 2;
+ qpwanted = powerof2;
+
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt;
+ pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt;
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: req_qp=%d max_sd=%u, max_qp = %u, max_cq=%u, max_mr=%u, max_pble=%u, mc=%d, av=%u\n",
+ qp_count, max_sds,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1;
+
+ while (irdma_q1_cnt(dev, hmc_info, qpwanted) > hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].max_cnt)
+ qpwanted /= 2;
+
+ do {
+ ++loop_count;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt =
+ min(2 * qpwanted, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt = 0; /* Reserved */
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted;
+
+ hte = round_up(qpwanted + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt, 512);
+ powerof2 = 1;
+ while (powerof2 < hte)
+ powerof2 *= 2;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].cnt =
+ powerof2 * hmc_fpm_misc->ht_multiplier;
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ cfg_fpm_value_gen_1(dev, hmc_info, qpwanted);
+ else
+ cfg_fpm_value_gen_2(dev, hmc_info, qpwanted);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = irdma_q1_cnt(dev, hmc_info, qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt =
+ (round_up(qpwanted, 512) / 512 + 1) * hmc_fpm_misc->timer_bucket;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+ sd_needed = irdma_est_sd(dev, hmc_info);
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: sd_needed = %d, hmc_fpm_misc->max_sds=%d, mrwanted=%d, pblewanted=%d qpwanted=%d\n",
+ sd_needed, hmc_fpm_misc->max_sds, mrwanted,
+ pblewanted, qpwanted);
+
+ /* Do not reduce resources further. All objects fit with max SDs */
+ if (sd_needed <= hmc_fpm_misc->max_sds)
+ break;
+
+ sd_diff = sd_needed - hmc_fpm_misc->max_sds;
+ if (sd_diff > 128) {
+ if (!(loop_count % 2) && qpwanted > 128) {
+ qpwanted /= 2;
+ } else {
+ pblewanted /= 2;
+ mrwanted /= 2;
+ }
+ continue;
+ }
+
+ if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
+ pblewanted > (512 * FPM_MULTIPLIER * sd_diff)) {
+ pblewanted -= 256 * FPM_MULTIPLIER * sd_diff;
+ continue;
+ } else if (pblewanted > (100 * FPM_MULTIPLIER)) {
+ pblewanted -= 10 * FPM_MULTIPLIER;
+ } else if (pblewanted > FPM_MULTIPLIER) {
+ pblewanted -= FPM_MULTIPLIER;
+ } else if (qpwanted <= 128) {
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt > 256)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt /= 2;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt > 256)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt /= 2;
+ }
+ if (mrwanted > FPM_MULTIPLIER)
+ mrwanted -= FPM_MULTIPLIER;
+ if (!(loop_count % 10) && qpwanted > 128) {
+ qpwanted /= 2;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt > 256)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt /= 2;
+ }
+ } while (loop_count < 2000);
+
+ if (sd_needed > hmc_fpm_misc->max_sds) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_fpm failed loop_cnt=%u, sd_needed=%u, max sd count %u\n",
+ loop_count, sd_needed, hmc_info->sd_table.sd_cnt);
+ return -EINVAL;
+ }
+
+ if (loop_count > 1 && sd_needed < max_sds) {
+ pblewanted += (max_sds - sd_needed) * 256 * FPM_MULTIPLIER;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+ sd_needed = irdma_est_sd(dev, hmc_info);
+ }
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: loop_cnt=%d, sd_needed=%d, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d, mc=%d, ah=%d, max sd count %d, first sd index %d\n",
+ loop_count, sd_needed,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt,
+ hmc_info->sd_table.sd_cnt, hmc_info->first_sd_index);
+
+ ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_iw_fpm returned error_code[x%08X]\n",
+ readl(dev->hw_regs[IRDMA_CQPERRCODES]));
+ return ret_code;
+ }
+
+ return irdma_cfg_sd_mem(dev, hmc_info);
+}
+
+/**
+ * irdma_exec_cqp_cmd - execute cqp cmd when wqe are available
+ * @dev: rdma device
+ * @pcmdinfo: cqp command info
+ */
+static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo)
+{
+ int status;
+ struct irdma_dma_mem val_mem;
+ bool alloc = false;
+
+ dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
+ switch (pcmdinfo->cqp_cmd) {
+ case IRDMA_OP_CEQ_DESTROY:
+ status = irdma_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
+ pcmdinfo->in.u.ceq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_AEQ_DESTROY:
+ status = irdma_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
+ pcmdinfo->in.u.aeq_destroy.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case IRDMA_OP_CEQ_CREATE:
+ status = irdma_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
+ pcmdinfo->in.u.ceq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_AEQ_CREATE:
+ status = irdma_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
+ pcmdinfo->in.u.aeq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_UPLOAD_CONTEXT:
+ status = irdma_sc_qp_upload_context(pcmdinfo->in.u.qp_upload_context.dev,
+ &pcmdinfo->in.u.qp_upload_context.info,
+ pcmdinfo->in.u.qp_upload_context.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_CQ_CREATE:
+ status = irdma_sc_cq_create(pcmdinfo->in.u.cq_create.cq,
+ pcmdinfo->in.u.cq_create.scratch,
+ pcmdinfo->in.u.cq_create.check_overflow,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_CQ_MODIFY:
+ status = irdma_sc_cq_modify(pcmdinfo->in.u.cq_modify.cq,
+ &pcmdinfo->in.u.cq_modify.info,
+ pcmdinfo->in.u.cq_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_CQ_DESTROY:
+ status = irdma_sc_cq_destroy(pcmdinfo->in.u.cq_destroy.cq,
+ pcmdinfo->in.u.cq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_FLUSH_WQES:
+ status = irdma_sc_qp_flush_wqes(pcmdinfo->in.u.qp_flush_wqes.qp,
+ &pcmdinfo->in.u.qp_flush_wqes.info,
+ pcmdinfo->in.u.qp_flush_wqes.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_GEN_AE:
+ status = irdma_sc_gen_ae(pcmdinfo->in.u.gen_ae.qp,
+ &pcmdinfo->in.u.gen_ae.info,
+ pcmdinfo->in.u.gen_ae.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MANAGE_PUSH_PAGE:
+ status = irdma_sc_manage_push_page(pcmdinfo->in.u.manage_push_page.cqp,
+ &pcmdinfo->in.u.manage_push_page.info,
+ pcmdinfo->in.u.manage_push_page.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_UPDATE_PE_SDS:
+ status = irdma_update_pe_sds(pcmdinfo->in.u.update_pe_sds.dev,
+ &pcmdinfo->in.u.update_pe_sds.info,
+ pcmdinfo->in.u.update_pe_sds.scratch);
+ break;
+ case IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE:
+ /* switch to calling through the call table */
+ status =
+ irdma_sc_manage_hmc_pm_func_table(pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
+ &pcmdinfo->in.u.manage_hmc_pm.info,
+ pcmdinfo->in.u.manage_hmc_pm.scratch,
+ true);
+ break;
+ case IRDMA_OP_SUSPEND:
+ status = irdma_sc_suspend_qp(pcmdinfo->in.u.suspend_resume.cqp,
+ pcmdinfo->in.u.suspend_resume.qp,
+ pcmdinfo->in.u.suspend_resume.scratch);
+ break;
+ case IRDMA_OP_RESUME:
+ status = irdma_sc_resume_qp(pcmdinfo->in.u.suspend_resume.cqp,
+ pcmdinfo->in.u.suspend_resume.qp,
+ pcmdinfo->in.u.suspend_resume.scratch);
+ break;
+ case IRDMA_OP_QUERY_FPM_VAL:
+ val_mem.pa = pcmdinfo->in.u.query_fpm_val.fpm_val_pa;
+ val_mem.va = pcmdinfo->in.u.query_fpm_val.fpm_val_va;
+ status = irdma_sc_query_fpm_val(pcmdinfo->in.u.query_fpm_val.cqp,
+ pcmdinfo->in.u.query_fpm_val.scratch,
+ pcmdinfo->in.u.query_fpm_val.hmc_fn_id,
+ &val_mem, true, IRDMA_CQP_WAIT_EVENT);
+ break;
+ case IRDMA_OP_COMMIT_FPM_VAL:
+ val_mem.pa = pcmdinfo->in.u.commit_fpm_val.fpm_val_pa;
+ val_mem.va = pcmdinfo->in.u.commit_fpm_val.fpm_val_va;
+ status = irdma_sc_commit_fpm_val(pcmdinfo->in.u.commit_fpm_val.cqp,
+ pcmdinfo->in.u.commit_fpm_val.scratch,
+ pcmdinfo->in.u.commit_fpm_val.hmc_fn_id,
+ &val_mem,
+ true,
+ IRDMA_CQP_WAIT_EVENT);
+ break;
+ case IRDMA_OP_STATS_ALLOCATE:
+ alloc = true;
+ fallthrough;
+ case IRDMA_OP_STATS_FREE:
+ status = irdma_sc_manage_stats_inst(pcmdinfo->in.u.stats_manage.cqp,
+ &pcmdinfo->in.u.stats_manage.info,
+ alloc,
+ pcmdinfo->in.u.stats_manage.scratch);
+ break;
+ case IRDMA_OP_STATS_GATHER:
+ status = irdma_sc_gather_stats(pcmdinfo->in.u.stats_gather.cqp,
+ &pcmdinfo->in.u.stats_gather.info,
+ pcmdinfo->in.u.stats_gather.scratch);
+ break;
+ case IRDMA_OP_WS_MODIFY_NODE:
+ status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp,
+ &pcmdinfo->in.u.ws_node.info,
+ IRDMA_MODIFY_NODE,
+ pcmdinfo->in.u.ws_node.scratch);
+ break;
+ case IRDMA_OP_WS_DELETE_NODE:
+ status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp,
+ &pcmdinfo->in.u.ws_node.info,
+ IRDMA_DEL_NODE,
+ pcmdinfo->in.u.ws_node.scratch);
+ break;
+ case IRDMA_OP_WS_ADD_NODE:
+ status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp,
+ &pcmdinfo->in.u.ws_node.info,
+ IRDMA_ADD_NODE,
+ pcmdinfo->in.u.ws_node.scratch);
+ break;
+ case IRDMA_OP_SET_UP_MAP:
+ status = irdma_sc_set_up_map(pcmdinfo->in.u.up_map.cqp,
+ &pcmdinfo->in.u.up_map.info,
+ pcmdinfo->in.u.up_map.scratch);
+ break;
+ case IRDMA_OP_QUERY_RDMA_FEATURES:
+ status = irdma_sc_query_rdma_features(pcmdinfo->in.u.query_rdma.cqp,
+ &pcmdinfo->in.u.query_rdma.query_buff_mem,
+ pcmdinfo->in.u.query_rdma.scratch);
+ break;
+ case IRDMA_OP_DELETE_ARP_CACHE_ENTRY:
+ status = irdma_sc_del_arp_cache_entry(pcmdinfo->in.u.del_arp_cache_entry.cqp,
+ pcmdinfo->in.u.del_arp_cache_entry.scratch,
+ pcmdinfo->in.u.del_arp_cache_entry.arp_index,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MANAGE_APBVT_ENTRY:
+ status = irdma_sc_manage_apbvt_entry(pcmdinfo->in.u.manage_apbvt_entry.cqp,
+ &pcmdinfo->in.u.manage_apbvt_entry.info,
+ pcmdinfo->in.u.manage_apbvt_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY:
+ status = irdma_sc_manage_qhash_table_entry(pcmdinfo->in.u.manage_qhash_table_entry.cqp,
+ &pcmdinfo->in.u.manage_qhash_table_entry.info,
+ pcmdinfo->in.u.manage_qhash_table_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_MODIFY:
+ status = irdma_sc_qp_modify(pcmdinfo->in.u.qp_modify.qp,
+ &pcmdinfo->in.u.qp_modify.info,
+ pcmdinfo->in.u.qp_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_CREATE:
+ status = irdma_sc_qp_create(pcmdinfo->in.u.qp_create.qp,
+ &pcmdinfo->in.u.qp_create.info,
+ pcmdinfo->in.u.qp_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_DESTROY:
+ status = irdma_sc_qp_destroy(pcmdinfo->in.u.qp_destroy.qp,
+ pcmdinfo->in.u.qp_destroy.scratch,
+ pcmdinfo->in.u.qp_destroy.remove_hash_idx,
+ pcmdinfo->in.u.qp_destroy.ignore_mw_bnd,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ALLOC_STAG:
+ status = irdma_sc_alloc_stag(pcmdinfo->in.u.alloc_stag.dev,
+ &pcmdinfo->in.u.alloc_stag.info,
+ pcmdinfo->in.u.alloc_stag.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MR_REG_NON_SHARED:
+ status = irdma_sc_mr_reg_non_shared(pcmdinfo->in.u.mr_reg_non_shared.dev,
+ &pcmdinfo->in.u.mr_reg_non_shared.info,
+ pcmdinfo->in.u.mr_reg_non_shared.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_DEALLOC_STAG:
+ status = irdma_sc_dealloc_stag(pcmdinfo->in.u.dealloc_stag.dev,
+ &pcmdinfo->in.u.dealloc_stag.info,
+ pcmdinfo->in.u.dealloc_stag.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MW_ALLOC:
+ status = irdma_sc_mw_alloc(pcmdinfo->in.u.mw_alloc.dev,
+ &pcmdinfo->in.u.mw_alloc.info,
+ pcmdinfo->in.u.mw_alloc.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ADD_ARP_CACHE_ENTRY:
+ status = irdma_sc_add_arp_cache_entry(pcmdinfo->in.u.add_arp_cache_entry.cqp,
+ &pcmdinfo->in.u.add_arp_cache_entry.info,
+ pcmdinfo->in.u.add_arp_cache_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY:
+ status = irdma_sc_alloc_local_mac_entry(pcmdinfo->in.u.alloc_local_mac_entry.cqp,
+ pcmdinfo->in.u.alloc_local_mac_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ADD_LOCAL_MAC_ENTRY:
+ status = irdma_sc_add_local_mac_entry(pcmdinfo->in.u.add_local_mac_entry.cqp,
+ &pcmdinfo->in.u.add_local_mac_entry.info,
+ pcmdinfo->in.u.add_local_mac_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_DELETE_LOCAL_MAC_ENTRY:
+ status = irdma_sc_del_local_mac_entry(pcmdinfo->in.u.del_local_mac_entry.cqp,
+ pcmdinfo->in.u.del_local_mac_entry.scratch,
+ pcmdinfo->in.u.del_local_mac_entry.entry_idx,
+ pcmdinfo->in.u.del_local_mac_entry.ignore_ref_count,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_AH_CREATE:
+ status = irdma_sc_create_ah(pcmdinfo->in.u.ah_create.cqp,
+ &pcmdinfo->in.u.ah_create.info,
+ pcmdinfo->in.u.ah_create.scratch);
+ break;
+ case IRDMA_OP_AH_DESTROY:
+ status = irdma_sc_destroy_ah(pcmdinfo->in.u.ah_destroy.cqp,
+ &pcmdinfo->in.u.ah_destroy.info,
+ pcmdinfo->in.u.ah_destroy.scratch);
+ break;
+ case IRDMA_OP_MC_CREATE:
+ status = irdma_sc_create_mcast_grp(pcmdinfo->in.u.mc_create.cqp,
+ &pcmdinfo->in.u.mc_create.info,
+ pcmdinfo->in.u.mc_create.scratch);
+ break;
+ case IRDMA_OP_MC_DESTROY:
+ status = irdma_sc_destroy_mcast_grp(pcmdinfo->in.u.mc_destroy.cqp,
+ &pcmdinfo->in.u.mc_destroy.info,
+ pcmdinfo->in.u.mc_destroy.scratch);
+ break;
+ case IRDMA_OP_MC_MODIFY:
+ status = irdma_sc_modify_mcast_grp(pcmdinfo->in.u.mc_modify.cqp,
+ &pcmdinfo->in.u.mc_modify.info,
+ pcmdinfo->in.u.mc_modify.scratch);
+ break;
+ case IRDMA_OP_SRQ_CREATE:
+ status = irdma_sc_srq_create(pcmdinfo->in.u.srq_create.srq,
+ pcmdinfo->in.u.srq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_SRQ_MODIFY:
+ status = irdma_sc_srq_modify(pcmdinfo->in.u.srq_modify.srq,
+ &pcmdinfo->in.u.srq_modify.info,
+ pcmdinfo->in.u.srq_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_SRQ_DESTROY:
+ status = irdma_sc_srq_destroy(pcmdinfo->in.u.srq_destroy.srq,
+ pcmdinfo->in.u.srq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ default:
+ status = -EOPNOTSUPP;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_process_cqp_cmd - process all cqp commands
+ * @dev: sc device struct
+ * @pcmdinfo: cqp command info
+ */
+int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo)
+{
+ int status = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cqp_lock, flags);
+ if (list_empty(&dev->cqp_cmd_head) && !irdma_cqp_ring_full(dev->cqp))
+ status = irdma_exec_cqp_cmd(dev, pcmdinfo);
+ else
+ list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
+ spin_unlock_irqrestore(&dev->cqp_lock, flags);
+ return status;
+}
+
+/**
+ * irdma_process_bh - called from tasklet for cqp list
+ * @dev: sc device struct
+ */
+int irdma_process_bh(struct irdma_sc_dev *dev)
+{
+ int status = 0;
+ struct cqp_cmds_info *pcmdinfo;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cqp_lock, flags);
+ while (!list_empty(&dev->cqp_cmd_head) &&
+ !irdma_cqp_ring_full(dev->cqp)) {
+ pcmdinfo = (struct cqp_cmds_info *)irdma_remove_cqp_head(dev);
+ status = irdma_exec_cqp_cmd(dev, pcmdinfo);
+ if (status)
+ break;
+ }
+ spin_unlock_irqrestore(&dev->cqp_lock, flags);
+ return status;
+}
+
+/**
+ * irdma_cfg_aeq- Configure AEQ interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ * @enable: True to enable, False disables
+ */
+void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable)
+{
+ u32 reg_val;
+
+ reg_val = FIELD_PREP(IRDMA_PFINT_AEQCTL_CAUSE_ENA, enable) |
+ FIELD_PREP(IRDMA_PFINT_AEQCTL_MSIX_INDX, idx) |
+ FIELD_PREP(IRDMA_PFINT_AEQCTL_ITR_INDX, 3);
+ writel(reg_val, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
+}
+
+/**
+ * sc_vsi_update_stats - Update statistics
+ * @vsi: sc_vsi instance to update
+ */
+void sc_vsi_update_stats(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_dev_hw_stats *hw_stats = &vsi->pestat->hw_stats;
+ struct irdma_gather_stats *gather_stats =
+ vsi->pestat->gather_info.gather_stats_va;
+ struct irdma_gather_stats *last_gather_stats =
+ vsi->pestat->gather_info.last_gather_stats_va;
+ const struct irdma_hw_stat_map *map = vsi->dev->hw_stats_map;
+ u16 max_stat_idx = vsi->dev->hw_attrs.max_stat_idx;
+ u16 i;
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ for (i = 0; i < max_stat_idx; i++) {
+ u16 idx = map[i].byteoff / sizeof(u64);
+
+ hw_stats->stats_val[i] = gather_stats->val[idx];
+ }
+ return;
+ }
+
+ irdma_update_stats(hw_stats, gather_stats, last_gather_stats,
+ map, max_stat_idx);
+}
+
+/**
+ * irdma_wait_pe_ready - Check if firmware is ready
+ * @dev: provides access to registers
+ */
+static int irdma_wait_pe_ready(struct irdma_sc_dev *dev)
+{
+ u32 statuscpu0;
+ u32 statuscpu1;
+ u32 statuscpu2;
+ u32 retrycount = 0;
+
+ do {
+ statuscpu0 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS0]);
+ statuscpu1 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS1]);
+ statuscpu2 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS2]);
+ if (statuscpu0 == 0x80 && statuscpu1 == 0x80 &&
+ statuscpu2 == 0x80)
+ return 0;
+ mdelay(1000);
+ } while (retrycount++ < dev->hw_attrs.max_pe_ready_count);
+ return -1;
+}
+
+static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev)
+{
+ switch (dev->hw_attrs.uk_attrs.hw_rev) {
+ case IRDMA_GEN_1:
+ i40iw_init_hw(dev);
+ break;
+ case IRDMA_GEN_2:
+ icrdma_init_hw(dev);
+ break;
+ case IRDMA_GEN_3:
+ ig3rdma_init_hw(dev);
+ break;
+ }
+}
+
+/**
+ * irdma_sc_dev_init - Initialize control part of device
+ * @ver: version
+ * @dev: Device pointer
+ * @info: Device init info
+ */
+int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info)
+{
+ u32 val;
+ int ret_code = 0;
+ u8 db_size;
+
+ INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */
+ mutex_init(&dev->ws_mutex);
+ dev->hmc_fn_id = info->hmc_fn_id;
+ dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
+ dev->fpm_query_buf = info->fpm_query_buf;
+ dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
+ dev->fpm_commit_buf = info->fpm_commit_buf;
+ dev->hw = info->hw;
+ dev->hw->hw_addr = info->bar0;
+ dev->protocol_used = info->protocol_used;
+ /* Setup the hardware limits, hmc may limit further */
+ dev->hw_attrs.min_hw_qp_id = IRDMA_MIN_IW_QP_ID;
+ dev->hw_attrs.min_hw_srq_id = IRDMA_MIN_IW_SRQ_ID;
+ dev->hw_attrs.min_hw_aeq_size = IRDMA_MIN_AEQ_ENTRIES;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES_GEN_3;
+ else
+ dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES;
+ dev->hw_attrs.min_hw_ceq_size = IRDMA_MIN_CEQ_ENTRIES;
+ dev->hw_attrs.max_hw_ceq_size = IRDMA_MAX_CEQ_ENTRIES;
+ dev->hw_attrs.uk_attrs.min_hw_cq_size = IRDMA_MIN_CQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_cq_size = IRDMA_MAX_CQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = IRDMA_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = IRDMA_MAX_SGE_RD;
+ dev->hw_attrs.max_hw_outbound_msg_size = IRDMA_MAX_OUTBOUND_MSG_SIZE;
+ dev->hw_attrs.max_mr_size = IRDMA_MAX_MR_SIZE;
+ dev->hw_attrs.max_hw_inbound_msg_size = IRDMA_MAX_INBOUND_MSG_SIZE;
+ dev->hw_attrs.max_hw_device_pages = IRDMA_MAX_PUSH_PAGE_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_inline = IRDMA_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.max_hw_wqes = IRDMA_MAX_WQ_ENTRIES;
+ dev->hw_attrs.max_qp_wr = IRDMA_MAX_QP_WRS(IRDMA_MAX_QUANTA_PER_WR);
+
+ dev->hw_attrs.uk_attrs.max_hw_rq_quanta = IRDMA_QP_SW_MAX_RQ_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_wq_quanta = IRDMA_QP_SW_MAX_WQ_QUANTA;
+ dev->hw_attrs.max_hw_pds = IRDMA_MAX_PDS;
+ dev->hw_attrs.max_hw_ena_vf_count = IRDMA_MAX_PE_ENA_VF_COUNT;
+
+ dev->hw_attrs.max_pe_ready_count = 14;
+ dev->hw_attrs.max_done_count = IRDMA_DONE_COUNT;
+ dev->hw_attrs.max_sleep_count = IRDMA_SLEEP_COUNT;
+ dev->hw_attrs.max_cqp_compl_wait_time_ms = CQP_COMPL_WAIT_TIME_MS;
+
+ if (!dev->privileged) {
+ ret_code = irdma_vchnl_req_get_hmc_fcn(dev);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get HMC function ret = %d\n",
+ ret_code);
+
+ return ret_code;
+ }
+ }
+
+ irdma_sc_init_hw(dev);
+
+ if (dev->privileged) {
+ if (irdma_wait_pe_ready(dev))
+ return -ETIMEDOUT;
+
+ val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
+ db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
+ if (db_size != IRDMA_PE_DB_SIZE_4M &&
+ db_size != IRDMA_PE_DB_SIZE_8M) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n",
+ val, db_size);
+ return -ENODEV;
+ }
+ } else {
+ ret_code = irdma_vchnl_req_get_reg_layout(dev);
+ if (ret_code)
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Register layout failed ret = %d\n",
+ ret_code);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_stat_val - Extract HW counter value from statistics buffer
+ * @stats_val: pointer to statistics buffer
+ * @byteoff: byte offset of counter value in the buffer (8B-aligned)
+ * @bitoff: bit offset of counter value within 8B entry
+ * @bitmask: maximum counter value (e.g. 0xffffff for 24-bit counter)
+ */
+static inline u64 irdma_stat_val(const u64 *stats_val, u16 byteoff, u8 bitoff,
+ u64 bitmask)
+{
+ u16 idx = byteoff / sizeof(*stats_val);
+
+ return (stats_val[idx] >> bitoff) & bitmask;
+}
+
+/**
+ * irdma_stat_delta - Calculate counter delta
+ * @new_val: updated counter value
+ * @old_val: last counter value
+ * @max_val: maximum counter value (e.g. 0xffffff for 24-bit counter)
+ */
+static inline u64 irdma_stat_delta(u64 new_val, u64 old_val, u64 max_val)
+{
+ if (new_val >= old_val)
+ return new_val - old_val;
+
+ /* roll-over case */
+ return max_val - old_val + new_val + 1;
+}
+
+/**
+ * irdma_update_stats - Update statistics
+ * @hw_stats: hw_stats instance to update
+ * @gather_stats: updated stat counters
+ * @last_gather_stats: last stat counters
+ * @map: HW stat map (hw_stats => gather_stats)
+ * @max_stat_idx: number of HW stats
+ */
+void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats,
+ struct irdma_gather_stats *gather_stats,
+ struct irdma_gather_stats *last_gather_stats,
+ const struct irdma_hw_stat_map *map, u16 max_stat_idx)
+{
+ u64 *stats_val = hw_stats->stats_val;
+ u16 i;
+
+ for (i = 0; i < max_stat_idx; i++) {
+ u64 new_val = irdma_stat_val(gather_stats->val, map[i].byteoff,
+ map[i].bitoff, map[i].bitmask);
+ u64 last_val = irdma_stat_val(last_gather_stats->val,
+ map[i].byteoff, map[i].bitoff,
+ map[i].bitmask);
+
+ stats_val[i] +=
+ irdma_stat_delta(new_val, last_val, map[i].bitmask);
+ }
+
+ memcpy(last_gather_stats, gather_stats, sizeof(*last_gather_stats));
+}
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
new file mode 100644
index 000000000000..983b22d7ae23
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -0,0 +1,1184 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_DEFS_H
+#define IRDMA_DEFS_H
+
+#define IRDMA_FIRST_USER_QP_ID 3
+
+#define ECN_CODE_PT_VAL 2
+
+#define IRDMA_PUSH_OFFSET (8 * 1024 * 1024)
+#define IRDMA_PF_FIRST_PUSH_PAGE_INDEX 16
+#define IRDMA_PF_BAR_RSVD (60 * 1024)
+
+#define IRDMA_PE_DB_SIZE_4M 1
+#define IRDMA_PE_DB_SIZE_8M 2
+
+#define IRDMA_IRD_HW_SIZE_4_GEN3 0
+#define IRDMA_IRD_HW_SIZE_8_GEN3 1
+#define IRDMA_IRD_HW_SIZE_16_GEN3 2
+#define IRDMA_IRD_HW_SIZE_32_GEN3 3
+#define IRDMA_IRD_HW_SIZE_64_GEN3 4
+#define IRDMA_IRD_HW_SIZE_128_GEN3 5
+#define IRDMA_IRD_HW_SIZE_256_GEN3 6
+#define IRDMA_IRD_HW_SIZE_512_GEN3 7
+#define IRDMA_IRD_HW_SIZE_1024_GEN3 8
+#define IRDMA_IRD_HW_SIZE_2048_GEN3 9
+#define IRDMA_IRD_HW_SIZE_4096_GEN3 10
+
+#define IRDMA_IRD_HW_SIZE_4 0
+#define IRDMA_IRD_HW_SIZE_16 1
+#define IRDMA_IRD_HW_SIZE_64 2
+#define IRDMA_IRD_HW_SIZE_128 3
+#define IRDMA_IRD_HW_SIZE_256 4
+
+enum irdma_protocol_used {
+ IRDMA_ANY_PROTOCOL = 0,
+ IRDMA_IWARP_PROTOCOL_ONLY = 1,
+ IRDMA_ROCE_PROTOCOL_ONLY = 2,
+};
+
+#define IRDMA_QP_STATE_INVALID 0
+#define IRDMA_QP_STATE_IDLE 1
+#define IRDMA_QP_STATE_RTS 2
+#define IRDMA_QP_STATE_CLOSING 3
+#define IRDMA_QP_STATE_SQD 3
+#define IRDMA_QP_STATE_RTR 4
+#define IRDMA_QP_STATE_TERMINATE 5
+#define IRDMA_QP_STATE_ERROR 6
+
+#define IRDMA_MAX_TRAFFIC_CLASS 8
+#define IRDMA_MAX_STATS_COUNT_GEN_1 12
+#define IRDMA_MAX_USER_PRIORITY 8
+#define IRDMA_MAX_APPS 8
+#define IRDMA_MAX_STATS_COUNT 128
+#define IRDMA_FIRST_NON_PF_STAT 4
+
+#define IRDMA_MIN_MTU_IPV4 576
+#define IRDMA_MIN_MTU_IPV6 1280
+#define IRDMA_MTU_TO_MSS_IPV4 40
+#define IRDMA_MTU_TO_MSS_IPV6 60
+#define IRDMA_DEFAULT_MTU 1500
+
+#define Q2_FPSN_OFFSET 64
+#define TERM_DDP_LEN_TAGGED 14
+#define TERM_DDP_LEN_UNTAGGED 18
+#define TERM_RDMA_LEN 28
+#define RDMA_OPCODE_M 0x0f
+#define RDMA_READ_REQ_OPCODE 1
+#define Q2_BAD_FRAME_OFFSET 72
+#define CQE_MAJOR_DRV 0x8000
+
+#define IRDMA_TERM_SENT 1
+#define IRDMA_TERM_RCVD 2
+#define IRDMA_TERM_DONE 4
+#define IRDMA_MAC_HLEN 14
+
+#define IRDMA_CQP_WAIT_POLL_REGS 1
+#define IRDMA_CQP_WAIT_POLL_CQ 2
+#define IRDMA_CQP_WAIT_EVENT 3
+
+#define IRDMA_AE_SOURCE_RSVD 0x0
+#define IRDMA_AE_SOURCE_RQ 0x1
+#define IRDMA_AE_SOURCE_RQ_0011 0x3
+
+#define IRDMA_AE_SOURCE_CQ 0x2
+#define IRDMA_AE_SOURCE_CQ_0110 0x6
+#define IRDMA_AE_SOURCE_CQ_1010 0xa
+#define IRDMA_AE_SOURCE_CQ_1110 0xe
+
+#define IRDMA_AE_SOURCE_SQ 0x5
+#define IRDMA_AE_SOURCE_SQ_0111 0x7
+
+#define IRDMA_AE_SOURCE_IN_RR_WR 0x9
+#define IRDMA_AE_SOURCE_IN_RR_WR_1011 0xb
+#define IRDMA_AE_SOURCE_OUT_RR 0xd
+#define IRDMA_AE_SOURCE_OUT_RR_1111 0xf
+
+#define IRDMA_TCP_STATE_NON_EXISTENT 0
+#define IRDMA_TCP_STATE_CLOSED 1
+#define IRDMA_TCP_STATE_LISTEN 2
+#define IRDMA_STATE_SYN_SEND 3
+#define IRDMA_TCP_STATE_SYN_RECEIVED 4
+#define IRDMA_TCP_STATE_ESTABLISHED 5
+#define IRDMA_TCP_STATE_CLOSE_WAIT 6
+#define IRDMA_TCP_STATE_FIN_WAIT_1 7
+#define IRDMA_TCP_STATE_CLOSING 8
+#define IRDMA_TCP_STATE_LAST_ACK 9
+#define IRDMA_TCP_STATE_FIN_WAIT_2 10
+#define IRDMA_TCP_STATE_TIME_WAIT 11
+#define IRDMA_TCP_STATE_RESERVED_1 12
+#define IRDMA_TCP_STATE_RESERVED_2 13
+#define IRDMA_TCP_STATE_RESERVED_3 14
+#define IRDMA_TCP_STATE_RESERVED_4 15
+
+#define IRDMA_CQP_SW_SQSIZE_4 4
+#define IRDMA_CQP_SW_SQSIZE_2048 2048
+
+#define IRDMA_CQ_TYPE_IWARP 1
+#define IRDMA_CQ_TYPE_ILQ 2
+#define IRDMA_CQ_TYPE_IEQ 3
+#define IRDMA_CQ_TYPE_CQP 4
+
+#define IRDMA_DONE_COUNT 1000
+#define IRDMA_SLEEP_COUNT 10
+
+#define IRDMA_UPDATE_SD_BUFF_SIZE 128
+#define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES)
+
+#define ENABLE_LOC_MEM 63
+#define IRDMA_ATOMICS_ALLOWED_BIT 1
+#define MAX_PBLE_PER_SD 0x40000
+#define MAX_PBLE_SD_PER_FCN 0x400
+#define MAX_MR_PER_SD 0x8000
+#define MAX_MR_SD_PER_FCN 0x80
+#define IRDMA_PBLE_COMMIT_OFFSET 112
+#define IRDMA_MAX_QUANTA_PER_WR 8
+
+#define IRDMA_QP_SW_MAX_WQ_QUANTA 32768
+#define IRDMA_QP_SW_MAX_SQ_QUANTA 32768
+#define IRDMA_QP_SW_MAX_RQ_QUANTA 32768
+#define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \
+ ((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr))
+#define IRDMA_SRQ_MIN_QUANTA 8
+#define IRDMA_SRQ_MAX_QUANTA 262144
+#define IRDMA_MAX_SRQ_WRS \
+ ((IRDMA_SRQ_MAX_QUANTA - IRDMA_RQ_RSVD) / IRDMA_MAX_QUANTA_PER_WR)
+
+#define IRDMAQP_TERM_SEND_TERM_AND_FIN 0
+#define IRDMAQP_TERM_SEND_TERM_ONLY 1
+#define IRDMAQP_TERM_SEND_FIN_ONLY 2
+#define IRDMAQP_TERM_DONOT_SEND_TERM_OR_FIN 3
+
+#define IRDMA_QP_TYPE_IWARP 1
+#define IRDMA_QP_TYPE_UDA 2
+#define IRDMA_QP_TYPE_ROCE_RC 3
+#define IRDMA_QP_TYPE_ROCE_UD 4
+
+#define IRDMA_HW_PAGE_SIZE 4096
+#define IRDMA_HW_PAGE_SHIFT 12
+#define IRDMA_CQE_QTYPE_RQ 0
+#define IRDMA_CQE_QTYPE_SQ 1
+
+#define IRDMA_QP_SW_MIN_WQSIZE 8u /* in WRs*/
+#define IRDMA_QP_WQE_MIN_SIZE 32
+#define IRDMA_QP_WQE_MAX_SIZE 256
+#define IRDMA_QP_WQE_MIN_QUANTA 1
+#define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2
+#define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3
+
+#define IRDMA_SQ_RSVD 258
+#define IRDMA_RQ_RSVD 1
+
+#define IRDMA_FEATURE_RTS_AE BIT_ULL(0)
+#define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1)
+#define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5)
+#define IRDMA_FEATURE_ATOMIC_OPS BIT_ULL(6)
+#define IRDMA_FEATURE_SRQ BIT_ULL(7)
+#define IRDMA_FEATURE_CQE_TIMESTAMPING BIT_ULL(8)
+
+#define IRDMAQP_OP_RDMA_WRITE 0x00
+#define IRDMAQP_OP_RDMA_READ 0x01
+#define IRDMAQP_OP_RDMA_SEND 0x03
+#define IRDMAQP_OP_RDMA_SEND_INV 0x04
+#define IRDMAQP_OP_RDMA_SEND_SOL_EVENT 0x05
+#define IRDMAQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06
+#define IRDMAQP_OP_BIND_MW 0x08
+#define IRDMAQP_OP_FAST_REGISTER 0x09
+#define IRDMAQP_OP_LOCAL_INVALIDATE 0x0a
+#define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b
+#define IRDMAQP_OP_NOP 0x0c
+#define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d
+#define IRDMAQP_OP_ATOMIC_FETCH_ADD 0x0f
+#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11
+#define IRDMAQP_OP_GEN_RTS_AE 0x30
+
+enum irdma_cqp_op_type {
+ IRDMA_OP_CEQ_DESTROY = 1,
+ IRDMA_OP_AEQ_DESTROY = 2,
+ IRDMA_OP_DELETE_ARP_CACHE_ENTRY = 3,
+ IRDMA_OP_MANAGE_APBVT_ENTRY = 4,
+ IRDMA_OP_CEQ_CREATE = 5,
+ IRDMA_OP_AEQ_CREATE = 6,
+ IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY = 7,
+ IRDMA_OP_QP_MODIFY = 8,
+ IRDMA_OP_QP_UPLOAD_CONTEXT = 9,
+ IRDMA_OP_CQ_CREATE = 10,
+ IRDMA_OP_CQ_DESTROY = 11,
+ IRDMA_OP_QP_CREATE = 12,
+ IRDMA_OP_QP_DESTROY = 13,
+ IRDMA_OP_ALLOC_STAG = 14,
+ IRDMA_OP_MR_REG_NON_SHARED = 15,
+ IRDMA_OP_DEALLOC_STAG = 16,
+ IRDMA_OP_MW_ALLOC = 17,
+ IRDMA_OP_QP_FLUSH_WQES = 18,
+ IRDMA_OP_ADD_ARP_CACHE_ENTRY = 19,
+ IRDMA_OP_MANAGE_PUSH_PAGE = 20,
+ IRDMA_OP_UPDATE_PE_SDS = 21,
+ IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE = 22,
+ IRDMA_OP_SUSPEND = 23,
+ IRDMA_OP_RESUME = 24,
+ IRDMA_OP_MANAGE_VF_PBLE_BP = 25,
+ IRDMA_OP_QUERY_FPM_VAL = 26,
+ IRDMA_OP_COMMIT_FPM_VAL = 27,
+ IRDMA_OP_AH_CREATE = 28,
+ IRDMA_OP_AH_MODIFY = 29,
+ IRDMA_OP_AH_DESTROY = 30,
+ IRDMA_OP_MC_CREATE = 31,
+ IRDMA_OP_MC_DESTROY = 32,
+ IRDMA_OP_MC_MODIFY = 33,
+ IRDMA_OP_STATS_ALLOCATE = 34,
+ IRDMA_OP_STATS_FREE = 35,
+ IRDMA_OP_STATS_GATHER = 36,
+ IRDMA_OP_WS_ADD_NODE = 37,
+ IRDMA_OP_WS_MODIFY_NODE = 38,
+ IRDMA_OP_WS_DELETE_NODE = 39,
+ IRDMA_OP_WS_FAILOVER_START = 40,
+ IRDMA_OP_WS_FAILOVER_COMPLETE = 41,
+ IRDMA_OP_SET_UP_MAP = 42,
+ IRDMA_OP_GEN_AE = 43,
+ IRDMA_OP_QUERY_RDMA_FEATURES = 44,
+ IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 45,
+ IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46,
+ IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47,
+ IRDMA_OP_CQ_MODIFY = 48,
+ IRDMA_OP_SRQ_CREATE = 49,
+ IRDMA_OP_SRQ_MODIFY = 50,
+ IRDMA_OP_SRQ_DESTROY = 51,
+
+ /* Must be last entry*/
+ IRDMA_MAX_CQP_OPS = 52,
+};
+
+/* CQP SQ WQES */
+#define IRDMA_CQP_OP_CREATE_QP 0
+#define IRDMA_CQP_OP_MODIFY_QP 0x1
+#define IRDMA_CQP_OP_DESTROY_QP 0x02
+#define IRDMA_CQP_OP_CREATE_CQ 0x03
+#define IRDMA_CQP_OP_MODIFY_CQ 0x04
+#define IRDMA_CQP_OP_DESTROY_CQ 0x05
+#define IRDMA_CQP_OP_CREATE_SRQ 0x06
+#define IRDMA_CQP_OP_MODIFY_SRQ 0x07
+#define IRDMA_CQP_OP_DESTROY_SRQ 0x08
+#define IRDMA_CQP_OP_ALLOC_STAG 0x09
+#define IRDMA_CQP_OP_REG_MR 0x0a
+#define IRDMA_CQP_OP_QUERY_STAG 0x0b
+#define IRDMA_CQP_OP_REG_SMR 0x0c
+#define IRDMA_CQP_OP_DEALLOC_STAG 0x0d
+#define IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE 0x0e
+#define IRDMA_CQP_OP_MANAGE_ARP 0x0f
+#define IRDMA_CQP_OP_MANAGE_VF_PBLE_BP 0x10
+#define IRDMA_CQP_OP_MANAGE_PUSH_PAGES 0x11
+#define IRDMA_CQP_OP_QUERY_RDMA_FEATURES 0x12
+#define IRDMA_CQP_OP_UPLOAD_CONTEXT 0x13
+#define IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY 0x14
+#define IRDMA_CQP_OP_UPLOAD_CONTEXT 0x13
+#define IRDMA_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15
+#define IRDMA_CQP_OP_CREATE_CEQ 0x16
+#define IRDMA_CQP_OP_DESTROY_CEQ 0x18
+#define IRDMA_CQP_OP_CREATE_AEQ 0x19
+#define IRDMA_CQP_OP_DESTROY_AEQ 0x1b
+#define IRDMA_CQP_OP_CREATE_ADDR_HANDLE 0x1c
+#define IRDMA_CQP_OP_MODIFY_ADDR_HANDLE 0x1d
+#define IRDMA_CQP_OP_DESTROY_ADDR_HANDLE 0x1e
+#define IRDMA_CQP_OP_UPDATE_PE_SDS 0x1f
+#define IRDMA_CQP_OP_QUERY_FPM_VAL 0x20
+#define IRDMA_CQP_OP_COMMIT_FPM_VAL 0x21
+#define IRDMA_CQP_OP_FLUSH_WQES 0x22
+/* IRDMA_CQP_OP_GEN_AE is the same value as IRDMA_CQP_OP_FLUSH_WQES */
+#define IRDMA_CQP_OP_GEN_AE 0x22
+#define IRDMA_CQP_OP_MANAGE_APBVT 0x23
+#define IRDMA_CQP_OP_NOP 0x24
+#define IRDMA_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
+#define IRDMA_CQP_OP_CREATE_MCAST_GRP 0x26
+#define IRDMA_CQP_OP_MODIFY_MCAST_GRP 0x27
+#define IRDMA_CQP_OP_DESTROY_MCAST_GRP 0x28
+#define IRDMA_CQP_OP_SUSPEND_QP 0x29
+#define IRDMA_CQP_OP_RESUME_QP 0x2a
+#define IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b
+#define IRDMA_CQP_OP_WORK_SCHED_NODE 0x2c
+#define IRDMA_CQP_OP_MANAGE_STATS 0x2d
+#define IRDMA_CQP_OP_GATHER_STATS 0x2e
+#define IRDMA_CQP_OP_UP_MAP 0x2f
+
+#define FLD_LS_64(dev, val, field) \
+ (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
+#define FLD_RS_64(dev, val, field) \
+ ((u64)((val) & (dev)->hw_masks[field ## _M]) >> (dev)->hw_shifts[field ## _S])
+#define FLD_LS_32(dev, val, field) \
+ (((val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
+#define FLD_RS_32(dev, val, field) \
+ ((u64)((val) & (dev)->hw_masks[field ## _M]) >> (dev)->hw_shifts[field ## _S])
+
+#define IRDMA_MAX_STATS_24 0xffffffULL
+#define IRDMA_MAX_STATS_32 0xffffffffULL
+#define IRDMA_MAX_STATS_48 0xffffffffffffULL
+#define IRDMA_MAX_STATS_56 0xffffffffffffffULL
+#define IRDMA_MAX_STATS_64 0xffffffffffffffffULL
+
+#define IRDMA_MAX_CQ_READ_THRESH 0x3FFFF
+#define IRDMA_CQPSQ_QHASH_VLANID GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_QHASH_QPN GENMASK_ULL(49, 32)
+#define IRDMA_CQPSQ_QHASH_QS_HANDLE GENMASK_ULL(9, 0)
+#define IRDMA_CQPSQ_QHASH_SRC_PORT GENMASK_ULL(31, 16)
+#define IRDMA_CQPSQ_QHASH_DEST_PORT GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_QHASH_ADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_CQPSQ_QHASH_ADDR1 GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_QHASH_ADDR2 GENMASK_ULL(63, 32)
+#define IRDMA_CQPSQ_QHASH_ADDR3 GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_QHASH_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_QHASH_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_QHASH_MANAGE GENMASK_ULL(62, 61)
+#define IRDMA_CQPSQ_QHASH_IPV4VALID BIT_ULL(60)
+#define IRDMA_CQPSQ_QHASH_VLANVALID BIT_ULL(59)
+#define IRDMA_CQPSQ_QHASH_ENTRYTYPE GENMASK_ULL(44, 42)
+#define IRDMA_CQPSQ_STATS_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_STATS_ALLOC_INST BIT_ULL(62)
+#define IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX BIT_ULL(60)
+#define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61)
+#define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0)
+#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52)
+#define IRDMA_SD_MAX GENMASK_ULL(15, 0)
+#define IRDMA_MEM_MAX GENMASK_ULL(15, 0)
+#define IRDMA_QP_MEM_LOC GENMASK_ULL(47, 44)
+#define IRDMA_MR_MEM_LOC GENMASK_ULL(27, 24)
+
+#define IRDMA_CQPSQ_WS_ENABLENODE BIT_ULL(62)
+#define IRDMA_CQPSQ_WS_NODETYPE BIT_ULL(61)
+#define IRDMA_CQPSQ_WS_PRIOTYPE GENMASK_ULL(60, 59)
+#define IRDMA_CQPSQ_WS_TC GENMASK_ULL(58, 56)
+#define IRDMA_CQPSQ_WS_VMVFTYPE GENMASK_ULL(55, 54)
+#define IRDMA_CQPSQ_WS_VMVFNUM GENMASK_ULL(51, 42)
+#define IRDMA_CQPSQ_WS_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(29, 16)
+#define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(13, 0)
+#define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(63, 48)
+#define IRDMA_CQPSQ_WS_WEIGHT GENMASK_ULL(38, 32)
+
+#define IRDMA_CQPSQ_UP_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_UP_USEVLAN BIT_ULL(62)
+#define IRDMA_CQPSQ_UP_USEOVERRIDE BIT_ULL(61)
+#define IRDMA_CQPSQ_UP_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_UP_CNPOVERRIDE GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MODEL_USED GENMASK_ULL(47, 32)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MAJOR_VERSION GENMASK_ULL(23, 16)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MINOR_VERSION GENMASK_ULL(7, 0)
+#define IRDMA_CQPHC_SQSIZE GENMASK_ULL(11, 8)
+#define IRDMA_CQPHC_DISABLE_PFPDUS BIT_ULL(1)
+#define IRDMA_CQPHC_ROCEV2_RTO_POLICY BIT_ULL(2)
+#define IRDMA_CQPHC_PROTOCOL_USED GENMASK_ULL(4, 3)
+#define IRDMA_CQPHC_MIN_RATE GENMASK_ULL(51, 48)
+#define IRDMA_CQPHC_MIN_DEC_FACTOR GENMASK_ULL(59, 56)
+#define IRDMA_CQPHC_DCQCN_T GENMASK_ULL(15, 0)
+#define IRDMA_CQPHC_HAI_FACTOR GENMASK_ULL(47, 32)
+#define IRDMA_CQPHC_RAI_FACTOR GENMASK_ULL(63, 48)
+#define IRDMA_CQPHC_DCQCN_B GENMASK_ULL(24, 0)
+#define IRDMA_CQPHC_DCQCN_F GENMASK_ULL(27, 25)
+#define IRDMA_CQPHC_CC_CFG_VALID BIT_ULL(31)
+#define IRDMA_CQPHC_RREDUCE_MPERIOD GENMASK_ULL(63, 32)
+#define IRDMA_CQPHC_HW_MINVER GENMASK_ULL(15, 0)
+
+#define IRDMA_CQPHC_HW_MAJVER_GEN_1 0
+#define IRDMA_CQPHC_HW_MAJVER_GEN_2 1
+#define IRDMA_CQPHC_HW_MAJVER_GEN_3 2
+#define IRDMA_CQPHC_HW_MAJVER GENMASK_ULL(31, 16)
+#define IRDMA_CQPHC_CEQPERVF GENMASK_ULL(39, 32)
+
+#define IRDMA_CQPHC_ENABLED_VFS GENMASK_ULL(37, 32)
+
+#define IRDMA_CQPHC_HMC_PROFILE GENMASK_ULL(2, 0)
+#define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24)
+#define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9)
+
+#define IRDMA_CQPHC_TIMESTAMP_OVERRIDE BIT_ULL(5)
+#define IRDMA_CQPHC_TS_SHIFT GENMASK_ULL(12, 8)
+#define IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS BIT_ULL(0)
+
+#define IRDMA_CQPHC_OOISC_BLKSIZE GENMASK_ULL(63, 60)
+#define IRDMA_CQPHC_RRSP_BLKSIZE GENMASK_ULL(59, 56)
+#define IRDMA_CQPHC_Q1_BLKSIZE GENMASK_ULL(55, 52)
+#define IRDMA_CQPHC_XMIT_BLKSIZE GENMASK_ULL(51, 48)
+#define IRDMA_CQPHC_BLKSIZES_VALID BIT_ULL(4)
+
+#define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0)
+#define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
+#define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0)
+#define IRDMA_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(13, 0)
+#define IRDMA_CQ_DBSA_ARM_NEXT BIT_ULL(14)
+#define IRDMA_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15)
+#define IRDMA_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(17, 16)
+
+/* CQP and iWARP Completion Queue */
+#define IRDMA_CQ_QPCTX IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0)
+
+#define IRDMA_CCQ_DEFINFO GENMASK_ULL(63, 32)
+
+#define IRDMA_CQ_MINERR GENMASK_ULL(15, 0)
+#define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16)
+#define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32)
+#define IRDMA_CQ_EXTCQE BIT_ULL(50)
+#define IRDMA_OOO_CMPL BIT_ULL(54)
+#define IRDMA_CQ_ERROR BIT_ULL(55)
+#define IRDMA_CQ_SQ BIT_ULL(62)
+
+#define IRDMA_CQ_SRQ BIT_ULL(52)
+#define IRDMA_CQ_VALID BIT_ULL(63)
+#define IRDMA_CQ_IMMVALID BIT_ULL(62)
+#define IRDMA_CQ_UDSMACVALID BIT_ULL(61)
+#define IRDMA_CQ_UDVLANVALID BIT_ULL(60)
+#define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0)
+#define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48)
+
+#define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
+#define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
+#define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0)
+#define IRDMACQ_TCPSEQNUMRTT GENMASK_ULL(63, 32)
+#define IRDMACQ_INVSTAG GENMASK_ULL(31, 0)
+#define IRDMACQ_QPID GENMASK_ULL(55, 32)
+
+#define IRDMACQ_UDSRCQPN GENMASK_ULL(31, 0)
+#define IRDMACQ_PSHDROP BIT_ULL(51)
+#define IRDMACQ_STAG BIT_ULL(53)
+#define IRDMACQ_IPV4 BIT_ULL(53)
+#define IRDMACQ_SOEVENT BIT_ULL(54)
+#define IRDMACQ_OP GENMASK_ULL(61, 56)
+
+#define IRDMA_CEQE_CQCTX GENMASK_ULL(62, 0)
+#define IRDMA_CEQE_VALID BIT_ULL(63)
+
+/* AEQE format */
+#define IRDMA_AEQE_COMPCTX IRDMA_CQPHC_QPCTX
+#define IRDMA_AEQE_QPCQID_LOW GENMASK_ULL(17, 0)
+#define IRDMA_AEQE_QPCQID_HI BIT_ULL(46)
+#define IRDMA_AEQE_WQDESCIDX GENMASK_ULL(32, 18)
+#define IRDMA_AEQE_OVERFLOW BIT_ULL(33)
+#define IRDMA_AEQE_AECODE GENMASK_ULL(45, 34)
+#define IRDMA_AEQE_AESRC GENMASK_ULL(53, 50)
+#define IRDMA_AEQE_IWSTATE GENMASK_ULL(56, 54)
+#define IRDMA_AEQE_TCPSTATE GENMASK_ULL(60, 57)
+#define IRDMA_AEQE_Q2DATA GENMASK_ULL(62, 61)
+#define IRDMA_AEQE_VALID BIT_ULL(63)
+
+#define IRDMA_AEQE_Q2DATA_GEN_3 GENMASK_ULL(5, 4)
+#define IRDMA_AEQE_TCPSTATE_GEN_3 GENMASK_ULL(3, 0)
+#define IRDMA_AEQE_QPCQID_GEN_3 GENMASK_ULL(24, 0)
+#define IRDMA_AEQE_AECODE_GEN_3 GENMASK_ULL(61, 50)
+#define IRDMA_AEQE_OVERFLOW_GEN_3 BIT_ULL(62)
+#define IRDMA_AEQE_WQDESCIDX_GEN_3 GENMASK_ULL(49, 32)
+#define IRDMA_AEQE_IWSTATE_GEN_3 GENMASK_ULL(31, 29)
+#define IRDMA_AEQE_AESRC_GEN_3 GENMASK_ULL(28, 25)
+#define IRDMA_AEQE_CMPL_CTXT_S 6
+#define IRDMA_AEQE_CMPL_CTXT GENMASK_ULL(63, 6)
+
+#define IRDMA_UDA_QPSQ_NEXT_HDR GENMASK_ULL(23, 16)
+#define IRDMA_UDA_QPSQ_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_QPSQ_L4LEN GENMASK_ULL(45, 42)
+#define IRDMA_GEN1_UDA_QPSQ_L4LEN GENMASK_ULL(27, 24)
+#define IRDMA_UDA_QPSQ_AHIDX GENMASK_ULL(16, 0)
+#define IRDMA_UDA_QPSQ_VALID BIT_ULL(63)
+#define IRDMA_UDA_QPSQ_SIGCOMPL BIT_ULL(62)
+#define IRDMA_UDA_QPSQ_MACLEN GENMASK_ULL(62, 56)
+#define IRDMA_UDA_QPSQ_IPLEN GENMASK_ULL(54, 48)
+#define IRDMA_UDA_QPSQ_L4T GENMASK_ULL(31, 30)
+#define IRDMA_UDA_QPSQ_IIPT GENMASK_ULL(29, 28)
+#define IRDMA_UDA_PAYLOADLEN GENMASK_ULL(13, 0)
+#define IRDMA_UDA_HDRLEN GENMASK_ULL(24, 16)
+#define IRDMA_VLAN_TAG_VALID BIT_ULL(50)
+#define IRDMA_UDA_L3PROTO GENMASK_ULL(1, 0)
+#define IRDMA_UDA_L4PROTO GENMASK_ULL(17, 16)
+#define IRDMA_UDA_QPSQ_DOLOOPBACK BIT_ULL(44)
+#define IRDMA_CQPSQ_BUFSIZE GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_TPHVAL GENMASK_ULL(7, 0)
+
+#define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(23, 8)
+#define IRDMA_CQPSQ_TPHEN BIT_ULL(60)
+
+#define IRDMA_CQPSQ_PBUFADDR IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CQPSQ_PASID GENMASK_ULL(51, 32)
+#define IRDMA_CQPSQ_PASID_VALID BIT_ULL(62)
+
+/* Create/Modify/Destroy QP */
+
+#define IRDMA_CQPSQ_QP_NEWMSS GENMASK_ULL(45, 32)
+#define IRDMA_CQPSQ_QP_TERMLEN GENMASK_ULL(51, 48)
+
+#define IRDMA_CQPSQ_QP_QPCTX IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CQPSQ_QP_QPID_S 0
+#define IRDMA_CQPSQ_QP_QPID_M (0xFFFFFFUL)
+
+#define IRDMA_CQPSQ_QP_OP_S 32
+#define IRDMA_CQPSQ_QP_OP_M IRDMACQ_OP_M
+#define IRDMA_CQPSQ_QP_ORDVALID BIT_ULL(42)
+#define IRDMA_CQPSQ_QP_TOECTXVALID BIT_ULL(43)
+#define IRDMA_CQPSQ_QP_CACHEDVARVALID BIT_ULL(44)
+#define IRDMA_CQPSQ_QP_VQ BIT_ULL(45)
+#define IRDMA_CQPSQ_QP_FORCELOOPBACK BIT_ULL(46)
+#define IRDMA_CQPSQ_QP_CQNUMVALID BIT_ULL(47)
+#define IRDMA_CQPSQ_QP_QPTYPE GENMASK_ULL(50, 48)
+#define IRDMA_CQPSQ_QP_MACVALID BIT_ULL(51)
+#define IRDMA_CQPSQ_QP_MSSCHANGE BIT_ULL(52)
+
+#define IRDMA_CQPSQ_QP_IGNOREMWBOUND BIT_ULL(54)
+#define IRDMA_CQPSQ_QP_REMOVEHASHENTRY BIT_ULL(55)
+#define IRDMA_CQPSQ_QP_TERMACT GENMASK_ULL(57, 56)
+#define IRDMA_CQPSQ_QP_RESETCON BIT_ULL(58)
+#define IRDMA_CQPSQ_QP_ARPTABIDXVALID BIT_ULL(59)
+#define IRDMA_CQPSQ_QP_NEXTIWSTATE GENMASK_ULL(62, 60)
+
+#define IRDMA_CQPSQ_QP_DBSHADOWADDR IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CQPSQ_SRQ_RQSIZE GENMASK_ULL(3, 0)
+#define IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE GENMASK_ULL(5, 4)
+#define IRDMA_CQPSQ_SRQ_SRQ_LIMIT GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_SRQ_SRQCTX GENMASK_ULL(63, 6)
+#define IRDMA_CQPSQ_SRQ_PD_ID GENMASK_ULL(39, 16)
+#define IRDMA_CQPSQ_SRQ_SRQ_ID GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_SRQ_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE GENMASK_ULL(45, 44)
+#define IRDMA_CQPSQ_SRQ_VIRTMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_SRQ_TPH_EN BIT_ULL(60)
+#define IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT BIT_ULL(61)
+#define IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_SRQ_TPH_VALUE GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S 8
+#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR GENMASK_ULL(63, 8)
+#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S 6
+#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR GENMASK_ULL(63, 6)
+
+#define IRDMA_CQPSQ_CQ_CQSIZE GENMASK_ULL(20, 0)
+#define IRDMA_CQPSQ_CQ_CQCTX GENMASK_ULL(62, 0)
+#define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD GENMASK(17, 0)
+
+#define IRDMA_CQPSQ_CQ_CQID_HIGH GENMASK_ULL(52, 50)
+#define IRDMA_CQPSQ_CQ_CEQID_HIGH GENMASK_ULL(59, 54)
+#define IRDMA_CQPSQ_CQ_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_CQ_CQRESIZE BIT_ULL(43)
+#define IRDMA_CQPSQ_CQ_LPBLSIZE GENMASK_ULL(45, 44)
+#define IRDMA_CQPSQ_CQ_CHKOVERFLOW BIT_ULL(46)
+#define IRDMA_CQPSQ_CQ_VIRTMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_CQ_ENCEQEMASK BIT_ULL(48)
+#define IRDMA_CQPSQ_CQ_CEQIDVALID BIT_ULL(49)
+#define IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT BIT_ULL(61)
+#define IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+
+/* Allocate/Register/Register Shared/Deallocate Stag */
+#define IRDMA_CQPSQ_STAG_VA_FBO IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_STAG_STAGLEN GENMASK_ULL(45, 0)
+#define IRDMA_CQPSQ_STAG_KEY GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_STAG_IDX GENMASK_ULL(31, 8)
+#define IRDMA_CQPSQ_STAG_IDX_S 8
+#define IRDMA_CQPSQ_STAG_PARENTSTAGIDX GENMASK_ULL(55, 32)
+#define IRDMA_CQPSQ_STAG_MR BIT_ULL(43)
+#define IRDMA_CQPSQ_STAG_MWTYPE BIT_ULL(42)
+#define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY BIT_ULL(58)
+#define IRDMA_CQPSQ_STAG_PDID_HI GENMASK_ULL(59, 54)
+
+#define IRDMA_CQPSQ_STAG_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
+#define IRDMA_CQPSQ_STAG_HPAGESIZE GENMASK_ULL(47, 46)
+#define IRDMA_CQPSQ_STAG_ARIGHTS GENMASK_ULL(52, 48)
+#define IRDMA_CQPSQ_STAG_REMACCENABLED BIT_ULL(53)
+#define IRDMA_CQPSQ_STAG_VABASEDTO BIT_ULL(59)
+#define IRDMA_CQPSQ_STAG_USEHMCFNIDX BIT_ULL(60)
+#define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61)
+
+#define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN BIT_ULL(61)
+
+#define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX
+#define IRDMA_CQPSQ_MLM_TABLEIDX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_MLM_FREEENTRY BIT_ULL(62)
+#define IRDMA_CQPSQ_MLM_IGNORE_REF_CNT BIT_ULL(61)
+#define IRDMA_CQPSQ_MLM_MAC0 GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_MLM_MAC1 GENMASK_ULL(15, 8)
+#define IRDMA_CQPSQ_MLM_MAC2 GENMASK_ULL(23, 16)
+#define IRDMA_CQPSQ_MLM_MAC3 GENMASK_ULL(31, 24)
+#define IRDMA_CQPSQ_MLM_MAC4 GENMASK_ULL(39, 32)
+#define IRDMA_CQPSQ_MLM_MAC5 GENMASK_ULL(47, 40)
+#define IRDMA_CQPSQ_MAT_REACHMAX GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_MAT_MACADDR GENMASK_ULL(47, 0)
+#define IRDMA_CQPSQ_MAT_ARPENTRYIDX GENMASK_ULL(11, 0)
+#define IRDMA_CQPSQ_MAT_ENTRYVALID BIT_ULL(42)
+#define IRDMA_CQPSQ_MAT_PERMANENT BIT_ULL(43)
+#define IRDMA_CQPSQ_MAT_QUERY BIT_ULL(44)
+#define IRDMA_CQPSQ_MVPBP_PD_ENTRY_CNT GENMASK_ULL(9, 0)
+#define IRDMA_CQPSQ_MVPBP_FIRST_PD_INX GENMASK_ULL(24, 16)
+#define IRDMA_CQPSQ_MVPBP_SD_INX GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_MVPBP_INV_PD_ENT BIT_ULL(62)
+#define IRDMA_CQPSQ_MVPBP_PD_PLPBA GENMASK_ULL(63, 3)
+
+/* Manage Push Page - MPP */
+#define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
+#define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff
+#define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_MPP_PPTYPE GENMASK_ULL(61, 60)
+#define IRDMA_CQPSQ_MPP_FREE_PAGE BIT_ULL(62)
+
+/* Upload Context - UCTX */
+#define IRDMA_CQPSQ_UCTX_QPCTXADDR IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_UCTX_QPID GENMASK_ULL(23, 0)
+#define IRDMA_CQPSQ_UCTX_QPTYPE GENMASK_ULL(51, 48)
+
+#define IRDMA_CQPSQ_UCTX_RAWFORMAT BIT_ULL(61)
+#define IRDMA_CQPSQ_UCTX_FREEZEQP BIT_ULL(62)
+
+#define IRDMA_CQPSQ_MHMC_VFIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_MHMC_FREEPMFN BIT_ULL(62)
+
+#define IRDMA_CQPSQ_SHMCRP_HMC_PROFILE GENMASK_ULL(2, 0)
+#define IRDMA_CQPSQ_SHMCRP_VFNUM GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_CEQ_CEQSIZE GENMASK_ULL(21, 0)
+#define IRDMA_CQPSQ_CEQ_CEQID GENMASK_ULL(9, 0)
+
+#define IRDMA_CQPSQ_CEQ_CEQID_HIGH GENMASK_ULL(15, 10)
+
+#define IRDMA_CQPSQ_CEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
+#define IRDMA_CQPSQ_CEQ_VMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_CEQ_ITRNOEXPIRE BIT_ULL(46)
+#define IRDMA_CQPSQ_CEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_AEQ_AEQECNT GENMASK_ULL(18, 0)
+#define IRDMA_CQPSQ_AEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
+#define IRDMA_CQPSQ_AEQ_VMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+
+#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(20, 0)
+#define IRDMA_COMMIT_FPM_BASE_S 32
+#define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(15, 0)
+
+#define IRDMA_CQPSQ_FWQE_AECODE GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_FWQE_AESOURCE GENMASK_ULL(19, 16)
+#define IRDMA_CQPSQ_FWQE_RQMNERR GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_FWQE_RQMJERR GENMASK_ULL(31, 16)
+#define IRDMA_CQPSQ_FWQE_SQMNERR GENMASK_ULL(47, 32)
+#define IRDMA_CQPSQ_FWQE_SQMJERR GENMASK_ULL(63, 48)
+#define IRDMA_CQPSQ_FWQE_QPID GENMASK_ULL(23, 0)
+#define IRDMA_CQPSQ_FWQE_GENERATE_AE BIT_ULL(59)
+#define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60)
+#define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61)
+#define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID BIT_ULL(42)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX GENMASK_ULL(49, 32)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID BIT_ULL(43)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX GENMASK_ULL(46, 32)
+#define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62)
+#define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_UPESD_SDDATALOW GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_UPESD_SDDATAHI GENMASK_ULL(63, 32)
+#define IRDMA_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_UPESD_ENTRY_VALID BIT_ULL(63)
+
+#define IRDMA_CQPSQ_UPESD_BM_PF 0
+#define IRDMA_CQPSQ_UPESD_BM_CP_LM 1
+#define IRDMA_CQPSQ_UPESD_BM_AXF 2
+#define IRDMA_CQPSQ_UPESD_BM_LM 4
+#define IRDMA_CQPSQ_UPESD_BM GENMASK_ULL(34, 32)
+#define IRDMA_CQPSQ_UPESD_ENTRY_COUNT GENMASK_ULL(3, 0)
+#define IRDMA_CQPSQ_UPESD_SKIP_ENTRY BIT_ULL(7)
+#define IRDMA_CQPSQ_SUSPENDQP_QPID GENMASK_ULL(23, 0)
+#define IRDMA_CQPSQ_RESUMEQP_QSHANDLE GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_RESUMEQP_QPID GENMASK(23, 0)
+#define IRDMA_MANAGE_RSRC_VER2 BIT_ULL(2)
+
+#define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001
+#define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005
+#define IRDMA_CQPSQ_MIN_DEF_CMPL 0x0006
+#define IRDMA_CQPSQ_MIN_OOO_CMPL 0x0007
+
+#define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000
+#define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000
+#define IRDMA_CQPSQ_MAJ_CNTXTCACHE_ERROR 0xF001
+#define IRDMA_CQPSQ_MAJ_ERROR 0xFFFF
+#define IRDMAQPC_DDP_VER GENMASK_ULL(1, 0)
+#define IRDMAQPC_IBRDENABLE BIT_ULL(2)
+#define IRDMAQPC_IPV4 BIT_ULL(3)
+#define IRDMAQPC_NONAGLE BIT_ULL(4)
+#define IRDMAQPC_INSERTVLANTAG BIT_ULL(5)
+#define IRDMAQPC_ISQP1 BIT_ULL(6)
+#define IRDMAQPC_TIMESTAMP BIT_ULL(7)
+#define IRDMAQPC_RQWQESIZE GENMASK_ULL(9, 8)
+#define IRDMAQPC_INSERTL2TAG2 BIT_ULL(11)
+#define IRDMAQPC_LIMIT GENMASK_ULL(13, 12)
+
+#define IRDMAQPC_USE_SRQ BIT_ULL(10)
+#define IRDMAQPC_SRQ_ID GENMASK_ULL(15, 0)
+#define IRDMAQPC_PASID GENMASK_ULL(19, 0)
+#define IRDMAQPC_PASID_VALID BIT_ULL(11)
+
+#define IRDMAQPC_ECN_EN BIT_ULL(14)
+#define IRDMAQPC_DROPOOOSEG BIT_ULL(15)
+#define IRDMAQPC_DUPACK_THRESH GENMASK_ULL(18, 16)
+#define IRDMAQPC_ERR_RQ_IDX_VALID BIT_ULL(19)
+#define IRDMAQPC_DIS_VLAN_CHECKS GENMASK_ULL(21, 19)
+#define IRDMAQPC_DC_TCP_EN BIT_ULL(25)
+#define IRDMAQPC_RCVTPHEN BIT_ULL(28)
+#define IRDMAQPC_XMITTPHEN BIT_ULL(29)
+#define IRDMAQPC_RQTPHEN BIT_ULL(30)
+#define IRDMAQPC_SQTPHEN BIT_ULL(31)
+#define IRDMAQPC_PPIDX GENMASK_ULL(41, 32)
+#define IRDMAQPC_PMENA BIT_ULL(47)
+#define IRDMAQPC_RDMAP_VER GENMASK_ULL(63, 62)
+#define IRDMAQPC_ROCE_TVER GENMASK_ULL(63, 60)
+
+#define IRDMAQPC_SQADDR IRDMA_CQPHC_QPCTX
+#define IRDMAQPC_RQADDR IRDMA_CQPHC_QPCTX
+#define IRDMAQPC_TTL GENMASK_ULL(7, 0)
+#define IRDMAQPC_RQSIZE GENMASK_ULL(11, 8)
+#define IRDMAQPC_SQSIZE GENMASK_ULL(15, 12)
+#define IRDMAQPC_GEN1_SRCMACADDRIDX GENMASK(21, 16)
+#define IRDMAQPC_AVOIDSTRETCHACK BIT_ULL(23)
+#define IRDMAQPC_TOS GENMASK_ULL(31, 24)
+#define IRDMAQPC_SRCPORTNUM GENMASK_ULL(47, 32)
+#define IRDMAQPC_DESTPORTNUM GENMASK_ULL(63, 48)
+#define IRDMAQPC_DESTIPADDR0 GENMASK_ULL(63, 32)
+#define IRDMAQPC_DESTIPADDR1 GENMASK_ULL(31, 0)
+#define IRDMAQPC_DESTIPADDR2 GENMASK_ULL(63, 32)
+#define IRDMAQPC_DESTIPADDR3 GENMASK_ULL(31, 0)
+#define IRDMAQPC_SNDMSS GENMASK_ULL(29, 16)
+#define IRDMAQPC_SYN_RST_HANDLING GENMASK_ULL(31, 30)
+#define IRDMAQPC_VLANTAG GENMASK_ULL(47, 32)
+#define IRDMAQPC_ARPIDX GENMASK_ULL(63, 48)
+#define IRDMAQPC_FLOWLABEL GENMASK_ULL(19, 0)
+#define IRDMAQPC_WSCALE BIT_ULL(20)
+#define IRDMAQPC_KEEPALIVE BIT_ULL(21)
+#define IRDMAQPC_IGNORE_TCP_OPT BIT_ULL(22)
+#define IRDMAQPC_IGNORE_TCP_UNS_OPT BIT_ULL(23)
+#define IRDMAQPC_TCPSTATE GENMASK_ULL(31, 28)
+#define IRDMAQPC_RCVSCALE GENMASK_ULL(35, 32)
+#define IRDMAQPC_SNDSCALE GENMASK_ULL(43, 40)
+#define IRDMAQPC_PDIDX GENMASK_ULL(63, 48)
+#define IRDMAQPC_PDIDXHI GENMASK_ULL(21, 20)
+#define IRDMAQPC_PKEY GENMASK_ULL(47, 32)
+#define IRDMAQPC_ACKCREDITS GENMASK_ULL(24, 20)
+#define IRDMAQPC_QKEY GENMASK_ULL(63, 32)
+#define IRDMAQPC_DESTQP GENMASK_ULL(23, 0)
+#define IRDMAQPC_KALIVE_TIMER_MAX_PROBES GENMASK_ULL(23, 16)
+#define IRDMAQPC_KEEPALIVE_INTERVAL GENMASK_ULL(31, 24)
+#define IRDMAQPC_TIMESTAMP_RECENT GENMASK_ULL(31, 0)
+#define IRDMAQPC_TIMESTAMP_AGE GENMASK_ULL(63, 32)
+#define IRDMAQPC_SNDNXT GENMASK_ULL(31, 0)
+#define IRDMAQPC_ISN GENMASK_ULL(55, 32)
+#define IRDMAQPC_PSNNXT GENMASK_ULL(23, 0)
+#define IRDMAQPC_LSN GENMASK_ULL(55, 32)
+#define IRDMAQPC_SNDWND GENMASK_ULL(63, 32)
+#define IRDMAQPC_RCVNXT GENMASK_ULL(31, 0)
+#define IRDMAQPC_EPSN GENMASK_ULL(23, 0)
+#define IRDMAQPC_RCVWND GENMASK_ULL(63, 32)
+#define IRDMAQPC_SNDMAX GENMASK_ULL(31, 0)
+#define IRDMAQPC_SNDUNA GENMASK_ULL(63, 32)
+#define IRDMAQPC_PSNMAX GENMASK_ULL(23, 0)
+#define IRDMAQPC_PSNUNA GENMASK_ULL(55, 32)
+#define IRDMAQPC_SRTT GENMASK_ULL(31, 0)
+#define IRDMAQPC_RTTVAR GENMASK_ULL(63, 32)
+#define IRDMAQPC_SSTHRESH GENMASK_ULL(31, 0)
+#define IRDMAQPC_CWND GENMASK_ULL(63, 32)
+#define IRDMAQPC_CWNDROCE GENMASK_ULL(55, 32)
+#define IRDMAQPC_SNDWL1 GENMASK_ULL(31, 0)
+#define IRDMAQPC_SNDWL2 GENMASK_ULL(63, 32)
+#define IRDMAQPC_MINRNR_TIMER GENMASK_ULL(4, 0)
+#define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(46, 32)
+#define IRDMAQPC_RTOMIN GENMASK_ULL(63, 57)
+#define IRDMAQPC_MAXSNDWND GENMASK_ULL(31, 0)
+#define IRDMAQPC_REXMIT_THRESH GENMASK_ULL(53, 48)
+#define IRDMAQPC_RNRNAK_THRESH GENMASK_ULL(56, 54)
+#define IRDMAQPC_TXCQNUM GENMASK_ULL(24, 0)
+#define IRDMAQPC_RXCQNUM GENMASK_ULL(56, 32)
+#define IRDMAQPC_STAT_INDEX GENMASK_ULL(6, 0)
+#define IRDMAQPC_Q2ADDR GENMASK_ULL(63, 8)
+#define IRDMAQPC_LASTBYTESENT GENMASK_ULL(7, 0)
+#define IRDMAQPC_MACADDRESS GENMASK_ULL(63, 16)
+#define IRDMAQPC_ORDSIZE GENMASK_ULL(7, 0)
+
+#define IRDMAQPC_LOCALACKTIMEOUT GENMASK_ULL(12, 8)
+#define IRDMAQPC_RNRNAK_TMR GENMASK_ULL(4, 0)
+#define IRDMAQPC_ORDSIZE_GEN3 GENMASK_ULL(10, 0)
+#define IRDMAQPC_REMOTE_ATOMIC_EN BIT_ULL(18)
+#define IRDMAQPC_STAT_INDEX_GEN3 GENMASK_ULL(47, 32)
+#define IRDMAQPC_PKT_LIMIT GENMASK_ULL(55, 48)
+
+#define IRDMAQPC_IRDSIZE GENMASK_ULL(18, 16)
+
+#define IRDMAQPC_IRDSIZE_GEN3 GENMASK_ULL(17, 14)
+
+#define IRDMAQPC_UDPRIVCQENABLE BIT_ULL(19)
+#define IRDMAQPC_WRRDRSPOK BIT_ULL(20)
+#define IRDMAQPC_RDOK BIT_ULL(21)
+#define IRDMAQPC_SNDMARKERS BIT_ULL(22)
+#define IRDMAQPC_DCQCNENABLE BIT_ULL(22)
+#define IRDMAQPC_FW_CC_ENABLE BIT_ULL(28)
+#define IRDMAQPC_RCVNOICRC BIT_ULL(31)
+#define IRDMAQPC_BINDEN BIT_ULL(23)
+#define IRDMAQPC_FASTREGEN BIT_ULL(24)
+#define IRDMAQPC_PRIVEN BIT_ULL(25)
+#define IRDMAQPC_TIMELYENABLE BIT_ULL(27)
+#define IRDMAQPC_THIGH GENMASK_ULL(63, 52)
+#define IRDMAQPC_TLOW GENMASK_ULL(39, 32)
+#define IRDMAQPC_REMENDPOINTIDX GENMASK_ULL(16, 0)
+#define IRDMAQPC_USESTATSINSTANCE BIT_ULL(26)
+#define IRDMAQPC_IWARPMODE BIT_ULL(28)
+#define IRDMAQPC_RCVMARKERS BIT_ULL(29)
+#define IRDMAQPC_ALIGNHDRS BIT_ULL(30)
+#define IRDMAQPC_RCVNOMPACRC BIT_ULL(31)
+#define IRDMAQPC_RCVMARKOFFSET GENMASK_ULL(40, 32)
+#define IRDMAQPC_SNDMARKOFFSET GENMASK_ULL(56, 48)
+
+#define IRDMAQPC_QPCOMPCTX IRDMA_CQPHC_QPCTX
+#define IRDMAQPC_SQTPHVAL GENMASK_ULL(7, 0)
+#define IRDMAQPC_RQTPHVAL GENMASK_ULL(15, 8)
+#define IRDMAQPC_QSHANDLE GENMASK_ULL(25, 16)
+#define IRDMAQPC_EXCEPTION_LAN_QUEUE GENMASK_ULL(43, 32)
+#define IRDMAQPC_LOCAL_IPADDR3 GENMASK_ULL(31, 0)
+#define IRDMAQPC_LOCAL_IPADDR2 GENMASK_ULL(63, 32)
+#define IRDMAQPC_LOCAL_IPADDR1 GENMASK_ULL(31, 0)
+#define IRDMAQPC_LOCAL_IPADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_FW_VER_MINOR GENMASK_ULL(15, 0)
+#define IRDMA_FW_VER_MAJOR GENMASK_ULL(31, 16)
+#define IRDMA_FEATURE_INFO GENMASK_ULL(47, 0)
+#define IRDMA_FEATURE_CNT GENMASK_ULL(47, 32)
+#define IRDMA_FEATURE_TYPE GENMASK_ULL(63, 48)
+#define IRDMA_FEATURE_RSRC_MAX GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32)
+#define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43)
+#define IRDMAQPSQ_ADDFRAGCNT GENMASK_ULL(41, 38)
+#define IRDMAQPSQ_PUSHWQE BIT_ULL(56)
+#define IRDMAQPSQ_STREAMMODE BIT_ULL(58)
+#define IRDMAQPSQ_WAITFORRCVPDU BIT_ULL(59)
+#define IRDMAQPSQ_READFENCE BIT_ULL(60)
+#define IRDMAQPSQ_LOCALFENCE BIT_ULL(61)
+#define IRDMAQPSQ_UDPHEADER BIT_ULL(61)
+#define IRDMAQPSQ_L4LEN GENMASK_ULL(45, 42)
+#define IRDMAQPSQ_SIGCOMPL BIT_ULL(62)
+#define IRDMAQPSQ_VALID BIT_ULL(63)
+
+#define IRDMAQPSQ_FRAG_TO IRDMA_CQPHC_QPCTX
+#define IRDMAQPSQ_FRAG_VALID BIT_ULL(63)
+#define IRDMAQPSQ_FRAG_LEN GENMASK_ULL(62, 32)
+#define IRDMAQPSQ_FRAG_STAG GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32)
+#define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32)
+#define IRDMAQPSQ_AHID GENMASK_ULL(24, 0)
+#define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57)
+
+#define IRDMA_INLINE_VALID_S 7
+#define IRDMAQPSQ_INLINEDATALEN GENMASK_ULL(55, 48)
+#define IRDMAQPSQ_IMMDATAFLAG BIT_ULL(47)
+#define IRDMAQPSQ_REPORTRTT BIT_ULL(46)
+
+#define IRDMAQPSQ_IMMDATA GENMASK_ULL(63, 0)
+#define IRDMAQPSQ_REMSTAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX
+
+#define IRDMAQPSQ_STAG GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_REMOTE_STAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48)
+#define IRDMAQPSQ_VABASEDTO BIT_ULL(53)
+#define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54)
+
+#define IRDMAQPSQ_MWLEN IRDMA_CQPHC_QPCTX
+#define IRDMAQPSQ_PARENTMRSTAG GENMASK_ULL(63, 32)
+#define IRDMAQPSQ_MWSTAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX
+
+#define IRDMAQPSQ_REMOTE_ATOMICS_EN BIT_ULL(55)
+
+#define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_STAGKEY GENMASK_ULL(7, 0)
+#define IRDMAQPSQ_STAGINDEX GENMASK_ULL(31, 8)
+#define IRDMAQPSQ_COPYHOSTPBLS BIT_ULL(43)
+#define IRDMAQPSQ_LPBLSIZE GENMASK_ULL(45, 44)
+#define IRDMAQPSQ_HPAGESIZE GENMASK_ULL(47, 46)
+#define IRDMAQPSQ_STAGLEN GENMASK_ULL(40, 0)
+#define IRDMAQPSQ_FIRSTPMPBLIDXLO GENMASK_ULL(63, 48)
+#define IRDMAQPSQ_FIRSTPMPBLIDXHI GENMASK_ULL(11, 0)
+#define IRDMAQPSQ_PBLADDR GENMASK_ULL(63, 12)
+
+/* iwarp QP RQ WQE common fields */
+#define IRDMAQPRQ_ADDFRAGCNT IRDMAQPSQ_ADDFRAGCNT
+#define IRDMAQPRQ_VALID IRDMAQPSQ_VALID
+#define IRDMAQPRQ_COMPLCTX IRDMA_CQPHC_QPCTX
+#define IRDMAQPRQ_FRAG_LEN IRDMAQPSQ_FRAG_LEN
+#define IRDMAQPRQ_STAG IRDMAQPSQ_FRAG_STAG
+#define IRDMAQPRQ_TO IRDMAQPSQ_FRAG_TO
+
+#define IRDMAPFINT_OICR_HMC_ERR_M BIT(26)
+#define IRDMAPFINT_OICR_PE_PUSH_M BIT(27)
+#define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28)
+
+#define IRDMA_QUERY_FPM_LOC_MEM_PAGES GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(31, 0)
+#define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(31, 0)
+#define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX GENMASK_ULL(13, 0)
+#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(44, 32)
+#define IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3 GENMASK_ULL(47, 32)
+#define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0)
+#define IRDMA_QUERY_FPM_MAX_IRD GENMASK_ULL(53, 50)
+#define IRDMA_QUERY_FPM_XFBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_Q1BLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_HTMULTIPLIER GENMASK_ULL(19, 16)
+#define IRDMA_QUERY_FPM_TIMERBUCKET GENMASK_ULL(47, 32)
+#define IRDMA_QUERY_FPM_RRFBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_RRFFLBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_OOISCFBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID GENMASK_ULL(5, 0)
+
+#define IRDMA_GET_CURRENT_AEQ_ELEM(_aeq) \
+ ( \
+ (_aeq)->aeqe_base[IRDMA_RING_CURRENT_TAIL((_aeq)->aeq_ring)].buf \
+ )
+
+#define IRDMA_GET_CURRENT_CEQ_ELEM(_ceq) \
+ ( \
+ (_ceq)->ceqe_base[IRDMA_RING_CURRENT_TAIL((_ceq)->ceq_ring)].buf \
+ )
+
+#define IRDMA_GET_CEQ_ELEM_AT_POS(_ceq, _pos) \
+ ( \
+ (_ceq)->ceqe_base[_pos].buf \
+ )
+
+#define IRDMA_RING_GET_NEXT_TAIL(_ring, _idx) \
+ ( \
+ ((_ring).tail + (_idx)) % (_ring).size \
+ )
+
+#define IRDMA_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
+#define IRDMA_GET_CURRENT_CQ_ELEM(_cq) \
+ ( \
+ (_cq)->cq_base[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \
+ )
+#define IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \
+ ( \
+ ((struct irdma_extended_cqe *) \
+ ((_cq)->cq_base))[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \
+ )
+
+#define IRDMA_RING_INIT(_ring, _size) \
+ { \
+ (_ring).head = 0; \
+ (_ring).tail = 0; \
+ (_ring).size = (_size); \
+ }
+#define IRDMA_RING_SIZE(_ring) ((_ring).size)
+#define IRDMA_RING_CURRENT_HEAD(_ring) ((_ring).head)
+#define IRDMA_RING_CURRENT_TAIL(_ring) ((_ring).tail)
+
+#define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if (!IRDMA_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
+ (_ring).head = ((_ring).head + (_count)) % (_ring).size
+
+#define IRDMA_RING_MOVE_TAIL(_ring) \
+ (_ring).tail = ((_ring).tail + 1) % (_ring).size
+
+#define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \
+ (_ring).head = ((_ring).head + 1) % (_ring).size
+
+#define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
+ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
+#define IRDMA_RING_SET_TAIL(_ring, _pos) \
+ (_ring).tail = (_pos) % (_ring).size
+
+#define IRDMA_RING_FULL_ERR(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \
+ )
+
+#define IRDMA_ERR_RING_FULL2(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \
+ )
+
+#define IRDMA_ERR_RING_FULL3(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \
+ )
+
+#define IRDMA_SQ_RING_FULL_ERR(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \
+ )
+
+#define IRDMA_ERR_SQ_RING_FULL2(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \
+ )
+#define IRDMA_ERR_SQ_RING_FULL3(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \
+ )
+#define IRDMA_RING_MORE_WORK(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) != 0) \
+ )
+
+#define IRDMA_RING_USED_QUANTA(_ring) \
+ ( \
+ (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
+ )
+
+#define IRDMA_RING_FREE_QUANTA(_ring) \
+ ( \
+ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \
+ )
+
+#define IRDMA_SQ_RING_FREE_QUANTA(_ring) \
+ ( \
+ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \
+ )
+
+#define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
+ { \
+ index = IRDMA_RING_CURRENT_HEAD(_ring); \
+ IRDMA_RING_MOVE_HEAD(_ring, _retcode); \
+ }
+
+enum irdma_qp_wqe_size {
+ IRDMA_WQE_SIZE_32 = 32,
+ IRDMA_WQE_SIZE_64 = 64,
+ IRDMA_WQE_SIZE_96 = 96,
+ IRDMA_WQE_SIZE_128 = 128,
+ IRDMA_WQE_SIZE_256 = 256,
+};
+
+enum irdma_ws_node_op {
+ IRDMA_ADD_NODE = 0,
+ IRDMA_MODIFY_NODE,
+ IRDMA_DEL_NODE,
+};
+
+enum { IRDMA_Q_ALIGNMENT_M = (128 - 1),
+ IRDMA_AEQ_ALIGNMENT_M = (256 - 1),
+ IRDMA_Q2_ALIGNMENT_M = (256 - 1),
+ IRDMA_CEQ_ALIGNMENT_M = (256 - 1),
+ IRDMA_CQ0_ALIGNMENT_M = (256 - 1),
+ IRDMA_HOST_CTX_ALIGNMENT_M = (4 - 1),
+ IRDMA_SHADOWAREA_M = (128 - 1),
+ IRDMA_FPM_QUERY_BUF_ALIGNMENT_M = (4 - 1),
+ IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M = (4 - 1),
+};
+
+enum irdma_alignment {
+ IRDMA_CQP_ALIGNMENT = 0x200,
+ IRDMA_AEQ_ALIGNMENT = 0x100,
+ IRDMA_CEQ_ALIGNMENT = 0x100,
+ IRDMA_CQ0_ALIGNMENT = 0x100,
+ IRDMA_SD_BUF_ALIGNMENT = 0x80,
+ IRDMA_FEATURE_BUF_ALIGNMENT = 0x10,
+};
+
+enum icrdma_protocol_used {
+ ICRDMA_ANY_PROTOCOL = 0,
+ ICRDMA_IWARP_PROTOCOL_ONLY = 1,
+ ICRDMA_ROCE_PROTOCOL_ONLY = 2,
+};
+
+/**
+ * set_64bit_val - set 64 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_64bit_val(__le64 *wqe_words, u32 byte_index, u64 val)
+{
+ wqe_words[byte_index >> 3] = cpu_to_le64(val);
+}
+
+/**
+ * set_32bit_val - set 32 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_32bit_val(__le32 *wqe_words, u32 byte_index, u32 val)
+{
+ wqe_words[byte_index >> 2] = cpu_to_le32(val);
+}
+
+/**
+ * get_64bit_val - read 64 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to read from
+ * @val: read value
+ **/
+static inline void get_64bit_val(__le64 *wqe_words, u32 byte_index, u64 *val)
+{
+ *val = le64_to_cpu(wqe_words[byte_index >> 3]);
+}
+
+/**
+ * get_32bit_val - read 32 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to reaad from
+ * @val: return 32 bit value
+ **/
+static inline void get_32bit_val(__le32 *wqe_words, u32 byte_index, u32 *val)
+{
+ *val = le32_to_cpu(wqe_words[byte_index >> 2]);
+}
+#endif /* IRDMA_DEFS_H */
diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c
new file mode 100644
index 000000000000..da18add141da
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/hmc.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "virtchnl.h"
+
+/**
+ * irdma_find_sd_index_limit - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by irdma_hmc_rsrc_type.
+ */
+
+static void irdma_find_sd_index_limit(struct irdma_hmc_info *hmc_info, u32 type,
+ u32 idx, u32 cnt, u32 *sd_idx,
+ u32 *sd_limit)
+{
+ u64 fpm_addr, fpm_limit;
+
+ fpm_addr = hmc_info->hmc_obj[(type)].base +
+ hmc_info->hmc_obj[type].size * idx;
+ fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
+ *sd_idx = (u32)(fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE);
+ *sd_limit = (u32)((fpm_limit - 1) / IRDMA_HMC_DIRECT_BP_SIZE);
+ *sd_limit += 1;
+}
+
+/**
+ * irdma_find_pd_index_limit - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_idx: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by irdma_hmc_rsrc_type.
+ */
+
+static void irdma_find_pd_index_limit(struct irdma_hmc_info *hmc_info, u32 type,
+ u32 idx, u32 cnt, u32 *pd_idx,
+ u32 *pd_limit)
+{
+ u64 fpm_adr, fpm_limit;
+
+ fpm_adr = hmc_info->hmc_obj[type].base +
+ hmc_info->hmc_obj[type].size * idx;
+ fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
+ *pd_idx = (u32)(fpm_adr / IRDMA_HMC_PAGED_BP_SIZE);
+ *pd_limit = (u32)((fpm_limit - 1) / IRDMA_HMC_PAGED_BP_SIZE);
+ *pd_limit += 1;
+}
+
+/**
+ * irdma_set_sd_entry - setup entry for sd programming
+ * @pa: physical addr
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static void irdma_set_sd_entry(u64 pa, u32 idx, enum irdma_sd_entry_type type,
+ struct irdma_update_sd_entry *entry)
+{
+ entry->data = pa |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT, IRDMA_HMC_MAX_BP_COUNT) |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDTYPE,
+ type == IRDMA_SD_TYPE_PAGED ? 0 : 1) |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDVALID, 1);
+
+ entry->cmd = idx | FIELD_PREP(IRDMA_PFHMC_SDCMD_PMSDWR, 1) | BIT(15);
+}
+
+/**
+ * irdma_clr_sd_entry - setup entry for sd clear
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static void irdma_clr_sd_entry(u32 idx, enum irdma_sd_entry_type type,
+ struct irdma_update_sd_entry *entry)
+{
+ entry->data = FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT, IRDMA_HMC_MAX_BP_COUNT) |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDTYPE,
+ type == IRDMA_SD_TYPE_PAGED ? 0 : 1);
+
+ entry->cmd = idx | FIELD_PREP(IRDMA_PFHMC_SDCMD_PMSDWR, 1) | BIT(15);
+}
+
+/**
+ * irdma_invalidate_pf_hmc_pd - Invalidates the pd cache in the hardware for PF
+ * @dev: pointer to our device struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ */
+static inline void irdma_invalidate_pf_hmc_pd(struct irdma_sc_dev *dev, u32 sd_idx,
+ u32 pd_idx)
+{
+ u32 val = FIELD_PREP(IRDMA_PFHMC_PDINV_PMSDIDX, sd_idx) |
+ FIELD_PREP(IRDMA_PFHMC_PDINV_PMSDPARTSEL, 1) |
+ FIELD_PREP(IRDMA_PFHMC_PDINV_PMPDIDX, pd_idx);
+
+ writel(val, dev->hw_regs[IRDMA_PFHMC_PDINV]);
+}
+
+/**
+ * irdma_hmc_sd_one - setup 1 sd entry for cqp
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ * @pa: physical addr
+ * @sd_idx: sd index
+ * @type: paged or direct sd
+ * @setsd: flag to set or clear sd
+ */
+int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx,
+ enum irdma_sd_entry_type type, bool setsd)
+{
+ struct irdma_update_sds_info sdinfo;
+
+ sdinfo.cnt = 1;
+ sdinfo.hmc_fn_id = hmc_fn_id;
+ if (setsd)
+ irdma_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
+ else
+ irdma_clr_sd_entry(sd_idx, type, sdinfo.entry);
+ return dev->cqp->process_cqp_sds(dev, &sdinfo);
+}
+
+/**
+ * irdma_hmc_sd_grp - setup group of sd entries for cqp
+ * @dev: pointer to the device structure
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: sd index
+ * @sd_cnt: number of sd entries
+ * @setsd: flag to set or clear sd
+ */
+static int irdma_hmc_sd_grp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ u32 sd_cnt, bool setsd)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_update_sds_info sdinfo = {};
+ u64 pa;
+ u32 i;
+ int ret_code = 0;
+
+ sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
+ for (i = sd_index; i < sd_index + sd_cnt; i++) {
+ sd_entry = &hmc_info->sd_table.sd_entry[i];
+ if (!sd_entry || (!sd_entry->valid && setsd) ||
+ (sd_entry->valid && !setsd))
+ continue;
+ if (setsd) {
+ pa = (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa :
+ sd_entry->u.bp.addr.pa;
+ irdma_set_sd_entry(pa, i, sd_entry->entry_type,
+ &sdinfo.entry[sdinfo.cnt]);
+ } else {
+ irdma_clr_sd_entry(i, sd_entry->entry_type,
+ &sdinfo.entry[sdinfo.cnt]);
+ }
+ sdinfo.cnt++;
+ if (sdinfo.cnt == IRDMA_MAX_SD_ENTRIES) {
+ ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: sd_programming failed err=%d\n",
+ ret_code);
+ return ret_code;
+ }
+
+ sdinfo.cnt = 0;
+ }
+ }
+ if (sdinfo.cnt)
+ ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+
+ return ret_code;
+}
+
+/**
+ * irdma_hmc_finish_add_sd_reg - program sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: create obj info
+ */
+static int irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
+{
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return -EINVAL;
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return -EINVAL;
+
+ if (!info->add_sd_cnt)
+ return 0;
+ return irdma_hmc_sd_grp(dev, info->hmc_info,
+ info->hmc_info->sd_indexes[0], info->add_sd_cnt,
+ true);
+}
+
+/**
+ * irdma_sc_create_hmc_obj - allocate backing store for hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to irdma_hmc_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ */
+int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ u32 sd_idx, sd_lmt;
+ u32 pd_idx = 0, pd_lmt = 0;
+ u32 pd_idx1 = 0, pd_lmt1 = 0;
+ u32 i, j;
+ bool pd_error = false;
+ int ret_code = 0;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 &&
+ dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM)
+ return 0;
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return -EINVAL;
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: error type %u, start = %u, req cnt %u, cnt = %u\n",
+ info->rsrc_type, info->start_idx, info->count,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return -EINVAL;
+ }
+
+ irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &sd_idx,
+ &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ return -EINVAL;
+ }
+
+ irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx,
+ &pd_lmt);
+
+ for (j = sd_idx; j < sd_lmt; j++) {
+ ret_code = irdma_add_sd_table_entry(dev->hw, info->hmc_info, j,
+ info->entry_type,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ goto exit_sd_error;
+
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+ if (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED &&
+ (dev->hmc_info == info->hmc_info &&
+ info->rsrc_type != IRDMA_HMC_IW_PBLE)) {
+ pd_idx1 = max(pd_idx, (j * IRDMA_HMC_MAX_BP_COUNT));
+ pd_lmt1 = min(pd_lmt, (j + 1) * IRDMA_HMC_MAX_BP_COUNT);
+ for (i = pd_idx1; i < pd_lmt1; i++) {
+ /* update the pd table entry */
+ ret_code = irdma_add_pd_table_entry(dev,
+ info->hmc_info,
+ i, NULL);
+ if (ret_code) {
+ pd_error = true;
+ break;
+ }
+ }
+ if (pd_error) {
+ while (i && (i > pd_idx1)) {
+ irdma_remove_pd_bp(dev, info->hmc_info,
+ i - 1);
+ i--;
+ }
+ }
+ }
+ if (sd_entry->valid)
+ continue;
+
+ info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
+ info->add_sd_cnt++;
+ sd_entry->valid = true;
+ }
+ return irdma_hmc_finish_add_sd_reg(dev, info);
+
+exit_sd_error:
+ while (j && (j > sd_idx)) {
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+ switch (sd_entry->entry_type) {
+ case IRDMA_SD_TYPE_PAGED:
+ pd_idx1 = max(pd_idx, (j - 1) * IRDMA_HMC_MAX_BP_COUNT);
+ pd_lmt1 = min(pd_lmt, (j * IRDMA_HMC_MAX_BP_COUNT));
+ for (i = pd_idx1; i < pd_lmt1; i++)
+ irdma_prep_remove_pd_page(info->hmc_info, i);
+ break;
+ case IRDMA_SD_TYPE_DIRECT:
+ irdma_prep_remove_pd_page(info->hmc_info, (j - 1));
+ break;
+ default:
+ ret_code = -EINVAL;
+ break;
+ }
+ j--;
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_finish_del_sd_reg - delete sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: dele obj info
+ * @reset: true if called before reset
+ */
+static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info,
+ bool reset)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ int ret_code = 0;
+ u32 i, sd_idx;
+ struct irdma_dma_mem *mem;
+
+ if (dev->privileged && !reset)
+ ret_code = irdma_hmc_sd_grp(dev, info->hmc_info,
+ info->hmc_info->sd_indexes[0],
+ info->del_sd_cnt, false);
+
+ if (ret_code)
+ ibdev_dbg(to_ibdev(dev), "HMC: error cqp sd sd_grp\n");
+ for (i = 0; i < info->del_sd_cnt; i++) {
+ sd_idx = info->hmc_info->sd_indexes[i];
+ sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
+ mem = (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED) ?
+ &sd_entry->u.pd_table.pd_page_addr :
+ &sd_entry->u.bp.addr;
+
+ if (!mem || !mem->va) {
+ ibdev_dbg(to_ibdev(dev), "HMC: error cqp sd mem\n");
+ } else {
+ dma_free_coherent(dev->hw->device, mem->size, mem->va,
+ mem->pa);
+ mem->va = NULL;
+ }
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_del_hmc_obj - remove pe hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to irdma_hmc_del_obj_info struct
+ * @reset: true if called before reset
+ *
+ * This will de-populate the SDs and PDs. It frees
+ * the memory for PDS and backing storage. After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ */
+int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info, bool reset)
+{
+ struct irdma_hmc_pd_table *pd_table;
+ u32 sd_idx, sd_lmt;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+ u32 i, j;
+ int ret_code = 0;
+
+ if (dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM)
+ return 0;
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
+ info->start_idx, info->rsrc_type,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return -EINVAL;
+ }
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n",
+ info->start_idx, info->count, info->rsrc_type,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return -EINVAL;
+ }
+
+ irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx,
+ &pd_lmt);
+
+ for (j = pd_idx; j < pd_lmt; j++) {
+ sd_idx = j / IRDMA_HMC_PD_CNT_IN_SD;
+
+ if (!info->hmc_info->sd_table.sd_entry[sd_idx].valid)
+ continue;
+
+ if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+ IRDMA_SD_TYPE_PAGED)
+ continue;
+
+ rel_pd_idx = j % IRDMA_HMC_PD_CNT_IN_SD;
+ pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ if (pd_table->pd_entry &&
+ pd_table->pd_entry[rel_pd_idx].valid) {
+ ret_code = irdma_remove_pd_bp(dev, info->hmc_info, j);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: remove_pd_bp error\n");
+ return ret_code;
+ }
+ }
+ }
+
+ irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &sd_idx,
+ &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ ibdev_dbg(to_ibdev(dev), "HMC: invalid sd_idx\n");
+ return -EINVAL;
+ }
+
+ for (i = sd_idx; i < sd_lmt; i++) {
+ pd_table = &info->hmc_info->sd_table.sd_entry[i].u.pd_table;
+ if (!info->hmc_info->sd_table.sd_entry[i].valid)
+ continue;
+ switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+ case IRDMA_SD_TYPE_DIRECT:
+ ret_code = irdma_prep_remove_sd_bp(info->hmc_info, i);
+ if (!ret_code) {
+ info->hmc_info->sd_indexes[info->del_sd_cnt] =
+ (u16)i;
+ info->del_sd_cnt++;
+ }
+ break;
+ case IRDMA_SD_TYPE_PAGED:
+ ret_code = irdma_prep_remove_pd_page(info->hmc_info, i);
+ if (ret_code)
+ break;
+ if (dev->hmc_info != info->hmc_info &&
+ info->rsrc_type == IRDMA_HMC_IW_PBLE &&
+ pd_table->pd_entry) {
+ kfree(pd_table->pd_entry_virt_mem.va);
+ pd_table->pd_entry = NULL;
+ }
+ info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+ info->del_sd_cnt++;
+ break;
+ default:
+ break;
+ }
+ }
+ return irdma_finish_del_sd_reg(dev, info, reset);
+}
+
+/**
+ * irdma_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ */
+int irdma_add_sd_table_entry(struct irdma_hw *hw,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ enum irdma_sd_entry_type type, u64 direct_mode_sz)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_dma_mem dma_mem;
+ u64 alloc_len;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+ if (!sd_entry->valid) {
+ if (type == IRDMA_SD_TYPE_PAGED)
+ alloc_len = IRDMA_HMC_PAGED_BP_SIZE;
+ else
+ alloc_len = direct_mode_sz;
+
+ /* allocate a 4K pd page or 2M backing page */
+ dma_mem.size = ALIGN(alloc_len, IRDMA_HMC_PD_BP_BUF_ALIGNMENT);
+ dma_mem.va = dma_alloc_coherent(hw->device, dma_mem.size,
+ &dma_mem.pa, GFP_KERNEL);
+ if (!dma_mem.va)
+ return -ENOMEM;
+ if (type == IRDMA_SD_TYPE_PAGED) {
+ struct irdma_virt_mem *vmem =
+ &sd_entry->u.pd_table.pd_entry_virt_mem;
+
+ vmem->size = sizeof(struct irdma_hmc_pd_entry) * 512;
+ vmem->va = kzalloc(vmem->size, GFP_KERNEL);
+ if (!vmem->va) {
+ dma_free_coherent(hw->device, dma_mem.size,
+ dma_mem.va, dma_mem.pa);
+ dma_mem.va = NULL;
+ return -ENOMEM;
+ }
+ sd_entry->u.pd_table.pd_entry = vmem->va;
+
+ memcpy(&sd_entry->u.pd_table.pd_page_addr, &dma_mem,
+ sizeof(sd_entry->u.pd_table.pd_page_addr));
+ } else {
+ memcpy(&sd_entry->u.bp.addr, &dma_mem,
+ sizeof(sd_entry->u.bp.addr));
+
+ sd_entry->u.bp.sd_pd_index = sd_index;
+ }
+
+ hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+ hmc_info->sd_table.use_cnt++;
+ }
+ if (sd_entry->entry_type == IRDMA_SD_TYPE_DIRECT)
+ sd_entry->u.bp.use_cnt++;
+
+ return 0;
+}
+
+/**
+ * irdma_add_pd_table_entry - Adds page descriptor to the specified table
+ * @dev: pointer to our device structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ * 1. Initializes the pd entry
+ * 2. Adds pd_entry in the pd_table
+ * 3. Mark the entry valid in irdma_hmc_pd_entry structure
+ * 4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ * 1. The memory for pd should be pinned down, physically contiguous and
+ * aligned on 4K boundary and zeroed memory.
+ * 2. It should be 4K in size.
+ */
+int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 pd_index,
+ struct irdma_dma_mem *rsrc_pg)
+{
+ struct irdma_hmc_pd_table *pd_table;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_dma_mem mem;
+ struct irdma_dma_mem *page = &mem;
+ u32 sd_idx, rel_pd_idx;
+ u64 *pd_addr;
+ u64 page_desc;
+
+ if (pd_index / IRDMA_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
+ return -EINVAL;
+
+ sd_idx = (pd_index / IRDMA_HMC_PD_CNT_IN_SD);
+ if (hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+ IRDMA_SD_TYPE_PAGED)
+ return 0;
+
+ rel_pd_idx = (pd_index % IRDMA_HMC_PD_CNT_IN_SD);
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ if (!pd_entry->valid) {
+ if (rsrc_pg) {
+ pd_entry->rsrc_pg = true;
+ page = rsrc_pg;
+ } else {
+ page->size = ALIGN(IRDMA_HMC_PAGED_BP_SIZE,
+ IRDMA_HMC_PD_BP_BUF_ALIGNMENT);
+ page->va = dma_alloc_coherent(dev->hw->device,
+ page->size, &page->pa,
+ GFP_KERNEL);
+ if (!page->va)
+ return -ENOMEM;
+
+ pd_entry->rsrc_pg = false;
+ }
+
+ memcpy(&pd_entry->bp.addr, page, sizeof(pd_entry->bp.addr));
+ pd_entry->bp.sd_pd_index = pd_index;
+ pd_entry->bp.entry_type = IRDMA_SD_TYPE_PAGED;
+ page_desc = page->pa | 0x1;
+ pd_addr = pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+ memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
+ pd_entry->sd_index = sd_idx;
+ pd_entry->valid = true;
+ pd_table->use_cnt++;
+
+ if (hmc_info->hmc_fn_id < dev->hw_attrs.first_hw_vf_fpm_id &&
+ dev->privileged)
+ irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx);
+ }
+ pd_entry->bp.use_cnt++;
+
+ return 0;
+}
+
+/**
+ * irdma_remove_pd_bp - remove a backing page from a page descriptor
+ * @dev: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ *
+ * This function:
+ * 1. Marks the entry in pd table (for paged address mode) or in sd table
+ * (for direct address mode) invalid.
+ * 2. Write to register PMPDINV to invalidate the backing page in FV cache
+ * 3. Decrement the ref count for the pd _entry
+ * assumptions:
+ * 1. Caller can deallocate the memory used by backing storage after this
+ * function returns.
+ */
+int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 idx)
+{
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_hmc_pd_table *pd_table;
+ struct irdma_hmc_sd_entry *sd_entry;
+ u32 sd_idx, rel_pd_idx;
+ struct irdma_dma_mem *mem;
+ u64 *pd_addr;
+
+ sd_idx = idx / IRDMA_HMC_PD_CNT_IN_SD;
+ rel_pd_idx = idx % IRDMA_HMC_PD_CNT_IN_SD;
+ if (sd_idx >= hmc_info->sd_table.sd_cnt)
+ return -EINVAL;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+ if (sd_entry->entry_type != IRDMA_SD_TYPE_PAGED)
+ return -EINVAL;
+
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ if (--pd_entry->bp.use_cnt)
+ return 0;
+
+ pd_entry->valid = false;
+ pd_table->use_cnt--;
+ pd_addr = pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+ memset(pd_addr, 0, sizeof(u64));
+ if (dev->privileged && dev->hmc_fn_id == hmc_info->hmc_fn_id)
+ irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx);
+
+ if (!pd_entry->rsrc_pg) {
+ mem = &pd_entry->bp.addr;
+ if (!mem || !mem->va)
+ return -EINVAL;
+
+ dma_free_coherent(dev->hw->device, mem->size, mem->va,
+ mem->pa);
+ mem->va = NULL;
+ }
+ if (!pd_table->use_cnt)
+ kfree(pd_table->pd_entry_virt_mem.va);
+
+ return 0;
+}
+
+/**
+ * irdma_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ */
+int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+ if (--sd_entry->u.bp.use_cnt)
+ return -EBUSY;
+
+ hmc_info->sd_table.use_cnt--;
+ sd_entry->valid = false;
+
+ return 0;
+}
+
+/**
+ * irdma_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ */
+int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+ if (sd_entry->u.pd_table.use_cnt)
+ return -EBUSY;
+
+ sd_entry->valid = false;
+ hmc_info->sd_table.use_cnt--;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h
new file mode 100644
index 000000000000..257a5d22aa96
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/hmc.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_HMC_H
+#define IRDMA_HMC_H
+
+#include "defs.h"
+
+#define IRDMA_HMC_MAX_BP_COUNT 512
+#define IRDMA_MAX_SD_ENTRIES 11
+#define IRDMA_HW_DBG_HMC_INVALID_BP_MARK 0xca
+#define IRDMA_HMC_INFO_SIGNATURE 0x484d5347
+#define IRDMA_HMC_PD_CNT_IN_SD 512
+#define IRDMA_HMC_DIRECT_BP_SIZE 0x200000
+#define IRDMA_HMC_MAX_SD_COUNT 8192
+#define IRDMA_HMC_PAGED_BP_SIZE 4096
+#define IRDMA_HMC_PD_BP_BUF_ALIGNMENT 4096
+#define IRDMA_FIRST_VF_FPM_ID 8
+#define FPM_MULTIPLIER 1024
+#define IRDMA_OBJ_LOC_MEM_BIT 0x4
+#define IRDMA_XF_MULTIPLIER 16
+#define IRDMA_RRF_MULTIPLIER 8
+#define IRDMA_MIN_PBLE_PAGES 3
+#define IRDMA_HMC_PAGE_SIZE 2097152
+#define IRDMA_MIN_MR_PER_QP 4
+#define IRDMA_MIN_QP_CNT 64
+#define IRDMA_FSIAV_CNT_MAX 1048576
+#define IRDMA_MIN_IRD 8
+#define IRDMA_HMC_MIN_RRF 16
+
+enum irdma_hmc_rsrc_type {
+ IRDMA_HMC_IW_QP = 0,
+ IRDMA_HMC_IW_CQ = 1,
+ IRDMA_HMC_IW_SRQ = 2,
+ IRDMA_HMC_IW_HTE = 3,
+ IRDMA_HMC_IW_ARP = 4,
+ IRDMA_HMC_IW_APBVT_ENTRY = 5,
+ IRDMA_HMC_IW_MR = 6,
+ IRDMA_HMC_IW_XF = 7,
+ IRDMA_HMC_IW_XFFL = 8,
+ IRDMA_HMC_IW_Q1 = 9,
+ IRDMA_HMC_IW_Q1FL = 10,
+ IRDMA_HMC_IW_TIMER = 11,
+ IRDMA_HMC_IW_FSIMC = 12,
+ IRDMA_HMC_IW_FSIAV = 13,
+ IRDMA_HMC_IW_PBLE = 14,
+ IRDMA_HMC_IW_RRF = 15,
+ IRDMA_HMC_IW_RRFFL = 16,
+ IRDMA_HMC_IW_HDR = 17,
+ IRDMA_HMC_IW_MD = 18,
+ IRDMA_HMC_IW_OOISC = 19,
+ IRDMA_HMC_IW_OOISCFFL = 20,
+ IRDMA_HMC_IW_MAX, /* Must be last entry */
+};
+
+enum irdma_sd_entry_type {
+ IRDMA_SD_TYPE_INVALID = 0,
+ IRDMA_SD_TYPE_PAGED = 1,
+ IRDMA_SD_TYPE_DIRECT = 2,
+};
+
+enum irdma_hmc_obj_mem {
+ IRDMA_HOST_MEM = 0,
+ IRDMA_LOC_MEM = 1,
+};
+
+struct irdma_hmc_obj_info {
+ u64 base;
+ u32 max_cnt;
+ u32 cnt;
+ u64 size;
+ enum irdma_hmc_obj_mem mem_loc;
+};
+
+struct irdma_hmc_bp {
+ enum irdma_sd_entry_type entry_type;
+ struct irdma_dma_mem addr;
+ u32 sd_pd_index;
+ u32 use_cnt;
+};
+
+struct irdma_hmc_pd_entry {
+ struct irdma_hmc_bp bp;
+ u32 sd_index;
+ bool rsrc_pg:1;
+ bool valid:1;
+};
+
+struct irdma_hmc_pd_table {
+ struct irdma_dma_mem pd_page_addr;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_virt_mem pd_entry_virt_mem;
+ u32 use_cnt;
+ u32 sd_index;
+};
+
+struct irdma_hmc_sd_entry {
+ enum irdma_sd_entry_type entry_type;
+ bool valid;
+ union {
+ struct irdma_hmc_pd_table pd_table;
+ struct irdma_hmc_bp bp;
+ } u;
+};
+
+struct irdma_hmc_sd_table {
+ struct irdma_virt_mem addr;
+ u32 sd_cnt;
+ u32 use_cnt;
+ struct irdma_hmc_sd_entry *sd_entry;
+};
+
+struct irdma_hmc_info {
+ u32 signature;
+ u8 hmc_fn_id;
+ u16 first_sd_index;
+ struct irdma_hmc_obj_info *hmc_obj;
+ struct irdma_virt_mem hmc_obj_virt_mem;
+ struct irdma_hmc_sd_table sd_table;
+ u16 sd_indexes[IRDMA_HMC_MAX_SD_COUNT];
+};
+
+struct irdma_update_sd_entry {
+ u64 cmd;
+ u64 data;
+};
+
+struct irdma_update_sds_info {
+ u32 cnt;
+ u8 hmc_fn_id;
+ struct irdma_update_sd_entry entry[IRDMA_MAX_SD_ENTRIES];
+};
+
+struct irdma_ccq_cqe_info;
+struct irdma_hmc_fcn_info {
+ u32 vf_id;
+ u8 protocol_used;
+ u8 free_fcn;
+};
+
+struct irdma_hmc_create_obj_info {
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_virt_mem add_sd_virt_mem;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ u32 add_sd_cnt;
+ enum irdma_sd_entry_type entry_type;
+ bool privileged;
+};
+
+struct irdma_hmc_del_obj_info {
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_virt_mem del_sd_virt_mem;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ u32 del_sd_cnt;
+ bool privileged;
+};
+
+int irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf,
+ struct irdma_dma_mem *src_mem, u64 src_offset, u64 size);
+int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info);
+int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info, bool reset);
+int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx,
+ enum irdma_sd_entry_type type,
+ bool setsd);
+int irdma_update_sds_noccq(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+struct irdma_vfdev *irdma_vfdev_from_fpm(struct irdma_sc_dev *dev,
+ u8 hmc_fn_id);
+struct irdma_hmc_info *irdma_vf_hmcinfo_from_fpm(struct irdma_sc_dev *dev,
+ u8 hmc_fn_id);
+int irdma_add_sd_table_entry(struct irdma_hw *hw,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ enum irdma_sd_entry_type type, u64 direct_mode_sz);
+int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 pd_index,
+ struct irdma_dma_mem *rsrc_pg);
+int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx);
+#endif /* IRDMA_HMC_H */
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
new file mode 100644
index 000000000000..d1fc5726b979
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -0,0 +1,2823 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+
+static struct irdma_rsrc_limits rsrc_limits_table[] = {
+ [0] = {
+ .qplimit = SZ_128,
+ },
+ [1] = {
+ .qplimit = SZ_1K,
+ },
+ [2] = {
+ .qplimit = SZ_2K,
+ },
+ [3] = {
+ .qplimit = SZ_4K,
+ },
+ [4] = {
+ .qplimit = SZ_16K,
+ },
+ [5] = {
+ .qplimit = SZ_64K,
+ },
+ [6] = {
+ .qplimit = SZ_128K,
+ },
+ [7] = {
+ .qplimit = SZ_256K,
+ },
+};
+
+/* types of hmc objects */
+static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = {
+ IRDMA_HMC_IW_QP,
+ IRDMA_HMC_IW_CQ,
+ IRDMA_HMC_IW_SRQ,
+ IRDMA_HMC_IW_HTE,
+ IRDMA_HMC_IW_ARP,
+ IRDMA_HMC_IW_APBVT_ENTRY,
+ IRDMA_HMC_IW_MR,
+ IRDMA_HMC_IW_XF,
+ IRDMA_HMC_IW_XFFL,
+ IRDMA_HMC_IW_Q1,
+ IRDMA_HMC_IW_Q1FL,
+ IRDMA_HMC_IW_PBLE,
+ IRDMA_HMC_IW_TIMER,
+ IRDMA_HMC_IW_FSIMC,
+ IRDMA_HMC_IW_FSIAV,
+ IRDMA_HMC_IW_RRF,
+ IRDMA_HMC_IW_RRFFL,
+ IRDMA_HMC_IW_HDR,
+ IRDMA_HMC_IW_MD,
+ IRDMA_HMC_IW_OOISC,
+ IRDMA_HMC_IW_OOISCFFL,
+};
+
+/**
+ * irdma_iwarp_ce_handler - handle iwarp completions
+ * @iwcq: iwarp cq receiving event
+ */
+static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
+{
+ struct irdma_cq *cq = iwcq->back_cq;
+
+ if (!cq->user_mode)
+ atomic_set(&cq->armed, 0);
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+/**
+ * irdma_puda_ce_handler - handle puda completion events
+ * @rf: RDMA PCI function
+ * @cq: puda completion q for event
+ */
+static void irdma_puda_ce_handler(struct irdma_pci_f *rf,
+ struct irdma_sc_cq *cq)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ u32 compl_error;
+ int status;
+
+ do {
+ status = irdma_puda_poll_cmpl(dev, cq, &compl_error);
+ if (status == -ENOENT)
+ break;
+ if (status) {
+ ibdev_dbg(to_ibdev(dev), "ERR: puda status = %d\n", status);
+ break;
+ }
+ if (compl_error) {
+ ibdev_dbg(to_ibdev(dev), "ERR: puda compl_err =0x%x\n",
+ compl_error);
+ break;
+ }
+ } while (1);
+
+ irdma_sc_ccq_arm(cq);
+}
+
+/**
+ * irdma_process_ceq - handle ceq for completions
+ * @rf: RDMA PCI function
+ * @ceq: ceq having cq for completion
+ */
+static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_sc_ceq *sc_ceq;
+ struct irdma_sc_cq *cq;
+ unsigned long flags;
+
+ sc_ceq = &ceq->sc_ceq;
+ do {
+ spin_lock_irqsave(&ceq->ce_lock, flags);
+ cq = irdma_sc_process_ceq(dev, sc_ceq);
+ if (!cq) {
+ spin_unlock_irqrestore(&ceq->ce_lock, flags);
+ break;
+ }
+
+ if (cq->cq_type == IRDMA_CQ_TYPE_IWARP)
+ irdma_iwarp_ce_handler(cq);
+
+ spin_unlock_irqrestore(&ceq->ce_lock, flags);
+
+ if (cq->cq_type == IRDMA_CQ_TYPE_CQP)
+ queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work);
+ else if (cq->cq_type == IRDMA_CQ_TYPE_ILQ ||
+ cq->cq_type == IRDMA_CQ_TYPE_IEQ)
+ irdma_puda_ce_handler(rf, cq);
+ } while (1);
+}
+
+static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ struct qp_err_code qp_err;
+
+ qp->sq_flush_code = info->sq;
+ qp->rq_flush_code = info->rq;
+ if (qp->qp_uk.uk_attrs->hw_rev >= IRDMA_GEN_3) {
+ if (info->sq) {
+ qp->err_sq_idx_valid = true;
+ qp->err_sq_idx = info->wqe_idx;
+ }
+ if (info->rq) {
+ qp->err_rq_idx_valid = true;
+ qp->err_rq_idx = info->wqe_idx;
+ }
+ }
+
+ qp_err = irdma_ae_to_qp_err_code(info->ae_id);
+ qp->flush_code = qp_err.flush_code;
+ qp->event_type = qp_err.event_type;
+}
+
+/**
+ * irdma_complete_cqp_request - perform post-completion cleanup
+ * @cqp: device CQP
+ * @cqp_request: CQP request
+ *
+ * Mark CQP request as done, wake up waiting thread or invoke
+ * callback function and release/free CQP request.
+ */
+static void irdma_complete_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (cqp_request->waiting) {
+ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ } else if (cqp_request->callback_fcn) {
+ cqp_request->callback_fcn(cqp_request);
+ }
+ irdma_put_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_process_ae_def_cmpl - handle IRDMA_AE_CQP_DEFERRED_COMPLETE event
+ * @rf: RDMA PCI function
+ * @info: AEQ entry info
+ */
+static void irdma_process_ae_def_cmpl(struct irdma_pci_f *rf,
+ struct irdma_aeqe_info *info)
+{
+ u32 sw_def_info;
+ u64 scratch;
+
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+
+ irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, true,
+ &scratch, &sw_def_info);
+ while (scratch) {
+ struct irdma_cqp_request *cqp_request =
+ (struct irdma_cqp_request *)(uintptr_t)scratch;
+
+ irdma_complete_cqp_request(&rf->cqp, cqp_request);
+ irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, false,
+ &scratch, &sw_def_info);
+ }
+}
+
+/**
+ * irdma_process_aeq - handle aeq events
+ * @rf: RDMA PCI function
+ */
+static void irdma_process_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_aeq *aeq = &rf->aeq;
+ struct irdma_sc_aeq *sc_aeq = &aeq->sc_aeq;
+ struct irdma_aeqe_info aeinfo;
+ struct irdma_aeqe_info *info = &aeinfo;
+ int ret;
+ struct irdma_qp *iwqp = NULL;
+ struct irdma_cq *iwcq = NULL;
+ struct irdma_sc_qp *qp = NULL;
+ struct irdma_qp_host_ctx_info *ctx_info = NULL;
+ struct irdma_device *iwdev = rf->iwdev;
+ struct irdma_sc_srq *srq;
+ unsigned long flags;
+
+ u32 aeqcnt = 0;
+
+ if (!sc_aeq->size)
+ return;
+
+ do {
+ memset(info, 0, sizeof(*info));
+ ret = irdma_sc_get_next_aeqe(sc_aeq, info);
+ if (ret)
+ break;
+
+ if (info->aeqe_overflow) {
+ ibdev_err(&iwdev->ibdev, "AEQ has overflowed\n");
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ return;
+ }
+
+ aeqcnt++;
+ ibdev_dbg(&iwdev->ibdev,
+ "AEQ: ae_id = 0x%x bool qp=%d qp_id = %d tcp_state=%d iwarp_state=%d ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->tcp_state,
+ info->iwarp_state, info->ae_src);
+
+ if (info->qp) {
+ spin_lock_irqsave(&rf->qptable_lock, flags);
+ iwqp = rf->qp_table[info->qp_cq_id];
+ if (!iwqp) {
+ spin_unlock_irqrestore(&rf->qptable_lock,
+ flags);
+ if (info->ae_id == IRDMA_AE_QP_SUSPEND_COMPLETE) {
+ atomic_dec(&iwdev->vsi.qp_suspend_reqs);
+ wake_up(&iwdev->suspend_wq);
+ continue;
+ }
+ ibdev_dbg(&iwdev->ibdev, "AEQ: qp_id %d is already freed\n",
+ info->qp_cq_id);
+ continue;
+ }
+ irdma_qp_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&rf->qptable_lock, flags);
+ qp = &iwqp->sc_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = info->tcp_state;
+ iwqp->hw_iwarp_state = info->iwarp_state;
+ if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE)
+ iwqp->last_aeq = info->ae_id;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ } else if (info->srq) {
+ if (info->ae_id != IRDMA_AE_SRQ_LIMIT)
+ continue;
+ } else {
+ if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR &&
+ info->ae_id != IRDMA_AE_CQP_DEFERRED_COMPLETE)
+ continue;
+ }
+
+ switch (info->ae_id) {
+ struct irdma_cm_node *cm_node;
+ case IRDMA_AE_LLP_CONNECTION_ESTABLISHED:
+ cm_node = iwqp->cm_node;
+ if (cm_node->accept_pend) {
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+ iwqp->rts_ae_rcvd = 1;
+ wake_up_interruptible(&iwqp->waitq);
+ break;
+ case IRDMA_AE_LLP_FIN_RECEIVED:
+ case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE:
+ if (qp->term_flags)
+ break;
+ if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSE_WAIT;
+ if (iwqp->hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT &&
+ iwqp->ibqp_state == IB_QPS_RTS) {
+ irdma_next_iw_state(iwqp,
+ IRDMA_QP_STATE_CLOSING,
+ 0, 0, 0);
+ irdma_cm_disconn(iwqp);
+ }
+ irdma_schedule_cm_timer(iwqp->cm_node,
+ (struct irdma_puda_buf *)iwqp,
+ IRDMA_TIMER_TYPE_CLOSE,
+ 1, 0);
+ }
+ break;
+ case IRDMA_AE_LLP_CLOSE_COMPLETE:
+ if (qp->term_flags)
+ irdma_terminate_done(qp, 0);
+ else
+ irdma_cm_disconn(iwqp);
+ break;
+ case IRDMA_AE_BAD_CLOSE:
+ case IRDMA_AE_RESET_SENT:
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0,
+ 0);
+ irdma_cm_disconn(iwqp);
+ break;
+ case IRDMA_AE_LLP_CONNECTION_RESET:
+ if (atomic_read(&iwqp->close_timer_started))
+ break;
+ irdma_cm_disconn(iwqp);
+ break;
+ case IRDMA_AE_QP_SUSPEND_COMPLETE:
+ if (iwqp->iwdev->vsi.tc_change_pending) {
+ if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs))
+ wake_up(&iwqp->iwdev->suspend_wq);
+ }
+ if (iwqp->suspend_pending) {
+ iwqp->suspend_pending = false;
+ wake_up(&iwqp->iwdev->suspend_wq);
+ }
+ break;
+ case IRDMA_AE_TERMINATE_SENT:
+ irdma_terminate_send_fin(qp);
+ break;
+ case IRDMA_AE_LLP_TERMINATE_RECEIVED:
+ irdma_terminate_received(qp, info);
+ break;
+ case IRDMA_AE_CQ_OPERATION_ERROR:
+ ibdev_err(&iwdev->ibdev,
+ "Processing an iWARP related AE for CQ misc = 0x%04X\n",
+ info->ae_id);
+
+ spin_lock_irqsave(&rf->cqtable_lock, flags);
+ iwcq = rf->cq_table[info->qp_cq_id];
+ if (!iwcq) {
+ spin_unlock_irqrestore(&rf->cqtable_lock,
+ flags);
+ ibdev_dbg(to_ibdev(dev),
+ "cq_id %d is already freed\n", info->qp_cq_id);
+ continue;
+ }
+ irdma_cq_add_ref(&iwcq->ibcq);
+ spin_unlock_irqrestore(&rf->cqtable_lock, flags);
+
+ if (iwcq->ibcq.event_handler) {
+ struct ib_event ibevent;
+
+ ibevent.device = iwcq->ibcq.device;
+ ibevent.event = IB_EVENT_CQ_ERR;
+ ibevent.element.cq = &iwcq->ibcq;
+ iwcq->ibcq.event_handler(&ibevent,
+ iwcq->ibcq.cq_context);
+ }
+ irdma_cq_rem_ref(&iwcq->ibcq);
+ break;
+ case IRDMA_AE_SRQ_LIMIT:
+ srq = (struct irdma_sc_srq *)(uintptr_t)info->compl_ctx;
+ irdma_srq_event(srq);
+ break;
+ case IRDMA_AE_SRQ_CATASTROPHIC_ERROR:
+ break;
+ case IRDMA_AE_CQP_DEFERRED_COMPLETE:
+ /* Remove completed CQP requests from pending list
+ * and notify about those CQP ops completion.
+ */
+ irdma_process_ae_def_cmpl(rf, info);
+ break;
+ case IRDMA_AE_RESET_NOT_SENT:
+ case IRDMA_AE_LLP_DOUBT_REACHABILITY:
+ case IRDMA_AE_RESOURCE_EXHAUSTION:
+ break;
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ case IRDMA_AE_STAG_ZERO_INVALID:
+ case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
+ case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_INVALID_QN:
+ case IRDMA_AE_DDP_NO_L_BIT:
+ case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST:
+ case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ case IRDMA_AE_INVALID_ARP_ENTRY:
+ case IRDMA_AE_INVALID_TCP_OPTION_RCVD:
+ case IRDMA_AE_STALE_ARP_ENTRY:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_SYN_RECEIVED:
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ case IRDMA_AE_LCE_CQ_CATASTROPHIC:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ default:
+ ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
+ ctx_info = &iwqp->ctx_info;
+ if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) {
+ ctx_info->roce_info->err_rq_idx_valid =
+ ctx_info->srq_valid ? false : info->err_rq_idx_valid;
+ if (ctx_info->roce_info->err_rq_idx_valid) {
+ ctx_info->roce_info->err_rq_idx = info->wqe_idx;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
+ ctx_info);
+ }
+ irdma_set_flush_fields(qp, info);
+ irdma_cm_disconn(iwqp);
+ break;
+ }
+ ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
+ ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = true;
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va,
+ ctx_info);
+ }
+ if (iwqp->hw_iwarp_state != IRDMA_QP_STATE_RTS &&
+ iwqp->hw_iwarp_state != IRDMA_QP_STATE_TERMINATE) {
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0, 0);
+ irdma_cm_disconn(iwqp);
+ } else {
+ irdma_terminate_connection(qp, info);
+ }
+ break;
+ }
+ if (info->qp)
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ } while (1);
+
+ if (aeqcnt)
+ irdma_sc_repost_aeq_entries(dev, aeqcnt);
+}
+
+/**
+ * irdma_ena_intr - set up device interrupts
+ * @dev: hardware control device structure
+ * @msix_id: id of the interrupt to be enabled
+ */
+static void irdma_ena_intr(struct irdma_sc_dev *dev, u32 msix_id)
+{
+ dev->irq_ops->irdma_en_irq(dev, msix_id);
+}
+
+/**
+ * irdma_dpc - tasklet for aeq and ceq 0
+ * @t: tasklet_struct ptr
+ */
+static void irdma_dpc(struct tasklet_struct *t)
+{
+ struct irdma_pci_f *rf = from_tasklet(rf, t, dpc_tasklet);
+
+ if (rf->msix_shared)
+ irdma_process_ceq(rf, rf->ceqlist);
+ irdma_process_aeq(rf);
+ irdma_ena_intr(&rf->sc_dev, rf->iw_msixtbl[0].idx);
+}
+
+/**
+ * irdma_ceq_dpc - dpc handler for CEQ
+ * @t: tasklet_struct ptr
+ */
+static void irdma_ceq_dpc(struct tasklet_struct *t)
+{
+ struct irdma_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet);
+ struct irdma_pci_f *rf = iwceq->rf;
+
+ irdma_process_ceq(rf, iwceq);
+ irdma_ena_intr(&rf->sc_dev, iwceq->msix_idx);
+}
+
+/**
+ * irdma_save_msix_info - copy msix vector information to iwarp device
+ * @rf: RDMA PCI function
+ *
+ * Allocate iwdev msix table and copy the msix info to the table
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_save_msix_info(struct irdma_pci_f *rf)
+{
+ struct irdma_qvlist_info *iw_qvlist;
+ struct irdma_qv_info *iw_qvinfo;
+ struct msix_entry *pmsix;
+ u32 ceq_idx;
+ u32 i;
+ size_t size;
+
+ if (!rf->msix_count)
+ return -EINVAL;
+
+ size = sizeof(struct irdma_msix_vector) * rf->msix_count;
+ size += struct_size(iw_qvlist, qv_info, rf->msix_count);
+ rf->iw_msixtbl = kzalloc(size, GFP_KERNEL);
+ if (!rf->iw_msixtbl)
+ return -ENOMEM;
+
+ rf->iw_qvlist = (struct irdma_qvlist_info *)
+ (&rf->iw_msixtbl[rf->msix_count]);
+ iw_qvlist = rf->iw_qvlist;
+ iw_qvinfo = iw_qvlist->qv_info;
+ iw_qvlist->num_vectors = rf->msix_count;
+ if (rf->msix_count <= num_online_cpus())
+ rf->msix_shared = true;
+
+ pmsix = rf->msix_entries;
+ for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
+ rf->iw_msixtbl[i].idx = pmsix->entry;
+ rf->iw_msixtbl[i].irq = pmsix->vector;
+ rf->iw_msixtbl[i].cpu_affinity = ceq_idx;
+ if (!i) {
+ iw_qvinfo->aeq_idx = 0;
+ if (rf->msix_shared)
+ iw_qvinfo->ceq_idx = ceq_idx++;
+ else
+ iw_qvinfo->ceq_idx = IRDMA_Q_INVALID_IDX;
+ } else {
+ iw_qvinfo->aeq_idx = IRDMA_Q_INVALID_IDX;
+ iw_qvinfo->ceq_idx = ceq_idx++;
+ }
+ iw_qvinfo->itr_idx = 3;
+ iw_qvinfo->v_idx = rf->iw_msixtbl[i].idx;
+ pmsix++;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_irq_handler - interrupt handler for aeq and ceq0
+ * @irq: Interrupt request number
+ * @data: RDMA PCI function
+ */
+static irqreturn_t irdma_irq_handler(int irq, void *data)
+{
+ struct irdma_pci_f *rf = data;
+
+ tasklet_schedule(&rf->dpc_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * irdma_ceq_handler - interrupt handler for ceq
+ * @irq: interrupt request number
+ * @data: ceq pointer
+ */
+static irqreturn_t irdma_ceq_handler(int irq, void *data)
+{
+ struct irdma_ceq *iwceq = data;
+
+ if (iwceq->irq != irq)
+ ibdev_err(to_ibdev(&iwceq->rf->sc_dev), "expected irq = %d received irq = %d\n",
+ iwceq->irq, irq);
+ tasklet_schedule(&iwceq->dpc_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * irdma_destroy_irq - destroy device interrupts
+ * @rf: RDMA PCI function
+ * @msix_vec: msix vector to disable irq
+ * @dev_id: parameter to pass to free_irq (used during irq setup)
+ *
+ * The function is called when destroying aeq/ceq
+ */
+static void irdma_destroy_irq(struct irdma_pci_f *rf,
+ struct irdma_msix_vector *msix_vec, void *dev_id)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+
+ dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx);
+ irq_update_affinity_hint(msix_vec->irq, NULL);
+ free_irq(msix_vec->irq, dev_id);
+ if (rf == dev_id) {
+ tasklet_kill(&rf->dpc_tasklet);
+ } else {
+ struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id;
+
+ tasklet_kill(&iwceq->dpc_tasklet);
+ }
+}
+
+/**
+ * irdma_destroy_cqp - destroy control qp
+ * @rf: RDMA PCI function
+ *
+ * Issue destroy cqp request and
+ * free the resources associated with the cqp
+ */
+static void irdma_destroy_cqp(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp *cqp = &rf->cqp;
+ int status = 0;
+
+ status = irdma_sc_cqp_destroy(dev->cqp);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status);
+
+ irdma_cleanup_pending_cqp_op(rf);
+ dma_free_coherent(dev->hw->device, cqp->sq.size, cqp->sq.va,
+ cqp->sq.pa);
+ cqp->sq.va = NULL;
+ kfree(cqp->oop_op_array);
+ cqp->oop_op_array = NULL;
+ kfree(cqp->scratch_array);
+ cqp->scratch_array = NULL;
+ kfree(cqp->cqp_requests);
+ cqp->cqp_requests = NULL;
+}
+
+static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_aeq *aeq = &rf->aeq;
+ u32 pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE);
+ dma_addr_t *pg_arr = (dma_addr_t *)aeq->palloc.level1.addr;
+
+ irdma_unmap_vm_page_list(&rf->hw, pg_arr, pg_cnt);
+ irdma_free_pble(rf->pble_rsrc, &aeq->palloc);
+ vfree(aeq->mem.va);
+}
+
+/**
+ * irdma_destroy_aeq - destroy aeq
+ * @rf: RDMA PCI function
+ *
+ * Issue a destroy aeq request and
+ * free the resources associated with the aeq
+ * The function is called during driver unload
+ */
+static void irdma_destroy_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_aeq *aeq = &rf->aeq;
+ int status = -EBUSY;
+
+ if (!rf->msix_shared) {
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev,
+ rf->iw_msixtbl->idx, false);
+ irdma_destroy_irq(rf, rf->iw_msixtbl, rf);
+ }
+ if (rf->reset)
+ goto exit;
+
+ aeq->sc_aeq.size = 0;
+ status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_DESTROY);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: Destroy AEQ failed %d\n", status);
+
+exit:
+ if (aeq->virtual_map) {
+ irdma_destroy_virt_aeq(rf);
+ } else {
+ dma_free_coherent(dev->hw->device, aeq->mem.size, aeq->mem.va,
+ aeq->mem.pa);
+ aeq->mem.va = NULL;
+ }
+}
+
+/**
+ * irdma_destroy_ceq - destroy ceq
+ * @rf: RDMA PCI function
+ * @iwceq: ceq to be destroyed
+ *
+ * Issue a destroy ceq request and
+ * free the resources associated with the ceq
+ */
+static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
+
+ if (rf->reset)
+ goto exit;
+
+ status = irdma_sc_ceq_destroy(&iwceq->sc_ceq, 0, 1);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev), "ERR: CEQ destroy command failed %d\n", status);
+ goto exit;
+ }
+
+ status = irdma_sc_cceq_destroy_done(&iwceq->sc_ceq);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: CEQ destroy completion failed %d\n",
+ status);
+exit:
+ dma_free_coherent(dev->hw->device, iwceq->mem.size, iwceq->mem.va,
+ iwceq->mem.pa);
+ iwceq->mem.va = NULL;
+}
+
+/**
+ * irdma_del_ceq_0 - destroy ceq 0
+ * @rf: RDMA PCI function
+ *
+ * Disable the ceq 0 interrupt and destroy the ceq 0
+ */
+static void irdma_del_ceq_0(struct irdma_pci_f *rf)
+{
+ struct irdma_ceq *iwceq = rf->ceqlist;
+ struct irdma_msix_vector *msix_vec;
+
+ if (rf->msix_shared) {
+ msix_vec = &rf->iw_msixtbl[0];
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
+ msix_vec->ceq_id,
+ msix_vec->idx, false);
+ irdma_destroy_irq(rf, msix_vec, rf);
+ } else {
+ msix_vec = &rf->iw_msixtbl[1];
+ irdma_destroy_irq(rf, msix_vec, iwceq);
+ }
+
+ irdma_destroy_ceq(rf, iwceq);
+ rf->sc_dev.ceq_valid = false;
+ rf->ceqs_count = 0;
+}
+
+/**
+ * irdma_del_ceqs - destroy all ceq's except CEQ 0
+ * @rf: RDMA PCI function
+ *
+ * Go through all of the device ceq's, except 0, and for each
+ * ceq disable the ceq interrupt and destroy the ceq
+ */
+static void irdma_del_ceqs(struct irdma_pci_f *rf)
+{
+ struct irdma_ceq *iwceq = &rf->ceqlist[1];
+ struct irdma_msix_vector *msix_vec;
+ u32 i = 0;
+
+ if (rf->msix_shared)
+ msix_vec = &rf->iw_msixtbl[1];
+ else
+ msix_vec = &rf->iw_msixtbl[2];
+
+ for (i = 1; i < rf->ceqs_count; i++, msix_vec++, iwceq++) {
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
+ msix_vec->ceq_id,
+ msix_vec->idx, false);
+ irdma_destroy_irq(rf, msix_vec, iwceq);
+ irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
+ IRDMA_OP_CEQ_DESTROY);
+ dma_free_coherent(rf->sc_dev.hw->device, iwceq->mem.size,
+ iwceq->mem.va, iwceq->mem.pa);
+ iwceq->mem.va = NULL;
+ }
+ rf->ceqs_count = 1;
+}
+
+/**
+ * irdma_destroy_ccq - destroy control cq
+ * @rf: RDMA PCI function
+ *
+ * Issue destroy ccq request and
+ * free the resources associated with the ccq
+ */
+static void irdma_destroy_ccq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_ccq *ccq = &rf->ccq;
+ int status = 0;
+
+ if (rf->cqp_cmpl_wq)
+ destroy_workqueue(rf->cqp_cmpl_wq);
+
+ if (!rf->reset)
+ status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: CCQ destroy failed %d\n", status);
+ dma_free_coherent(dev->hw->device, ccq->mem_cq.size, ccq->mem_cq.va,
+ ccq->mem_cq.pa);
+ ccq->mem_cq.va = NULL;
+}
+
+/**
+ * irdma_close_hmc_objects_type - delete hmc objects of a given type
+ * @dev: iwarp device
+ * @obj_type: the hmc object type to be deleted
+ * @hmc_info: host memory info struct
+ * @privileged: permission to close HMC objects
+ * @reset: true if called before reset
+ */
+static void irdma_close_hmc_objects_type(struct irdma_sc_dev *dev,
+ enum irdma_hmc_rsrc_type obj_type,
+ struct irdma_hmc_info *hmc_info,
+ bool privileged, bool reset)
+{
+ struct irdma_hmc_del_obj_info info = {};
+
+ info.hmc_info = hmc_info;
+ info.rsrc_type = obj_type;
+ info.count = hmc_info->hmc_obj[obj_type].cnt;
+ info.privileged = privileged;
+ if (irdma_sc_del_hmc_obj(dev, &info, reset))
+ ibdev_dbg(to_ibdev(dev), "ERR: del HMC obj of type %d failed\n",
+ obj_type);
+}
+
+/**
+ * irdma_del_hmc_objects - remove all device hmc objects
+ * @dev: iwarp device
+ * @hmc_info: hmc_info to free
+ * @privileged: permission to delete HMC objects
+ * @reset: true if called before reset
+ * @vers: hardware version
+ */
+static void irdma_del_hmc_objects(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, bool privileged,
+ bool reset, enum irdma_vers vers)
+{
+ unsigned int i;
+
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt)
+ irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i],
+ hmc_info, privileged, reset);
+ if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER)
+ break;
+ }
+}
+
+/**
+ * irdma_create_hmc_obj_type - create hmc object of a given type
+ * @dev: hardware control device structure
+ * @info: information for the hmc object to create
+ */
+static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
+{
+ return irdma_sc_create_hmc_obj(dev, info);
+}
+
+/**
+ * irdma_create_hmc_objs - create all hmc objects for the device
+ * @rf: RDMA PCI function
+ * @privileged: permission to create HMC objects
+ * @vers: HW version
+ *
+ * Create the device hmc objects and allocate hmc pages
+ * Return 0 if successful, otherwise clean up and return error
+ */
+static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged,
+ enum irdma_vers vers)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_hmc_create_obj_info info = {};
+ int i, status = 0;
+
+ info.hmc_info = dev->hmc_info;
+ info.privileged = privileged;
+ info.entry_type = rf->sd_type;
+
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+ if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE)
+ continue;
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) {
+ info.rsrc_type = iw_hmc_obj_types[i];
+ info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+ info.add_sd_cnt = 0;
+ status = irdma_create_hmc_obj_type(dev, &info);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: create obj type %d status = %d\n",
+ iw_hmc_obj_types[i], status);
+ break;
+ }
+ }
+ if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER)
+ break;
+ }
+
+ if (!status)
+ return irdma_sc_static_hmc_pages_allocated(dev->cqp, 0, dev->hmc_fn_id,
+ true, true);
+
+ while (i) {
+ i--;
+ /* destroy the hmc objects of a given type */
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt)
+ irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i],
+ dev->hmc_info, privileged,
+ false);
+ }
+
+ return status;
+}
+
+/**
+ * irdma_obj_aligned_mem - get aligned memory from device allocated memory
+ * @rf: RDMA PCI function
+ * @memptr: points to the memory addresses
+ * @size: size of memory needed
+ * @mask: mask for the aligned memory
+ *
+ * Get aligned memory of the requested size and
+ * update the memptr to point to the new aligned memory
+ * Return 0 if successful, otherwise return no memory error
+ */
+static int irdma_obj_aligned_mem(struct irdma_pci_f *rf,
+ struct irdma_dma_mem *memptr, u32 size,
+ u32 mask)
+{
+ unsigned long va, newva;
+ unsigned long extra;
+
+ va = (unsigned long)rf->obj_next.va;
+ newva = va;
+ if (mask)
+ newva = ALIGN(va, (unsigned long)mask + 1ULL);
+ extra = newva - va;
+ memptr->va = (u8 *)va + extra;
+ memptr->pa = rf->obj_next.pa + extra;
+ memptr->size = size;
+ if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size))
+ return -ENOMEM;
+
+ rf->obj_next.va = (u8 *)memptr->va + size;
+ rf->obj_next.pa = memptr->pa + size;
+
+ return 0;
+}
+
+/**
+ * irdma_create_cqp - create control qp
+ * @rf: RDMA PCI function
+ *
+ * Return 0, if the cqp and all the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_cqp(struct irdma_pci_f *rf)
+{
+ u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048;
+ struct irdma_dma_mem mem;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp_init_info cqp_init_info = {};
+ struct irdma_cqp *cqp = &rf->cqp;
+ u16 maj_err, min_err;
+ int i, status;
+
+ cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
+ if (!cqp->cqp_requests)
+ return -ENOMEM;
+
+ cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
+ if (!cqp->scratch_array) {
+ status = -ENOMEM;
+ goto err_scratch;
+ }
+
+ cqp->oop_op_array = kcalloc(sqsize, sizeof(*cqp->oop_op_array),
+ GFP_KERNEL);
+ if (!cqp->oop_op_array) {
+ status = -ENOMEM;
+ goto err_oop;
+ }
+ cqp_init_info.ooo_op_array = cqp->oop_op_array;
+ dev->cqp = &cqp->sc_cqp;
+ dev->cqp->dev = dev;
+ cqp->sq.size = ALIGN(sizeof(struct irdma_cqp_sq_wqe) * sqsize,
+ IRDMA_CQP_ALIGNMENT);
+ cqp->sq.va = dma_alloc_coherent(dev->hw->device, cqp->sq.size,
+ &cqp->sq.pa, GFP_KERNEL);
+ if (!cqp->sq.va) {
+ status = -ENOMEM;
+ goto err_sq;
+ }
+
+ status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx),
+ IRDMA_HOST_CTX_ALIGNMENT_M);
+ if (status)
+ goto err_ctx;
+
+ dev->cqp->host_ctx_pa = mem.pa;
+ dev->cqp->host_ctx = mem.va;
+ /* populate the cqp init info */
+ cqp_init_info.dev = dev;
+ cqp_init_info.sq_size = sqsize;
+ cqp_init_info.sq = cqp->sq.va;
+ cqp_init_info.sq_pa = cqp->sq.pa;
+ cqp_init_info.host_ctx_pa = mem.pa;
+ cqp_init_info.host_ctx = mem.va;
+ cqp_init_info.hmc_profile = rf->rsrc_profile;
+ cqp_init_info.scratch_array = cqp->scratch_array;
+ cqp_init_info.protocol_used = rf->protocol_used;
+
+ switch (rf->rdma_ver) {
+ case IRDMA_GEN_1:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_1;
+ break;
+ case IRDMA_GEN_2:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2;
+ break;
+ case IRDMA_GEN_3:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_3;
+ cqp_init_info.ts_override = 1;
+ break;
+ }
+ status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev), "ERR: cqp init status %d\n", status);
+ goto err_ctx;
+ }
+
+ spin_lock_init(&cqp->req_lock);
+ spin_lock_init(&cqp->compl_lock);
+
+ status = irdma_sc_cqp_create(dev->cqp, &maj_err, &min_err);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: cqp create failed - status %d maj_err %d min_err %d\n",
+ status, maj_err, min_err);
+ goto err_ctx;
+ }
+
+ INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
+ INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
+
+ /* init the waitqueue of the cqp_requests and add them to the list */
+ for (i = 0; i < sqsize; i++) {
+ init_waitqueue_head(&cqp->cqp_requests[i].waitq);
+ list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
+ }
+ init_waitqueue_head(&cqp->remove_wq);
+ return 0;
+
+err_ctx:
+ dma_free_coherent(dev->hw->device, cqp->sq.size,
+ cqp->sq.va, cqp->sq.pa);
+ cqp->sq.va = NULL;
+err_sq:
+ kfree(cqp->oop_op_array);
+ cqp->oop_op_array = NULL;
+err_oop:
+ kfree(cqp->scratch_array);
+ cqp->scratch_array = NULL;
+err_scratch:
+ kfree(cqp->cqp_requests);
+ cqp->cqp_requests = NULL;
+
+ return status;
+}
+
+/**
+ * irdma_create_ccq - create control cq
+ * @rf: RDMA PCI function
+ *
+ * Return 0, if the ccq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_ccq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_ccq_init_info info = {};
+ struct irdma_ccq *ccq = &rf->ccq;
+ int ccq_size;
+ int status;
+
+ dev->ccq = &ccq->sc_cq;
+ dev->ccq->dev = dev;
+ info.dev = dev;
+ ccq_size = (rf->rdma_ver >= IRDMA_GEN_3) ? IW_GEN_3_CCQ_SIZE : IW_CCQ_SIZE;
+ ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area);
+ ccq->mem_cq.size = ALIGN(sizeof(struct irdma_cqe) * ccq_size,
+ IRDMA_CQ0_ALIGNMENT);
+ ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size,
+ &ccq->mem_cq.pa, GFP_KERNEL);
+ if (!ccq->mem_cq.va)
+ return -ENOMEM;
+
+ status = irdma_obj_aligned_mem(rf, &ccq->shadow_area,
+ ccq->shadow_area.size,
+ IRDMA_SHADOWAREA_M);
+ if (status)
+ goto exit;
+
+ ccq->sc_cq.back_cq = ccq;
+ /* populate the ccq init info */
+ info.cq_base = ccq->mem_cq.va;
+ info.cq_pa = ccq->mem_cq.pa;
+ info.num_elem = ccq_size;
+ info.shadow_area = ccq->shadow_area.va;
+ info.shadow_area_pa = ccq->shadow_area.pa;
+ info.ceqe_mask = false;
+ info.ceq_id_valid = true;
+ info.shadow_read_threshold = 16;
+ info.vsi = &rf->default_vsi;
+ status = irdma_sc_ccq_init(dev->ccq, &info);
+ if (!status)
+ status = irdma_sc_ccq_create(dev->ccq, 0, true, true);
+exit:
+ if (status) {
+ dma_free_coherent(dev->hw->device, ccq->mem_cq.size,
+ ccq->mem_cq.va, ccq->mem_cq.pa);
+ ccq->mem_cq.va = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_alloc_set_mac - set up a mac address table entry
+ * @iwdev: irdma device
+ *
+ * Allocate a mac ip entry and add it to the hw table Return 0
+ * if successful, otherwise return error
+ */
+static int irdma_alloc_set_mac(struct irdma_device *iwdev)
+{
+ int status;
+
+ status = irdma_alloc_local_mac_entry(iwdev->rf,
+ &iwdev->mac_ip_table_idx);
+ if (!status) {
+ status = irdma_add_local_mac_entry(iwdev->rf,
+ (const u8 *)iwdev->netdev->dev_addr,
+ (u8)iwdev->mac_ip_table_idx);
+ if (status)
+ irdma_del_local_mac_entry(iwdev->rf,
+ (u8)iwdev->mac_ip_table_idx);
+ }
+ return status;
+}
+
+/**
+ * irdma_cfg_ceq_vector - set up the msix interrupt vector for
+ * ceq
+ * @rf: RDMA PCI function
+ * @iwceq: ceq associated with the vector
+ * @ceq_id: the id number of the iwceq
+ * @msix_vec: interrupt vector information
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
+ u32 ceq_id, struct irdma_msix_vector *msix_vec)
+{
+ int status;
+
+ if (rf->msix_shared && !ceq_id) {
+ snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
+ "irdma-%s-AEQCEQ-0", dev_name(&rf->pcidev->dev));
+ tasklet_setup(&rf->dpc_tasklet, irdma_dpc);
+ status = request_irq(msix_vec->irq, irdma_irq_handler, 0,
+ msix_vec->name, rf);
+ } else {
+ snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
+ "irdma-%s-CEQ-%d",
+ dev_name(&rf->pcidev->dev), ceq_id);
+ tasklet_setup(&iwceq->dpc_tasklet, irdma_ceq_dpc);
+
+ status = request_irq(msix_vec->irq, irdma_ceq_handler, 0,
+ msix_vec->name, iwceq);
+ }
+ cpumask_clear(&msix_vec->mask);
+ cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
+ irq_update_affinity_hint(msix_vec->irq, &msix_vec->mask);
+ if (status) {
+ ibdev_dbg(&rf->iwdev->ibdev, "ERR: ceq irq config fail\n");
+ return status;
+ }
+
+ msix_vec->ceq_id = ceq_id;
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id,
+ msix_vec->idx, true);
+ else
+ status = irdma_vchnl_req_ceq_vec_map(&rf->sc_dev, ceq_id,
+ msix_vec->idx);
+ return status;
+}
+
+/**
+ * irdma_cfg_aeq_vector - set up the msix vector for aeq
+ * @rf: RDMA PCI function
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
+{
+ struct irdma_msix_vector *msix_vec = rf->iw_msixtbl;
+ int ret = 0;
+
+ if (!rf->msix_shared) {
+ snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
+ "irdma-%s-AEQ", dev_name(&rf->pcidev->dev));
+ tasklet_setup(&rf->dpc_tasklet, irdma_dpc);
+ ret = request_irq(msix_vec->irq, irdma_irq_handler, 0,
+ msix_vec->name, rf);
+ }
+ if (ret) {
+ ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n");
+ return ret;
+ }
+
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx,
+ true);
+ else
+ ret = irdma_vchnl_req_aeq_vec_map(&rf->sc_dev, msix_vec->idx);
+
+ return ret;
+}
+
+/**
+ * irdma_create_ceq - create completion event queue
+ * @rf: RDMA PCI function
+ * @iwceq: pointer to the ceq resources to be created
+ * @ceq_id: the id number of the iwceq
+ * @vsi_idx: vsi idx
+ *
+ * Return 0, if the ceq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
+ u32 ceq_id, u16 vsi_idx)
+{
+ int status;
+ struct irdma_ceq_init_info info = {};
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ u32 ceq_size;
+
+ info.ceq_id = ceq_id;
+ iwceq->rf = rf;
+ ceq_size = min(rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt,
+ dev->hw_attrs.max_hw_ceq_size);
+ iwceq->mem.size = ALIGN(sizeof(struct irdma_ceqe) * ceq_size,
+ IRDMA_CEQ_ALIGNMENT);
+ iwceq->mem.va = dma_alloc_coherent(dev->hw->device, iwceq->mem.size,
+ &iwceq->mem.pa, GFP_KERNEL);
+ if (!iwceq->mem.va)
+ return -ENOMEM;
+
+ info.ceq_id = ceq_id;
+ info.ceqe_base = iwceq->mem.va;
+ info.ceqe_pa = iwceq->mem.pa;
+ info.elem_cnt = ceq_size;
+ iwceq->sc_ceq.ceq_id = ceq_id;
+ info.dev = dev;
+ info.vsi_idx = vsi_idx;
+ status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
+ if (!status) {
+ if (dev->ceq_valid)
+ status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
+ IRDMA_OP_CEQ_CREATE);
+ else
+ status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0);
+ }
+
+ if (status) {
+ dma_free_coherent(dev->hw->device, iwceq->mem.size,
+ iwceq->mem.va, iwceq->mem.pa);
+ iwceq->mem.va = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_setup_ceq_0 - create CEQ 0 and it's interrupt resource
+ * @rf: RDMA PCI function
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq 0 and configure it's msix interrupt vector
+ * Return 0, if successfully set up, otherwise return error
+ */
+static int irdma_setup_ceq_0(struct irdma_pci_f *rf)
+{
+ struct irdma_ceq *iwceq;
+ struct irdma_msix_vector *msix_vec;
+ u32 i;
+ int status = 0;
+ u32 num_ceqs;
+
+ num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs);
+ rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL);
+ if (!rf->ceqlist) {
+ status = -ENOMEM;
+ goto exit;
+ }
+
+ iwceq = &rf->ceqlist[0];
+ status = irdma_create_ceq(rf, iwceq, 0, rf->default_vsi.vsi_idx);
+ if (status) {
+ ibdev_dbg(&rf->iwdev->ibdev, "ERR: create ceq status = %d\n",
+ status);
+ goto exit;
+ }
+
+ spin_lock_init(&iwceq->ce_lock);
+ i = rf->msix_shared ? 0 : 1;
+ msix_vec = &rf->iw_msixtbl[i];
+ iwceq->irq = msix_vec->irq;
+ iwceq->msix_idx = msix_vec->idx;
+ status = irdma_cfg_ceq_vector(rf, iwceq, 0, msix_vec);
+ if (status) {
+ irdma_destroy_ceq(rf, iwceq);
+ goto exit;
+ }
+
+ irdma_ena_intr(&rf->sc_dev, msix_vec->idx);
+ rf->ceqs_count++;
+
+exit:
+ if (status && !rf->ceqs_count) {
+ kfree(rf->ceqlist);
+ rf->ceqlist = NULL;
+ return status;
+ }
+ rf->sc_dev.ceq_valid = true;
+
+ return 0;
+}
+
+/**
+ * irdma_setup_ceqs - manage the device ceq's and their interrupt resources
+ * @rf: RDMA PCI function
+ * @vsi_idx: vsi_idx for this CEQ
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq's and configure their msix interrupt vectors
+ * Return 0, if ceqs are successfully set up, otherwise return error
+ */
+static int irdma_setup_ceqs(struct irdma_pci_f *rf, u16 vsi_idx)
+{
+ u32 i;
+ u32 ceq_id;
+ struct irdma_ceq *iwceq;
+ struct irdma_msix_vector *msix_vec;
+ int status;
+ u32 num_ceqs;
+
+ num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs);
+ i = (rf->msix_shared) ? 1 : 2;
+ for (ceq_id = 1; i < num_ceqs; i++, ceq_id++) {
+ iwceq = &rf->ceqlist[ceq_id];
+ status = irdma_create_ceq(rf, iwceq, ceq_id, vsi_idx);
+ if (status) {
+ ibdev_dbg(&rf->iwdev->ibdev,
+ "ERR: create ceq status = %d\n", status);
+ goto del_ceqs;
+ }
+ spin_lock_init(&iwceq->ce_lock);
+ msix_vec = &rf->iw_msixtbl[i];
+ iwceq->irq = msix_vec->irq;
+ iwceq->msix_idx = msix_vec->idx;
+ status = irdma_cfg_ceq_vector(rf, iwceq, ceq_id, msix_vec);
+ if (status) {
+ irdma_destroy_ceq(rf, iwceq);
+ goto del_ceqs;
+ }
+ irdma_ena_intr(&rf->sc_dev, msix_vec->idx);
+ rf->ceqs_count++;
+ }
+
+ return 0;
+
+del_ceqs:
+ irdma_del_ceqs(rf);
+
+ return status;
+}
+
+static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size)
+{
+ struct irdma_aeq *aeq = &rf->aeq;
+ dma_addr_t *pg_arr;
+ u32 pg_cnt;
+ int status;
+
+ if (rf->rdma_ver < IRDMA_GEN_2)
+ return -EOPNOTSUPP;
+
+ aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size;
+ aeq->mem.va = vzalloc(aeq->mem.size);
+
+ if (!aeq->mem.va)
+ return -ENOMEM;
+
+ pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE);
+ status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true);
+ if (status) {
+ vfree(aeq->mem.va);
+ return status;
+ }
+
+ pg_arr = (dma_addr_t *)aeq->palloc.level1.addr;
+ status = irdma_map_vm_page_list(&rf->hw, aeq->mem.va, pg_arr, pg_cnt);
+ if (status) {
+ irdma_free_pble(rf->pble_rsrc, &aeq->palloc);
+ vfree(aeq->mem.va);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_create_aeq - create async event queue
+ * @rf: RDMA PCI function
+ *
+ * Return 0, if the aeq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_aeq_init_info info = {};
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_aeq *aeq = &rf->aeq;
+ struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info;
+ u32 aeq_size;
+ u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1;
+ int status;
+
+ aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt +
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
+ aeq_size = min(aeq_size, dev->hw_attrs.max_hw_aeq_size);
+ /* GEN_3 does not support virtual AEQ. Cap at max Kernel alloc size */
+ if (rf->rdma_ver == IRDMA_GEN_3)
+ aeq_size = min(aeq_size, (u32)((PAGE_SIZE << MAX_PAGE_ORDER) /
+ sizeof(struct irdma_sc_aeqe)));
+ aeq->mem.size = ALIGN(sizeof(struct irdma_sc_aeqe) * aeq_size,
+ IRDMA_AEQ_ALIGNMENT);
+ aeq->mem.va = dma_alloc_coherent(dev->hw->device, aeq->mem.size,
+ &aeq->mem.pa,
+ GFP_KERNEL | __GFP_NOWARN);
+ if (aeq->mem.va)
+ goto skip_virt_aeq;
+ else if (rf->rdma_ver == IRDMA_GEN_3)
+ return -ENOMEM;
+
+ /* physically mapped aeq failed. setup virtual aeq */
+ status = irdma_create_virt_aeq(rf, aeq_size);
+ if (status)
+ return status;
+
+ info.virtual_map = true;
+ aeq->virtual_map = info.virtual_map;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = aeq->palloc.level1.idx;
+
+skip_virt_aeq:
+ info.aeqe_base = aeq->mem.va;
+ info.aeq_elem_pa = aeq->mem.pa;
+ info.elem_cnt = aeq_size;
+ info.dev = dev;
+ info.msix_idx = rf->iw_msixtbl->idx;
+ status = irdma_sc_aeq_init(&aeq->sc_aeq, &info);
+ if (status)
+ goto err;
+
+ status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_CREATE);
+ if (status)
+ goto err;
+
+ return 0;
+
+err:
+ if (aeq->virtual_map) {
+ irdma_destroy_virt_aeq(rf);
+ } else {
+ dma_free_coherent(dev->hw->device, aeq->mem.size, aeq->mem.va,
+ aeq->mem.pa);
+ aeq->mem.va = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_setup_aeq - set up the device aeq
+ * @rf: RDMA PCI function
+ *
+ * Create the aeq and configure its msix interrupt vector
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_setup_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
+
+ status = irdma_create_aeq(rf);
+ if (status)
+ return status;
+
+ status = irdma_cfg_aeq_vector(rf);
+ if (status) {
+ irdma_destroy_aeq(rf);
+ return status;
+ }
+
+ if (!rf->msix_shared)
+ irdma_ena_intr(dev, rf->iw_msixtbl[0].idx);
+
+ return 0;
+}
+
+/**
+ * irdma_initialize_ilq - create iwarp local queue for cm
+ * @iwdev: irdma device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_initialize_ilq(struct irdma_device *iwdev)
+{
+ struct irdma_puda_rsrc_info info = {};
+ int status;
+
+ info.type = IRDMA_PUDA_RSRC_TYPE_ILQ;
+ info.cq_id = 1;
+ info.qp_id = 1;
+ info.count = 1;
+ info.pd_id = 1;
+ info.abi_ver = IRDMA_ABI_VER;
+ info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768);
+ info.rq_size = info.sq_size;
+ info.buf_size = 1024;
+ info.tx_buf_cnt = 2 * info.sq_size;
+ info.receive = irdma_receive_ilq;
+ info.xmit_complete = irdma_free_sqbuf;
+ status = irdma_puda_create_rsrc(&iwdev->vsi, &info);
+ if (status)
+ ibdev_dbg(&iwdev->ibdev, "ERR: ilq create fail\n");
+
+ return status;
+}
+
+/**
+ * irdma_initialize_ieq - create iwarp exception queue
+ * @iwdev: irdma device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_initialize_ieq(struct irdma_device *iwdev)
+{
+ struct irdma_puda_rsrc_info info = {};
+ int status;
+
+ info.type = IRDMA_PUDA_RSRC_TYPE_IEQ;
+ info.cq_id = 2;
+ info.qp_id = iwdev->vsi.exception_lan_q;
+ info.count = 1;
+ info.pd_id = 2;
+ info.abi_ver = IRDMA_ABI_VER;
+ info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768);
+ info.rq_size = info.sq_size;
+ info.buf_size = iwdev->vsi.mtu + IRDMA_IPV4_PAD;
+ info.tx_buf_cnt = 4096;
+ status = irdma_puda_create_rsrc(&iwdev->vsi, &info);
+ if (status)
+ ibdev_dbg(&iwdev->ibdev, "ERR: ieq create fail\n");
+
+ return status;
+}
+
+/**
+ * irdma_reinitialize_ieq - destroy and re-create ieq
+ * @vsi: VSI structure
+ */
+void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false);
+ if (irdma_initialize_ieq(iwdev)) {
+ iwdev->rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
+}
+
+/**
+ * irdma_hmc_setup - create hmc objects for the device
+ * @rf: RDMA PCI function
+ *
+ * Set up the device private memory space for the number and size of
+ * the hmc objects and create the objects
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_hmc_setup(struct irdma_pci_f *rf)
+{
+ int status;
+ u32 qpcnt;
+
+ qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
+
+ rf->sd_type = IRDMA_SD_TYPE_DIRECT;
+ status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt);
+ if (status)
+ return status;
+
+ status = irdma_create_hmc_objs(rf, true, rf->rdma_ver);
+
+ return status;
+}
+
+/**
+ * irdma_del_init_mem - deallocate memory resources
+ * @rf: RDMA PCI function
+ */
+static void irdma_del_init_mem(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+
+ if (!rf->sc_dev.privileged)
+ irdma_vchnl_req_put_hmc_fcn(&rf->sc_dev);
+ kfree(dev->hmc_info->sd_table.sd_entry);
+ dev->hmc_info->sd_table.sd_entry = NULL;
+ vfree(rf->mem_rsrc);
+ rf->mem_rsrc = NULL;
+ dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va,
+ rf->obj_mem.pa);
+ rf->obj_mem.va = NULL;
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ bitmap_free(rf->allocated_ws_nodes);
+ rf->allocated_ws_nodes = NULL;
+ }
+ kfree(rf->ceqlist);
+ rf->ceqlist = NULL;
+ kfree(rf->iw_msixtbl);
+ rf->iw_msixtbl = NULL;
+ kfree(rf->hmc_info_mem);
+ rf->hmc_info_mem = NULL;
+}
+
+/**
+ * irdma_initialize_dev - initialize device
+ * @rf: RDMA PCI function
+ *
+ * Allocate memory for the hmc objects and initialize iwdev
+ * Return 0 if successful, otherwise clean up the resources
+ * and return error
+ */
+static int irdma_initialize_dev(struct irdma_pci_f *rf)
+{
+ int status;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_device_init_info info = {};
+ struct irdma_dma_mem mem;
+ u32 size;
+
+ size = sizeof(struct irdma_hmc_pble_rsrc) +
+ sizeof(struct irdma_hmc_info) +
+ (sizeof(struct irdma_hmc_obj_info) * IRDMA_HMC_IW_MAX);
+
+ rf->hmc_info_mem = kzalloc(size, GFP_KERNEL);
+ if (!rf->hmc_info_mem)
+ return -ENOMEM;
+
+ rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem;
+ dev->hmc_info = &rf->hw.hmc;
+ dev->hmc_info->hmc_obj = (struct irdma_hmc_obj_info *)
+ (rf->pble_rsrc + 1);
+
+ status = irdma_obj_aligned_mem(rf, &mem, IRDMA_QUERY_FPM_BUF_SIZE,
+ IRDMA_FPM_QUERY_BUF_ALIGNMENT_M);
+ if (status)
+ goto error;
+
+ info.fpm_query_buf_pa = mem.pa;
+ info.fpm_query_buf = mem.va;
+
+ status = irdma_obj_aligned_mem(rf, &mem, IRDMA_COMMIT_FPM_BUF_SIZE,
+ IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M);
+ if (status)
+ goto error;
+
+ info.fpm_commit_buf_pa = mem.pa;
+ info.fpm_commit_buf = mem.va;
+
+ info.bar0 = rf->hw.hw_addr;
+ info.hmc_fn_id = rf->pf_id;
+ info.protocol_used = rf->protocol_used;
+ info.hw = &rf->hw;
+ status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info);
+ if (status)
+ goto error;
+
+ return status;
+error:
+ kfree(rf->hmc_info_mem);
+ rf->hmc_info_mem = NULL;
+
+ return status;
+}
+
+/**
+ * irdma_rt_deinit_hw - clean up the irdma device resources
+ * @iwdev: irdma device
+ *
+ * remove the mac ip entry and ipv4/ipv6 addresses, destroy the
+ * device queues and free the pble and the hmc objects
+ */
+void irdma_rt_deinit_hw(struct irdma_device *iwdev)
+{
+ ibdev_dbg(&iwdev->ibdev, "INIT: state = %d\n", iwdev->init_state);
+
+ switch (iwdev->init_state) {
+ case IP_ADDR_REGISTERED:
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_del_local_mac_entry(iwdev->rf,
+ (u8)iwdev->mac_ip_table_idx);
+ fallthrough;
+ case IEQ_CREATED:
+ if (!iwdev->roce_mode)
+ irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ,
+ iwdev->rf->reset);
+ fallthrough;
+ case ILQ_CREATED:
+ if (!iwdev->roce_mode)
+ irdma_puda_dele_rsrc(&iwdev->vsi,
+ IRDMA_PUDA_RSRC_TYPE_ILQ,
+ iwdev->rf->reset);
+ break;
+ default:
+ ibdev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state);
+ break;
+ }
+
+ irdma_cleanup_cm_core(&iwdev->cm_core);
+ if (iwdev->vsi.pestat) {
+ irdma_vsi_stats_free(&iwdev->vsi);
+ kfree(iwdev->vsi.pestat);
+ }
+ if (iwdev->cleanup_wq)
+ destroy_workqueue(iwdev->cleanup_wq);
+}
+
+static int irdma_setup_init_state(struct irdma_pci_f *rf)
+{
+ int status;
+
+ status = irdma_save_msix_info(rf);
+ if (status)
+ return status;
+
+ rf->hw.device = &rf->pcidev->dev;
+ rf->obj_mem.size = ALIGN(8192, IRDMA_HW_PAGE_SIZE);
+ rf->obj_mem.va = dma_alloc_coherent(rf->hw.device, rf->obj_mem.size,
+ &rf->obj_mem.pa, GFP_KERNEL);
+ if (!rf->obj_mem.va) {
+ status = -ENOMEM;
+ goto clean_msixtbl;
+ }
+
+ rf->obj_next = rf->obj_mem;
+ status = irdma_initialize_dev(rf);
+ if (status)
+ goto clean_obj_mem;
+
+ return 0;
+
+clean_obj_mem:
+ dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va,
+ rf->obj_mem.pa);
+ rf->obj_mem.va = NULL;
+clean_msixtbl:
+ kfree(rf->iw_msixtbl);
+ rf->iw_msixtbl = NULL;
+ return status;
+}
+
+/**
+ * irdma_get_used_rsrc - determine resources used internally
+ * @iwdev: irdma device
+ *
+ * Called at the end of open to get all internal allocations
+ */
+static void irdma_get_used_rsrc(struct irdma_device *iwdev)
+{
+ iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds,
+ iwdev->rf->max_pd);
+ iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps,
+ iwdev->rf->max_qp);
+ iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs,
+ iwdev->rf->max_cq);
+ iwdev->rf->used_srqs = find_first_zero_bit(iwdev->rf->allocated_srqs,
+ iwdev->rf->max_srq);
+ iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr);
+}
+
+void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
+{
+ enum init_completion_state state = rf->init_state;
+
+ rf->init_state = INVALID_STATE;
+
+ switch (state) {
+ case AEQ_CREATED:
+ irdma_destroy_aeq(rf);
+ fallthrough;
+ case PBLE_CHUNK_MEM:
+ irdma_destroy_pble_prm(rf->pble_rsrc);
+ fallthrough;
+ case CEQS_CREATED:
+ irdma_del_ceqs(rf);
+ fallthrough;
+ case CEQ0_CREATED:
+ irdma_del_ceq_0(rf);
+ fallthrough;
+ case CCQ_CREATED:
+ irdma_destroy_ccq(rf);
+ fallthrough;
+ case HW_RSRC_INITIALIZED:
+ case HMC_OBJS_CREATED:
+ irdma_del_hmc_objects(&rf->sc_dev, rf->sc_dev.hmc_info, true,
+ rf->reset, rf->rdma_ver);
+ fallthrough;
+ case CQP_CREATED:
+ irdma_destroy_cqp(rf);
+ fallthrough;
+ case INITIAL_STATE:
+ irdma_del_init_mem(rf);
+ break;
+ case INVALID_STATE:
+ default:
+ ibdev_warn(&rf->iwdev->ibdev, "bad init_state = %d\n", rf->init_state);
+ break;
+ }
+}
+
+/**
+ * irdma_rt_init_hw - Initializes runtime portion of HW
+ * @iwdev: irdma device
+ * @l2params: qos, tc, mtu info from netdev driver
+ *
+ * Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma
+ * device resource objects.
+ */
+int irdma_rt_init_hw(struct irdma_device *iwdev,
+ struct irdma_l2params *l2params)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_vsi_init_info vsi_info = {};
+ struct irdma_vsi_stats_info stats_info = {};
+ int status;
+
+ vsi_info.dev = dev;
+ vsi_info.back_vsi = iwdev;
+ vsi_info.params = l2params;
+ vsi_info.pf_data_vsi_num = iwdev->vsi_num;
+ vsi_info.register_qset = rf->gen_ops.register_qset;
+ vsi_info.unregister_qset = rf->gen_ops.unregister_qset;
+ vsi_info.exception_lan_q = 2;
+ irdma_sc_vsi_init(&iwdev->vsi, &vsi_info);
+
+ status = irdma_setup_cm_core(iwdev, rf->rdma_ver);
+ if (status)
+ return status;
+
+ stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+ if (!stats_info.pestat) {
+ irdma_cleanup_cm_core(&iwdev->cm_core);
+ return -ENOMEM;
+ }
+ stats_info.fcn_id = dev->hmc_fn_id;
+ status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info);
+ if (status) {
+ irdma_cleanup_cm_core(&iwdev->cm_core);
+ kfree(stats_info.pestat);
+ return status;
+ }
+
+ do {
+ if (!iwdev->roce_mode) {
+ status = irdma_initialize_ilq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = ILQ_CREATED;
+ status = irdma_initialize_ieq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = IEQ_CREATED;
+ }
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_alloc_set_mac(iwdev);
+ irdma_add_ip(iwdev);
+ iwdev->init_state = IP_ADDR_REGISTERED;
+
+ /* handles asynch cleanup tasks - disconnect CM , free qp,
+ * free cq bufs
+ */
+ iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq",
+ WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
+ if (!iwdev->cleanup_wq)
+ return -ENOMEM;
+ irdma_get_used_rsrc(iwdev);
+ init_waitqueue_head(&iwdev->suspend_wq);
+
+ return 0;
+ } while (0);
+
+ dev_err(&rf->pcidev->dev, "HW runtime init FAIL status = %d last cmpl = %d\n",
+ status, iwdev->init_state);
+ irdma_rt_deinit_hw(iwdev);
+
+ return status;
+}
+
+/**
+ * irdma_ctrl_init_hw - Initializes control portion of HW
+ * @rf: RDMA PCI function
+ *
+ * Create admin queues, HMC obejcts and RF resource objects
+ */
+int irdma_ctrl_init_hw(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
+ do {
+ status = irdma_setup_init_state(rf);
+ if (status)
+ break;
+ rf->init_state = INITIAL_STATE;
+
+ status = irdma_create_cqp(rf);
+ if (status)
+ break;
+ rf->init_state = CQP_CREATED;
+
+ dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT;
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ status = irdma_get_rdma_features(dev);
+ if (status)
+ break;
+ }
+
+ status = irdma_hmc_setup(rf);
+ if (status)
+ break;
+ rf->init_state = HMC_OBJS_CREATED;
+
+ status = irdma_initialize_hw_rsrc(rf);
+ if (status)
+ break;
+ rf->init_state = HW_RSRC_INITIALIZED;
+
+ status = irdma_create_ccq(rf);
+ if (status)
+ break;
+ rf->init_state = CCQ_CREATED;
+
+ status = irdma_setup_ceq_0(rf);
+ if (status)
+ break;
+ rf->init_state = CEQ0_CREATED;
+ /* Handles processing of CQP completions */
+ rf->cqp_cmpl_wq =
+ alloc_ordered_workqueue("cqp_cmpl_wq", WQ_HIGHPRI);
+ if (!rf->cqp_cmpl_wq) {
+ status = -ENOMEM;
+ break;
+ }
+ INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker);
+ irdma_sc_ccq_arm(dev->ccq);
+
+ status = irdma_setup_ceqs(rf, rf->iwdev ? rf->iwdev->vsi_num : 0);
+ if (status)
+ break;
+
+ rf->init_state = CEQS_CREATED;
+
+ status = irdma_hmc_init_pble(&rf->sc_dev,
+ rf->pble_rsrc);
+ if (status)
+ break;
+
+ rf->init_state = PBLE_CHUNK_MEM;
+
+ status = irdma_setup_aeq(rf);
+ if (status)
+ break;
+ rf->init_state = AEQ_CREATED;
+
+ return 0;
+ } while (0);
+
+ dev_err(&rf->pcidev->dev, "IRDMA hardware initialization FAILED init_state=%d status=%d\n",
+ rf->init_state, status);
+ irdma_ctrl_deinit_hw(rf);
+ return status;
+}
+
+/**
+ * irdma_set_hw_rsrc - set hw memory resources.
+ * @rf: RDMA PCI function
+ */
+static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
+{
+ rf->allocated_qps = (void *)(rf->mem_rsrc +
+ (sizeof(struct irdma_arp_entry) * rf->arp_table_size));
+ rf->allocated_cqs = &rf->allocated_qps[BITS_TO_LONGS(rf->max_qp)];
+ rf->allocated_srqs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)];
+ rf->allocated_mrs = &rf->allocated_srqs[BITS_TO_LONGS(rf->max_srq)];
+ rf->allocated_pds = &rf->allocated_mrs[BITS_TO_LONGS(rf->max_mr)];
+ rf->allocated_ahs = &rf->allocated_pds[BITS_TO_LONGS(rf->max_pd)];
+ rf->allocated_mcgs = &rf->allocated_ahs[BITS_TO_LONGS(rf->max_ah)];
+ rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)];
+ rf->qp_table = (struct irdma_qp **)
+ (&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]);
+ rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]);
+
+ spin_lock_init(&rf->rsrc_lock);
+ spin_lock_init(&rf->arp_lock);
+ spin_lock_init(&rf->qptable_lock);
+ spin_lock_init(&rf->cqtable_lock);
+ spin_lock_init(&rf->qh_list_lock);
+}
+
+/**
+ * irdma_calc_mem_rsrc_size - calculate memory resources size.
+ * @rf: RDMA PCI function
+ */
+static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf)
+{
+ u32 rsrc_size;
+
+ rsrc_size = sizeof(struct irdma_arp_entry) * rf->arp_table_size;
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_qp);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mr);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_cq);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_srq);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_pd);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->arp_table_size);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg);
+ rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp;
+ rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq;
+ rsrc_size += sizeof(struct irdma_srq **) * rf->max_srq;
+
+ return rsrc_size;
+}
+
+/**
+ * irdma_initialize_hw_rsrc - initialize hw resource tracking array
+ * @rf: RDMA PCI function
+ */
+u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
+{
+ u32 rsrc_size;
+ u32 mrdrvbits;
+ u32 ret;
+
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES,
+ GFP_KERNEL);
+ if (!rf->allocated_ws_nodes)
+ return -ENOMEM;
+
+ set_bit(0, rf->allocated_ws_nodes);
+ rf->max_ws_node_id = IRDMA_MAX_WS_NODES;
+ }
+ rf->max_cqe = rf->sc_dev.hw_attrs.uk_attrs.max_hw_cq_size;
+ rf->max_qp = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt;
+ rf->max_mr = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt;
+ rf->max_cq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
+ rf->max_srq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt;
+ rf->max_pd = rf->sc_dev.hw_attrs.max_hw_pds;
+ rf->arp_table_size = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt;
+ rf->max_ah = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt;
+ rf->max_mcg = rf->max_qp;
+
+ rsrc_size = irdma_calc_mem_rsrc_size(rf);
+ rf->mem_rsrc = vzalloc(rsrc_size);
+ if (!rf->mem_rsrc) {
+ ret = -ENOMEM;
+ goto mem_rsrc_vzalloc_fail;
+ }
+
+ rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
+
+ irdma_set_hw_rsrc(rf);
+
+ set_bit(0, rf->allocated_mrs);
+ set_bit(0, rf->allocated_qps);
+ set_bit(0, rf->allocated_cqs);
+ set_bit(0, rf->allocated_srqs);
+ set_bit(0, rf->allocated_pds);
+ set_bit(0, rf->allocated_arps);
+ set_bit(0, rf->allocated_ahs);
+ set_bit(0, rf->allocated_mcgs);
+ set_bit(2, rf->allocated_qps); /* qp 2 IEQ */
+ set_bit(1, rf->allocated_qps); /* qp 1 ILQ */
+ set_bit(1, rf->allocated_cqs);
+ set_bit(1, rf->allocated_pds);
+ set_bit(2, rf->allocated_cqs);
+ set_bit(2, rf->allocated_pds);
+
+ INIT_LIST_HEAD(&rf->mc_qht_list.list);
+ /* stag index mask has a minimum of 14 bits */
+ mrdrvbits = 24 - max(get_count_order(rf->max_mr), 14);
+ rf->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
+
+ return 0;
+
+mem_rsrc_vzalloc_fail:
+ bitmap_free(rf->allocated_ws_nodes);
+ rf->allocated_ws_nodes = NULL;
+
+ return ret;
+}
+
+/**
+ * irdma_cqp_ce_handler - handle cqp completions
+ * @rf: RDMA PCI function
+ * @cq: cq for cqp completions
+ */
+void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ u32 cqe_count = 0;
+ struct irdma_ccq_cqe_info info;
+ unsigned long flags;
+ int ret;
+
+ do {
+ memset(&info, 0, sizeof(info));
+ spin_lock_irqsave(&rf->cqp.compl_lock, flags);
+ ret = irdma_sc_ccq_get_cqe_info(cq, &info);
+ spin_unlock_irqrestore(&rf->cqp.compl_lock, flags);
+ if (ret)
+ break;
+
+ cqp_request = (struct irdma_cqp_request *)
+ (unsigned long)info.scratch;
+ if (info.error && irdma_cqp_crit_err(dev, cqp_request->info.cqp_cmd,
+ info.maj_err_code,
+ info.min_err_code))
+ ibdev_err(&rf->iwdev->ibdev, "cqp opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
+ info.op_code, info.maj_err_code, info.min_err_code);
+ if (cqp_request) {
+ cqp_request->compl_info.maj_err_code = info.maj_err_code;
+ cqp_request->compl_info.min_err_code = info.min_err_code;
+ cqp_request->compl_info.op_ret_val = info.op_ret_val;
+ cqp_request->compl_info.error = info.error;
+
+ /*
+ * If this is deferred or pending completion, then mark
+ * CQP request as pending to not block the CQ, but don't
+ * release CQP request, as it is still on the OOO list.
+ */
+ if (info.pending)
+ cqp_request->pending = true;
+ else
+ irdma_complete_cqp_request(&rf->cqp,
+ cqp_request);
+ }
+
+ cqe_count++;
+ } while (1);
+
+ if (cqe_count) {
+ irdma_process_bh(dev);
+ irdma_sc_ccq_arm(cq);
+ }
+}
+
+/**
+ * cqp_compl_worker - Handle cqp completions
+ * @work: Pointer to work structure
+ */
+void cqp_compl_worker(struct work_struct *work)
+{
+ struct irdma_pci_f *rf = container_of(work, struct irdma_pci_f,
+ cqp_cmpl_work);
+ struct irdma_sc_cq *cq = &rf->ccq.sc_cq;
+
+ irdma_cqp_ce_handler(rf, cq);
+}
+
+/**
+ * irdma_lookup_apbvt_entry - lookup hash table for an existing apbvt entry corresponding to port
+ * @cm_core: cm's core
+ * @port: port to identify apbvt entry
+ */
+static struct irdma_apbvt_entry *irdma_lookup_apbvt_entry(struct irdma_cm_core *cm_core,
+ u16 port)
+{
+ struct irdma_apbvt_entry *entry;
+
+ hash_for_each_possible(cm_core->apbvt_hash_tbl, entry, hlist, port) {
+ if (entry->port == port) {
+ entry->use_cnt++;
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_next_iw_state - modify qp state
+ * @iwqp: iwarp qp to modify
+ * @state: next state for qp
+ * @del_hash: del hash
+ * @term: term message
+ * @termlen: length of term message
+ */
+void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term,
+ u8 termlen)
+{
+ struct irdma_modify_qp_info info = {};
+
+ info.next_iwarp_state = state;
+ info.remove_hash_idx = del_hash;
+ info.cq_num_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.dont_send_term = true;
+ info.dont_send_fin = true;
+ info.termlen = termlen;
+
+ if (term & IRDMAQP_TERM_SEND_TERM_ONLY)
+ info.dont_send_term = false;
+ if (term & IRDMAQP_TERM_SEND_FIN_ONLY)
+ info.dont_send_fin = false;
+ if (iwqp->sc_qp.term_flags && state == IRDMA_QP_STATE_ERROR)
+ info.reset_tcp_conn = true;
+ iwqp->hw_iwarp_state = state;
+ irdma_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
+ iwqp->iwarp_state = info.next_iwarp_state;
+}
+
+/**
+ * irdma_del_local_mac_entry - remove a mac entry from the hw
+ * table
+ * @rf: RDMA PCI function
+ * @idx: the index of the mac ip address to delete
+ */
+void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx)
+{
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_DELETE_LOCAL_MAC_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.del_local_mac_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.del_local_mac_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.del_local_mac_entry.entry_idx = idx;
+ cqp_info->in.u.del_local_mac_entry.ignore_ref_count = 0;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+}
+
+/**
+ * irdma_add_local_mac_entry - add a mac ip address entry to the
+ * hw table
+ * @rf: RDMA PCI function
+ * @mac_addr: pointer to mac address
+ * @idx: the index of the mac ip address to add
+ */
+int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx)
+{
+ struct irdma_local_mac_entry_info *info;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ info = &cqp_info->in.u.add_local_mac_entry.info;
+ ether_addr_copy(info->mac_addr, mac_addr);
+ info->entry_idx = idx;
+ cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->cqp_cmd = IRDMA_OP_ADD_LOCAL_MAC_ENTRY;
+ cqp_info->in.u.add_local_mac_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_alloc_local_mac_entry - allocate a mac entry
+ * @rf: RDMA PCI function
+ * @mac_tbl_idx: the index of the new mac address
+ *
+ * Allocate a mac address entry and update the mac_tbl_idx
+ * to hold the index of the newly created mac address
+ * Return 0 if successful, otherwise return error
+ */
+int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx)
+{
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status = 0;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_local_mac_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.alloc_local_mac_entry.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status)
+ *mac_tbl_idx = (u16)cqp_request->compl_info.op_ret_val;
+
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_manage_apbvt_cmd - send cqp command manage apbvt
+ * @iwdev: irdma device
+ * @accel_local_port: port for apbvt
+ * @add_port: add ordelete port
+ */
+static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev,
+ u16 accel_local_port, bool add_port)
+{
+ struct irdma_apbvt_info *info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.manage_apbvt_entry.info;
+ info->add = add_port;
+ info->port = accel_local_port;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
+ ibdev_dbg(&iwdev->ibdev, "DEV: %s: port=0x%04x\n",
+ (!add_port) ? "DELETE" : "ADD", accel_local_port);
+
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_add_apbvt - add tcp port to HW apbvt table
+ * @iwdev: irdma device
+ * @port: port for apbvt
+ */
+struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct irdma_apbvt_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+ entry = irdma_lookup_apbvt_entry(cm_core, port);
+ if (entry) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return entry;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return NULL;
+ }
+
+ entry->port = port;
+ entry->use_cnt = 1;
+ hash_add(cm_core->apbvt_hash_tbl, &entry->hlist, entry->port);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+
+ if (irdma_cqp_manage_apbvt_cmd(iwdev, port, true)) {
+ kfree(entry);
+ return NULL;
+ }
+
+ return entry;
+}
+
+/**
+ * irdma_del_apbvt - delete tcp port from HW apbvt table
+ * @iwdev: irdma device
+ * @entry: apbvt entry object
+ */
+void irdma_del_apbvt(struct irdma_device *iwdev,
+ struct irdma_apbvt_entry *entry)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+ if (--entry->use_cnt) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return;
+ }
+
+ hash_del(&entry->hlist);
+ /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
+ * protect against race where add APBVT CQP can race ahead of the delete
+ * APBVT for same port.
+ */
+ irdma_cqp_manage_apbvt_cmd(iwdev, entry->port, false);
+ kfree(entry);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+}
+
+/**
+ * irdma_manage_arp_cache - manage hw arp cache
+ * @rf: RDMA PCI function
+ * @mac_addr: mac address ptr
+ * @ip_addr: ip addr for arp cache
+ * @ipv4: flag inicating IPv4
+ * @action: add, delete or modify
+ */
+void irdma_manage_arp_cache(struct irdma_pci_f *rf,
+ const unsigned char *mac_addr,
+ u32 *ip_addr, bool ipv4, u32 action)
+{
+ struct irdma_add_arp_cache_entry_info *info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int arp_index;
+
+ arp_index = irdma_arp_table(rf, ip_addr, ipv4, mac_addr, action);
+ if (arp_index == -1)
+ return;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ if (action == IRDMA_ARP_ADD) {
+ cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY;
+ info = &cqp_info->in.u.add_arp_cache_entry.info;
+ info->arp_index = (u16)arp_index;
+ info->permanent = true;
+ ether_addr_copy(info->mac_addr, mac_addr);
+ cqp_info->in.u.add_arp_cache_entry.scratch =
+ (uintptr_t)cqp_request;
+ cqp_info->in.u.add_arp_cache_entry.cqp = &rf->cqp.sc_cqp;
+ } else {
+ cqp_info->cqp_cmd = IRDMA_OP_DELETE_ARP_CACHE_ENTRY;
+ cqp_info->in.u.del_arp_cache_entry.scratch =
+ (uintptr_t)cqp_request;
+ cqp_info->in.u.del_arp_cache_entry.cqp = &rf->cqp.sc_cqp;
+ cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
+ }
+
+ cqp_info->post_sq = 1;
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_send_syn_cqp_callback - do syn/ack after qhash
+ * @cqp_request: qhash cqp completion
+ */
+static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_cm_node *cm_node = cqp_request->param;
+
+ irdma_send_syn(cm_node, 1);
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * irdma_manage_qhash - add or modify qhash
+ * @iwdev: irdma device
+ * @cminfo: cm info for qhash
+ * @etype: type (syn or quad)
+ * @mtype: type of qhash
+ * @cmnode: cmnode associated with connection
+ * @wait: wait for completion
+ */
+int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
+ enum irdma_quad_entry_type etype,
+ enum irdma_quad_hash_manage_type mtype, void *cmnode,
+ bool wait)
+{
+ struct irdma_qhash_table_info *info;
+ struct irdma_cqp *iwcqp = &iwdev->rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cm_node *cm_node = cmnode;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.manage_qhash_table_entry.info;
+ info->vsi = &iwdev->vsi;
+ info->manage = mtype;
+ info->entry_type = etype;
+ if (cminfo->vlan_id < VLAN_N_VID) {
+ info->vlan_valid = true;
+ info->vlan_id = cminfo->vlan_id;
+ } else {
+ info->vlan_valid = false;
+ }
+ info->ipv4_valid = cminfo->ipv4;
+ info->user_pri = cminfo->user_pri;
+ ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
+ info->qp_num = cminfo->qh_qpid;
+ info->dest_port = cminfo->loc_port;
+ info->dest_ip[0] = cminfo->loc_addr[0];
+ info->dest_ip[1] = cminfo->loc_addr[1];
+ info->dest_ip[2] = cminfo->loc_addr[2];
+ info->dest_ip[3] = cminfo->loc_addr[3];
+ if (etype == IRDMA_QHASH_TYPE_TCP_ESTABLISHED ||
+ etype == IRDMA_QHASH_TYPE_UDP_UNICAST ||
+ etype == IRDMA_QHASH_TYPE_UDP_MCAST ||
+ etype == IRDMA_QHASH_TYPE_ROCE_MCAST ||
+ etype == IRDMA_QHASH_TYPE_ROCEV2_HW) {
+ info->src_port = cminfo->rem_port;
+ info->src_ip[0] = cminfo->rem_addr[0];
+ info->src_ip[1] = cminfo->rem_addr[1];
+ info->src_ip[2] = cminfo->rem_addr[2];
+ info->src_ip[3] = cminfo->rem_addr[3];
+ }
+ if (cmnode) {
+ cqp_request->callback_fcn = irdma_send_syn_cqp_callback;
+ cqp_request->param = cmnode;
+ if (!wait)
+ refcount_inc(&cm_node->refcnt);
+ }
+ if (info->ipv4_valid)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: %s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI4 rem_addr=%pI4 mac=%pM, vlan_id=%d cm_node=%p\n",
+ (!mtype) ? "DELETE" : "ADD",
+ __builtin_return_address(0), info->dest_port,
+ info->src_port, info->dest_ip, info->src_ip,
+ info->mac_addr, cminfo->vlan_id,
+ cmnode ? cmnode : NULL);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: %s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI6 rem_addr=%pI6 mac=%pM, vlan_id=%d cm_node=%p\n",
+ (!mtype) ? "DELETE" : "ADD",
+ __builtin_return_address(0), info->dest_port,
+ info->src_port, info->dest_ip, info->src_ip,
+ info->mac_addr, cminfo->vlan_id,
+ cmnode ? cmnode : NULL);
+
+ cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY;
+ cqp_info->post_sq = 1;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (status && cm_node && !wait)
+ irdma_rem_ref_cm_node(cm_node);
+
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_hw_flush_wqes_callback - Check return code after flush
+ * @cqp_request: qhash cqp completion
+ */
+static void irdma_hw_flush_wqes_callback(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_qp_flush_info *hw_info;
+ struct irdma_sc_qp *qp;
+ struct irdma_qp *iwqp;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_info = &cqp_request->info;
+ hw_info = &cqp_info->in.u.qp_flush_wqes.info;
+ qp = cqp_info->in.u.qp_flush_wqes.qp;
+ iwqp = qp->qp_uk.back_qp;
+
+ if (cqp_request->compl_info.maj_err_code)
+ return;
+
+ if (hw_info->rq &&
+ (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_SQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0)) {
+ /* RQ WQE flush was requested but did not happen */
+ qp->qp_uk.rq_flush_complete = true;
+ }
+ if (hw_info->sq &&
+ (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_RQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0)) {
+ if (IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring)) {
+ ibdev_err(&iwqp->iwdev->ibdev, "Flush QP[%d] failed, SQ has more work",
+ qp->qp_uk.qp_id);
+ irdma_ib_qp_event(iwqp, IRDMA_QP_EVENT_CATASTROPHIC);
+ }
+ qp->qp_uk.sq_flush_complete = true;
+ }
+}
+
+/**
+ * irdma_hw_flush_wqes - flush qp's wqe
+ * @rf: RDMA PCI function
+ * @qp: hardware control qp
+ * @info: info for flush
+ * @wait: flag wait for completion
+ */
+int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, bool wait)
+{
+ int status;
+ struct irdma_qp_flush_info *hw_info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_qp *iwqp = qp->qp_uk.back_qp;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ if (!wait)
+ cqp_request->callback_fcn = irdma_hw_flush_wqes_callback;
+ hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
+ memcpy(hw_info, info, sizeof(*hw_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_FLUSH_WQES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_flush_wqes.qp = qp;
+ cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status) {
+ qp->qp_uk.sq_flush_complete = true;
+ qp->qp_uk.rq_flush_complete = true;
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ return status;
+ }
+
+ if (!wait || cqp_request->compl_info.maj_err_code)
+ goto put_cqp;
+
+ if (info->rq) {
+ if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_SQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0) {
+ /* RQ WQE flush was requested but did not happen */
+ qp->qp_uk.rq_flush_complete = true;
+ }
+ }
+ if (info->sq) {
+ if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_RQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0) {
+ /*
+ * Handling case where WQE is posted to empty SQ when
+ * flush has not completed
+ */
+ if (IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring)) {
+ struct irdma_cqp_request *new_req;
+
+ if (!qp->qp_uk.sq_flush_complete)
+ goto put_cqp;
+ qp->qp_uk.sq_flush_complete = false;
+ qp->flush_sq = false;
+
+ info->rq = false;
+ info->sq = true;
+ new_req = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!new_req) {
+ status = -ENOMEM;
+ goto put_cqp;
+ }
+ cqp_info = &new_req->info;
+ hw_info = &new_req->info.in.u.qp_flush_wqes.info;
+ memcpy(hw_info, info, sizeof(*hw_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_FLUSH_WQES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_flush_wqes.qp = qp;
+ cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)new_req;
+
+ status = irdma_handle_cqp_op(rf, new_req);
+ if (new_req->compl_info.maj_err_code ||
+ new_req->compl_info.min_err_code != IRDMA_CQP_COMPL_SQ_WQE_FLUSHED ||
+ status) {
+ ibdev_err(&iwqp->iwdev->ibdev, "fatal QP event: SQ in error but not flushed, qp: %d",
+ iwqp->ibqp.qp_num);
+ qp->qp_uk.sq_flush_complete = false;
+ irdma_ib_qp_event(iwqp, IRDMA_QP_EVENT_CATASTROPHIC);
+ }
+ irdma_put_cqp_request(&rf->cqp, new_req);
+ } else {
+ /* SQ WQE flush was requested but did not happen */
+ qp->qp_uk.sq_flush_complete = true;
+ }
+ } else {
+ if (!IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ qp->qp_uk.sq_flush_complete = true;
+ }
+ }
+
+ ibdev_dbg(&rf->iwdev->ibdev,
+ "VERBS: qp_id=%d qp_type=%d qpstate=%d ibqpstate=%d last_aeq=%d hw_iw_state=%d maj_err_code=%d min_err_code=%d\n",
+ iwqp->ibqp.qp_num, rf->protocol_used, iwqp->iwarp_state,
+ iwqp->ibqp_state, iwqp->last_aeq, iwqp->hw_iwarp_state,
+ cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code);
+put_cqp:
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_gen_ae - generate AE
+ * @rf: RDMA PCI function
+ * @qp: qp associated with AE
+ * @info: info for ae
+ * @wait: wait for completion
+ */
+void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, bool wait)
+{
+ struct irdma_gen_ae_info *ae_info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ ae_info = &cqp_request->info.in.u.gen_ae.info;
+ memcpy(ae_info, info, sizeof(*ae_info));
+ cqp_info->cqp_cmd = IRDMA_OP_GEN_AE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.gen_ae.qp = qp;
+ cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
+{
+ struct irdma_qp_flush_info info = {};
+ struct irdma_pci_f *rf = iwqp->iwdev->rf;
+ u8 flush_code = iwqp->sc_qp.flush_code;
+
+ if ((!(flush_mask & IRDMA_FLUSH_SQ) &&
+ !(flush_mask & IRDMA_FLUSH_RQ)) ||
+ ((flush_mask & IRDMA_REFLUSH) && rf->rdma_ver >= IRDMA_GEN_3))
+ return;
+
+ /* Set flush info fields*/
+ info.sq = flush_mask & IRDMA_FLUSH_SQ;
+ info.rq = flush_mask & IRDMA_FLUSH_RQ;
+
+ /* Generate userflush errors in CQE */
+ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ info.sq_minor_code = FLUSH_GENERAL_ERR;
+ info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ info.rq_minor_code = FLUSH_GENERAL_ERR;
+ info.userflushcode = true;
+ info.err_sq_idx_valid = iwqp->sc_qp.err_sq_idx_valid;
+ info.err_sq_idx = iwqp->sc_qp.err_sq_idx;
+ info.err_rq_idx_valid = iwqp->sc_qp.err_rq_idx_valid;
+ info.err_rq_idx = iwqp->sc_qp.err_rq_idx;
+
+ if (flush_mask & IRDMA_REFLUSH) {
+ if (info.sq)
+ iwqp->sc_qp.flush_sq = false;
+ if (info.rq)
+ iwqp->sc_qp.flush_rq = false;
+ } else {
+ if (flush_code) {
+ if (info.sq && iwqp->sc_qp.sq_flush_code)
+ info.sq_minor_code = flush_code;
+ if (info.rq && iwqp->sc_qp.rq_flush_code)
+ info.rq_minor_code = flush_code;
+ }
+ if (!iwqp->user_mode)
+ queue_delayed_work(iwqp->iwdev->cleanup_wq,
+ &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ /* Issue flush */
+ (void)irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &info,
+ flush_mask & IRDMA_FLUSH_WAIT);
+ iwqp->flush_issued = true;
+}
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
new file mode 100644
index 000000000000..60c1f2b1811d
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "i40iw_hw.h"
+#include "protos.h"
+
+static u32 i40iw_regs[IRDMA_MAX_REGS] = {
+ I40E_PFPE_CQPTAIL,
+ I40E_PFPE_CQPDB,
+ I40E_PFPE_CCQPSTATUS,
+ I40E_PFPE_CCQPHIGH,
+ I40E_PFPE_CCQPLOW,
+ I40E_PFPE_CQARM,
+ I40E_PFPE_CQACK,
+ I40E_PFPE_AEQALLOC,
+ I40E_PFPE_CQPERRCODES,
+ I40E_PFPE_WQEALLOC,
+ I40E_PFINT_DYN_CTLN(0),
+ I40IW_DB_ADDR_OFFSET,
+
+ I40E_GLPCI_LBARCTRL,
+ I40E_GLPE_CPUSTATUS0,
+ I40E_GLPE_CPUSTATUS1,
+ I40E_GLPE_CPUSTATUS2,
+ I40E_PFINT_AEQCTL,
+ I40E_PFINT_CEQCTL(0),
+ I40E_VSIQF_CTL(0),
+ I40E_PFHMC_PDINV,
+ I40E_GLHMC_VFPDINV(0),
+ I40E_GLPE_CRITERR,
+ 0xffffffff /* PFINT_RATEN not used in FPK */
+};
+
+static u32 i40iw_stat_offsets[] = {
+ I40E_GLPES_PFIP4RXDISCARD(0),
+ I40E_GLPES_PFIP4RXTRUNC(0),
+ I40E_GLPES_PFIP4TXNOROUTE(0),
+ I40E_GLPES_PFIP6RXDISCARD(0),
+ I40E_GLPES_PFIP6RXTRUNC(0),
+ I40E_GLPES_PFIP6TXNOROUTE(0),
+ I40E_GLPES_PFTCPRTXSEG(0),
+ I40E_GLPES_PFTCPRXOPTERR(0),
+ I40E_GLPES_PFTCPRXPROTOERR(0),
+ I40E_GLPES_PFRXVLANERR(0),
+
+ I40E_GLPES_PFIP4RXOCTSLO(0),
+ I40E_GLPES_PFIP4RXPKTSLO(0),
+ I40E_GLPES_PFIP4RXFRAGSLO(0),
+ I40E_GLPES_PFIP4RXMCPKTSLO(0),
+ I40E_GLPES_PFIP4TXOCTSLO(0),
+ I40E_GLPES_PFIP4TXPKTSLO(0),
+ I40E_GLPES_PFIP4TXFRAGSLO(0),
+ I40E_GLPES_PFIP4TXMCPKTSLO(0),
+ I40E_GLPES_PFIP6RXOCTSLO(0),
+ I40E_GLPES_PFIP6RXPKTSLO(0),
+ I40E_GLPES_PFIP6RXFRAGSLO(0),
+ I40E_GLPES_PFIP6RXMCPKTSLO(0),
+ I40E_GLPES_PFIP6TXOCTSLO(0),
+ I40E_GLPES_PFIP6TXPKTSLO(0),
+ I40E_GLPES_PFIP6TXFRAGSLO(0),
+ I40E_GLPES_PFIP6TXMCPKTSLO(0),
+ I40E_GLPES_PFTCPRXSEGSLO(0),
+ I40E_GLPES_PFTCPTXSEGLO(0),
+ I40E_GLPES_PFRDMARXRDSLO(0),
+ I40E_GLPES_PFRDMARXSNDSLO(0),
+ I40E_GLPES_PFRDMARXWRSLO(0),
+ I40E_GLPES_PFRDMATXRDSLO(0),
+ I40E_GLPES_PFRDMATXSNDSLO(0),
+ I40E_GLPES_PFRDMATXWRSLO(0),
+ I40E_GLPES_PFRDMAVBNDLO(0),
+ I40E_GLPES_PFRDMAVINVLO(0),
+ I40E_GLPES_PFIP4RXMCOCTSLO(0),
+ I40E_GLPES_PFIP4TXMCOCTSLO(0),
+ I40E_GLPES_PFIP6RXMCOCTSLO(0),
+ I40E_GLPES_PFIP6TXMCOCTSLO(0),
+ I40E_GLPES_PFUDPRXPKTSLO(0),
+ I40E_GLPES_PFUDPTXPKTSLO(0)
+};
+
+static u64 i40iw_masks[IRDMA_MAX_MASKS] = {
+ I40E_PFPE_CCQPSTATUS_CCQP_DONE,
+ I40E_PFPE_CCQPSTATUS_CCQP_ERR,
+ I40E_CQPSQ_STAG_PDID,
+ I40E_CQPSQ_CQ_CEQID,
+ I40E_CQPSQ_CQ_CQID,
+ I40E_COMMIT_FPM_CQCNT,
+ I40E_CQPSQ_UPESD_HMCFNID,
+};
+
+static u64 i40iw_shifts[IRDMA_MAX_SHIFTS] = {
+ I40E_PFPE_CCQPSTATUS_CCQP_DONE_S,
+ I40E_PFPE_CCQPSTATUS_CCQP_ERR_S,
+ I40E_CQPSQ_STAG_PDID_S,
+ I40E_CQPSQ_CQ_CEQID_S,
+ I40E_CQPSQ_CQ_CQID_S,
+ I40E_COMMIT_FPM_CQCNT_S,
+ I40E_CQPSQ_UPESD_HMCFNID_S,
+};
+
+/**
+ * i40iw_config_ceq- Configure CEQ interrupt
+ * @dev: pointer to the device structure
+ * @ceq_id: Completion Event Queue ID
+ * @idx: vector index
+ * @enable: Enable CEQ interrupt when true
+ */
+static void i40iw_config_ceq(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx,
+ bool enable)
+{
+ u32 reg_val;
+
+ reg_val = FIELD_PREP(I40E_PFINT_LNKLSTN_FIRSTQ_INDX, ceq_id) |
+ FIELD_PREP(I40E_PFINT_LNKLSTN_FIRSTQ_TYPE, QUEUE_TYPE_CEQ);
+ wr32(dev->hw, I40E_PFINT_LNKLSTN(idx - 1), reg_val);
+
+ reg_val = FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX, 0x3) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTENA, 0x1);
+ wr32(dev->hw, I40E_PFINT_DYN_CTLN(idx - 1), reg_val);
+
+ reg_val = FIELD_PREP(IRDMA_GLINT_CEQCTL_CAUSE_ENA, enable) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_MSIX_INDX, idx) |
+ FIELD_PREP(I40E_PFINT_CEQCTL_NEXTQ_INDX, NULL_QUEUE_INDEX) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_ITR_INDX, 0x3);
+
+ wr32(dev->hw, i40iw_regs[IRDMA_GLINT_CEQCTL] + 4 * ceq_id, reg_val);
+}
+
+/**
+ * i40iw_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void i40iw_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 0x1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 0x1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_ITR_INDX, 0x3);
+ wr32(dev->hw, i40iw_regs[IRDMA_GLINT_DYN_CTL] + 4 * (idx - 1), val);
+}
+
+/**
+ * i40iw_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void i40iw_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ wr32(dev->hw, i40iw_regs[IRDMA_GLINT_DYN_CTL] + 4 * (idx - 1), 0);
+}
+
+static const struct irdma_irq_ops i40iw_irq_ops = {
+ .irdma_cfg_aeq = irdma_cfg_aeq,
+ .irdma_cfg_ceq = i40iw_config_ceq,
+ .irdma_dis_irq = i40iw_disable_irq,
+ .irdma_en_irq = i40iw_ena_irq,
+};
+
+static const struct irdma_hw_stat_map i40iw_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 48, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 56, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 64, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 72, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 80, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 88, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 96, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 104, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 112, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 120, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 128, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 136, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 144, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 152, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 160, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 168, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 176, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 184, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 192, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 200, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 208, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 224, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 232, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 240, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 248, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 256, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 264, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 272, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 280, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 288, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 296, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 304, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 312, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 320, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 328, 0, IRDMA_MAX_STATS_48 },
+};
+
+void i40iw_init_hw(struct irdma_sc_dev *dev)
+{
+ int i;
+ u8 __iomem *hw_addr;
+
+ for (i = 0; i < IRDMA_MAX_REGS; ++i) {
+ hw_addr = dev->hw->hw_addr;
+
+ if (i == IRDMA_DB_ADDR_OFFSET)
+ hw_addr = NULL;
+
+ dev->hw_regs[i] = (u32 __iomem *)(i40iw_regs[i] + hw_addr);
+ }
+
+ for (i = 0; i < IRDMA_HW_STAT_INDEX_MAX_GEN_1; ++i)
+ dev->hw_stats_regs[i] = i40iw_stat_offsets[i];
+
+ dev->hw_attrs.first_hw_vf_fpm_id = I40IW_FIRST_VF_FPM_ID;
+ dev->hw_attrs.max_hw_vf_fpm_id = IRDMA_MAX_VF_FPM_ID;
+
+ for (i = 0; i < IRDMA_MAX_SHIFTS; ++i)
+ dev->hw_shifts[i] = i40iw_shifts[i];
+
+ for (i = 0; i < IRDMA_MAX_MASKS; ++i)
+ dev->hw_masks[i] = i40iw_masks[i];
+
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+ dev->ceq_itr_mask_db = NULL;
+ dev->aeq_itr_mask_db = NULL;
+ dev->irq_ops = &i40iw_irq_ops;
+ dev->hw_stats_map = i40iw_hw_stat_map;
+
+ /* Setup the hardware limits, hmc may limit further */
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
+ dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
+ dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
+ dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
+ dev->hw_attrs.uk_attrs.max_hw_rq_quanta = I40IW_QP_SW_MAX_RQ_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_wq_quanta = I40IW_QP_SW_MAX_WQ_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = I40IW_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.max_hw_pds = I40IW_MAX_PDS;
+ dev->hw_attrs.max_stat_inst = I40IW_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_1;
+ dev->hw_attrs.max_hw_outbound_msg_size = I40IW_MAX_OUTBOUND_MSG_SIZE;
+ dev->hw_attrs.max_hw_inbound_msg_size = I40IW_MAX_INBOUND_MSG_SIZE;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = I40IW_MIN_WQ_SIZE;
+ dev->hw_attrs.max_qp_wr = I40IW_MAX_QP_WRS;
+}
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h
new file mode 100644
index 000000000000..0095b327afcc
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef I40IW_HW_H
+#define I40IW_HW_H
+#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
+#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
+#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
+#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
+#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+
+#define I40E_PFPE_CQPTAIL 0x00008080 /* Reset: PFR */
+
+#define I40E_PFPE_CQPDB 0x00008000 /* Reset: PFR */
+#define I40E_PFPE_CCQPSTATUS 0x00008100 /* Reset: PFR */
+#define I40E_PFPE_CCQPHIGH 0x00008200 /* Reset: PFR */
+#define I40E_PFPE_CCQPLOW 0x00008180 /* Reset: PFR */
+#define I40E_PFPE_CQARM 0x00131080 /* Reset: PFR */
+#define I40E_PFPE_CQACK 0x00131100 /* Reset: PFR */
+#define I40E_PFPE_AEQALLOC 0x00131180 /* Reset: PFR */
+#define I40E_PFPE_CQPERRCODES 0x00008880 /* Reset: PFR */
+#define I40E_PFPE_WQEALLOC 0x00138C00 /* Reset: PFR */
+#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
+#define I40E_GLPE_CPUSTATUS0 0x0000D040 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS1 0x0000D044 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS2 0x0000D048 /* Reset: PE_CORER */
+#define I40E_GLPE_CRITERR 0x000B4000 /* Reset: PE_CORER */
+#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
+#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_AEQCTL 0x00038700 /* Reset: CORER */
+
+#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+
+#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+
+#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+
+#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
+
+#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
+
+#define I40E_PFINT_LNKLSTN(_INTPF) (0x00035000 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_LNKLSTN_MAX_INDEX 511
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX GENMASK(10, 0)
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE GENMASK(12, 11)
+
+#define I40E_PFINT_CEQCTL(_INTPF) (0x00036800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_PFINT_CEQCTL_MAX_INDEX 511
+
+/* shifts/masks for FLD_[LS/RS]_64 macros used in device table */
+#define I40E_PFINT_CEQCTL_MSIX_INDX_S 0
+#define I40E_PFINT_CEQCTL_MSIX_INDX GENMASK(7, 0)
+#define I40E_PFINT_CEQCTL_ITR_INDX_S 11
+#define I40E_PFINT_CEQCTL_ITR_INDX GENMASK(12, 11)
+#define I40E_PFINT_CEQCTL_MSIX0_INDX_S 13
+#define I40E_PFINT_CEQCTL_MSIX0_INDX GENMASK(15, 13)
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_S 16
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX GENMASK(26, 16)
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE_S 27
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE GENMASK(28, 27)
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_S 30
+#define I40E_PFINT_CEQCTL_CAUSE_ENA BIT(30)
+#define I40E_PFINT_CEQCTL_INTEVENT_S 31
+#define I40E_PFINT_CEQCTL_INTEVENT BIT(31)
+#define I40E_CQPSQ_STAG_PDID_S 48
+#define I40E_CQPSQ_STAG_PDID GENMASK_ULL(62, 48)
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_S 0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE BIT_ULL(0)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_S 31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR BIT_ULL(31)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_S 3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX GENMASK(4, 3)
+#define I40E_PFINT_DYN_CTLN_INTENA_S 0
+#define I40E_PFINT_DYN_CTLN_INTENA BIT(0)
+#define I40E_CQPSQ_CQ_CEQID_S 24
+#define I40E_CQPSQ_CQ_CEQID GENMASK(30, 24)
+#define I40E_CQPSQ_CQ_CQID_S 0
+#define I40E_CQPSQ_CQ_CQID GENMASK_ULL(15, 0)
+#define I40E_COMMIT_FPM_CQCNT_S 0
+#define I40E_COMMIT_FPM_CQCNT GENMASK_ULL(17, 0)
+#define I40E_CQPSQ_UPESD_HMCFNID_S 0
+#define I40E_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
+
+#define I40E_VSIQF_CTL(_VSI) (0x0020D800 + ((_VSI) * 4))
+
+enum i40iw_device_caps_const {
+ I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
+ I40IW_MAX_SGE_RD = 1,
+ I40IW_MAX_PUSH_PAGE_COUNT = 0,
+ I40IW_MAX_INLINE_DATA_SIZE = 48,
+ I40IW_MAX_IRD_SIZE = 63,
+ I40IW_MAX_ORD_SIZE = 127,
+ I40IW_MAX_WQ_ENTRIES = 2048,
+ I40IW_MAX_WQE_SIZE_RQ = 128,
+ I40IW_MAX_PDS = 32768,
+ I40IW_MAX_STATS_COUNT = 16,
+ I40IW_MAX_CQ_SIZE = 1048575,
+ I40IW_MAX_OUTBOUND_MSG_SIZE = 2147483647,
+ I40IW_MAX_INBOUND_MSG_SIZE = 2147483647,
+ I40IW_MIN_WQ_SIZE = 4 /* WQEs */,
+};
+
+#define I40IW_QP_WQE_MIN_SIZE 32
+#define I40IW_QP_WQE_MAX_SIZE 128
+#define I40IW_MAX_RQ_WQE_SHIFT 2
+#define I40IW_MAX_QUANTA_PER_WR 2
+
+#define I40IW_QP_SW_MAX_SQ_QUANTA 2048
+#define I40IW_QP_SW_MAX_RQ_QUANTA 16384
+#define I40IW_QP_SW_MAX_WQ_QUANTA 2048
+#define I40IW_MAX_QP_WRS ((I40IW_QP_SW_MAX_SQ_QUANTA - IRDMA_SQ_RSVD) / I40IW_MAX_QUANTA_PER_WR)
+#define I40IW_FIRST_VF_FPM_ID 16
+#define QUEUE_TYPE_CEQ 2
+#define NULL_QUEUE_INDEX 0x7FF
+
+void i40iw_init_hw(struct irdma_sc_dev *dev);
+#endif /* I40IW_HW_H */
diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c
new file mode 100644
index 000000000000..15e036ddaffb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/i40iw_if.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+#include "i40iw_hw.h"
+#include <linux/net/intel/i40e_client.h>
+
+static struct i40e_client i40iw_client;
+
+/**
+ * i40iw_l2param_change - handle mss change
+ * @cdev_info: parent lan device information structure with data/ops
+ * @client: client for parameter change
+ * @params: new parameters from L2
+ */
+static void i40iw_l2param_change(struct i40e_info *cdev_info,
+ struct i40e_client *client,
+ struct i40e_params *params)
+{
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(cdev_info->netdev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return;
+
+ iwdev = to_iwdev(ibdev);
+
+ if (iwdev->vsi.mtu != params->mtu) {
+ l2params.mtu_changed = true;
+ l2params.mtu = params->mtu;
+ }
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ ib_device_put(ibdev);
+}
+
+/**
+ * i40iw_close - client interface operation close for iwarp/uda device
+ * @cdev_info: parent lan device information structure with data/ops
+ * @client: client to close
+ * @reset: flag to indicate close on reset
+ *
+ * Called by the lan driver during the processing of client unregister
+ * Destroy and clean up the driver resources
+ */
+static void i40iw_close(struct i40e_info *cdev_info, struct i40e_client *client,
+ bool reset)
+{
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(cdev_info->netdev, RDMA_DRIVER_IRDMA);
+ if (WARN_ON(!ibdev))
+ return;
+
+ iwdev = to_iwdev(ibdev);
+ if (reset)
+ iwdev->rf->reset = true;
+
+ iwdev->iw_status = 0;
+ irdma_port_ibevent(iwdev);
+ ib_unregister_device_and_put(ibdev);
+ pr_debug("INIT: Gen1 PF[%d] close complete\n", PCI_FUNC(cdev_info->pcidev->devfn));
+}
+
+static void i40iw_request_reset(struct irdma_pci_f *rf)
+{
+ struct i40e_info *cdev_info = rf->cdev;
+
+ cdev_info->ops->request_reset(cdev_info, &i40iw_client, 1);
+}
+
+static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info *cdev_info)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ rf->rdma_ver = IRDMA_GEN_1;
+ rf->sc_dev.hw = &rf->hw;
+ rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1;
+ rf->sc_dev.privileged = true;
+ rf->gen_ops.request_reset = i40iw_request_reset;
+ rf->pcidev = cdev_info->pcidev;
+ rf->pf_id = cdev_info->fid;
+ rf->hw.hw_addr = cdev_info->hw_addr;
+ rf->cdev = cdev_info;
+ rf->msix_count = cdev_info->msix_count;
+ rf->msix_entries = cdev_info->msix_entries;
+ rf->limits_sel = 5;
+ rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY;
+ rf->iwdev = iwdev;
+
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ iwdev->netdev = cdev_info->netdev;
+ iwdev->vsi_num = 0;
+}
+
+/**
+ * i40iw_open - client interface operation open for iwarp/uda device
+ * @cdev_info: parent lan device information structure with data/ops
+ * @client: iwarp client information, provided during registration
+ *
+ * Called by the lan driver during the processing of client register
+ * Create device resources, set up queues, pble and hmc objects and
+ * register the device with the ib verbs interface
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_open(struct i40e_info *cdev_info, struct i40e_client *client)
+{
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ int err = -EIO;
+ int i;
+ u16 qset;
+ u16 last_qset = IRDMA_NO_QSET;
+
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ if (!iwdev)
+ return -ENOMEM;
+
+ iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!iwdev->rf) {
+ ib_dealloc_device(&iwdev->ibdev);
+ return -ENOMEM;
+ }
+
+ i40iw_fill_device_info(iwdev, cdev_info);
+ rf = iwdev->rf;
+
+ if (irdma_ctrl_init_hw(rf)) {
+ err = -EIO;
+ goto err_ctrl_init;
+ }
+
+ l2params.mtu = (cdev_info->params.mtu) ? cdev_info->params.mtu : IRDMA_DEFAULT_MTU;
+ for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
+ qset = cdev_info->params.qos.prio_qos[i].qs_handle;
+ l2params.up2tc[i] = cdev_info->params.qos.prio_qos[i].tc;
+ l2params.qs_handle_list[i] = qset;
+ if (last_qset == IRDMA_NO_QSET)
+ last_qset = qset;
+ else if ((qset != last_qset) && (qset != IRDMA_NO_QSET))
+ iwdev->dcb_vlan_mode = true;
+ }
+
+ if (irdma_rt_init_hw(iwdev, &l2params)) {
+ err = -EIO;
+ goto err_rt_init;
+ }
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ ibdev_dbg(&iwdev->ibdev, "INIT: Gen1 PF[%d] open success\n",
+ PCI_FUNC(rf->pcidev->devfn));
+
+ return 0;
+
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ kfree(iwdev->rf);
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+/* client interface functions */
+static const struct i40e_client_ops i40e_ops = {
+ .open = i40iw_open,
+ .close = i40iw_close,
+ .l2_param_change = i40iw_l2param_change
+};
+
+static struct i40e_client i40iw_client = {
+ .ops = &i40e_ops,
+ .type = I40E_CLIENT_IWARP,
+};
+
+static int i40iw_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
+{
+ struct i40e_auxiliary_device *i40e_adev = container_of(aux_dev,
+ struct i40e_auxiliary_device,
+ aux_dev);
+ struct i40e_info *cdev_info = i40e_adev->ldev;
+
+ strscpy_pad(i40iw_client.name, "irdma", I40E_CLIENT_STR_LENGTH);
+ i40e_client_device_register(cdev_info, &i40iw_client);
+
+ return 0;
+}
+
+static void i40iw_remove(struct auxiliary_device *aux_dev)
+{
+ struct i40e_auxiliary_device *i40e_adev = container_of(aux_dev,
+ struct i40e_auxiliary_device,
+ aux_dev);
+ struct i40e_info *cdev_info = i40e_adev->ldev;
+
+ i40e_client_device_unregister(cdev_info);
+}
+
+static const struct auxiliary_device_id i40iw_auxiliary_id_table[] = {
+ {.name = "i40e.iwarp", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, i40iw_auxiliary_id_table);
+
+struct auxiliary_driver i40iw_auxiliary_drv = {
+ .name = "gen_1",
+ .id_table = i40iw_auxiliary_id_table,
+ .probe = i40iw_probe,
+ .remove = i40iw_remove,
+};
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
new file mode 100644
index 000000000000..32f26284a788
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "icrdma_hw.h"
+
+static u32 icrdma_regs[IRDMA_MAX_REGS] = {
+ PFPE_CQPTAIL,
+ PFPE_CQPDB,
+ PFPE_CCQPSTATUS,
+ PFPE_CCQPHIGH,
+ PFPE_CCQPLOW,
+ PFPE_CQARM,
+ PFPE_CQACK,
+ PFPE_AEQALLOC,
+ PFPE_CQPERRCODES,
+ PFPE_WQEALLOC,
+ GLINT_DYN_CTL(0),
+ ICRDMA_DB_ADDR_OFFSET,
+
+ GLPCI_LBARCTRL,
+ GLPE_CPUSTATUS0,
+ GLPE_CPUSTATUS1,
+ GLPE_CPUSTATUS2,
+ PFINT_AEQCTL,
+ GLINT_CEQCTL(0),
+ VSIQF_PE_CTL1(0),
+ PFHMC_PDINV,
+ GLHMC_VFPDINV(0),
+ GLPE_CRITERR,
+ GLINT_RATE(0),
+};
+
+static u64 icrdma_masks[IRDMA_MAX_MASKS] = {
+ ICRDMA_CCQPSTATUS_CCQP_DONE,
+ ICRDMA_CCQPSTATUS_CCQP_ERR,
+ ICRDMA_CQPSQ_STAG_PDID,
+ ICRDMA_CQPSQ_CQ_CEQID,
+ ICRDMA_CQPSQ_CQ_CQID,
+ ICRDMA_COMMIT_FPM_CQCNT,
+ ICRDMA_CQPSQ_UPESD_HMCFNID,
+};
+
+static u64 icrdma_shifts[IRDMA_MAX_SHIFTS] = {
+ ICRDMA_CCQPSTATUS_CCQP_DONE_S,
+ ICRDMA_CCQPSTATUS_CCQP_ERR_S,
+ ICRDMA_CQPSQ_STAG_PDID_S,
+ ICRDMA_CQPSQ_CQ_CEQID_S,
+ ICRDMA_CQPSQ_CQ_CQID_S,
+ ICRDMA_COMMIT_FPM_CQCNT_S,
+ ICRDMA_CQPSQ_UPESD_HMCFNID_S,
+};
+
+/**
+ * icrdma_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void icrdma_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+ u32 interval = 0;
+
+ if (dev->ceq_itr && dev->aeq->msix_idx != idx)
+ interval = dev->ceq_itr >> 1; /* 2 usec units */
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_ITR_INDX, 0) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTERVAL, interval) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 1);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1)
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + idx);
+ else
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx - 1));
+}
+
+/**
+ * icrdma_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void icrdma_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1)
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + idx);
+ else
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx - 1));
+}
+
+/**
+ * icrdma_cfg_ceq- Configure CEQ interrupt
+ * @dev: pointer to the device structure
+ * @ceq_id: Completion Event Queue ID
+ * @idx: vector index
+ * @enable: True to enable, False disables
+ */
+static void icrdma_cfg_ceq(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx,
+ bool enable)
+{
+ u32 reg_val;
+
+ reg_val = FIELD_PREP(IRDMA_GLINT_CEQCTL_CAUSE_ENA, enable) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_MSIX_INDX, idx) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_ITR_INDX, 3);
+
+ writel(reg_val, dev->hw_regs[IRDMA_GLINT_CEQCTL] + ceq_id);
+}
+
+static const struct irdma_irq_ops icrdma_irq_ops = {
+ .irdma_cfg_aeq = irdma_cfg_aeq,
+ .irdma_cfg_ceq = icrdma_cfg_ceq,
+ .irdma_dis_irq = icrdma_disable_irq,
+ .irdma_en_irq = icrdma_ena_irq,
+};
+
+static const struct irdma_hw_stat_map icrdma_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 32, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 24, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 32, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 40, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 48, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 56, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 64, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 72, 32, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 72, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 80, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 88, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 96, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 104, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 112, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 120, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 128, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 136, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 144, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 152, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 160, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 168, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 176, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 184, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 184, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 192, 32, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 200, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 200, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 208, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 32, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 224, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 232, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 240, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 248, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 256, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 264, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 272, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 280, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 288, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 296, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = { 304, 0, IRDMA_MAX_STATS_56 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = { 312, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = { 312, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = { 320, 0, IRDMA_MAX_STATS_32 },
+};
+
+void icrdma_init_hw(struct irdma_sc_dev *dev)
+{
+ int i;
+ u8 __iomem *hw_addr;
+
+ for (i = 0; i < IRDMA_MAX_REGS; ++i) {
+ hw_addr = dev->hw->hw_addr;
+
+ if (i == IRDMA_DB_ADDR_OFFSET)
+ hw_addr = NULL;
+
+ dev->hw_regs[i] = (u32 __iomem *)(hw_addr + icrdma_regs[i]);
+ }
+ dev->hw_attrs.max_hw_vf_fpm_id = IRDMA_MAX_VF_FPM_ID;
+ dev->hw_attrs.first_hw_vf_fpm_id = IRDMA_FIRST_VF_FPM_ID;
+
+ for (i = 0; i < IRDMA_MAX_SHIFTS; ++i)
+ dev->hw_shifts[i] = icrdma_shifts[i];
+
+ for (i = 0; i < IRDMA_MAX_MASKS; ++i)
+ dev->hw_masks[i] = icrdma_masks[i];
+
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+ dev->irq_ops = &icrdma_irq_ops;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ dev->hw_stats_map = icrdma_hw_stat_map;
+ dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
+ dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2;
+ dev->hw_attrs.max_hw_device_pages = ICRDMA_MAX_PUSH_PAGE_COUNT;
+
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE |
+ IRDMA_FEATURE_CQ_RESIZE;
+}
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h
new file mode 100644
index 000000000000..d97944ab45da
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#ifndef ICRDMA_HW_H
+#define ICRDMA_HW_H
+
+#include "irdma.h"
+
+#define VFPE_CQPTAIL1 0x0000a000
+#define VFPE_CQPDB1 0x0000bc00
+#define VFPE_CCQPSTATUS1 0x0000b800
+#define VFPE_CCQPHIGH1 0x00009800
+#define VFPE_CCQPLOW1 0x0000ac00
+#define VFPE_CQARM1 0x0000b400
+#define VFPE_CQARM1 0x0000b400
+#define VFPE_CQACK1 0x0000b000
+#define VFPE_AEQALLOC1 0x0000a400
+#define VFPE_CQPERRCODES1 0x00009c00
+#define VFPE_WQEALLOC1 0x0000c000
+#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) /* _i=0...63 */
+
+#define PFPE_CQPTAIL 0x00500880
+#define PFPE_CQPDB 0x00500800
+#define PFPE_CCQPSTATUS 0x0050a000
+#define PFPE_CCQPHIGH 0x0050a100
+#define PFPE_CCQPLOW 0x0050a080
+#define PFPE_CQARM 0x00502c00
+#define PFPE_CQACK 0x00502c80
+#define PFPE_AEQALLOC 0x00502d00
+#define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4)) /* _i=0...2047 */
+#define GLPCI_LBARCTRL 0x0009de74
+#define GLPE_CPUSTATUS0 0x0050ba5c
+#define GLPE_CPUSTATUS1 0x0050ba60
+#define GLPE_CPUSTATUS2 0x0050ba64
+#define PFINT_AEQCTL 0x0016cb00
+#define PFPE_CQPERRCODES 0x0050a200
+#define PFPE_WQEALLOC 0x00504400
+#define GLINT_CEQCTL(_INT) (0x0015c000 + ((_INT) * 4)) /* _i=0...2047 */
+#define VSIQF_PE_CTL1(_VSI) (0x00414000 + ((_VSI) * 4)) /* _i=0...767 */
+#define PFHMC_PDINV 0x00520300
+#define GLHMC_VFPDINV(_i) (0x00528300 + ((_i) * 4)) /* _i=0...31 */
+#define GLPE_CRITERR 0x00534000
+#define GLINT_RATE(_INT) (0x0015A000 + ((_INT) * 4)) /* _i=0...2047 */ /* Reset Source: CORER */
+
+#define ICRDMA_DB_ADDR_OFFSET (8 * 1024 * 1024 - 64 * 1024)
+
+#define ICRDMA_VF_DB_ADDR_OFFSET (64 * 1024)
+
+/* shifts/masks for FLD_[LS/RS]_64 macros used in device table */
+#define ICRDMA_CCQPSTATUS_CCQP_DONE_S 0
+#define ICRDMA_CCQPSTATUS_CCQP_DONE BIT_ULL(0)
+#define ICRDMA_CCQPSTATUS_CCQP_ERR_S 31
+#define ICRDMA_CCQPSTATUS_CCQP_ERR BIT_ULL(31)
+#define ICRDMA_CQPSQ_STAG_PDID_S 46
+#define ICRDMA_CQPSQ_STAG_PDID GENMASK_ULL(63, 46)
+#define ICRDMA_CQPSQ_CQ_CEQID_S 22
+#define ICRDMA_CQPSQ_CQ_CEQID GENMASK_ULL(31, 22)
+#define ICRDMA_CQPSQ_CQ_CQID_S 0
+#define ICRDMA_CQPSQ_CQ_CQID GENMASK_ULL(18, 0)
+#define ICRDMA_COMMIT_FPM_CQCNT_S 0
+#define ICRDMA_COMMIT_FPM_CQCNT GENMASK_ULL(19, 0)
+#define ICRDMA_CQPSQ_UPESD_HMCFNID_S 0
+#define ICRDMA_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
+enum icrdma_device_caps_const {
+ ICRDMA_MAX_STATS_COUNT = 128,
+
+ ICRDMA_MAX_IRD_SIZE = 127,
+ ICRDMA_MAX_ORD_SIZE = 255,
+ ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */,
+ ICRDMA_MAX_PUSH_PAGE_COUNT = 256,
+};
+
+void icrdma_init_hw(struct irdma_sc_dev *dev);
+#endif /* ICRDMA_HW_H*/
diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c b/drivers/infiniband/hw/irdma/icrdma_if.c
new file mode 100644
index 000000000000..b49fd9cf2476
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_if.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_ice.h>
+
+static void icrdma_prep_tc_change(struct irdma_device *iwdev)
+{
+ iwdev->vsi.tc_change_pending = true;
+ irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
+
+ /* Wait for all qp's to suspend */
+ wait_event_timeout(iwdev->suspend_wq,
+ !atomic_read(&iwdev->vsi.qp_suspend_reqs),
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
+ irdma_ws_reset(&iwdev->vsi);
+}
+
+static void icrdma_fill_qos_info(struct irdma_l2params *l2params,
+ struct iidc_rdma_qos_params *qos_info)
+{
+ int i;
+
+ l2params->num_tc = qos_info->num_tc;
+ l2params->vsi_prio_type = qos_info->vport_priority_type;
+ l2params->vsi_rel_bw = qos_info->vport_relative_bw;
+ for (i = 0; i < l2params->num_tc; i++) {
+ l2params->tc_info[i].egress_virt_up =
+ qos_info->tc_info[i].egress_virt_up;
+ l2params->tc_info[i].ingress_virt_up =
+ qos_info->tc_info[i].ingress_virt_up;
+ l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
+ l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
+ l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
+ }
+ for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
+ l2params->up2tc[i] = qos_info->up2tc[i];
+ if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) {
+ l2params->dscp_mode = true;
+ memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
+ }
+}
+
+static void icrdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
+ struct irdma_l2params l2params = {};
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+ ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu);
+ if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+ l2params.mtu = iwdev->netdev->mtu;
+ l2params.mtu_changed = true;
+ irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ }
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
+ if (iwdev->vsi.tc_change_pending)
+ return;
+
+ icrdma_prep_tc_change(iwdev);
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) {
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+
+ if (!iwdev->vsi.tc_change_pending)
+ return;
+
+ l2params.tc_changed = true;
+ ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
+
+ icrdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode =
+ l2params.num_tc > 1 && !l2params.dscp_mode;
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) {
+ ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n",
+ event->reg);
+ if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
+ u32 pe_criterr;
+
+ pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
+#define IRDMA_Q1_RESOURCE_ERR 0x0001024d
+ if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) {
+ ibdev_err(&iwdev->ibdev, "critical PE Error, GLPE_CRITERR=0x%08x\n",
+ pe_criterr);
+ iwdev->rf->reset = true;
+ } else {
+ ibdev_warn(&iwdev->ibdev, "Q1 Resource Check\n");
+ }
+ }
+ if (event->reg & IRDMAPFINT_OICR_HMC_ERR_M) {
+ ibdev_err(&iwdev->ibdev, "HMC Error\n");
+ iwdev->rf->reset = true;
+ }
+ if (event->reg & IRDMAPFINT_OICR_PE_PUSH_M) {
+ ibdev_err(&iwdev->ibdev, "PE Push Error\n");
+ iwdev->rf->reset = true;
+ }
+ if (iwdev->rf->reset)
+ iwdev->rf->gen_ops.request_reset(iwdev->rf);
+ }
+}
+
+/**
+ * icrdma_lan_register_qset - Register qset with LAN driver
+ * @vsi: vsi structure
+ * @tc_node: Traffic class node
+ */
+static int icrdma_lan_register_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev;
+ struct iidc_rdma_qset_params qset = {};
+ int ret;
+
+ qset.qs_handle = tc_node->qs_handle;
+ qset.tc = tc_node->traffic_class;
+ qset.vport_id = vsi->vsi_idx;
+ ret = ice_add_rdma_qset(cdev_info, &qset);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n");
+ return ret;
+ }
+
+ tc_node->l2_sched_node_id = qset.teid;
+ vsi->qos[tc_node->user_pri].l2_sched_node_id = qset.teid;
+
+ return 0;
+}
+
+/**
+ * icrdma_lan_unregister_qset - Unregister qset with LAN driver
+ * @vsi: vsi structure
+ * @tc_node: Traffic class node
+ */
+static void icrdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev;
+ struct iidc_rdma_qset_params qset = {};
+
+ qset.qs_handle = tc_node->qs_handle;
+ qset.tc = tc_node->traffic_class;
+ qset.vport_id = vsi->vsi_idx;
+ qset.teid = tc_node->l2_sched_node_id;
+
+ if (ice_del_rdma_qset(cdev_info, &qset))
+ ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
+}
+
+/**
+ * icrdma_request_reset - Request a reset
+ * @rf: RDMA PCI function
+ */
+static void icrdma_request_reset(struct irdma_pci_f *rf)
+{
+ ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
+ ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET);
+}
+
+static int icrdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ while (--i >= 0)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void icrdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
+static void icrdma_fill_device_info(struct irdma_device *iwdev,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ rf->sc_dev.hw = &rf->hw;
+ rf->iwdev = iwdev;
+ rf->cdev = cdev_info;
+ rf->hw.hw_addr = idc_priv->hw_addr;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->pf_id = idc_priv->pf_id;
+ rf->rdma_ver = IRDMA_GEN_2;
+ rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
+ rf->sc_dev.is_pf = true;
+ rf->sc_dev.privileged = true;
+
+ rf->gen_ops.register_qset = icrdma_lan_register_qset;
+ rf->gen_ops.unregister_qset = icrdma_lan_unregister_qset;
+
+ rf->default_vsi.vsi_idx = idc_priv->vport_id;
+ rf->protocol_used =
+ cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ?
+ IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
+ rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
+ rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
+ rf->gen_ops.request_reset = icrdma_request_reset;
+ rf->limits_sel = 7;
+ mutex_init(&rf->ah_tbl_lock);
+
+ iwdev->netdev = idc_priv->netdev;
+ iwdev->vsi_num = idc_priv->vport_id;
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
+ iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->roce_mode = true;
+}
+
+static int icrdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_core_auxiliary_dev *iidc_adev;
+ struct iidc_rdma_core_dev_info *cdev_info;
+ struct iidc_rdma_priv_dev_info *idc_priv;
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ int err;
+
+ iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ cdev_info = iidc_adev->cdev_info;
+ idc_priv = cdev_info->iidc_priv;
+
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ if (!iwdev)
+ return -ENOMEM;
+ iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!iwdev->rf) {
+ ib_dealloc_device(&iwdev->ibdev);
+ return -ENOMEM;
+ }
+
+ icrdma_fill_device_info(iwdev, cdev_info);
+ rf = iwdev->rf;
+
+ err = icrdma_init_interrupts(rf, cdev_info);
+ if (err)
+ goto err_init_interrupts;
+
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
+ goto err_ctrl_init;
+
+ l2params.mtu = iwdev->netdev->mtu;
+ icrdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
+
+ err = irdma_rt_init_hw(iwdev, &l2params);
+ if (err)
+ goto err_rt_init;
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true);
+
+ ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn));
+ auxiliary_set_drvdata(aux_dev, iwdev);
+
+ return 0;
+
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ icrdma_deinit_interrupts(rf, cdev_info);
+err_init_interrupts:
+ mutex_destroy(&rf->ah_tbl_lock);
+ kfree(rf);
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+static void icrdma_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+ u8 rdma_ver = iwdev->rf->rdma_ver;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false);
+ irdma_ib_unregister_device(iwdev);
+ icrdma_deinit_interrupts(iwdev->rf, cdev_info);
+ mutex_destroy(&iwdev->rf->ah_tbl_lock);
+
+ kfree(iwdev->rf);
+
+ pr_debug("INIT: Gen[%d] func[%d] device remove success\n",
+ rdma_ver, PCI_FUNC(cdev_info->pdev->devfn));
+}
+
+static const struct auxiliary_device_id icrdma_auxiliary_id_table[] = {
+ {.name = "ice.iwarp", },
+ {.name = "ice.roce", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, icrdma_auxiliary_id_table);
+
+struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv = {
+ .adrv = {
+ .name = "gen_2",
+ .id_table = icrdma_auxiliary_id_table,
+ .probe = icrdma_probe,
+ .remove = icrdma_remove,
+ },
+ .event_handler = icrdma_iidc_event_handler,
+};
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
new file mode 100644
index 000000000000..2e8bb475e22a
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2018 - 2024 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "protos.h"
+#include "ig3rdma_hw.h"
+
+/**
+ * ig3rdma_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void ig3rdma_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+ u32 int_stride = 1; /* one u32 per register */
+
+ if (dev->is_pf)
+ int_stride = 0x400;
+ else
+ idx--; /* VFs use DYN_CTL_N */
+
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 1);
+
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride));
+}
+
+/**
+ * ig3rdma_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void ig3rdma_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 int_stride = 1; /* one u32 per register */
+
+ if (dev->is_pf)
+ int_stride = 0x400;
+ else
+ idx--; /* VFs use DYN_CTL_N */
+
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride));
+}
+
+static const struct irdma_irq_ops ig3rdma_irq_ops = {
+ .irdma_dis_irq = ig3rdma_disable_irq,
+ .irdma_en_irq = ig3rdma_ena_irq,
+};
+
+static const struct irdma_hw_stat_map ig3rdma_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 48, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 56, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 64, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 72, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 80, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 88, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 96, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 104, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 112, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 120, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 128, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 136, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 144, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 152, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 160, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 168, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 176, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 184, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 192, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 200, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 208, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 224, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 232, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 240, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 248, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 256, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 264, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 272, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 280, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 288, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 296, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 304, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 312, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 320, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 328, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = { 336, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = { 344, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = { 352, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = { 360, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RNR_SENT] = { 368, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RNR_RCVD] = { 376, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT] = { 384, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT] = { 392, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXATS] = { 408, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXATS] = { 416, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR] = { 424, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED] = { 432, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RTO] = { 440, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXOOOPKTS] = { 448, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_ICRCERR] = { 456, 0, 0 },
+};
+
+void ig3rdma_init_hw(struct irdma_sc_dev *dev)
+{
+ dev->irq_ops = &ig3rdma_irq_ops;
+ dev->hw_stats_map = ig3rdma_hw_stat_map;
+
+ dev->hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_3;
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = IG3RDMA_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = IG3RDMA_MAX_SGE_RD;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.first_hw_vf_fpm_id = 0;
+ dev->hw_attrs.max_hw_vf_fpm_id = IG3_MAX_APFS + IG3_MAX_AVFS;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_64_BYTE_CQE;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_CQE_TIMESTAMPING;
+
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_SRQ;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE |
+ IRDMA_FEATURE_CQ_RESIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ dev->hw_attrs.max_hw_ird = IG3RDMA_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = IG3RDMA_MAX_ORD_SIZE;
+ dev->hw_attrs.max_stat_inst = IG3RDMA_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_3;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = IG3RDMA_MIN_WQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_srq_quanta = IRDMA_SRQ_MAX_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_inline = IG3RDMA_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.max_hw_device_pages =
+ dev->is_pf ? IG3RDMA_MAX_PF_PUSH_PAGE_COUNT : IG3RDMA_MAX_VF_PUSH_PAGE_COUNT;
+}
+
+static void __iomem *__ig3rdma_get_reg_addr(struct irdma_mmio_region *region, u64 reg_offset)
+{
+ if (reg_offset >= region->offset &&
+ reg_offset < (region->offset + region->len)) {
+ reg_offset -= region->offset;
+
+ return region->addr + reg_offset;
+ }
+
+ return NULL;
+}
+
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset)
+{
+ u8 __iomem *reg_addr;
+ int i;
+
+ reg_addr = __ig3rdma_get_reg_addr(&hw->rdma_reg, reg_offset);
+ if (reg_addr)
+ return reg_addr;
+
+ for (i = 0; i < hw->num_io_regions; i++) {
+ reg_addr = __ig3rdma_get_reg_addr(&hw->io_regs[i], reg_offset);
+ if (reg_addr)
+ return reg_addr;
+ }
+
+ WARN_ON_ONCE(1);
+
+ return NULL;
+}
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.h b/drivers/infiniband/hw/irdma/ig3rdma_hw.h
new file mode 100644
index 000000000000..03d5f1188789
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2021 - 2024 Intel Corporation */
+#ifndef IG3RDMA_HW_H
+#define IG3RDMA_HW_H
+
+#define IG3_MAX_APFS 1
+#define IG3_MAX_AVFS 0
+
+#define IG3_PF_RDMA_REGION_OFFSET 0xBC00000
+#define IG3_PF_RDMA_REGION_LEN 0x401000
+#define IG3_VF_RDMA_REGION_OFFSET 0x8C00
+#define IG3_VF_RDMA_REGION_LEN 0x8400
+
+enum ig3rdma_device_caps_const {
+ IG3RDMA_MAX_WQ_FRAGMENT_COUNT = 14,
+ IG3RDMA_MAX_SGE_RD = 14,
+
+ IG3RDMA_MAX_STATS_COUNT = 128,
+
+ IG3RDMA_MAX_IRD_SIZE = 64,
+ IG3RDMA_MAX_ORD_SIZE = 64,
+ IG3RDMA_MIN_WQ_SIZE = 16 /* WQEs */,
+ IG3RDMA_MAX_INLINE_DATA_SIZE = 216,
+ IG3RDMA_MAX_PF_PUSH_PAGE_COUNT = 8192,
+ IG3RDMA_MAX_VF_PUSH_PAGE_COUNT = 16,
+};
+
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset);
+int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len,
+ u8 *recv_msg, u16 *recv_len);
+
+#endif /* IG3RDMA_HW_H*/
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_if.c b/drivers/infiniband/hw/irdma/ig3rdma_if.c
new file mode 100644
index 000000000000..e1d6670d9396
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_if.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2023 - 2024 Intel Corporation */
+
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_idpf.h>
+#include "ig3rdma_hw.h"
+
+static void ig3rdma_idc_core_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(cdev_info->adev);
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_WARN_RESET)) {
+ rf->reset = true;
+ rf->sc_dev.vchnl_up = false;
+ }
+}
+
+int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len,
+ u8 *recv_msg, u16 *recv_len)
+{
+ struct iidc_rdma_core_dev_info *cdev_info = dev_to_rf(dev)->cdev;
+ int ret;
+
+ ret = idpf_idc_rdma_vc_send_sync(cdev_info, msg, len, recv_msg,
+ recv_len);
+ if (ret == -ETIMEDOUT) {
+ ibdev_err(&(dev_to_rf(dev)->iwdev->ibdev),
+ "Virtual channel Req <-> Resp completion timeout\n");
+ dev->vchnl_up = false;
+ }
+
+ return ret;
+}
+
+static int ig3rdma_vchnl_init(struct irdma_pci_f *rf,
+ struct iidc_rdma_core_dev_info *cdev_info,
+ u8 *rdma_ver)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct irdma_vchnl_init_info virt_info;
+ u8 gen = rf->rdma_ver;
+ int ret;
+
+ rf->vchnl_wq = alloc_ordered_workqueue("irdma-virtchnl-wq", 0);
+ if (!rf->vchnl_wq)
+ return -ENOMEM;
+
+ mutex_init(&rf->sc_dev.vchnl_mutex);
+
+ virt_info.is_pf = !idc_priv->ftype;
+ virt_info.hw_rev = gen;
+ virt_info.privileged = gen == IRDMA_GEN_2;
+ virt_info.vchnl_wq = rf->vchnl_wq;
+ ret = irdma_sc_vchnl_init(&rf->sc_dev, &virt_info);
+ if (ret) {
+ destroy_workqueue(rf->vchnl_wq);
+ mutex_destroy(&rf->sc_dev.vchnl_mutex);
+ return ret;
+ }
+
+ *rdma_ver = rf->sc_dev.hw_attrs.uk_attrs.hw_rev;
+
+ return 0;
+}
+
+/**
+ * ig3rdma_request_reset - Request a reset
+ * @rf: RDMA PCI function
+ */
+static void ig3rdma_request_reset(struct irdma_pci_f *rf)
+{
+ ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
+ idpf_idc_request_reset(rf->cdev, IIDC_FUNC_RESET);
+}
+
+static int ig3rdma_cfg_regions(struct irdma_hw *hw,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct pci_dev *pdev = cdev_info->pdev;
+ int i;
+
+ switch (idc_priv->ftype) {
+ case IIDC_FUNCTION_TYPE_PF:
+ hw->rdma_reg.len = IG3_PF_RDMA_REGION_LEN;
+ hw->rdma_reg.offset = IG3_PF_RDMA_REGION_OFFSET;
+ break;
+ case IIDC_FUNCTION_TYPE_VF:
+ hw->rdma_reg.len = IG3_VF_RDMA_REGION_LEN;
+ hw->rdma_reg.offset = IG3_VF_RDMA_REGION_OFFSET;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ hw->rdma_reg.addr = ioremap(pci_resource_start(pdev, 0) + hw->rdma_reg.offset,
+ hw->rdma_reg.len);
+
+ if (!hw->rdma_reg.addr)
+ return -ENOMEM;
+
+ hw->num_io_regions = le16_to_cpu(idc_priv->num_memory_regions);
+ hw->io_regs = kcalloc(hw->num_io_regions,
+ sizeof(struct irdma_mmio_region), GFP_KERNEL);
+
+ if (!hw->io_regs) {
+ iounmap(hw->rdma_reg.addr);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < hw->num_io_regions; i++) {
+ hw->io_regs[i].addr =
+ idc_priv->mapped_mem_regions[i].region_addr;
+ hw->io_regs[i].len =
+ le64_to_cpu(idc_priv->mapped_mem_regions[i].size);
+ hw->io_regs[i].offset =
+ le64_to_cpu(idc_priv->mapped_mem_regions[i].start_offset);
+ }
+
+ return 0;
+}
+
+static void ig3rdma_decfg_rf(struct irdma_pci_f *rf)
+{
+ struct irdma_hw *hw = &rf->hw;
+
+ mutex_destroy(&rf->ah_tbl_lock);
+ destroy_workqueue(rf->vchnl_wq);
+ mutex_destroy(&rf->sc_dev.vchnl_mutex);
+ kfree(hw->io_regs);
+ iounmap(hw->rdma_reg.addr);
+}
+
+static int ig3rdma_cfg_rf(struct irdma_pci_f *rf,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ int err;
+
+ rf->sc_dev.hw = &rf->hw;
+ rf->cdev = cdev_info;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->msix_count = idc_priv->msix_count;
+ rf->msix_entries = idc_priv->msix_entries;
+
+ err = ig3rdma_vchnl_init(rf, cdev_info, &rf->rdma_ver);
+ if (err)
+ return err;
+
+ err = ig3rdma_cfg_regions(&rf->hw, cdev_info);
+ if (err) {
+ destroy_workqueue(rf->vchnl_wq);
+ mutex_destroy(&rf->sc_dev.vchnl_mutex);
+ return err;
+ }
+
+ rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
+ rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
+ rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
+ rf->gen_ops.request_reset = ig3rdma_request_reset;
+ rf->limits_sel = 7;
+ mutex_init(&rf->ah_tbl_lock);
+
+ return 0;
+}
+
+static int ig3rdma_core_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_pci_f *rf;
+ int err;
+
+ rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!rf)
+ return -ENOMEM;
+
+ err = ig3rdma_cfg_rf(rf, cdev_info);
+ if (err)
+ goto err_cfg_rf;
+
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
+ goto err_ctrl_init;
+
+ auxiliary_set_drvdata(aux_dev, rf);
+
+ err = idpf_idc_vport_dev_ctrl(cdev_info, true);
+ if (err)
+ goto err_vport_ctrl;
+
+ return 0;
+
+err_vport_ctrl:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ ig3rdma_decfg_rf(rf);
+err_cfg_rf:
+ kfree(rf);
+
+ return err;
+}
+
+static void ig3rdma_core_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_dev);
+
+ idpf_idc_vport_dev_ctrl(cdev_info, false);
+ irdma_ctrl_deinit_hw(rf);
+ ig3rdma_decfg_rf(rf);
+ kfree(rf);
+}
+
+static const struct auxiliary_device_id ig3rdma_core_auxiliary_id_table[] = {
+ {.name = "idpf.8086.rdma.core", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ig3rdma_core_auxiliary_id_table);
+
+struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv = {
+ .adrv = {
+ .name = "core",
+ .id_table = ig3rdma_core_auxiliary_id_table,
+ .probe = ig3rdma_core_probe,
+ .remove = ig3rdma_core_remove,
+ },
+ .event_handler = ig3rdma_idc_core_event_handler,
+};
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
new file mode 100644
index 000000000000..ff938a01d70c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#ifndef IRDMA_H
+#define IRDMA_H
+
+#define IRDMA_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20)
+
+#define IRDMA_CQPTAIL_WQTAIL GENMASK(10, 0)
+#define IRDMA_CQPTAIL_CQP_OP_ERR BIT(31)
+
+#define IRDMA_CQPERRCODES_CQP_MINOR_CODE GENMASK(15, 0)
+#define IRDMA_CQPERRCODES_CQP_MAJOR_CODE GENMASK(31, 16)
+#define IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE GENMASK(5, 4)
+#define IRDMA_GLINT_RATE_INTERVAL GENMASK(5, 0)
+#define IRDMA_GLINT_RATE_INTRL_ENA BIT(6)
+#define IRDMA_GLINT_DYN_CTL_INTENA BIT(0)
+#define IRDMA_GLINT_DYN_CTL_CLEARPBA BIT(1)
+#define IRDMA_GLINT_DYN_CTL_ITR_INDX GENMASK(4, 3)
+#define IRDMA_GLINT_DYN_CTL_INTERVAL GENMASK(16, 5)
+#define IRDMA_GLINT_CEQCTL_ITR_INDX GENMASK(12, 11)
+#define IRDMA_GLINT_CEQCTL_CAUSE_ENA BIT(30)
+#define IRDMA_GLINT_CEQCTL_MSIX_INDX GENMASK(10, 0)
+#define IRDMA_PFINT_AEQCTL_MSIX_INDX GENMASK(10, 0)
+#define IRDMA_PFINT_AEQCTL_ITR_INDX GENMASK(12, 11)
+#define IRDMA_PFINT_AEQCTL_CAUSE_ENA BIT(30)
+#define IRDMA_PFHMC_PDINV_PMSDIDX GENMASK(11, 0)
+#define IRDMA_PFHMC_PDINV_PMSDPARTSEL BIT(15)
+#define IRDMA_PFHMC_PDINV_PMPDIDX GENMASK(24, 16)
+#define IRDMA_PFHMC_SDDATALOW_PMSDVALID BIT(0)
+#define IRDMA_PFHMC_SDDATALOW_PMSDTYPE BIT(1)
+#define IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT GENMASK(11, 2)
+#define IRDMA_PFHMC_SDDATALOW_PMSDDATALOW GENMASK(31, 12)
+#define IRDMA_PFHMC_SDCMD_PMSDWR BIT(31)
+
+#define IRDMA_INVALID_CQ_IDX 0xffffffff
+#define IRDMA_Q_INVALID_IDX 0xffff
+
+enum irdma_dyn_idx_t {
+ IRDMA_IDX_ITR0 = 0,
+ IRDMA_IDX_ITR1 = 1,
+ IRDMA_IDX_ITR2 = 2,
+ IRDMA_IDX_NOITR = 3,
+};
+
+enum irdma_registers {
+ IRDMA_CQPTAIL,
+ IRDMA_CQPDB,
+ IRDMA_CCQPSTATUS,
+ IRDMA_CCQPHIGH,
+ IRDMA_CCQPLOW,
+ IRDMA_CQARM,
+ IRDMA_CQACK,
+ IRDMA_AEQALLOC,
+ IRDMA_CQPERRCODES,
+ IRDMA_WQEALLOC,
+ IRDMA_GLINT_DYN_CTL,
+ IRDMA_DB_ADDR_OFFSET,
+ IRDMA_GLPCI_LBARCTRL,
+ IRDMA_GLPE_CPUSTATUS0,
+ IRDMA_GLPE_CPUSTATUS1,
+ IRDMA_GLPE_CPUSTATUS2,
+ IRDMA_PFINT_AEQCTL,
+ IRDMA_GLINT_CEQCTL,
+ IRDMA_VSIQF_PE_CTL1,
+ IRDMA_PFHMC_PDINV,
+ IRDMA_GLHMC_VFPDINV,
+ IRDMA_GLPE_CRITERR,
+ IRDMA_GLINT_RATE,
+ IRDMA_MAX_REGS, /* Must be last entry */
+};
+
+enum irdma_shifts {
+ IRDMA_CCQPSTATUS_CCQP_DONE_S,
+ IRDMA_CCQPSTATUS_CCQP_ERR_S,
+ IRDMA_CQPSQ_STAG_PDID_S,
+ IRDMA_CQPSQ_CQ_CEQID_S,
+ IRDMA_CQPSQ_CQ_CQID_S,
+ IRDMA_COMMIT_FPM_CQCNT_S,
+ IRDMA_CQPSQ_UPESD_HMCFNID_S,
+ IRDMA_MAX_SHIFTS,
+};
+
+enum irdma_masks {
+ IRDMA_CCQPSTATUS_CCQP_DONE_M,
+ IRDMA_CCQPSTATUS_CCQP_ERR_M,
+ IRDMA_CQPSQ_STAG_PDID_M,
+ IRDMA_CQPSQ_CQ_CEQID_M,
+ IRDMA_CQPSQ_CQ_CQID_M,
+ IRDMA_COMMIT_FPM_CQCNT_M,
+ IRDMA_CQPSQ_UPESD_HMCFNID_M,
+ IRDMA_MAX_MASKS, /* Must be last entry */
+};
+
+#define IRDMA_MAX_MGS_PER_CTX 8
+
+struct irdma_mcast_grp_ctx_entry_info {
+ u32 qp_id;
+ bool valid_entry;
+ u16 dest_port;
+ u32 use_cnt;
+};
+
+struct irdma_mcast_grp_info {
+ u8 dest_mac_addr[ETH_ALEN];
+ u16 vlan_id;
+ u16 hmc_fcn_id;
+ bool ipv4_valid:1;
+ bool vlan_valid:1;
+ u16 mg_id;
+ u32 no_of_mgs;
+ u32 dest_ip_addr[4];
+ u16 qs_handle;
+ struct irdma_dma_mem dma_mem_mc;
+ struct irdma_mcast_grp_ctx_entry_info mg_ctx_info[IRDMA_MAX_MGS_PER_CTX];
+};
+
+enum irdma_vers {
+ IRDMA_GEN_RSVD,
+ IRDMA_GEN_1,
+ IRDMA_GEN_2,
+ IRDMA_GEN_3,
+ IRDMA_GEN_NEXT,
+ IRDMA_GEN_MAX = IRDMA_GEN_NEXT-1
+};
+
+struct irdma_uk_attrs {
+ u64 feature_flags;
+ u32 max_hw_wq_frags;
+ u32 max_hw_read_sges;
+ u32 max_hw_inline;
+ u32 max_hw_rq_quanta;
+ u32 max_hw_wq_quanta;
+ u32 min_hw_cq_size;
+ u32 max_hw_cq_size;
+ u32 max_hw_srq_quanta;
+ u16 max_hw_sq_chunk;
+ u16 min_hw_wq_size;
+ u8 hw_rev;
+};
+
+struct irdma_hw_attrs {
+ struct irdma_uk_attrs uk_attrs;
+ u64 max_hw_outbound_msg_size;
+ u64 max_hw_inbound_msg_size;
+ u64 max_mr_size;
+ u64 page_size_cap;
+ u32 min_hw_qp_id;
+ u32 min_hw_aeq_size;
+ u32 max_hw_aeq_size;
+ u32 min_hw_ceq_size;
+ u32 max_hw_ceq_size;
+ u32 max_hw_device_pages;
+ u32 max_hw_vf_fpm_id;
+ u32 first_hw_vf_fpm_id;
+ u32 max_hw_ird;
+ u32 max_hw_ord;
+ u32 max_hw_wqes;
+ u32 max_hw_pds;
+ u32 max_hw_ena_vf_count;
+ u32 max_qp_wr;
+ u32 max_pe_ready_count;
+ u32 max_done_count;
+ u32 max_sleep_count;
+ u32 max_cqp_compl_wait_time_ms;
+ u32 min_hw_srq_id;
+ u16 max_stat_inst;
+ u16 max_stat_idx;
+};
+
+void i40iw_init_hw(struct irdma_sc_dev *dev);
+void icrdma_init_hw(struct irdma_sc_dev *dev);
+void ig3rdma_init_hw(struct irdma_sc_dev *dev);
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset);
+#endif /* IRDMA_H*/
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
new file mode 100644
index 000000000000..95957d52883d
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_idpf.h>
+
+MODULE_ALIAS("i40iw");
+MODULE_DESCRIPTION("Intel(R) Ethernet Protocol Driver for RDMA");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct notifier_block irdma_inetaddr_notifier = {
+ .notifier_call = irdma_inetaddr_event
+};
+
+static struct notifier_block irdma_inetaddr6_notifier = {
+ .notifier_call = irdma_inet6addr_event
+};
+
+static struct notifier_block irdma_net_notifier = {
+ .notifier_call = irdma_net_event
+};
+
+static struct notifier_block irdma_netdevice_notifier = {
+ .notifier_call = irdma_netdevice_event
+};
+
+static void irdma_register_notifiers(void)
+{
+ register_inetaddr_notifier(&irdma_inetaddr_notifier);
+ register_inet6addr_notifier(&irdma_inetaddr6_notifier);
+ register_netevent_notifier(&irdma_net_notifier);
+ register_netdevice_notifier(&irdma_netdevice_notifier);
+}
+
+static void irdma_unregister_notifiers(void)
+{
+ unregister_netevent_notifier(&irdma_net_notifier);
+ unregister_inetaddr_notifier(&irdma_inetaddr_notifier);
+ unregister_inet6addr_notifier(&irdma_inetaddr6_notifier);
+ unregister_netdevice_notifier(&irdma_netdevice_notifier);
+}
+
+void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
+{
+ if (mtu < IRDMA_MIN_MTU_IPV4)
+ ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu);
+ else if (mtu < IRDMA_MIN_MTU_IPV6)
+ ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu);
+}
+
+static void ig3rdma_idc_vport_event_handler(struct iidc_rdma_vport_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_device *iwdev = auxiliary_get_drvdata(cdev_info->adev);
+ struct irdma_l2params l2params = {};
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+ ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu);
+ if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+ l2params.mtu = iwdev->netdev->mtu;
+ l2params.mtu_changed = true;
+ irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ }
+ }
+}
+
+static int ig3rdma_vport_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_vport_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev);
+ struct auxiliary_device *aux_core_dev = idc_adev->vdev_info->core_adev;
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_core_dev);
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ int err;
+
+ if (!rf) {
+ WARN_ON_ONCE(1);
+ return -ENOMEM;
+ }
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ /* Fill iwdev info */
+ iwdev->is_vport = true;
+ iwdev->rf = rf;
+ iwdev->vport_id = idc_adev->vdev_info->vport_id;
+ iwdev->netdev = idc_adev->vdev_info->netdev;
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
+ iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ iwdev->roce_mode = true;
+ iwdev->push_mode = false;
+
+ l2params.mtu = iwdev->netdev->mtu;
+
+ err = irdma_rt_init_hw(iwdev, &l2params);
+ if (err)
+ goto err_rt_init;
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ auxiliary_set_drvdata(aux_dev, iwdev);
+
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] vport[%d] probe success. dev_name = %s, core_dev_name = %s, netdev=%s\n",
+ rf->rdma_ver, idc_adev->vdev_info->vport_id,
+ dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+
+ return 0;
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+static void ig3rdma_vport_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_vport_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev);
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] dev_name = %s, core_dev_name = %s, netdev=%s\n",
+ iwdev->rf->rdma_ver, dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+
+ irdma_ib_unregister_device(iwdev);
+}
+
+static const struct auxiliary_device_id ig3rdma_vport_auxiliary_id_table[] = {
+ {.name = "idpf.8086.rdma.vdev", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ig3rdma_vport_auxiliary_id_table);
+
+static struct iidc_rdma_vport_auxiliary_drv ig3rdma_vport_auxiliary_drv = {
+ .adrv = {
+ .name = "vdev",
+ .id_table = ig3rdma_vport_auxiliary_id_table,
+ .probe = ig3rdma_vport_probe,
+ .remove = ig3rdma_vport_remove,
+ },
+ .event_handler = ig3rdma_idc_vport_event_handler,
+};
+
+
+static int __init irdma_init_module(void)
+{
+ int ret;
+
+ ret = auxiliary_driver_register(&i40iw_auxiliary_drv);
+ if (ret) {
+ pr_err("Failed i40iw(gen_1) auxiliary_driver_register() ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&icrdma_core_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed icrdma(gen_2) auxiliary_driver_register() ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&ig3rdma_core_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed ig3rdma(gen_3) core auxiliary_driver_register() ret=%d\n",
+ ret);
+
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&ig3rdma_vport_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed ig3rdma vport auxiliary_driver_register() ret=%d\n",
+ ret);
+
+ return ret;
+ }
+ irdma_register_notifiers();
+
+ return 0;
+}
+
+static void __exit irdma_exit_module(void)
+{
+ irdma_unregister_notifiers();
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&ig3rdma_vport_auxiliary_drv.adrv);
+}
+
+module_init(irdma_init_module);
+module_exit(irdma_exit_module);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
new file mode 100644
index 000000000000..baab61e424a2
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -0,0 +1,578 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_MAIN_H
+#define IRDMA_MAIN_H
+
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/addrconf.h>
+#include <net/netevent.h>
+#include <net/tcp.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include <net/secure_seq.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/crc32c.h>
+#include <linux/kthread.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+#include <linux/auxiliary_bus.h>
+#include <linux/net/intel/iidc_rdma.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
+#include "osdep.h"
+#include "defs.h"
+#include "hmc.h"
+#include "type.h"
+#include "ws.h"
+#include "protos.h"
+#include "pble.h"
+#include "cm.h"
+#include <rdma/irdma-abi.h>
+#include "verbs.h"
+#include "user.h"
+#include "puda.h"
+
+extern struct auxiliary_driver i40iw_auxiliary_drv;
+extern struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv;
+extern struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv;
+
+#define IRDMA_FW_VER_DEFAULT 2
+#define IRDMA_HW_VER 2
+
+#define IRDMA_ARP_ADD 1
+#define IRDMA_ARP_DELETE 2
+#define IRDMA_ARP_RESOLVE 3
+
+#define IRDMA_MACIP_ADD 1
+#define IRDMA_MACIP_DELETE 2
+
+#define IW_GEN_3_CCQ_SIZE (2 * IRDMA_CQP_SW_SQSIZE_2048 + 2)
+#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 2)
+#define IW_CEQ_SIZE 2048
+#define IW_AEQ_SIZE 2048
+
+#define RX_BUF_SIZE (1536 + 8)
+#define IW_REG0_SIZE (4 * 1024)
+#define IW_TX_TIMEOUT (6 * HZ)
+#define IW_FIRST_QPN 1
+
+#define IW_SW_CONTEXT_ALIGN 1024
+
+#define MAX_DPC_ITERATIONS 128
+
+#define IRDMA_EVENT_TIMEOUT_MS 5000
+#define IRDMA_VCHNL_EVENT_TIMEOUT 100000
+#define IRDMA_RST_TIMEOUT_HZ 4
+
+#define IRDMA_NO_QSET 0xffff
+
+#define IW_CFG_FPM_QP_COUNT 32768
+#define IRDMA_MAX_PAGES_PER_FMR 262144
+#define IRDMA_MIN_PAGES_PER_FMR 1
+#define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2
+#define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3
+
+#define IRDMA_Q_TYPE_PE_AEQ 0x80
+#define IRDMA_Q_INVALID_IDX 0xffff
+#define IRDMA_REM_ENDPOINT_TRK_QPID 3
+
+#define IRDMA_DRV_OPT_ENA_MPA_VER_0 0x00000001
+#define IRDMA_DRV_OPT_DISABLE_MPA_CRC 0x00000002
+#define IRDMA_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
+#define IRDMA_DRV_OPT_DISABLE_INTF 0x00000008
+#define IRDMA_DRV_OPT_ENA_MSI 0x00000010
+#define IRDMA_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
+#define IRDMA_DRV_OPT_NO_INLINE_DATA 0x00000080
+#define IRDMA_DRV_OPT_DISABLE_INT_MOD 0x00000100
+#define IRDMA_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
+#define IRDMA_DRV_OPT_ENA_PAU 0x00000400
+#define IRDMA_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
+
+#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
+#define IRDMA_ROCE_CWND_DEFAULT 0x400
+#define IRDMA_ROCE_ACKCREDS_DEFAULT 0x1E
+
+#define IRDMA_FLUSH_SQ BIT(0)
+#define IRDMA_FLUSH_RQ BIT(1)
+#define IRDMA_REFLUSH BIT(2)
+#define IRDMA_FLUSH_WAIT BIT(3)
+
+#define IRDMA_IRQ_NAME_STR_LEN (64)
+
+#define IRDMA_NUM_AEQ_MSIX 1
+#define IRDMA_MIN_MSIX 2
+
+enum init_completion_state {
+ INVALID_STATE = 0,
+ INITIAL_STATE,
+ CQP_CREATED,
+ HMC_OBJS_CREATED,
+ HW_RSRC_INITIALIZED,
+ CCQ_CREATED,
+ CEQ0_CREATED,
+ CEQS_CREATED,
+ PBLE_CHUNK_MEM,
+ AEQ_CREATED,
+ ILQ_CREATED,
+ IEQ_CREATED, /* Last state of probe */
+ IP_ADDR_REGISTERED, /* Last state of open */
+};
+
+struct irdma_rsrc_limits {
+ u32 qplimit;
+ u32 mrlimit;
+ u32 cqlimit;
+};
+
+struct irdma_cqp_err_info {
+ u16 maj;
+ u16 min;
+ const char *desc;
+};
+
+struct irdma_cqp_compl_info {
+ u32 op_ret_val;
+ u16 maj_err_code;
+ u16 min_err_code;
+ bool error;
+ u8 op_code;
+};
+
+struct irdma_cqp_request {
+ struct cqp_cmds_info info;
+ wait_queue_head_t waitq;
+ struct list_head list;
+ refcount_t refcnt;
+ void (*callback_fcn)(struct irdma_cqp_request *cqp_request);
+ void *param;
+ struct irdma_cqp_compl_info compl_info;
+ bool request_done; /* READ/WRITE_ONCE macros operate on it */
+ bool waiting:1;
+ bool dynamic:1;
+ bool pending:1;
+};
+
+struct irdma_cqp {
+ struct irdma_sc_cqp sc_cqp;
+ spinlock_t req_lock; /* protect CQP request list */
+ spinlock_t compl_lock; /* protect CQP completion processing */
+ wait_queue_head_t waitq;
+ wait_queue_head_t remove_wq;
+ struct irdma_dma_mem sq;
+ struct irdma_dma_mem host_ctx;
+ u64 *scratch_array;
+ struct irdma_cqp_request *cqp_requests;
+ struct irdma_ooo_cqp_op *oop_op_array;
+ struct list_head cqp_avail_reqs;
+ struct list_head cqp_pending_reqs;
+};
+
+struct irdma_ccq {
+ struct irdma_sc_cq sc_cq;
+ struct irdma_dma_mem mem_cq;
+ struct irdma_dma_mem shadow_area;
+};
+
+struct irdma_ceq {
+ struct irdma_sc_ceq sc_ceq;
+ struct irdma_dma_mem mem;
+ u32 irq;
+ u32 msix_idx;
+ struct irdma_pci_f *rf;
+ struct tasklet_struct dpc_tasklet;
+ spinlock_t ce_lock; /* sync cq destroy with cq completion event notification */
+};
+
+struct irdma_aeq {
+ struct irdma_sc_aeq sc_aeq;
+ struct irdma_dma_mem mem;
+ struct irdma_pble_alloc palloc;
+ bool virtual_map;
+};
+
+struct irdma_arp_entry {
+ u32 ip_addr[4];
+ u8 mac_addr[ETH_ALEN];
+};
+
+struct irdma_msix_vector {
+ u32 idx;
+ u32 irq;
+ u32 cpu_affinity;
+ u32 ceq_id;
+ cpumask_t mask;
+ char name[IRDMA_IRQ_NAME_STR_LEN];
+};
+
+struct irdma_mc_table_info {
+ u32 mgn;
+ u32 dest_ip[4];
+ bool lan_fwd:1;
+ bool ipv4_valid:1;
+};
+
+struct mc_table_list {
+ struct list_head list;
+ struct irdma_mc_table_info mc_info;
+ struct irdma_mcast_grp_info mc_grp_ctx;
+};
+
+struct irdma_qv_info {
+ u32 v_idx; /* msix_vector */
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct irdma_qvlist_info {
+ u32 num_vectors;
+ struct irdma_qv_info qv_info[] __counted_by(num_vectors);
+};
+
+struct irdma_gen_ops {
+ void (*request_reset)(struct irdma_pci_f *rf);
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ void (*unregister_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+};
+
+struct irdma_pci_f {
+ bool reset:1;
+ bool rsrc_created:1;
+ bool msix_shared:1;
+ bool hwqp1_rsvd:1;
+ u8 rsrc_profile;
+ u8 *hmc_info_mem;
+ u8 *mem_rsrc;
+ u8 rdma_ver;
+ u8 rst_to;
+ u8 pf_id;
+ enum irdma_protocol_used protocol_used;
+ u32 sd_type;
+ u32 msix_count;
+ u32 max_mr;
+ u32 max_qp;
+ u32 max_cq;
+ u32 max_srq;
+ u32 next_srq;
+ u32 max_ah;
+ u32 next_ah;
+ u32 max_mcg;
+ u32 next_mcg;
+ u32 max_pd;
+ u32 next_qp;
+ u32 next_cq;
+ u32 next_pd;
+ u32 max_mr_size;
+ u32 max_cqe;
+ u32 mr_stagmask;
+ u32 used_pds;
+ u32 used_cqs;
+ u32 used_srqs;
+ u32 used_mrs;
+ u32 used_qps;
+ u32 arp_table_size;
+ u32 next_arp_index;
+ u32 ceqs_count;
+ u32 next_ws_node_id;
+ u32 max_ws_node_id;
+ u32 limits_sel;
+ unsigned long *allocated_ws_nodes;
+ unsigned long *allocated_qps;
+ unsigned long *allocated_cqs;
+ unsigned long *allocated_srqs;
+ unsigned long *allocated_mrs;
+ unsigned long *allocated_pds;
+ unsigned long *allocated_mcgs;
+ unsigned long *allocated_ahs;
+ unsigned long *allocated_arps;
+ enum init_completion_state init_state;
+ struct irdma_sc_dev sc_dev;
+ struct pci_dev *pcidev;
+ void *cdev;
+ struct irdma_hw hw;
+ struct irdma_cqp cqp;
+ struct irdma_ccq ccq;
+ struct irdma_aeq aeq;
+ struct irdma_ceq *ceqlist;
+ struct irdma_hmc_pble_rsrc *pble_rsrc;
+ struct irdma_arp_entry *arp_table;
+ spinlock_t arp_lock; /*protect ARP table access*/
+ spinlock_t rsrc_lock; /* protect HW resource array access */
+ spinlock_t qptable_lock; /*protect QP table access*/
+ spinlock_t cqtable_lock; /*protect CQ table access*/
+ struct irdma_qp **qp_table;
+ struct irdma_cq **cq_table;
+ spinlock_t qh_list_lock; /* protect mc_qht_list */
+ struct mc_table_list mc_qht_list;
+ struct irdma_msix_vector *iw_msixtbl;
+ struct irdma_qvlist_info *iw_qvlist;
+ struct tasklet_struct dpc_tasklet;
+ struct msix_entry *msix_entries;
+ struct irdma_dma_mem obj_mem;
+ struct irdma_dma_mem obj_next;
+ atomic_t vchnl_msgs;
+ wait_queue_head_t vchnl_waitq;
+ struct workqueue_struct *cqp_cmpl_wq;
+ struct work_struct cqp_cmpl_work;
+ struct workqueue_struct *vchnl_wq;
+ struct irdma_sc_vsi default_vsi;
+ void *back_fcn;
+ struct irdma_gen_ops gen_ops;
+ struct irdma_device *iwdev;
+ DECLARE_HASHTABLE(ah_hash_tbl, 8);
+ struct mutex ah_tbl_lock; /* protect AH hash table access */
+};
+
+struct irdma_device {
+ struct ib_device ibdev;
+ struct irdma_pci_f *rf;
+ struct net_device *netdev;
+ struct workqueue_struct *cleanup_wq;
+ struct irdma_sc_vsi vsi;
+ struct irdma_cm_core cm_core;
+ u32 roce_cwnd;
+ u32 roce_ackcreds;
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 push_mode;
+ u32 rcv_wnd;
+ u16 mac_ip_table_idx;
+ u16 vsi_num;
+ u16 vport_id;
+ u8 rcv_wscale;
+ u8 iw_status;
+ bool roce_mode:1;
+ bool roce_dcqcn_en:1;
+ bool dcb_vlan_mode:1;
+ bool iw_ooo:1;
+ bool is_vport:1;
+ enum init_completion_state init_state;
+
+ wait_queue_head_t suspend_wq;
+};
+
+static inline struct irdma_device *to_iwdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct irdma_device, ibdev);
+}
+
+static inline struct irdma_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct irdma_ucontext, ibucontext);
+}
+
+static inline struct irdma_user_mmap_entry *
+to_irdma_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct irdma_user_mmap_entry,
+ rdma_entry);
+}
+
+static inline struct irdma_pd *to_iwpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct irdma_pd, ibpd);
+}
+
+static inline struct irdma_ah *to_iwah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct irdma_ah, ibah);
+}
+
+static inline struct irdma_mr *to_iwmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct irdma_mr, ibmr);
+}
+
+static inline struct irdma_mr *to_iwmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct irdma_mr, ibmw);
+}
+
+static inline struct irdma_cq *to_iwcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct irdma_cq, ibcq);
+}
+
+static inline struct irdma_qp *to_iwqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct irdma_qp, ibqp);
+}
+
+static inline struct irdma_pci_f *dev_to_rf(struct irdma_sc_dev *dev)
+{
+ return container_of(dev, struct irdma_pci_f, sc_dev);
+}
+
+static inline struct irdma_srq *to_iwsrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct irdma_srq, ibsrq);
+}
+
+/**
+ * irdma_alloc_resource - allocate a resource
+ * @iwdev: device pointer
+ * @resource_array: resource bit array:
+ * @max_resources: maximum resource number
+ * @req_resources_num: Allocated resource number
+ * @next: next free id
+ **/
+static inline int irdma_alloc_rsrc(struct irdma_pci_f *rf,
+ unsigned long *rsrc_array, u32 max_rsrc,
+ u32 *req_rsrc_num, u32 *next)
+{
+ u32 rsrc_num;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rsrc_num = find_next_zero_bit(rsrc_array, max_rsrc, *next);
+ if (rsrc_num >= max_rsrc) {
+ rsrc_num = find_first_zero_bit(rsrc_array, max_rsrc);
+ if (rsrc_num >= max_rsrc) {
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ ibdev_dbg(&rf->iwdev->ibdev,
+ "ERR: resource [%d] allocation failed\n",
+ rsrc_num);
+ return -EOVERFLOW;
+ }
+ }
+ __set_bit(rsrc_num, rsrc_array);
+ *next = rsrc_num + 1;
+ if (*next == max_rsrc)
+ *next = 0;
+ *req_rsrc_num = rsrc_num;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_free_resource - free a resource
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to free
+ **/
+static inline void irdma_free_rsrc(struct irdma_pci_f *rf,
+ unsigned long *rsrc_array, u32 rsrc_num)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ __clear_bit(rsrc_num, rsrc_array);
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+}
+
+int irdma_ctrl_init_hw(struct irdma_pci_f *rf);
+void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf);
+int irdma_rt_init_hw(struct irdma_device *iwdev,
+ struct irdma_l2params *l2params);
+void irdma_rt_deinit_hw(struct irdma_device *iwdev);
+void irdma_qp_add_ref(struct ib_qp *ibqp);
+void irdma_qp_rem_ref(struct ib_qp *ibqp);
+void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp);
+struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn);
+void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask);
+void irdma_manage_arp_cache(struct irdma_pci_f *rf,
+ const unsigned char *mac_addr,
+ u32 *ip_addr, bool ipv4, u32 action);
+struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port);
+void irdma_del_apbvt(struct irdma_device *iwdev,
+ struct irdma_apbvt_entry *entry);
+struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp,
+ bool wait);
+void irdma_free_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request);
+void irdma_put_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request);
+int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx);
+int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx);
+void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx);
+
+u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf);
+void irdma_port_ibevent(struct irdma_device *iwdev);
+void irdma_cm_disconn(struct irdma_qp *qp);
+
+bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd,
+ u16 maj_err_code, u16 min_err_code);
+int irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request);
+
+int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata);
+int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+void irdma_cq_add_ref(struct ib_cq *ibcq);
+void irdma_cq_rem_ref(struct ib_cq *ibcq);
+void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
+void irdma_srq_event(struct irdma_sc_srq *srq);
+void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq);
+void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf);
+int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info, bool wait);
+int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend);
+int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
+ enum irdma_quad_entry_type etype,
+ enum irdma_quad_hash_manage_type mtype, void *cmnode,
+ bool wait);
+void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf);
+void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp);
+void irdma_free_qp_rsrc(struct irdma_qp *iwqp);
+int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver);
+void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core);
+void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term,
+ u8 term_len);
+int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack);
+int irdma_send_reset(struct irdma_cm_node *cm_node);
+struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core,
+ u16 rem_port, u32 *rem_addr, u16 loc_port,
+ u32 *loc_addr, u16 vlan_id);
+int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, bool wait);
+void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, bool wait);
+void irdma_copy_ip_ntohl(u32 *dst, __be32 *src);
+void irdma_copy_ip_htonl(__be32 *dst, u32 *src);
+u16 irdma_get_vlan_ipv4(u32 *addr);
+void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac);
+struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+ int acc, u64 *iova_start, bool dma_mr);
+int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw);
+void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
+int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
+ bool wait,
+ void (*callback_fcn)(struct irdma_cqp_request *cqp_request),
+ void *cb_param);
+void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request);
+int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+int irdma_net_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+void irdma_add_ip(struct irdma_device *iwdev);
+void cqp_compl_worker(struct work_struct *work);
+void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev);
+#endif /* IRDMA_MAIN_H */
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
new file mode 100644
index 000000000000..3f73ceacccb6
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_OSDEP_H
+#define IRDMA_OSDEP_H
+
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+#include <rdma/ib_verbs.h>
+#include <net/dscp.h>
+
+#define STATS_TIMER_DELAY 60000
+
+struct irdma_dma_info {
+ dma_addr_t *dmaaddrs;
+};
+
+struct irdma_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+} __packed;
+
+struct irdma_virt_mem {
+ void *va;
+ u32 size;
+} __packed;
+
+struct irdma_sc_vsi;
+struct irdma_sc_dev;
+struct irdma_sc_qp;
+struct irdma_puda_buf;
+struct irdma_puda_cmpl_info;
+struct irdma_update_sds_info;
+struct irdma_hmc_fcn_info;
+struct irdma_manage_vf_pble_info;
+struct irdma_hw;
+struct irdma_pci_f;
+
+struct ib_device *to_ibdev(struct irdma_sc_dev *dev);
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
+bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
+void irdma_add_dev_ref(struct irdma_sc_dev *dev);
+void irdma_put_dev_ref(struct irdma_sc_dev *dev);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf);
+void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
+ u32 seqnum);
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo,
+ u16 *pmf_idx);
+int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
+void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
+void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term,
+ u8 term_len);
+void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred);
+void irdma_terminate_start_timer(struct irdma_sc_qp *qp);
+void irdma_terminate_del_timer(struct irdma_sc_qp *qp);
+void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi);
+void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi);
+void wr32(struct irdma_hw *hw, u32 reg, u32 val);
+u32 rd32(struct irdma_hw *hw, u32 reg);
+u64 rd64(struct irdma_hw *hw, u32 reg);
+int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma,
+ u32 pg_cnt);
+void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt);
+#endif /* IRDMA_OSDEP_H */
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
new file mode 100644
index 000000000000..28dfad7f940c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "pble.h"
+
+static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+
+/**
+ * irdma_destroy_pble_prm - destroy prm during module unload
+ * @pble_rsrc: pble resources
+ */
+void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_chunk *chunk;
+ struct irdma_pble_prm *pinfo = &pble_rsrc->pinfo;
+
+ while (!list_empty(&pinfo->clist)) {
+ chunk = (struct irdma_chunk *) pinfo->clist.next;
+ list_del(&chunk->list);
+ if (chunk->type == PBLE_SD_PAGED)
+ irdma_pble_free_paged_mem(chunk);
+ bitmap_free(chunk->bitmapbuf);
+ kfree(chunk->chunkmem.va);
+ }
+}
+
+/**
+ * irdma_hmc_init_pble - Initialize pble resources during module load
+ * @dev: irdma_sc_dev struct
+ * @pble_rsrc: pble resources
+ */
+int irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_hmc_info *hmc_info;
+ u32 fpm_idx = 0;
+ int status = 0;
+
+ hmc_info = dev->hmc_info;
+ pble_rsrc->dev = dev;
+ pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].base;
+ /* Start pble' on 4k boundary */
+ if (pble_rsrc->fpm_base_addr & 0xfff)
+ fpm_idx = (4096 - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
+ pble_rsrc->unallocated_pble =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt - fpm_idx;
+ pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
+ pble_rsrc->pinfo.pble_shift = PBLE_SHIFT;
+
+ mutex_init(&pble_rsrc->pble_mutex_lock);
+
+ spin_lock_init(&pble_rsrc->pinfo.prm_lock);
+ INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
+ if (add_pble_prm(pble_rsrc)) {
+ irdma_destroy_pble_prm(pble_rsrc);
+ status = -ENOMEM;
+ }
+
+ return status;
+}
+
+/**
+ * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
+ * @pble_rsrc: structure containing fpm address
+ * @idx: where to return indexes
+ */
+static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct sd_pd_idx *idx)
+{
+ idx->sd_idx = pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE;
+ idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE);
+ idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD);
+}
+
+/**
+ * add_sd_direct - add sd direct for pble
+ * @pble_rsrc: pble resource ptr
+ * @info: page info for sd
+ */
+static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ int ret_code = 0;
+ struct sd_pd_idx *idx = &info->idx;
+ struct irdma_chunk *chunk = info->chunk;
+ struct irdma_hmc_info *hmc_info = info->hmc_info;
+ struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
+ u32 offset = 0;
+
+ if (!sd_entry->valid) {
+ ret_code = irdma_add_sd_table_entry(dev->hw, hmc_info,
+ info->idx.sd_idx,
+ IRDMA_SD_TYPE_DIRECT,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ return ret_code;
+
+ chunk->type = PBLE_SD_CONTIGOUS;
+ }
+
+ offset = idx->rel_pd_idx << HMC_PAGED_BP_SHIFT;
+ chunk->size = info->pages << HMC_PAGED_BP_SHIFT;
+ chunk->vaddr = sd_entry->u.bp.addr.va + offset;
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ ibdev_dbg(to_ibdev(dev),
+ "PBLE: chunk_size[%lld] = 0x%llx vaddr=0x%p fpm_addr = %llx\n",
+ chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
+
+ return 0;
+}
+
+/**
+ * fpm_to_idx - given fpm address, get pble index
+ * @pble_rsrc: pble resource management
+ * @addr: fpm address for index
+ */
+static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr)
+{
+ u64 idx;
+
+ idx = (addr - (pble_rsrc->fpm_base_addr)) >> 3;
+
+ return (u32)idx;
+}
+
+/**
+ * add_bp_pages - add backing pages for sd
+ * @pble_rsrc: pble resource management
+ * @info: page info for sd
+ */
+static int add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ u8 *addr;
+ struct irdma_dma_mem mem;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
+ struct irdma_hmc_info *hmc_info = info->hmc_info;
+ struct irdma_chunk *chunk = info->chunk;
+ int status = 0;
+ u32 rel_pd_idx = info->idx.rel_pd_idx;
+ u32 pd_idx = info->idx.pd_idx;
+ u32 i;
+
+ if (irdma_pble_get_paged_mem(chunk, info->pages))
+ return -ENOMEM;
+
+ status = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx,
+ IRDMA_SD_TYPE_PAGED,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (status)
+ goto error;
+
+ addr = chunk->vaddr;
+ for (i = 0; i < info->pages; i++) {
+ mem.pa = (u64)chunk->dmainfo.dmaaddrs[i];
+ mem.size = 4096;
+ mem.va = addr;
+ pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
+ if (!pd_entry->valid) {
+ status = irdma_add_pd_table_entry(dev, hmc_info,
+ pd_idx++, &mem);
+ if (status)
+ goto error;
+
+ addr += 4096;
+ }
+ }
+
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ return 0;
+
+error:
+ irdma_pble_free_paged_mem(chunk);
+
+ return status;
+}
+
+/**
+ * irdma_get_type - add a sd entry type for sd
+ * @dev: irdma_sc_dev struct
+ * @idx: index of sd
+ * @pages: pages in the sd
+ */
+static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev,
+ struct sd_pd_idx *idx, u32 pages)
+{
+ enum irdma_sd_entry_type sd_entry_type;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ sd_entry_type = (!idx->rel_pd_idx &&
+ pages == IRDMA_HMC_PD_CNT_IN_SD) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ else
+ sd_entry_type = (!idx->rel_pd_idx &&
+ pages == IRDMA_HMC_PD_CNT_IN_SD &&
+ dev->privileged) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ return sd_entry_type;
+}
+
+/**
+ * add_pble_prm - add a sd entry for pble resoure
+ * @pble_rsrc: pble resource management
+ */
+static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_chunk *chunk;
+ struct irdma_add_page_info info;
+ struct sd_pd_idx *idx = &info.idx;
+ int ret_code = 0;
+ enum irdma_sd_entry_type sd_entry_type;
+ u64 sd_reg_val = 0;
+ struct irdma_virt_mem chunkmem;
+ u32 pages;
+
+ if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
+ return -ENOMEM;
+
+ if (pble_rsrc->next_fpm_addr & 0xfff)
+ return -EINVAL;
+
+ chunkmem.size = sizeof(*chunk);
+ chunkmem.va = kzalloc(chunkmem.size, GFP_KERNEL);
+ if (!chunkmem.va)
+ return -ENOMEM;
+
+ chunk = chunkmem.va;
+ chunk->chunkmem = chunkmem;
+ hmc_info = dev->hmc_info;
+ chunk->dev = dev;
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ get_sd_pd_idx(pble_rsrc, idx);
+ sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
+ pages = (idx->rel_pd_idx) ? (IRDMA_HMC_PD_CNT_IN_SD - idx->rel_pd_idx) :
+ IRDMA_HMC_PD_CNT_IN_SD;
+ pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
+ info.chunk = chunk;
+ info.hmc_info = hmc_info;
+ info.pages = pages;
+ info.sd_entry = sd_entry;
+ if (!sd_entry->valid)
+ sd_entry_type = irdma_get_type(dev, idx, pages);
+ else
+ sd_entry_type = sd_entry->entry_type;
+
+ ibdev_dbg(to_ibdev(dev),
+ "PBLE: pages = %d, unallocated_pble[%d] current_fpm_addr = %llx\n",
+ pages, pble_rsrc->unallocated_pble,
+ pble_rsrc->next_fpm_addr);
+ ibdev_dbg(to_ibdev(dev), "PBLE: sd_entry_type = %d\n", sd_entry_type);
+ if (sd_entry_type == IRDMA_SD_TYPE_DIRECT)
+ ret_code = add_sd_direct(pble_rsrc, &info);
+
+ if (ret_code)
+ sd_entry_type = IRDMA_SD_TYPE_PAGED;
+ else
+ pble_rsrc->stats_direct_sds++;
+
+ if (sd_entry_type == IRDMA_SD_TYPE_PAGED) {
+ ret_code = add_bp_pages(pble_rsrc, &info);
+ if (ret_code)
+ goto error;
+ else
+ pble_rsrc->stats_paged_sds++;
+ }
+
+ ret_code = irdma_prm_add_pble_mem(&pble_rsrc->pinfo, chunk);
+ if (ret_code)
+ goto error;
+
+ pble_rsrc->next_fpm_addr += chunk->size;
+ ibdev_dbg(to_ibdev(dev),
+ "PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n",
+ pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+ pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3);
+ sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa :
+ sd_entry->u.bp.addr.pa;
+ if ((dev->privileged && !sd_entry->valid) ||
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+ sd_reg_val, idx->sd_idx,
+ sd_entry->entry_type, true);
+ if (ret_code)
+ goto error;
+ }
+
+ list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+ sd_entry->valid = true;
+ return 0;
+
+error:
+ bitmap_free(chunk->bitmapbuf);
+ kfree(chunk->chunkmem.va);
+
+ return ret_code;
+}
+
+/**
+ * free_lvl2 - fee level 2 pble
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ u32 i;
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *root = &lvl2->root;
+ struct irdma_pble_info *leaf = lvl2->leaf;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ if (leaf->addr)
+ irdma_prm_return_pbles(&pble_rsrc->pinfo,
+ &leaf->chunkinfo);
+ else
+ break;
+ }
+
+ if (root->addr)
+ irdma_prm_return_pbles(&pble_rsrc->pinfo, &root->chunkinfo);
+
+ kfree(lvl2->leafmem.va);
+ lvl2->leaf = NULL;
+}
+
+/**
+ * get_lvl2_pble - get level 2 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static int get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ u32 lf4k, lflast, total, i;
+ u32 pblcnt = PBLE_PER_PAGE;
+ u64 *addr;
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *root = &lvl2->root;
+ struct irdma_pble_info *leaf;
+ int ret_code;
+ u64 fpm_addr;
+
+ /* number of full 512 (4K) leafs) */
+ lf4k = palloc->total_cnt >> 9;
+ lflast = palloc->total_cnt % PBLE_PER_PAGE;
+ total = (lflast == 0) ? lf4k : lf4k + 1;
+ lvl2->leaf_cnt = total;
+
+ lvl2->leafmem.size = (sizeof(*leaf) * total);
+ lvl2->leafmem.va = kzalloc(lvl2->leafmem.size, GFP_KERNEL);
+ if (!lvl2->leafmem.va)
+ return -ENOMEM;
+
+ lvl2->leaf = lvl2->leafmem.va;
+ leaf = lvl2->leaf;
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &root->chunkinfo,
+ total << 3, &root->addr, &fpm_addr);
+ if (ret_code) {
+ kfree(lvl2->leafmem.va);
+ lvl2->leaf = NULL;
+ return -ENOMEM;
+ }
+
+ root->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+ root->cnt = total;
+ addr = root->addr;
+ for (i = 0; i < total; i++, leaf++) {
+ pblcnt = (lflast && ((i + 1) == total)) ?
+ lflast : PBLE_PER_PAGE;
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo,
+ &leaf->chunkinfo, pblcnt << 3,
+ &leaf->addr, &fpm_addr);
+ if (ret_code)
+ goto error;
+
+ leaf->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+
+ leaf->cnt = pblcnt;
+ *addr = (u64)leaf->idx;
+ addr++;
+ }
+
+ palloc->level = PBLE_LEVEL_2;
+ pble_rsrc->stats_lvl2++;
+ return 0;
+
+error:
+ free_lvl2(pble_rsrc, palloc);
+
+ return -ENOMEM;
+}
+
+/**
+ * get_lvl1_pble - get level 1 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 1 pble allocation
+ */
+static int get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ int ret_code;
+ u64 fpm_addr;
+ struct irdma_pble_info *lvl1 = &palloc->level1;
+
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &lvl1->chunkinfo,
+ palloc->total_cnt << 3, &lvl1->addr,
+ &fpm_addr);
+ if (ret_code)
+ return -ENOMEM;
+
+ palloc->level = PBLE_LEVEL_1;
+ lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+ lvl1->cnt = palloc->total_cnt;
+ pble_rsrc->stats_lvl1++;
+
+ return 0;
+}
+
+/**
+ * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @lvl: Bitmask for requested pble level
+ */
+static int get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u8 lvl)
+{
+ int status = 0;
+
+ status = get_lvl1_pble(pble_rsrc, palloc);
+ if (!status || lvl == PBLE_LEVEL_1 || palloc->total_cnt <= PBLE_PER_PAGE)
+ return status;
+
+ status = get_lvl2_pble(pble_rsrc, palloc);
+
+ return status;
+}
+
+/**
+ * irdma_get_pble - allocate pbles from the prm
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pble_cnt: #of pbles requested
+ * @lvl: requested pble level mask
+ */
+int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u32 pble_cnt,
+ u8 lvl)
+{
+ int status = 0;
+ int max_sds = 0;
+ int i;
+
+ palloc->total_cnt = pble_cnt;
+ palloc->level = PBLE_LEVEL_0;
+
+ mutex_lock(&pble_rsrc->pble_mutex_lock);
+
+ /*check first to see if we can get pble's without acquiring
+ * additional sd's
+ */
+ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, lvl);
+ if (!status)
+ goto exit;
+
+ max_sds = (palloc->total_cnt >> 18) + 1;
+ for (i = 0; i < max_sds; i++) {
+ status = add_pble_prm(pble_rsrc);
+ if (status)
+ break;
+
+ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, lvl);
+ /* if level1_only, only go through it once */
+ if (!status || lvl)
+ break;
+ }
+
+exit:
+ if (!status) {
+ pble_rsrc->allocdpbles += pble_cnt;
+ pble_rsrc->stats_alloc_ok++;
+ } else {
+ pble_rsrc->stats_alloc_fail++;
+ }
+ mutex_unlock(&pble_rsrc->pble_mutex_lock);
+
+ return status;
+}
+
+/**
+ * irdma_free_pble - put pbles back into prm
+ * @pble_rsrc: pble resources
+ * @palloc: contains all information regarding pble resource being freed
+ */
+void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ if (palloc->level == PBLE_LEVEL_2)
+ free_lvl2(pble_rsrc, palloc);
+ else
+ irdma_prm_return_pbles(&pble_rsrc->pinfo,
+ &palloc->level1.chunkinfo);
+
+ mutex_lock(&pble_rsrc->pble_mutex_lock);
+ pble_rsrc->freedpbles += palloc->total_cnt;
+ pble_rsrc->stats_alloc_freed++;
+ mutex_unlock(&pble_rsrc->pble_mutex_lock);
+}
diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h
new file mode 100644
index 000000000000..160ad728e9fb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/pble.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2019 Intel Corporation */
+#ifndef IRDMA_PBLE_H
+#define IRDMA_PBLE_H
+
+#define PBLE_SHIFT 6
+#define PBLE_PER_PAGE 512
+#define HMC_PAGED_BP_SHIFT 12
+#define PBLE_512_SHIFT 9
+#define PBLE_INVALID_IDX 0xffffffff
+
+enum irdma_pble_level {
+ PBLE_LEVEL_0 = 0,
+ PBLE_LEVEL_1 = 1,
+ PBLE_LEVEL_2 = 2,
+};
+
+enum irdma_alloc_type {
+ PBLE_NO_ALLOC = 0,
+ PBLE_SD_CONTIGOUS = 1,
+ PBLE_SD_PAGED = 2,
+};
+
+struct irdma_chunk;
+
+struct irdma_pble_chunkinfo {
+ struct irdma_chunk *pchunk;
+ u64 bit_idx;
+ u64 bits_used;
+};
+
+struct irdma_pble_info {
+ u64 *addr;
+ u32 idx;
+ u32 cnt;
+ struct irdma_pble_chunkinfo chunkinfo;
+};
+
+struct irdma_pble_level2 {
+ struct irdma_pble_info root;
+ struct irdma_pble_info *leaf;
+ struct irdma_virt_mem leafmem;
+ u32 leaf_cnt;
+};
+
+struct irdma_pble_alloc {
+ u32 total_cnt;
+ enum irdma_pble_level level;
+ union {
+ struct irdma_pble_info level1;
+ struct irdma_pble_level2 level2;
+ };
+};
+
+struct sd_pd_idx {
+ u32 sd_idx;
+ u32 pd_idx;
+ u32 rel_pd_idx;
+};
+
+struct irdma_add_page_info {
+ struct irdma_chunk *chunk;
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_hmc_info *hmc_info;
+ struct sd_pd_idx idx;
+ u32 pages;
+};
+
+struct irdma_chunk {
+ struct list_head list;
+ struct irdma_dma_info dmainfo;
+ unsigned long *bitmapbuf;
+
+ u32 sizeofbitmap;
+ u64 size;
+ void *vaddr;
+ u64 fpm_addr;
+ u32 pg_cnt;
+ enum irdma_alloc_type type;
+ struct irdma_sc_dev *dev;
+ struct irdma_virt_mem chunkmem;
+};
+
+struct irdma_pble_prm {
+ struct list_head clist;
+ spinlock_t prm_lock; /* protect prm bitmap */
+ u64 total_pble_alloc;
+ u64 free_pble_cnt;
+ u8 pble_shift;
+};
+
+struct irdma_hmc_pble_rsrc {
+ u32 unallocated_pble;
+ struct mutex pble_mutex_lock; /* protect PBLE resource */
+ struct irdma_sc_dev *dev;
+ u64 fpm_base_addr;
+ u64 next_fpm_addr;
+ struct irdma_pble_prm pinfo;
+ u64 allocdpbles;
+ u64 freedpbles;
+ u32 stats_direct_sds;
+ u32 stats_paged_sds;
+ u64 stats_alloc_ok;
+ u64 stats_alloc_fail;
+ u64 stats_alloc_freed;
+ u64 stats_lvl1;
+ u64 stats_lvl2;
+};
+
+void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+int irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc);
+void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc);
+int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u32 pble_cnt,
+ u8 lvl);
+int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk);
+int irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
+ u64 **vaddr, u64 *fpm_addr);
+void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo);
+void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ unsigned long *flags);
+void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ unsigned long *flags);
+void irdma_pble_free_paged_mem(struct irdma_chunk *chunk);
+int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt);
+void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk);
+#endif /* IRDMA_PBLE_H */
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
new file mode 100644
index 000000000000..324cfbf21764
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#ifndef IRDMA_PROTOS_H
+#define IRDMA_PROTOS_H
+
+#define PAUSE_TIMER_VAL 0xffff
+#define REFRESH_THRESHOLD 0x7fff
+#define HIGH_THRESHOLD 0x800
+#define LOW_THRESHOLD 0x200
+#define ALL_TC2PFC 0xff
+#define CQP_COMPL_WAIT_TIME_MS 10
+#define CQP_TIMEOUT_THRESHOLD 500
+#define CQP_DEF_CMPL_TIMEOUT_THRESHOLD 2500
+
+/* init operations */
+int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info);
+void irdma_sc_rt_init(struct irdma_sc_dev *dev);
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
+__le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch);
+int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq);
+/* HMC/FPM functions */
+int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id);
+/* stats misc */
+int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat, bool wait);
+void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat);
+void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats,
+ const u64 *hw_stats_regs);
+int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
+ struct irdma_ws_node_info *node_info);
+int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq,
+ u8 op);
+int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq,
+ u8 op);
+int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
+ struct irdma_stats_inst_info *stats_info);
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev);
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id);
+void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats,
+ struct irdma_gather_stats *gather_stats,
+ struct irdma_gather_stats *last_gather_stats,
+ const struct irdma_hw_stat_map *map, u16 max_stat_idx);
+
+/* vsi functions */
+int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info);
+void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi);
+void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_init_info *info);
+int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
+void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
+/* misc L2 param change functions */
+void irdma_change_l2params(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2params);
+void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 suspend);
+int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 cmd);
+void irdma_qp_add_qos(struct irdma_sc_qp *qp);
+void irdma_qp_rem_qos(struct irdma_sc_qp *qp);
+struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
+ struct irdma_sc_qp *qp);
+void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi);
+/* terminate functions*/
+void irdma_terminate_send_fin(struct irdma_sc_qp *qp);
+
+void irdma_terminate_connection(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info);
+
+void irdma_terminate_received(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info);
+/* dynamic memory allocation */
+/* misc */
+u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type);
+void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp);
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers);
+int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count);
+int irdma_get_rdma_features(struct irdma_sc_dev *dev);
+void free_sd_mem(struct irdma_sc_dev *dev);
+int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo);
+int irdma_process_bh(struct irdma_sc_dev *dev);
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
+int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo,
+ u16 *pmf_idx);
+void irdma_add_dev_ref(struct irdma_sc_dev *dev);
+void irdma_put_dev_ref(struct irdma_sc_dev *dev);
+void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
+#endif /* IRDMA_PROTOS_H */
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
new file mode 100644
index 000000000000..cee47ddbd1b5
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -0,0 +1,1718 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "puda.h"
+#include "ws.h"
+
+static void irdma_ieq_receive(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_buf *buf);
+static void irdma_ieq_tx_compl(struct irdma_sc_vsi *vsi, void *sqwrid);
+static void irdma_ilq_putback_rcvbuf(struct irdma_sc_qp *qp,
+ struct irdma_puda_buf *buf, u32 wqe_idx);
+/**
+ * irdma_puda_get_listbuf - get buffer from puda list
+ * @list: list to use for buffers (ILQ or IEQ)
+ */
+static struct irdma_puda_buf *irdma_puda_get_listbuf(struct list_head *list)
+{
+ struct irdma_puda_buf *buf = NULL;
+
+ if (!list_empty(list)) {
+ buf = (struct irdma_puda_buf *)list->next;
+ list_del((struct list_head *)&buf->list);
+ }
+
+ return buf;
+}
+
+/**
+ * irdma_puda_get_bufpool - return buffer from resource
+ * @rsrc: resource to use for buffer
+ */
+struct irdma_puda_buf *irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_puda_buf *buf = NULL;
+ struct list_head *list = &rsrc->bufpool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ buf = irdma_puda_get_listbuf(list);
+ if (buf) {
+ rsrc->avail_buf_count--;
+ buf->vsi = rsrc->vsi;
+ } else {
+ rsrc->stats_buf_alloc_fail++;
+ }
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+
+ return buf;
+}
+
+/**
+ * irdma_puda_ret_bufpool - return buffer to rsrc list
+ * @rsrc: resource to use for buffer
+ * @buf: buffer to return to resource
+ */
+void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf)
+{
+ unsigned long flags;
+
+ buf->do_lpb = false;
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ list_add(&buf->list, &rsrc->bufpool);
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ rsrc->avail_buf_count++;
+}
+
+/**
+ * irdma_puda_post_recvbuf - set wqe for rcv buffer
+ * @rsrc: resource ptr
+ * @wqe_idx: wqe index to use
+ * @buf: puda buffer for rcv q
+ * @initial: flag if during init time
+ */
+static void irdma_puda_post_recvbuf(struct irdma_puda_rsrc *rsrc, u32 wqe_idx,
+ struct irdma_puda_buf *buf, bool initial)
+{
+ __le64 *wqe;
+ struct irdma_sc_qp *qp = &rsrc->qp;
+ u64 offset24 = 0;
+
+ /* Synch buffer for use by device */
+ dma_sync_single_for_device(rsrc->dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
+ wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+ if (!initial)
+ get_64bit_val(wqe, 24, &offset24);
+
+ offset24 = (offset24) ? 0 : FIELD_PREP(IRDMAQPSQ_VALID, 1);
+
+ set_64bit_val(wqe, 16, 0);
+ set_64bit_val(wqe, 0, buf->mem.pa);
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, buf->mem.size));
+ } else {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, buf->mem.size) |
+ offset24);
+ }
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * irdma_puda_replenish_rq - post rcv buffers
+ * @rsrc: resource to use for buffer
+ * @initial: flag if during init time
+ */
+static int irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial)
+{
+ u32 i;
+ u32 invalid_cnt = rsrc->rxq_invalid_cnt;
+ struct irdma_puda_buf *buf = NULL;
+
+ for (i = 0; i < invalid_cnt; i++) {
+ buf = irdma_puda_get_bufpool(rsrc);
+ if (!buf)
+ return -ENOBUFS;
+ irdma_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf, initial);
+ rsrc->rx_wqe_idx = ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
+ rsrc->rxq_invalid_cnt--;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_puda_alloc_buf - allocate mem for buffer
+ * @dev: iwarp device
+ * @len: length of buffer
+ */
+static struct irdma_puda_buf *irdma_puda_alloc_buf(struct irdma_sc_dev *dev,
+ u32 len)
+{
+ struct irdma_puda_buf *buf;
+ struct irdma_virt_mem buf_mem;
+
+ buf_mem.size = sizeof(struct irdma_puda_buf);
+ buf_mem.va = kzalloc(buf_mem.size, GFP_KERNEL);
+ if (!buf_mem.va)
+ return NULL;
+
+ buf = buf_mem.va;
+ buf->mem.size = len;
+ buf->mem.va = kzalloc(buf->mem.size, GFP_KERNEL);
+ if (!buf->mem.va)
+ goto free_virt;
+ buf->mem.pa = dma_map_single(dev->hw->device, buf->mem.va,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev->hw->device, buf->mem.pa)) {
+ kfree(buf->mem.va);
+ goto free_virt;
+ }
+
+ buf->buf_mem.va = buf_mem.va;
+ buf->buf_mem.size = buf_mem.size;
+
+ return buf;
+
+free_virt:
+ kfree(buf_mem.va);
+ return NULL;
+}
+
+/**
+ * irdma_puda_dele_buf - delete buffer back to system
+ * @dev: iwarp device
+ * @buf: buffer to free
+ */
+static void irdma_puda_dele_buf(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf)
+{
+ dma_unmap_single(dev->hw->device, buf->mem.pa, buf->mem.size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf->mem.va);
+ kfree(buf->buf_mem.va);
+}
+
+/**
+ * irdma_puda_get_next_send_wqe - return next wqe for processing
+ * @qp: puda qp for wqe
+ * @wqe_idx: wqe index for caller
+ */
+static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
+ u32 *wqe_idx)
+{
+ int ret_code = 0;
+
+ *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return NULL;
+
+ return qp->sq_base[*wqe_idx].elem;
+}
+
+/**
+ * irdma_puda_poll_info - poll cq for completion
+ * @cq: cq for poll
+ * @info: info return for successful completion
+ */
+static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
+ struct irdma_puda_cmpl_info *info)
+{
+ struct irdma_cq_uk *cq_uk = &cq->cq_uk;
+ u64 qword0, qword2, qword3, qword6;
+ __le64 *cqe;
+ __le64 *ext_cqe = NULL;
+ u64 qword7 = 0;
+ u64 comp_ctx;
+ bool valid_bit;
+ bool ext_valid = 0;
+ u32 major_err, minor_err;
+ u32 peek_head;
+ bool error;
+ u8 polarity;
+
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(&cq->cq_uk);
+ get_64bit_val(cqe, 24, &qword3);
+ valid_bit = (bool)FIELD_GET(IRDMA_CQ_VALID, qword3);
+ if (valid_bit != cq_uk->polarity)
+ return -ENOENT;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
+
+ if (ext_valid) {
+ peek_head = (cq_uk->cq_ring.head + 1) % cq_uk->cq_ring.size;
+ ext_cqe = cq_uk->cq_base[peek_head].buf;
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
+ if (!peek_head)
+ polarity ^= 1;
+ if (polarity != cq_uk->polarity)
+ return -ENOENT;
+
+ /* Ensure ext CQE contents are read after ext valid bit is checked */
+ dma_rmb();
+
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
+ cq_uk->polarity = !cq_uk->polarity;
+ /* update cq tail in cq shadow memory also */
+ IRDMA_RING_MOVE_TAIL(cq_uk->cq_ring);
+ }
+
+ print_hex_dump_debug("PUDA: PUDA CQE", DUMP_PREFIX_OFFSET, 16, 8, cqe,
+ 32, false);
+ if (ext_valid)
+ print_hex_dump_debug("PUDA: PUDA EXT-CQE", DUMP_PREFIX_OFFSET,
+ 16, 8, ext_cqe, 32, false);
+
+ error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
+ if (error) {
+ ibdev_dbg(to_ibdev(cq->dev), "PUDA: receive error\n");
+ major_err = (u32)(FIELD_GET(IRDMA_CQ_MAJERR, qword3));
+ minor_err = (u32)(FIELD_GET(IRDMA_CQ_MINERR, qword3));
+ info->compl_error = major_err << 16 | minor_err;
+ return -EIO;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ info->qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ info->wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
+
+ if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
+ if (ext_valid) {
+ info->vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7);
+ if (info->vlan_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ info->vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6);
+ }
+ info->smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7);
+ if (info->smac_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ info->smac[0] = (u8)((qword6 >> 40) & 0xFF);
+ info->smac[1] = (u8)((qword6 >> 32) & 0xFF);
+ info->smac[2] = (u8)((qword6 >> 24) & 0xFF);
+ info->smac[3] = (u8)((qword6 >> 16) & 0xFF);
+ info->smac[4] = (u8)((qword6 >> 8) & 0xFF);
+ info->smac[5] = (u8)(qword6 & 0xFF);
+ }
+ }
+
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) {
+ info->vlan_valid = (bool)FIELD_GET(IRDMA_VLAN_TAG_VALID, qword3);
+ info->l4proto = (u8)FIELD_GET(IRDMA_UDA_L4PROTO, qword2);
+ info->l3proto = (u8)FIELD_GET(IRDMA_UDA_L3PROTO, qword2);
+ }
+
+ info->payload_len = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_puda_poll_cmpl - processes completion for cq
+ * @dev: iwarp device
+ * @cq: cq getting interrupt
+ * @compl_err: return any completion err
+ */
+int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq,
+ u32 *compl_err)
+{
+ struct irdma_qp_uk *qp;
+ struct irdma_cq_uk *cq_uk = &cq->cq_uk;
+ struct irdma_puda_cmpl_info info = {};
+ int ret = 0;
+ struct irdma_puda_buf *buf;
+ struct irdma_puda_rsrc *rsrc;
+ u8 cq_type = cq->cq_type;
+ unsigned long flags;
+
+ if (cq_type == IRDMA_CQ_TYPE_ILQ || cq_type == IRDMA_CQ_TYPE_IEQ) {
+ rsrc = (cq_type == IRDMA_CQ_TYPE_ILQ) ? cq->vsi->ilq :
+ cq->vsi->ieq;
+ } else {
+ ibdev_dbg(to_ibdev(dev), "PUDA: qp_type error\n");
+ return -EINVAL;
+ }
+
+ ret = irdma_puda_poll_info(cq, &info);
+ *compl_err = info.compl_error;
+ if (ret == -ENOENT)
+ return ret;
+ if (ret)
+ goto done;
+
+ qp = info.qp;
+ if (!qp || !rsrc) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ if (qp->qp_id != rsrc->qp_id) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ if (info.q_type == IRDMA_CQE_QTYPE_RQ) {
+ buf = (struct irdma_puda_buf *)(uintptr_t)
+ qp->rq_wrid_array[info.wqe_idx];
+
+ /* reusing so synch the buffer for CPU use */
+ dma_sync_single_for_cpu(dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ /* Get all the tcpip information in the buf header */
+ ret = irdma_puda_get_tcpip_info(&info, buf);
+ if (ret) {
+ rsrc->stats_rcvd_pkt_err++;
+ if (cq_type == IRDMA_CQ_TYPE_ILQ) {
+ irdma_ilq_putback_rcvbuf(&rsrc->qp, buf,
+ info.wqe_idx);
+ } else {
+ irdma_puda_ret_bufpool(rsrc, buf);
+ irdma_puda_replenish_rq(rsrc, false);
+ }
+ goto done;
+ }
+
+ rsrc->stats_pkt_rcvd++;
+ rsrc->compl_rxwqe_idx = info.wqe_idx;
+ ibdev_dbg(to_ibdev(dev), "PUDA: RQ completion\n");
+ rsrc->receive(rsrc->vsi, buf);
+ if (cq_type == IRDMA_CQ_TYPE_ILQ)
+ irdma_ilq_putback_rcvbuf(&rsrc->qp, buf, info.wqe_idx);
+ else
+ irdma_puda_replenish_rq(rsrc, false);
+
+ } else {
+ ibdev_dbg(to_ibdev(dev), "PUDA: SQ completion\n");
+ buf = (struct irdma_puda_buf *)(uintptr_t)
+ qp->sq_wrtrk_array[info.wqe_idx].wrid;
+
+ /* reusing so synch the buffer for CPU use */
+ dma_sync_single_for_cpu(dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ IRDMA_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
+ rsrc->xmit_complete(rsrc->vsi, buf);
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ rsrc->tx_wqe_avail_cnt++;
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ if (!list_empty(&rsrc->txpend))
+ irdma_puda_send_buf(rsrc, NULL);
+ }
+
+done:
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
+ cq_uk->polarity = !cq_uk->polarity;
+ /* update cq tail in cq shadow memory also */
+ IRDMA_RING_MOVE_TAIL(cq_uk->cq_ring);
+ set_64bit_val(cq_uk->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring));
+
+ return ret;
+}
+
+/**
+ * irdma_puda_send - complete send wqe for transmit
+ * @qp: puda qp for send
+ * @info: buffer information for transmit
+ */
+int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info)
+{
+ __le64 *wqe;
+ u32 iplen, l4len;
+ u64 hdr[2];
+ u32 wqe_idx;
+ u8 iipt;
+
+ /* number of 32 bits DWORDS in header */
+ l4len = info->tcplen >> 2;
+ if (info->ipv4) {
+ iipt = 3;
+ iplen = 5;
+ } else {
+ iipt = 1;
+ iplen = 10;
+ }
+
+ wqe = irdma_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
+ /* Third line of WQE descriptor */
+ /* maclen is in words */
+
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ hdr[0] = 0; /* Dest_QPN and Dest_QKey only for UD */
+ hdr[1] = FIELD_PREP(IRDMA_UDA_QPSQ_OPCODE, IRDMA_OP_TYPE_SEND) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_L4LEN, l4len) |
+ FIELD_PREP(IRDMAQPSQ_AHID, info->ah_id) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_SIGCOMPL, 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_VALID,
+ qp->qp_uk.swqe_polarity);
+
+ /* Forth line of WQE descriptor */
+
+ set_64bit_val(wqe, 0, info->paddr);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, info->len) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity));
+ } else {
+ hdr[0] = FIELD_PREP(IRDMA_UDA_QPSQ_MACLEN, info->maclen >> 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_IPLEN, iplen) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_L4T, 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_IIPT, iipt) |
+ FIELD_PREP(IRDMA_GEN1_UDA_QPSQ_L4LEN, l4len);
+
+ hdr[1] = FIELD_PREP(IRDMA_UDA_QPSQ_OPCODE, IRDMA_OP_TYPE_SEND) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_SIGCOMPL, 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_DOLOOPBACK, info->do_lpb) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity);
+
+ /* Forth line of WQE descriptor */
+
+ set_64bit_val(wqe, 0, info->paddr);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, info->len));
+ }
+
+ set_64bit_val(wqe, 16, hdr[0]);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr[1]);
+
+ print_hex_dump_debug("PUDA: PUDA SEND WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, 32, false);
+ irdma_uk_qp_post_wr(&qp->qp_uk);
+ return 0;
+}
+
+/**
+ * irdma_puda_send_buf - transmit puda buffer
+ * @rsrc: resource to use for buffer
+ * @buf: puda buffer to transmit
+ */
+void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_puda_send_info info;
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ /* if no wqe available or not from a completion and we have
+ * pending buffers, we must queue new buffer
+ */
+ if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
+ list_add_tail(&buf->list, &rsrc->txpend);
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ rsrc->stats_sent_pkt_q++;
+ if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ ibdev_dbg(to_ibdev(rsrc->dev),
+ "PUDA: adding to txpend\n");
+ return;
+ }
+ rsrc->tx_wqe_avail_cnt--;
+ /* if we are coming from a completion and have pending buffers
+ * then Get one from pending list
+ */
+ if (!buf) {
+ buf = irdma_puda_get_listbuf(&rsrc->txpend);
+ if (!buf)
+ goto done;
+ }
+
+ info.scratch = buf;
+ info.paddr = buf->mem.pa;
+ info.len = buf->totallen;
+ info.tcplen = buf->tcphlen;
+ info.ipv4 = buf->ipv4;
+
+ if (rsrc->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ info.ah_id = buf->ah_id;
+ } else {
+ info.maclen = buf->maclen;
+ info.do_lpb = buf->do_lpb;
+ }
+
+ /* Synch buffer for use by device */
+ dma_sync_single_for_cpu(rsrc->dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ ret = irdma_puda_send(&rsrc->qp, &info);
+ if (ret) {
+ rsrc->tx_wqe_avail_cnt++;
+ rsrc->stats_sent_pkt_q++;
+ list_add(&buf->list, &rsrc->txpend);
+ if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ ibdev_dbg(to_ibdev(rsrc->dev),
+ "PUDA: adding to puda_send\n");
+ } else {
+ rsrc->stats_pkt_sent++;
+ }
+done:
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+}
+
+/**
+ * irdma_puda_qp_setctx - during init, set qp's context
+ * @rsrc: qp's resource
+ */
+static void irdma_puda_qp_setctx(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_sc_qp *qp = &rsrc->qp;
+ __le64 *qp_ctx = qp->hw_host_ctx;
+
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ set_64bit_val(qp_ctx, 24,
+ FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, rsrc->buf_size));
+ set_64bit_val(qp_ctx, 56, 0);
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ set_64bit_val(qp_ctx, 64, 1);
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, rsrc->cq_id) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, rsrc->cq_id));
+ set_64bit_val(qp_ctx, 144,
+ FIELD_PREP(IRDMAQPC_STAT_INDEX, rsrc->stats_idx));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_PRIVEN, 1) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, rsrc->stats_idx_valid));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, (uintptr_t)qp));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+
+ print_hex_dump_debug("PUDA: PUDA QP CONTEXT", DUMP_PREFIX_OFFSET, 16,
+ 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+/**
+ * irdma_puda_qp_wqe - setup wqe for qp create
+ * @dev: Device
+ * @qp: Resource qp
+ */
+static int irdma_puda_qp_wqe(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_ccq_cqe_info compl_info;
+ int status = 0;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, IRDMA_QP_TYPE_UDA) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, 2) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("PUDA: PUDA QP CREATE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, 40, false);
+ irdma_sc_cqp_post_sq(cqp);
+ status = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_QP,
+ &compl_info);
+
+ return status;
+}
+
+/**
+ * irdma_puda_qp_create - create qp for resource
+ * @rsrc: resource to use for buffer
+ */
+static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_sc_qp *qp = &rsrc->qp;
+ struct irdma_qp_uk *ukqp = &qp->qp_uk;
+ int ret = 0;
+ u32 sq_size, rq_size;
+ struct irdma_dma_mem *mem;
+
+ sq_size = rsrc->sq_size * IRDMA_QP_WQE_MIN_SIZE;
+ rq_size = rsrc->rq_size * IRDMA_QP_WQE_MIN_SIZE;
+ rsrc->qpmem.size = ALIGN((sq_size + rq_size + (IRDMA_SHADOW_AREA_SIZE << 3) + IRDMA_QP_CTX_SIZE),
+ IRDMA_HW_PAGE_SIZE);
+ rsrc->qpmem.va = dma_alloc_coherent(rsrc->dev->hw->device,
+ rsrc->qpmem.size, &rsrc->qpmem.pa,
+ GFP_KERNEL);
+ if (!rsrc->qpmem.va)
+ return -ENOMEM;
+
+ mem = &rsrc->qpmem;
+ memset(mem->va, 0, rsrc->qpmem.size);
+ qp->hw_sq_size = irdma_get_encoded_wqe_size(rsrc->sq_size, IRDMA_QUEUE_TYPE_SQ_RQ);
+ qp->hw_rq_size = irdma_get_encoded_wqe_size(rsrc->rq_size, IRDMA_QUEUE_TYPE_SQ_RQ);
+ qp->pd = &rsrc->sc_pd;
+ qp->qp_uk.qp_type = IRDMA_QP_TYPE_UDA;
+ qp->dev = rsrc->dev;
+ qp->qp_uk.back_qp = rsrc;
+ qp->sq_pa = mem->pa;
+ qp->rq_pa = qp->sq_pa + sq_size;
+ qp->vsi = rsrc->vsi;
+ ukqp->sq_base = mem->va;
+ ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
+ ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
+ ukqp->uk_attrs = &qp->dev->hw_attrs.uk_attrs;
+ qp->shadow_area_pa = qp->rq_pa + rq_size;
+ qp->hw_host_ctx = ukqp->shadow_area + IRDMA_SHADOW_AREA_SIZE;
+ qp->hw_host_ctx_pa = qp->shadow_area_pa + (IRDMA_SHADOW_AREA_SIZE << 3);
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+ ukqp->qp_id = rsrc->qp_id;
+ ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
+ ukqp->rq_wrid_array = rsrc->rq_wrid_array;
+ ukqp->sq_size = rsrc->sq_size;
+ ukqp->rq_size = rsrc->rq_size;
+
+ IRDMA_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
+ IRDMA_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
+ ukqp->wqe_alloc_db = qp->pd->dev->wqe_alloc_db;
+
+ ret = rsrc->dev->ws_add(qp->vsi, qp->user_pri);
+ if (ret) {
+ dma_free_coherent(rsrc->dev->hw->device, rsrc->qpmem.size,
+ rsrc->qpmem.va, rsrc->qpmem.pa);
+ rsrc->qpmem.va = NULL;
+ return ret;
+ }
+
+ irdma_qp_add_qos(qp);
+ irdma_puda_qp_setctx(rsrc);
+
+ if (rsrc->dev->ceq_valid)
+ ret = irdma_cqp_qp_create_cmd(rsrc->dev, qp);
+ else
+ ret = irdma_puda_qp_wqe(rsrc->dev, qp);
+ if (ret) {
+ irdma_qp_rem_qos(qp);
+ rsrc->dev->ws_remove(qp->vsi, qp->user_pri);
+ dma_free_coherent(rsrc->dev->hw->device, rsrc->qpmem.size,
+ rsrc->qpmem.va, rsrc->qpmem.pa);
+ rsrc->qpmem.va = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_puda_cq_wqe - setup wqe for CQ create
+ * @dev: Device
+ * @cq: resource for cq
+ */
+static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ struct irdma_ccq_cqe_info compl_info;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold));
+ set_64bit_val(wqe, 32, cq->cq_pa);
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx));
+
+ hdr = cq->cq_uk.cq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("PUDA: PUDA CREATE CQ", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(dev->cqp);
+ return irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ,
+ &compl_info);
+}
+
+/**
+ * irdma_puda_cq_create - create cq for resource
+ * @rsrc: resource for which cq to create
+ */
+static int irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_sc_dev *dev = rsrc->dev;
+ struct irdma_sc_cq *cq = &rsrc->cq;
+ int ret = 0;
+ u32 cqsize;
+ struct irdma_dma_mem *mem;
+ struct irdma_cq_init_info info = {};
+ struct irdma_cq_uk_init_info *init_info = &info.cq_uk_init_info;
+
+ cq->vsi = rsrc->vsi;
+ cqsize = rsrc->cq_size * (sizeof(struct irdma_cqe));
+ rsrc->cqmem.size = ALIGN(cqsize + sizeof(struct irdma_cq_shadow_area),
+ IRDMA_CQ0_ALIGNMENT);
+ rsrc->cqmem.va = dma_alloc_coherent(dev->hw->device, rsrc->cqmem.size,
+ &rsrc->cqmem.pa, GFP_KERNEL);
+ if (!rsrc->cqmem.va)
+ return -ENOMEM;
+
+ mem = &rsrc->cqmem;
+ info.dev = dev;
+ info.type = (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) ?
+ IRDMA_CQ_TYPE_ILQ : IRDMA_CQ_TYPE_IEQ;
+ info.shadow_read_threshold = rsrc->cq_size >> 2;
+ info.cq_base_pa = mem->pa;
+ info.shadow_area_pa = mem->pa + cqsize;
+ init_info->cq_base = mem->va;
+ init_info->shadow_area = (__le64 *)((u8 *)mem->va + cqsize);
+ init_info->cq_size = rsrc->cq_size;
+ init_info->cq_id = rsrc->cq_id;
+ info.ceqe_mask = true;
+ info.ceq_id_valid = true;
+ info.vsi = rsrc->vsi;
+
+ ret = irdma_sc_cq_init(cq, &info);
+ if (ret)
+ goto error;
+
+ if (rsrc->dev->ceq_valid)
+ ret = irdma_cqp_cq_create_cmd(dev, cq);
+ else
+ ret = irdma_puda_cq_wqe(dev, cq);
+error:
+ if (ret) {
+ dma_free_coherent(dev->hw->device, rsrc->cqmem.size,
+ rsrc->cqmem.va, rsrc->cqmem.pa);
+ rsrc->cqmem.va = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_puda_free_qp - free qp for resource
+ * @rsrc: resource for which qp to free
+ */
+static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc)
+{
+ int ret;
+ struct irdma_ccq_cqe_info compl_info;
+ struct irdma_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->dev->ceq_valid) {
+ irdma_cqp_qp_destroy_cmd(dev, &rsrc->qp);
+ rsrc->dev->ws_remove(rsrc->qp.vsi, rsrc->qp.user_pri);
+ return;
+ }
+
+ ret = irdma_sc_qp_destroy(&rsrc->qp, 0, false, true, true);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error puda qp destroy wqe, status = %d\n",
+ ret);
+ if (!ret) {
+ ret = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_DESTROY_QP,
+ &compl_info);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error puda qp destroy failed, status = %d\n",
+ ret);
+ }
+ rsrc->dev->ws_remove(rsrc->qp.vsi, rsrc->qp.user_pri);
+}
+
+/**
+ * irdma_puda_free_cq - free cq for resource
+ * @rsrc: resource for which cq to free
+ */
+static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc)
+{
+ int ret;
+ struct irdma_ccq_cqe_info compl_info;
+ struct irdma_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->dev->ceq_valid) {
+ irdma_cqp_cq_destroy_cmd(dev, &rsrc->cq);
+ return;
+ }
+
+ ret = irdma_sc_cq_destroy(&rsrc->cq, 0, true);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev), "PUDA: error ieq cq destroy\n");
+ if (!ret) {
+ ret = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_DESTROY_CQ,
+ &compl_info);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error ieq qp destroy done\n");
+ }
+}
+
+/**
+ * irdma_puda_dele_rsrc - delete all resources during close
+ * @vsi: VSI structure of device
+ * @type: type of resource to dele
+ * @reset: true if reset chip
+ */
+void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
+ bool reset)
+{
+ struct irdma_sc_dev *dev = vsi->dev;
+ struct irdma_puda_rsrc *rsrc;
+ struct irdma_puda_buf *buf = NULL;
+ struct irdma_puda_buf *nextbuf = NULL;
+ struct irdma_virt_mem *vmem;
+
+ switch (type) {
+ case IRDMA_PUDA_RSRC_TYPE_ILQ:
+ rsrc = vsi->ilq;
+ vmem = &vsi->ilq_mem;
+ vsi->ilq = NULL;
+ break;
+ case IRDMA_PUDA_RSRC_TYPE_IEQ:
+ rsrc = vsi->ieq;
+ vmem = &vsi->ieq_mem;
+ vsi->ieq = NULL;
+ break;
+ default:
+ ibdev_dbg(to_ibdev(dev), "PUDA: error resource type = 0x%x\n",
+ type);
+ return;
+ }
+
+ switch (rsrc->cmpl) {
+ case PUDA_HASH_CRC_COMPLETE:
+ case PUDA_QP_CREATED:
+ irdma_qp_rem_qos(&rsrc->qp);
+
+ if (!reset)
+ irdma_puda_free_qp(rsrc);
+
+ dma_free_coherent(dev->hw->device, rsrc->qpmem.size,
+ rsrc->qpmem.va, rsrc->qpmem.pa);
+ rsrc->qpmem.va = NULL;
+ fallthrough;
+ case PUDA_CQ_CREATED:
+ if (!reset)
+ irdma_puda_free_cq(rsrc);
+
+ dma_free_coherent(dev->hw->device, rsrc->cqmem.size,
+ rsrc->cqmem.va, rsrc->cqmem.pa);
+ rsrc->cqmem.va = NULL;
+ break;
+ default:
+ ibdev_dbg(to_ibdev(rsrc->dev), "PUDA: error no resources\n");
+ break;
+ }
+ /* Free all allocated puda buffers for both tx and rx */
+ buf = rsrc->alloclist;
+ while (buf) {
+ nextbuf = buf->next;
+ irdma_puda_dele_buf(dev, buf);
+ buf = nextbuf;
+ rsrc->alloc_buf_count--;
+ }
+
+ kfree(vmem->va);
+}
+
+/**
+ * irdma_puda_allocbufs - allocate buffers for resource
+ * @rsrc: resource for buffer allocation
+ * @count: number of buffers to create
+ */
+static int irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, u32 count)
+{
+ u32 i;
+ struct irdma_puda_buf *buf;
+ struct irdma_puda_buf *nextbuf;
+
+ for (i = 0; i < count; i++) {
+ buf = irdma_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
+ if (!buf) {
+ rsrc->stats_buf_alloc_fail++;
+ return -ENOMEM;
+ }
+ irdma_puda_ret_bufpool(rsrc, buf);
+ rsrc->alloc_buf_count++;
+ if (!rsrc->alloclist) {
+ rsrc->alloclist = buf;
+ } else {
+ nextbuf = rsrc->alloclist;
+ rsrc->alloclist = buf;
+ buf->next = nextbuf;
+ }
+ }
+
+ rsrc->avail_buf_count = rsrc->alloc_buf_count;
+
+ return 0;
+}
+
+/**
+ * irdma_puda_create_rsrc - create resource (ilq or ieq)
+ * @vsi: sc VSI struct
+ * @info: resource information
+ */
+int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_rsrc_info *info)
+{
+ struct irdma_sc_dev *dev = vsi->dev;
+ int ret = 0;
+ struct irdma_puda_rsrc *rsrc;
+ u32 pudasize;
+ u32 sqwridsize, rqwridsize;
+ struct irdma_virt_mem *vmem;
+
+ info->count = 1;
+ pudasize = sizeof(struct irdma_puda_rsrc);
+ sqwridsize = info->sq_size * sizeof(struct irdma_sq_uk_wr_trk_info);
+ rqwridsize = info->rq_size * 8;
+ switch (info->type) {
+ case IRDMA_PUDA_RSRC_TYPE_ILQ:
+ vmem = &vsi->ilq_mem;
+ break;
+ case IRDMA_PUDA_RSRC_TYPE_IEQ:
+ vmem = &vsi->ieq_mem;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ vmem->size = pudasize + sqwridsize + rqwridsize;
+ vmem->va = kzalloc(vmem->size, GFP_KERNEL);
+ if (!vmem->va)
+ return -ENOMEM;
+
+ rsrc = vmem->va;
+ spin_lock_init(&rsrc->bufpool_lock);
+ switch (info->type) {
+ case IRDMA_PUDA_RSRC_TYPE_ILQ:
+ vsi->ilq = vmem->va;
+ vsi->ilq_count = info->count;
+ rsrc->receive = info->receive;
+ rsrc->xmit_complete = info->xmit_complete;
+ break;
+ case IRDMA_PUDA_RSRC_TYPE_IEQ:
+ vsi->ieq_count = info->count;
+ vsi->ieq = vmem->va;
+ rsrc->receive = irdma_ieq_receive;
+ rsrc->xmit_complete = irdma_ieq_tx_compl;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rsrc->type = info->type;
+ rsrc->sq_wrtrk_array = (struct irdma_sq_uk_wr_trk_info *)
+ ((u8 *)vmem->va + pudasize);
+ rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
+ /* Initialize all ieq lists */
+ INIT_LIST_HEAD(&rsrc->bufpool);
+ INIT_LIST_HEAD(&rsrc->txpend);
+
+ rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
+ irdma_sc_pd_init(dev, &rsrc->sc_pd, info->pd_id, info->abi_ver);
+ rsrc->qp_id = info->qp_id;
+ rsrc->cq_id = info->cq_id;
+ rsrc->sq_size = info->sq_size;
+ rsrc->rq_size = info->rq_size;
+ rsrc->cq_size = info->rq_size + info->sq_size;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ rsrc->cq_size += info->rq_size;
+ }
+ rsrc->buf_size = info->buf_size;
+ rsrc->dev = dev;
+ rsrc->vsi = vsi;
+ rsrc->stats_idx = info->stats_idx;
+ rsrc->stats_idx_valid = info->stats_idx_valid;
+
+ ret = irdma_puda_cq_create(rsrc);
+ if (!ret) {
+ rsrc->cmpl = PUDA_CQ_CREATED;
+ ret = irdma_puda_qp_create(rsrc);
+ }
+ if (ret) {
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error qp_create type=%d, status=%d\n",
+ rsrc->type, ret);
+ goto error;
+ }
+ rsrc->cmpl = PUDA_QP_CREATED;
+
+ ret = irdma_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
+ if (ret) {
+ ibdev_dbg(to_ibdev(dev), "PUDA: error alloc_buf\n");
+ goto error;
+ }
+
+ rsrc->rxq_invalid_cnt = info->rq_size;
+ ret = irdma_puda_replenish_rq(rsrc, true);
+ if (ret)
+ goto error;
+
+ if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) {
+ rsrc->check_crc = true;
+ rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
+ }
+
+ irdma_sc_ccq_arm(&rsrc->cq);
+ return 0;
+
+error:
+ irdma_puda_dele_rsrc(vsi, info->type, false);
+
+ return ret;
+}
+
+/**
+ * irdma_ilq_putback_rcvbuf - ilq buffer to put back on rq
+ * @qp: ilq's qp resource
+ * @buf: puda buffer for rcv q
+ * @wqe_idx: wqe index of completed rcvbuf
+ */
+static void irdma_ilq_putback_rcvbuf(struct irdma_sc_qp *qp,
+ struct irdma_puda_buf *buf, u32 wqe_idx)
+{
+ __le64 *wqe;
+ u64 offset8, offset24;
+
+ /* Synch buffer for use by device */
+ dma_sync_single_for_device(qp->dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+ get_64bit_val(wqe, 24, &offset24);
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ get_64bit_val(wqe, 8, &offset8);
+ if (offset24)
+ offset8 &= ~FIELD_PREP(IRDMAQPSQ_VALID, 1);
+ else
+ offset8 |= FIELD_PREP(IRDMAQPSQ_VALID, 1);
+ set_64bit_val(wqe, 8, offset8);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+ }
+ if (offset24)
+ offset24 = 0;
+ else
+ offset24 = FIELD_PREP(IRDMAQPSQ_VALID, 1);
+
+ set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * irdma_ieq_get_fpdu_len - get length of fpdu with or without marker
+ * @pfpdu: pointer to fpdu
+ * @datap: pointer to data in the buffer
+ * @rcv_seq: seqnum of the data buffer
+ */
+static u16 irdma_ieq_get_fpdu_len(struct irdma_pfpdu *pfpdu, u8 *datap,
+ u32 rcv_seq)
+{
+ u32 marker_seq, end_seq, blk_start;
+ u8 marker_len = pfpdu->marker_len;
+ u16 total_len = 0;
+ u16 fpdu_len;
+
+ blk_start = (pfpdu->rcv_start_seq - rcv_seq) & (IRDMA_MRK_BLK_SZ - 1);
+ if (!blk_start) {
+ total_len = marker_len;
+ marker_seq = rcv_seq + IRDMA_MRK_BLK_SZ;
+ if (marker_len && *(u32 *)datap)
+ return 0;
+ } else {
+ marker_seq = rcv_seq + blk_start;
+ }
+
+ datap += total_len;
+ fpdu_len = ntohs(*(__be16 *)datap);
+ fpdu_len += IRDMA_IEQ_MPA_FRAMING;
+ fpdu_len = (fpdu_len + 3) & 0xfffc;
+
+ if (fpdu_len > pfpdu->max_fpdu_data)
+ return 0;
+
+ total_len += fpdu_len;
+ end_seq = rcv_seq + total_len;
+ while ((int)(marker_seq - end_seq) < 0) {
+ total_len += marker_len;
+ end_seq += marker_len;
+ marker_seq += IRDMA_MRK_BLK_SZ;
+ }
+
+ return total_len;
+}
+
+/**
+ * irdma_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
+ * @buf: rcv buffer with partial
+ * @txbuf: tx buffer for sending back
+ * @buf_offset: rcv buffer offset to copy from
+ * @txbuf_offset: at offset in tx buf to copy
+ * @len: length of data to copy
+ */
+static void irdma_ieq_copy_to_txbuf(struct irdma_puda_buf *buf,
+ struct irdma_puda_buf *txbuf,
+ u16 buf_offset, u32 txbuf_offset, u32 len)
+{
+ void *mem1 = (u8 *)buf->mem.va + buf_offset;
+ void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
+
+ memcpy(mem2, mem1, len);
+}
+
+/**
+ * irdma_ieq_setup_tx_buf - setup tx buffer for partial handling
+ * @buf: reeive buffer with partial
+ * @txbuf: buffer to prepare
+ */
+static void irdma_ieq_setup_tx_buf(struct irdma_puda_buf *buf,
+ struct irdma_puda_buf *txbuf)
+{
+ txbuf->tcphlen = buf->tcphlen;
+ txbuf->ipv4 = buf->ipv4;
+
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ txbuf->hdrlen = txbuf->tcphlen;
+ irdma_ieq_copy_to_txbuf(buf, txbuf, IRDMA_TCP_OFFSET, 0,
+ txbuf->hdrlen);
+ } else {
+ txbuf->maclen = buf->maclen;
+ txbuf->hdrlen = buf->hdrlen;
+ irdma_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
+ }
+}
+
+/**
+ * irdma_ieq_check_first_buf - check if rcv buffer's seq is in range
+ * @buf: receive exception buffer
+ * @fps: first partial sequence number
+ */
+static void irdma_ieq_check_first_buf(struct irdma_puda_buf *buf, u32 fps)
+{
+ u32 offset;
+
+ if (buf->seqnum < fps) {
+ offset = fps - buf->seqnum;
+ if (offset > buf->datalen)
+ return;
+ buf->data += offset;
+ buf->datalen -= (u16)offset;
+ buf->seqnum = fps;
+ }
+}
+
+/**
+ * irdma_ieq_compl_pfpdu - write txbuf with full fpdu
+ * @ieq: ieq resource
+ * @rxlist: ieq's received buffer list
+ * @pbufl: temporary list for buffers for fpddu
+ * @txbuf: tx buffer for fpdu
+ * @fpdu_len: total length of fpdu
+ */
+static void irdma_ieq_compl_pfpdu(struct irdma_puda_rsrc *ieq,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct irdma_puda_buf *txbuf, u16 fpdu_len)
+{
+ struct irdma_puda_buf *buf;
+ u32 nextseqnum;
+ u16 txoffset, bufoffset;
+
+ buf = irdma_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
+
+ nextseqnum = buf->seqnum + fpdu_len;
+ irdma_ieq_setup_tx_buf(buf, txbuf);
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ txoffset = txbuf->hdrlen;
+ txbuf->totallen = txbuf->hdrlen + fpdu_len;
+ txbuf->data = (u8 *)txbuf->mem.va + txoffset;
+ } else {
+ txoffset = buf->hdrlen;
+ txbuf->totallen = buf->hdrlen + fpdu_len;
+ txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
+ }
+ bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+
+ do {
+ if (buf->datalen >= fpdu_len) {
+ /* copied full fpdu */
+ irdma_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset,
+ fpdu_len);
+ buf->datalen -= fpdu_len;
+ buf->data += fpdu_len;
+ buf->seqnum = nextseqnum;
+ break;
+ }
+ /* copy partial fpdu */
+ irdma_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset,
+ buf->datalen);
+ txoffset += buf->datalen;
+ fpdu_len -= buf->datalen;
+ irdma_puda_ret_bufpool(ieq, buf);
+ buf = irdma_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
+
+ bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+ } while (1);
+
+ /* last buffer on the list*/
+ if (buf->datalen)
+ list_add(&buf->list, rxlist);
+ else
+ irdma_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * irdma_ieq_create_pbufl - create buffer list for single fpdu
+ * @pfpdu: pointer to fpdu
+ * @rxlist: resource list for receive ieq buffes
+ * @pbufl: temp. list for buffers for fpddu
+ * @buf: first receive buffer
+ * @fpdu_len: total length of fpdu
+ */
+static int irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct irdma_puda_buf *buf, u16 fpdu_len)
+{
+ int status = 0;
+ struct irdma_puda_buf *nextbuf;
+ u32 nextseqnum;
+ u16 plen = fpdu_len - buf->datalen;
+ bool done = false;
+
+ nextseqnum = buf->seqnum + buf->datalen;
+ do {
+ nextbuf = irdma_puda_get_listbuf(rxlist);
+ if (!nextbuf) {
+ status = -ENOBUFS;
+ break;
+ }
+ list_add_tail(&nextbuf->list, pbufl);
+ if (nextbuf->seqnum != nextseqnum) {
+ pfpdu->bad_seq_num++;
+ status = -ERANGE;
+ break;
+ }
+ if (nextbuf->datalen >= plen) {
+ done = true;
+ } else {
+ plen -= nextbuf->datalen;
+ nextseqnum = nextbuf->seqnum + nextbuf->datalen;
+ }
+
+ } while (!done);
+
+ return status;
+}
+
+/**
+ * irdma_ieq_handle_partial - process partial fpdu buffer
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ * @fpdu_len: fpdu len in the buffer
+ */
+static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
+ struct irdma_pfpdu *pfpdu,
+ struct irdma_puda_buf *buf, u16 fpdu_len)
+{
+ int status = 0;
+ u8 *crcptr;
+ u32 mpacrc;
+ u32 seqnum = buf->seqnum;
+ struct list_head pbufl; /* partial buffer list */
+ struct irdma_puda_buf *txbuf = NULL;
+ struct list_head *rxlist = &pfpdu->rxlist;
+
+ ieq->partials_handled++;
+
+ INIT_LIST_HEAD(&pbufl);
+ list_add(&buf->list, &pbufl);
+
+ status = irdma_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
+ if (status)
+ goto error;
+
+ txbuf = irdma_puda_get_bufpool(ieq);
+ if (!txbuf) {
+ pfpdu->no_tx_bufs++;
+ status = -ENOBUFS;
+ goto error;
+ }
+
+ irdma_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
+ irdma_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
+
+ crcptr = txbuf->data + fpdu_len - 4;
+ mpacrc = *(u32 *)crcptr;
+ if (ieq->check_crc) {
+ status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4,
+ mpacrc);
+ if (status) {
+ ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n");
+ goto error;
+ }
+ }
+
+ print_hex_dump_debug("IEQ: IEQ TX BUFFER", DUMP_PREFIX_OFFSET, 16, 8,
+ txbuf->mem.va, txbuf->totallen, false);
+ if (ieq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ txbuf->ah_id = pfpdu->ah->ah_info.ah_idx;
+ txbuf->do_lpb = true;
+ irdma_puda_send_buf(ieq, txbuf);
+ pfpdu->rcv_nxt = seqnum + fpdu_len;
+ return status;
+
+error:
+ while (!list_empty(&pbufl)) {
+ buf = list_last_entry(&pbufl, struct irdma_puda_buf, list);
+ list_move(&buf->list, rxlist);
+ }
+ if (txbuf)
+ irdma_puda_ret_bufpool(ieq, txbuf);
+
+ return status;
+}
+
+/**
+ * irdma_ieq_process_buf - process buffer rcvd for ieq
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ */
+static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
+ struct irdma_pfpdu *pfpdu,
+ struct irdma_puda_buf *buf)
+{
+ u16 fpdu_len = 0;
+ u16 datalen = buf->datalen;
+ u8 *datap = buf->data;
+ u8 *crcptr;
+ u16 ioffset = 0;
+ u32 mpacrc;
+ u32 seqnum = buf->seqnum;
+ u16 len = 0;
+ u16 full = 0;
+ bool partial = false;
+ struct irdma_puda_buf *txbuf;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ int ret = 0;
+
+ ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
+ while (datalen) {
+ fpdu_len = irdma_ieq_get_fpdu_len(pfpdu, datap, buf->seqnum);
+ if (!fpdu_len) {
+ ibdev_dbg(to_ibdev(ieq->dev),
+ "IEQ: error bad fpdu len\n");
+ list_add(&buf->list, rxlist);
+ return -EINVAL;
+ }
+
+ if (datalen < fpdu_len) {
+ partial = true;
+ break;
+ }
+ crcptr = datap + fpdu_len - 4;
+ mpacrc = *(u32 *)crcptr;
+ if (ieq->check_crc)
+ ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4,
+ mpacrc);
+ if (ret) {
+ list_add(&buf->list, rxlist);
+ ibdev_dbg(to_ibdev(ieq->dev),
+ "ERR: IRDMA_ERR_MPA_CRC\n");
+ return -EINVAL;
+ }
+ full++;
+ pfpdu->fpdu_processed++;
+ ieq->fpdu_processed++;
+ datap += fpdu_len;
+ len += fpdu_len;
+ datalen -= fpdu_len;
+ }
+ if (full) {
+ /* copy full pdu's in the txbuf and send them out */
+ txbuf = irdma_puda_get_bufpool(ieq);
+ if (!txbuf) {
+ pfpdu->no_tx_bufs++;
+ list_add(&buf->list, rxlist);
+ return -ENOBUFS;
+ }
+ /* modify txbuf's buffer header */
+ irdma_ieq_setup_tx_buf(buf, txbuf);
+ /* copy full fpdu's to new buffer */
+ if (ieq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ irdma_ieq_copy_to_txbuf(buf, txbuf, ioffset,
+ txbuf->hdrlen, len);
+ txbuf->totallen = txbuf->hdrlen + len;
+ txbuf->ah_id = pfpdu->ah->ah_info.ah_idx;
+ } else {
+ irdma_ieq_copy_to_txbuf(buf, txbuf, ioffset,
+ buf->hdrlen, len);
+ txbuf->totallen = buf->hdrlen + len;
+ }
+ irdma_ieq_update_tcpip_info(txbuf, len, buf->seqnum);
+ print_hex_dump_debug("IEQ: IEQ TX BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, txbuf->mem.va, txbuf->totallen,
+ false);
+ txbuf->do_lpb = true;
+ irdma_puda_send_buf(ieq, txbuf);
+
+ if (!datalen) {
+ pfpdu->rcv_nxt = buf->seqnum + len;
+ irdma_puda_ret_bufpool(ieq, buf);
+ return 0;
+ }
+ buf->data = datap;
+ buf->seqnum = seqnum + len;
+ buf->datalen = datalen;
+ pfpdu->rcv_nxt = buf->seqnum;
+ }
+ if (partial)
+ return irdma_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_process_fpdus - process fpdu's buffers on its list
+ * @qp: qp for which partial fpdus
+ * @ieq: ieq resource
+ */
+void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
+ struct irdma_puda_rsrc *ieq)
+{
+ struct irdma_pfpdu *pfpdu = &qp->pfpdu;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ struct irdma_puda_buf *buf;
+ int status;
+
+ do {
+ if (list_empty(rxlist))
+ break;
+ buf = irdma_puda_get_listbuf(rxlist);
+ if (!buf) {
+ ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error no buf\n");
+ break;
+ }
+ if (buf->seqnum != pfpdu->rcv_nxt) {
+ /* This could be out of order or missing packet */
+ pfpdu->out_of_order++;
+ list_add(&buf->list, rxlist);
+ break;
+ }
+ /* keep processing buffers from the head of the list */
+ status = irdma_ieq_process_buf(ieq, pfpdu, buf);
+ if (status == -EINVAL) {
+ pfpdu->mpa_crc_err = true;
+ while (!list_empty(rxlist)) {
+ buf = irdma_puda_get_listbuf(rxlist);
+ irdma_puda_ret_bufpool(ieq, buf);
+ pfpdu->crc_err++;
+ ieq->crc_err++;
+ }
+ /* create CQP for AE */
+ irdma_ieq_mpa_crc_ae(ieq->dev, qp);
+ }
+ } while (!status);
+}
+
+/**
+ * irdma_ieq_create_ah - create an address handle for IEQ
+ * @qp: qp pointer
+ * @buf: buf received on IEQ used to create AH
+ */
+static int irdma_ieq_create_ah(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf)
+{
+ struct irdma_ah_info ah_info = {};
+
+ qp->pfpdu.ah_buf = buf;
+ irdma_puda_ieq_get_ah_info(qp, &ah_info);
+ return irdma_puda_create_ah(qp->vsi->dev, &ah_info, false,
+ IRDMA_PUDA_RSRC_TYPE_IEQ, qp,
+ &qp->pfpdu.ah);
+}
+
+/**
+ * irdma_ieq_handle_exception - handle qp's exception
+ * @ieq: ieq resource
+ * @qp: qp receiving excpetion
+ * @buf: receive buffer
+ */
+static void irdma_ieq_handle_exception(struct irdma_puda_rsrc *ieq,
+ struct irdma_sc_qp *qp,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_pfpdu *pfpdu = &qp->pfpdu;
+ u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
+ u32 rcv_wnd = hw_host_ctx[23];
+ /* first partial seq # in q2 */
+ u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET);
+ struct list_head *rxlist = &pfpdu->rxlist;
+ unsigned long flags = 0;
+ u8 hw_rev = qp->dev->hw_attrs.uk_attrs.hw_rev;
+
+ print_hex_dump_debug("IEQ: IEQ RX BUFFER", DUMP_PREFIX_OFFSET, 16, 8,
+ buf->mem.va, buf->totallen, false);
+
+ spin_lock_irqsave(&pfpdu->lock, flags);
+ pfpdu->total_ieq_bufs++;
+ if (pfpdu->mpa_crc_err) {
+ pfpdu->crc_err++;
+ goto error;
+ }
+ if (pfpdu->mode && fps != pfpdu->fps) {
+ /* clean up qp as it is new partial sequence */
+ irdma_ieq_cleanup_qp(ieq, qp);
+ ibdev_dbg(to_ibdev(ieq->dev), "IEQ: restarting new partial\n");
+ pfpdu->mode = false;
+ }
+
+ if (!pfpdu->mode) {
+ print_hex_dump_debug("IEQ: Q2 BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, (u64 *)qp->q2_buf, 128, false);
+ /* First_Partial_Sequence_Number check */
+ pfpdu->rcv_nxt = fps;
+ pfpdu->fps = fps;
+ pfpdu->mode = true;
+ pfpdu->max_fpdu_data = (buf->ipv4) ?
+ (ieq->vsi->mtu - IRDMA_MTU_TO_MSS_IPV4) :
+ (ieq->vsi->mtu - IRDMA_MTU_TO_MSS_IPV6);
+ pfpdu->pmode_count++;
+ ieq->pmode_count++;
+ INIT_LIST_HEAD(rxlist);
+ irdma_ieq_check_first_buf(buf, fps);
+ }
+
+ if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
+ pfpdu->bad_seq_num++;
+ ieq->bad_seq_num++;
+ goto error;
+ }
+
+ if (!list_empty(rxlist)) {
+ if (buf->seqnum != pfpdu->nextseqnum) {
+ irdma_send_ieq_ack(qp);
+ /* throw away out-of-order, duplicates*/
+ goto error;
+ }
+ }
+ /* Insert buf before head */
+ list_add_tail(&buf->list, rxlist);
+ pfpdu->nextseqnum = buf->seqnum + buf->datalen;
+ pfpdu->lastrcv_buf = buf;
+ if (hw_rev >= IRDMA_GEN_2 && !pfpdu->ah) {
+ irdma_ieq_create_ah(qp, buf);
+ if (!pfpdu->ah)
+ goto error;
+ goto exit;
+ }
+ if (hw_rev == IRDMA_GEN_1)
+ irdma_ieq_process_fpdus(qp, ieq);
+ else if (pfpdu->ah && pfpdu->ah->ah_info.ah_valid)
+ irdma_ieq_process_fpdus(qp, ieq);
+exit:
+ spin_unlock_irqrestore(&pfpdu->lock, flags);
+
+ return;
+
+error:
+ irdma_puda_ret_bufpool(ieq, buf);
+ spin_unlock_irqrestore(&pfpdu->lock, flags);
+}
+
+/**
+ * irdma_ieq_receive - received exception buffer
+ * @vsi: VSI of device
+ * @buf: exception buffer received
+ */
+static void irdma_ieq_receive(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_puda_rsrc *ieq = vsi->ieq;
+ struct irdma_sc_qp *qp = NULL;
+ u32 wqe_idx = ieq->compl_rxwqe_idx;
+
+ qp = irdma_ieq_get_qp(vsi->dev, buf);
+ if (!qp) {
+ ieq->stats_bad_qp_id++;
+ irdma_puda_ret_bufpool(ieq, buf);
+ } else {
+ irdma_ieq_handle_exception(ieq, qp, buf);
+ }
+ /*
+ * ieq->rx_wqe_idx is used by irdma_puda_replenish_rq()
+ * on which wqe_idx to start replenish rq
+ */
+ if (!ieq->rxq_invalid_cnt)
+ ieq->rx_wqe_idx = wqe_idx;
+ ieq->rxq_invalid_cnt++;
+}
+
+/**
+ * irdma_ieq_tx_compl - put back after sending completed exception buffer
+ * @vsi: sc VSI struct
+ * @sqwrid: pointer to puda buffer
+ */
+static void irdma_ieq_tx_compl(struct irdma_sc_vsi *vsi, void *sqwrid)
+{
+ struct irdma_puda_rsrc *ieq = vsi->ieq;
+ struct irdma_puda_buf *buf = sqwrid;
+
+ irdma_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * irdma_ieq_cleanup_qp - qp is being destroyed
+ * @ieq: ieq resource
+ * @qp: all pending fpdu buffers
+ */
+void irdma_ieq_cleanup_qp(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp)
+{
+ struct irdma_puda_buf *buf;
+ struct irdma_pfpdu *pfpdu = &qp->pfpdu;
+ struct list_head *rxlist = &pfpdu->rxlist;
+
+ if (qp->pfpdu.ah) {
+ irdma_puda_free_ah(ieq->dev, qp->pfpdu.ah);
+ qp->pfpdu.ah = NULL;
+ qp->pfpdu.ah_buf = NULL;
+ }
+
+ if (!pfpdu->mode)
+ return;
+
+ while (!list_empty(rxlist)) {
+ buf = irdma_puda_get_listbuf(rxlist);
+ irdma_puda_ret_bufpool(ieq, buf);
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
new file mode 100644
index 000000000000..d65041bee667
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_PUDA_H
+#define IRDMA_PUDA_H
+
+#define IRDMA_IEQ_MPA_FRAMING 6
+#define IRDMA_TCP_OFFSET 40
+#define IRDMA_IPV4_PAD 20
+#define IRDMA_MRK_BLK_SZ 512
+
+enum puda_rsrc_type {
+ IRDMA_PUDA_RSRC_TYPE_ILQ = 1,
+ IRDMA_PUDA_RSRC_TYPE_IEQ,
+ IRDMA_PUDA_RSRC_TYPE_MAX, /* Must be last entry */
+};
+
+enum puda_rsrc_complete {
+ PUDA_CQ_CREATED = 1,
+ PUDA_QP_CREATED,
+ PUDA_TX_COMPLETE,
+ PUDA_RX_COMPLETE,
+ PUDA_HASH_CRC_COMPLETE,
+};
+
+struct irdma_sc_dev;
+struct irdma_sc_qp;
+struct irdma_sc_cq;
+
+struct irdma_puda_cmpl_info {
+ struct irdma_qp_uk *qp;
+ u8 q_type;
+ u8 l3proto;
+ u8 l4proto;
+ u16 vlan;
+ u32 payload_len;
+ u32 compl_error; /* No_err=0, else major and minor err code */
+ u32 qp_id;
+ u32 wqe_idx;
+ bool ipv4:1;
+ bool smac_valid:1;
+ bool vlan_valid:1;
+ u8 smac[ETH_ALEN];
+};
+
+struct irdma_puda_send_info {
+ u64 paddr; /* Physical address */
+ u32 len;
+ u32 ah_id;
+ u8 tcplen;
+ u8 maclen;
+ bool ipv4:1;
+ bool do_lpb:1;
+ void *scratch;
+};
+
+struct irdma_puda_buf {
+ struct list_head list; /* MUST be first entry */
+ struct irdma_dma_mem mem; /* DMA memory for the buffer */
+ struct irdma_puda_buf *next; /* for alloclist in rsrc struct */
+ struct irdma_virt_mem buf_mem; /* Buffer memory for this buffer */
+ void *scratch;
+ u8 *iph;
+ u8 *tcph;
+ u8 *data;
+ u16 datalen;
+ u16 vlan_id;
+ u8 tcphlen; /* tcp length in bytes */
+ u8 maclen; /* mac length in bytes */
+ u32 totallen; /* machlen+iphlen+tcphlen+datalen */
+ refcount_t refcount;
+ u8 hdrlen;
+ bool ipv4:1;
+ bool vlan_valid:1;
+ bool do_lpb:1; /* Loopback buffer */
+ bool smac_valid:1;
+ u32 seqnum;
+ u32 ah_id;
+ u8 smac[ETH_ALEN];
+ struct irdma_sc_vsi *vsi;
+};
+
+struct irdma_puda_rsrc_info {
+ void (*receive)(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf);
+ void (*xmit_complete)(struct irdma_sc_vsi *vsi, void *sqwrid);
+ enum puda_rsrc_type type; /* ILQ or IEQ */
+ u32 count;
+ u32 pd_id;
+ u32 cq_id;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
+ u16 buf_size;
+ u16 stats_idx;
+ bool stats_idx_valid:1;
+ int abi_ver;
+};
+
+struct irdma_puda_rsrc {
+ struct irdma_sc_cq cq;
+ struct irdma_sc_qp qp;
+ struct irdma_sc_pd sc_pd;
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_dma_mem cqmem;
+ struct irdma_dma_mem qpmem;
+ struct irdma_virt_mem ilq_mem;
+ enum puda_rsrc_complete cmpl;
+ enum puda_rsrc_type type;
+ u16 buf_size; /*buf must be max datalen + tcpip hdr + mac */
+ u32 cq_id;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u32 cq_size;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 compl_rxwqe_idx;
+ u32 rx_wqe_idx;
+ u32 rxq_invalid_cnt;
+ u32 tx_wqe_avail_cnt;
+ struct list_head txpend;
+ struct list_head bufpool; /* free buffers pool list for recv and xmit */
+ u32 alloc_buf_count;
+ u32 avail_buf_count; /* snapshot of currently available buffers */
+ spinlock_t bufpool_lock;
+ struct irdma_puda_buf *alloclist;
+ void (*receive)(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf);
+ void (*xmit_complete)(struct irdma_sc_vsi *vsi, void *sqwrid);
+ /* puda stats */
+ u64 stats_buf_alloc_fail;
+ u64 stats_pkt_rcvd;
+ u64 stats_pkt_sent;
+ u64 stats_rcvd_pkt_err;
+ u64 stats_sent_pkt_q;
+ u64 stats_bad_qp_id;
+ /* IEQ stats */
+ u64 fpdu_processed;
+ u64 bad_seq_num;
+ u64 crc_err;
+ u64 pmode_count;
+ u64 partials_handled;
+ u16 stats_idx;
+ bool check_crc:1;
+ bool stats_idx_valid:1;
+};
+
+struct irdma_puda_buf *irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc);
+void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf);
+void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf);
+int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info);
+int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_rsrc_info *info);
+void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
+ bool reset);
+int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq,
+ u32 *compl_err);
+
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf);
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
+int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
+int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
+void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp,
+ struct irdma_ah_info *ah_info);
+int irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info, bool wait,
+ enum puda_rsrc_type type, void *cb_param,
+ struct irdma_sc_ah **ah);
+void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah);
+void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
+ struct irdma_puda_rsrc *ieq);
+void irdma_ieq_cleanup_qp(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp);
+#endif /*IRDMA_PROTOS_H */
diff --git a/drivers/infiniband/hw/irdma/trace.c b/drivers/infiniband/hw/irdma/trace.c
new file mode 100644
index 000000000000..fc2f56697741
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Intel Corporation */
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+const char *print_ip_addr(struct trace_seq *p, u32 *addr, u16 port, bool ipv4)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (ipv4) {
+ __be32 myaddr = htonl(*addr);
+
+ trace_seq_printf(p, "%pI4:%d", &myaddr, htons(port));
+ } else {
+ trace_seq_printf(p, "%pI6:%d", addr, htons(port));
+ }
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *parse_iw_event_type(enum iw_cm_event_type iw_type)
+{
+ switch (iw_type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ return "IwRequest";
+ case IW_CM_EVENT_CONNECT_REPLY:
+ return "IwReply";
+ case IW_CM_EVENT_ESTABLISHED:
+ return "IwEstablished";
+ case IW_CM_EVENT_DISCONNECT:
+ return "IwDisconnect";
+ case IW_CM_EVENT_CLOSE:
+ return "IwClose";
+ }
+
+ return "Unknown";
+}
+
+const char *parse_cm_event_type(enum irdma_cm_event_type cm_type)
+{
+ switch (cm_type) {
+ case IRDMA_CM_EVENT_ESTABLISHED:
+ return "CmEstablished";
+ case IRDMA_CM_EVENT_MPA_REQ:
+ return "CmMPA_REQ";
+ case IRDMA_CM_EVENT_MPA_CONNECT:
+ return "CmMPA_CONNECT";
+ case IRDMA_CM_EVENT_MPA_ACCEPT:
+ return "CmMPA_ACCEPT";
+ case IRDMA_CM_EVENT_MPA_REJECT:
+ return "CmMPA_REJECT";
+ case IRDMA_CM_EVENT_MPA_ESTABLISHED:
+ return "CmMPA_ESTABLISHED";
+ case IRDMA_CM_EVENT_CONNECTED:
+ return "CmConnected";
+ case IRDMA_CM_EVENT_RESET:
+ return "CmReset";
+ case IRDMA_CM_EVENT_ABORTED:
+ return "CmAborted";
+ case IRDMA_CM_EVENT_UNKNOWN:
+ return "none";
+ }
+ return "Unknown";
+}
+
+const char *parse_cm_state(enum irdma_cm_node_state state)
+{
+ switch (state) {
+ case IRDMA_CM_STATE_UNKNOWN:
+ return "UNKNOWN";
+ case IRDMA_CM_STATE_INITED:
+ return "INITED";
+ case IRDMA_CM_STATE_LISTENING:
+ return "LISTENING";
+ case IRDMA_CM_STATE_SYN_RCVD:
+ return "SYN_RCVD";
+ case IRDMA_CM_STATE_SYN_SENT:
+ return "SYN_SENT";
+ case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
+ return "ONE_SIDE_ESTABLISHED";
+ case IRDMA_CM_STATE_ESTABLISHED:
+ return "ESTABLISHED";
+ case IRDMA_CM_STATE_ACCEPTING:
+ return "ACCEPTING";
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ return "MPAREQ_SENT";
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ return "MPAREQ_RCVD";
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ return "MPAREJ_RECVD";
+ case IRDMA_CM_STATE_OFFLOADED:
+ return "OFFLOADED";
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ return "FIN_WAIT1";
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ return "FIN_WAIT2";
+ case IRDMA_CM_STATE_CLOSE_WAIT:
+ return "CLOSE_WAIT";
+ case IRDMA_CM_STATE_TIME_WAIT:
+ return "TIME_WAIT";
+ case IRDMA_CM_STATE_LAST_ACK:
+ return "LAST_ACK";
+ case IRDMA_CM_STATE_CLOSING:
+ return "CLOSING";
+ case IRDMA_CM_STATE_LISTENER_DESTROYED:
+ return "LISTENER_DESTROYED";
+ case IRDMA_CM_STATE_CLOSED:
+ return "CLOSED";
+ }
+ return ("Bad state");
+}
diff --git a/drivers/infiniband/hw/irdma/trace.h b/drivers/infiniband/hw/irdma/trace.h
new file mode 100644
index 000000000000..b8085a66b9f8
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Intel Corporation */
+#include "trace_cm.h"
diff --git a/drivers/infiniband/hw/irdma/trace_cm.h b/drivers/infiniband/hw/irdma/trace_cm.h
new file mode 100644
index 000000000000..0d1699b55241
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace_cm.h
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 - 2021 Intel Corporation */
+#if !defined(__TRACE_CM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRACE_CM_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "main.h"
+
+const char *print_ip_addr(struct trace_seq *p, u32 *addr, u16 port, bool ivp4);
+const char *parse_iw_event_type(enum iw_cm_event_type iw_type);
+const char *parse_cm_event_type(enum irdma_cm_event_type cm_type);
+const char *parse_cm_state(enum irdma_cm_node_state);
+#define __print_ip_addr(addr, port, ipv4) print_ip_addr(p, addr, port, ipv4)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irdma_cm
+
+TRACE_EVENT(irdma_create_listen,
+ TP_PROTO(struct irdma_device *iwdev, struct irdma_cm_info *cm_info),
+ TP_ARGS(iwdev, cm_info),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __dynamic_array(u32, laddr, 4)
+ __field(u16, lport)
+ __field(bool, ipv4)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->lport = cm_info->loc_port;
+ __entry->ipv4 = cm_info->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ cm_info->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p loc: %s",
+ __entry->iwdev,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+TRACE_EVENT(irdma_dec_refcnt_listen,
+ TP_PROTO(struct irdma_cm_listener *listener, void *caller),
+ TP_ARGS(listener, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u32, refcnt)
+ __dynamic_array(u32, laddr, 4)
+ __field(u16, lport)
+ __field(bool, ipv4)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->iwdev = listener->iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->ipv4 = listener->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS loc: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DECLARE_EVENT_CLASS(listener_template,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u16, lport)
+ __field(u16, vlan_id)
+ __field(bool, ipv4)
+ __field(enum irdma_cm_listener_state,
+ state)
+ __dynamic_array(u32, laddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = listener->iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->vlan_id = listener->vlan_id;
+ __entry->ipv4 = listener->ipv4;
+ __entry->state = listener->listener_state;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p vlan=%d loc: %s",
+ __entry->iwdev,
+ __entry->vlan_id,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(listener_template, irdma_find_listener,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener));
+
+DEFINE_EVENT(listener_template, irdma_del_multiple_qhash,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener));
+
+TRACE_EVENT(irdma_negotiate_mpa_v2,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node),
+ TP_STRUCT__entry(__field(struct irdma_cm_node *, cm_node)
+ __field(u16, ord_size)
+ __field(u16, ird_size)
+ ),
+ TP_fast_assign(__entry->cm_node = cm_node;
+ __entry->ord_size = cm_node->ord_size;
+ __entry->ird_size = cm_node->ird_size;
+ ),
+ TP_printk("MPVA2 Negotiated cm_node=%p ORD:[%d], IRD:[%d]",
+ __entry->cm_node,
+ __entry->ord_size,
+ __entry->ird_size
+ )
+);
+
+DECLARE_EVENT_CLASS(tos_template,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u8, tos)
+ __field(u8, user_pri)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->tos = tos;
+ __entry->user_pri = user_pri;
+ ),
+ TP_printk("iwdev=%p TOS:[%d] UP:[%d]",
+ __entry->iwdev,
+ __entry->tos,
+ __entry->user_pri
+ )
+);
+
+DEFINE_EVENT(tos_template, irdma_listener_tos,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri));
+
+DEFINE_EVENT(tos_template, irdma_dcb_tos,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri));
+
+DECLARE_EVENT_CLASS(qhash_template,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ const char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u16, lport)
+ __field(u16, vlan_id)
+ __field(bool, ipv4)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, mac, ETH_ALEN)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->vlan_id = listener->vlan_id;
+ __entry->ipv4 = listener->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ether_addr_copy(__get_dynamic_array(mac),
+ dev_addr);
+ ),
+ TP_printk("iwdev=%p vlan=%d MAC=%6phC loc: %s",
+ __entry->iwdev,
+ __entry->vlan_id,
+ __get_dynamic_array(mac),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(qhash_template, irdma_add_mqh_6,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ const char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr));
+
+DEFINE_EVENT(qhash_template, irdma_add_mqh_4,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ const char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr));
+
+TRACE_EVENT(irdma_addr_resolve,
+ TP_PROTO(struct irdma_device *iwdev, char *dev_addr),
+ TP_ARGS(iwdev, dev_addr),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __dynamic_array(u8, mac, ETH_ALEN)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ ether_addr_copy(__get_dynamic_array(mac), dev_addr);
+ ),
+ TP_printk("iwdev=%p MAC=%6phC", __entry->iwdev,
+ __get_dynamic_array(mac)
+ )
+);
+
+TRACE_EVENT(irdma_send_cm_event,
+ TP_PROTO(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type, int status, void *caller),
+ TP_ARGS(cm_node, cm_id, type, status, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(struct iw_cm_id *, cm_id)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __field(enum iw_cm_event_type, type)
+ __field(int, status)
+ __field(void *, caller)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->cm_id = cm_id;
+ __entry->refcount = refcount_read(&cm_node->refcnt);
+ __entry->state = cm_node->state;
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ __entry->type = type;
+ __entry->status = status;
+ __entry->caller = caller;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS cm_id=%p node=%p refcnt=%d vlan_id=%d accel=%d state=%s event_type=%s status=%d loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_id,
+ __entry->cm_node,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ parse_iw_event_type(__entry->type),
+ __entry->status,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+TRACE_EVENT(irdma_send_cm_event_no_node,
+ TP_PROTO(struct iw_cm_id *cm_id, enum iw_cm_event_type type,
+ int status, void *caller),
+ TP_ARGS(cm_id, type, status, caller),
+ TP_STRUCT__entry(__field(struct iw_cm_id *, cm_id)
+ __field(enum iw_cm_event_type, type)
+ __field(int, status)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->cm_id = cm_id;
+ __entry->type = type;
+ __entry->status = status;
+ __entry->caller = caller;
+ ),
+ TP_printk("cm_id=%p caller=%pS event_type=%s status=%d",
+ __entry->cm_id,
+ __entry->caller,
+ parse_iw_event_type(__entry->type),
+ __entry->status
+ )
+);
+
+DECLARE_EVENT_CLASS(cm_node_template,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __field(enum irdma_cm_event_type, type)
+ __field(void *, caller)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->refcount = refcount_read(&cm_node->refcnt);
+ __entry->state = cm_node->state;
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ __entry->type = type;
+ __entry->caller = caller;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS node=%p refcnt=%d vlan_id=%d accel=%d state=%s event_type=%s loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_node,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ parse_cm_event_type(__entry->type),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(cm_node_template, irdma_create_event,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_accept,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_connect,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_reject,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_find_node,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_send_reset,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_rem_ref_cm_node,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_cm_event_handler,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+TRACE_EVENT(open_err_template,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, reset)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->state = cm_node->state;
+ __entry->reset = reset;
+ __entry->caller = caller;
+ ),
+ TP_printk("iwdev=%p caller=%pS node%p reset=%d state=%s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_node,
+ __entry->reset,
+ parse_cm_state(__entry->state)
+ )
+);
+
+DEFINE_EVENT(open_err_template, irdma_active_open_err,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller));
+
+DEFINE_EVENT(open_err_template, irdma_passive_open_err,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller));
+
+DECLARE_EVENT_CLASS(cm_node_ah_template,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(struct irdma_sc_ah *, ah)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->ah = cm_node->ah;
+ __entry->refcount = refcount_read(&cm_node->refcnt);
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->state = cm_node->state;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p node=%p ah=%p refcnt=%d vlan_id=%d accel=%d state=%s loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->cm_node,
+ __entry->ah,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(cm_node_ah_template, irdma_cm_free_ah,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node));
+
+DEFINE_EVENT(cm_node_ah_template, irdma_create_ah,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node));
+
+#endif /* __TRACE_CM_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_cm
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
new file mode 100644
index 000000000000..cab4896640a1
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -0,0 +1,1674 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_TYPE_H
+#define IRDMA_TYPE_H
+#include "osdep.h"
+#include "irdma.h"
+#include "user.h"
+#include "hmc.h"
+#include "uda.h"
+#include "ws.h"
+#include "virtchnl.h"
+
+#define IRDMA_DEBUG_ERR "ERR"
+#define IRDMA_DEBUG_INIT "INIT"
+#define IRDMA_DEBUG_DEV "DEV"
+#define IRDMA_DEBUG_CM "CM"
+#define IRDMA_DEBUG_VERBS "VERBS"
+#define IRDMA_DEBUG_PUDA "PUDA"
+#define IRDMA_DEBUG_ILQ "ILQ"
+#define IRDMA_DEBUG_IEQ "IEQ"
+#define IRDMA_DEBUG_QP "QP"
+#define IRDMA_DEBUG_CQ "CQ"
+#define IRDMA_DEBUG_MR "MR"
+#define IRDMA_DEBUG_PBLE "PBLE"
+#define IRDMA_DEBUG_WQE "WQE"
+#define IRDMA_DEBUG_AEQ "AEQ"
+#define IRDMA_DEBUG_CQP "CQP"
+#define IRDMA_DEBUG_HMC "HMC"
+#define IRDMA_DEBUG_USER "USER"
+#define IRDMA_DEBUG_VIRT "VIRT"
+#define IRDMA_DEBUG_DCB "DCB"
+#define IRDMA_DEBUG_CQE "CQE"
+#define IRDMA_DEBUG_CLNT "CLNT"
+#define IRDMA_DEBUG_WS "WS"
+#define IRDMA_DEBUG_STATS "STATS"
+
+enum irdma_page_size {
+ IRDMA_PAGE_SIZE_4K = 0,
+ IRDMA_PAGE_SIZE_2M,
+ IRDMA_PAGE_SIZE_1G,
+};
+
+enum irdma_hdrct_flags {
+ DDP_LEN_FLAG = 0x80,
+ DDP_HDR_FLAG = 0x40,
+ RDMA_HDR_FLAG = 0x20,
+};
+
+enum irdma_term_layers {
+ LAYER_RDMA = 0,
+ LAYER_DDP = 1,
+ LAYER_MPA = 2,
+};
+
+enum irdma_term_error_types {
+ RDMAP_REMOTE_PROT = 1,
+ RDMAP_REMOTE_OP = 2,
+ DDP_CATASTROPHIC = 0,
+ DDP_TAGGED_BUF = 1,
+ DDP_UNTAGGED_BUF = 2,
+ DDP_LLP = 3,
+};
+
+enum irdma_term_rdma_errors {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_INV_BOUNDS = 0x01,
+ RDMAP_ACCESS = 0x02,
+ RDMAP_UNASSOC_STAG = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_RDMAP_VER = 0x05,
+ RDMAP_UNEXPECTED_OP = 0x06,
+ RDMAP_CATASTROPHIC_LOCAL = 0x07,
+ RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff,
+};
+
+enum irdma_term_ddp_errors {
+ DDP_CATASTROPHIC_LOCAL = 0x00,
+ DDP_TAGGED_INV_STAG = 0x00,
+ DDP_TAGGED_BOUNDS = 0x01,
+ DDP_TAGGED_UNASSOC_STAG = 0x02,
+ DDP_TAGGED_TO_WRAP = 0x03,
+ DDP_TAGGED_INV_DDP_VER = 0x04,
+ DDP_UNTAGGED_INV_QN = 0x01,
+ DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+ DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+ DDP_UNTAGGED_INV_MO = 0x04,
+ DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+ DDP_UNTAGGED_INV_DDP_VER = 0x06,
+};
+
+enum irdma_term_mpa_errors {
+ MPA_CLOSED = 0x01,
+ MPA_CRC = 0x02,
+ MPA_MARKER = 0x03,
+ MPA_REQ_RSP = 0x04,
+};
+
+enum irdma_hw_stats_index {
+ /* gen1 - 32-bit */
+ IRDMA_HW_STAT_INDEX_IP4RXDISCARD = 0,
+ IRDMA_HW_STAT_INDEX_IP4RXTRUNC = 1,
+ IRDMA_HW_STAT_INDEX_IP4TXNOROUTE = 2,
+ IRDMA_HW_STAT_INDEX_IP6RXDISCARD = 3,
+ IRDMA_HW_STAT_INDEX_IP6RXTRUNC = 4,
+ IRDMA_HW_STAT_INDEX_IP6TXNOROUTE = 5,
+ IRDMA_HW_STAT_INDEX_TCPRTXSEG = 6,
+ IRDMA_HW_STAT_INDEX_TCPRXOPTERR = 7,
+ IRDMA_HW_STAT_INDEX_TCPRXPROTOERR = 8,
+ IRDMA_HW_STAT_INDEX_RXVLANERR = 9,
+ /* gen1 - 64-bit */
+ IRDMA_HW_STAT_INDEX_IP4RXOCTS = 10,
+ IRDMA_HW_STAT_INDEX_IP4RXPKTS = 11,
+ IRDMA_HW_STAT_INDEX_IP4RXFRAGS = 12,
+ IRDMA_HW_STAT_INDEX_IP4RXMCPKTS = 13,
+ IRDMA_HW_STAT_INDEX_IP4TXOCTS = 14,
+ IRDMA_HW_STAT_INDEX_IP4TXPKTS = 15,
+ IRDMA_HW_STAT_INDEX_IP4TXFRAGS = 16,
+ IRDMA_HW_STAT_INDEX_IP4TXMCPKTS = 17,
+ IRDMA_HW_STAT_INDEX_IP6RXOCTS = 18,
+ IRDMA_HW_STAT_INDEX_IP6RXPKTS = 19,
+ IRDMA_HW_STAT_INDEX_IP6RXFRAGS = 20,
+ IRDMA_HW_STAT_INDEX_IP6RXMCPKTS = 21,
+ IRDMA_HW_STAT_INDEX_IP6TXOCTS = 22,
+ IRDMA_HW_STAT_INDEX_IP6TXPKTS = 23,
+ IRDMA_HW_STAT_INDEX_IP6TXFRAGS = 24,
+ IRDMA_HW_STAT_INDEX_IP6TXMCPKTS = 25,
+ IRDMA_HW_STAT_INDEX_TCPRXSEGS = 26,
+ IRDMA_HW_STAT_INDEX_TCPTXSEG = 27,
+ IRDMA_HW_STAT_INDEX_RDMARXRDS = 28,
+ IRDMA_HW_STAT_INDEX_RDMARXSNDS = 29,
+ IRDMA_HW_STAT_INDEX_RDMARXWRS = 30,
+ IRDMA_HW_STAT_INDEX_RDMATXRDS = 31,
+ IRDMA_HW_STAT_INDEX_RDMATXSNDS = 32,
+ IRDMA_HW_STAT_INDEX_RDMATXWRS = 33,
+ IRDMA_HW_STAT_INDEX_RDMAVBND = 34,
+ IRDMA_HW_STAT_INDEX_RDMAVINV = 35,
+ IRDMA_HW_STAT_INDEX_IP4RXMCOCTS = 36,
+ IRDMA_HW_STAT_INDEX_IP4TXMCOCTS = 37,
+ IRDMA_HW_STAT_INDEX_IP6RXMCOCTS = 38,
+ IRDMA_HW_STAT_INDEX_IP6TXMCOCTS = 39,
+ IRDMA_HW_STAT_INDEX_UDPRXPKTS = 40,
+ IRDMA_HW_STAT_INDEX_UDPTXPKTS = 41,
+ IRDMA_HW_STAT_INDEX_MAX_GEN_1 = 42, /* Must be same value as next entry */
+ /* gen2 - 64-bit */
+ IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS = 42,
+ /* gen2 - 32-bit */
+ IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED = 43,
+ IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED = 44,
+ IRDMA_HW_STAT_INDEX_TXNPCNPSENT = 45,
+ IRDMA_HW_STAT_INDEX_MAX_GEN_2 = 46,
+
+ /* gen3 */
+ IRDMA_HW_STAT_INDEX_RNR_SENT = 46,
+ IRDMA_HW_STAT_INDEX_RNR_RCVD = 47,
+ IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT = 48,
+ IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT = 49,
+ IRDMA_HW_STAT_INDEX_RDMARXATS = 50,
+ IRDMA_HW_STAT_INDEX_RDMATXATS = 51,
+ IRDMA_HW_STAT_INDEX_NAKSEQERR = 52,
+ IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED = 53,
+ IRDMA_HW_STAT_INDEX_RTO = 54,
+ IRDMA_HW_STAT_INDEX_RXOOOPKTS = 55,
+ IRDMA_HW_STAT_INDEX_ICRCERR = 56,
+
+ IRDMA_HW_STAT_INDEX_MAX_GEN_3 = 57,
+};
+
+enum irdma_feature_type {
+ IRDMA_FEATURE_FW_INFO = 0,
+ IRDMA_HW_VERSION_INFO = 1,
+ IRDMA_QP_MAX_INCR = 2,
+ IRDMA_CQ_MAX_INCR = 3,
+ IRDMA_CEQ_MAX_INCR = 4,
+ IRDMA_SD_MAX_INCR = 5,
+ IRDMA_MR_MAX_INCR = 6,
+ IRDMA_Q1_MAX_INCR = 7,
+ IRDMA_AH_MAX_INCR = 8,
+ IRDMA_SRQ_MAX_INCR = 9,
+ IRDMA_TIMER_MAX_INCR = 10,
+ IRDMA_XF_MAX_INCR = 11,
+ IRDMA_RRF_MAX_INCR = 12,
+ IRDMA_PBLE_MAX_INCR = 13,
+ IRDMA_OBJ_1 = 22,
+ IRDMA_OBJ_2 = 23,
+ IRDMA_ENDPT_TRK = 24,
+ IRDMA_FTN_INLINE_MAX = 25,
+ IRDMA_QSETS_MAX = 26,
+ IRDMA_ASO = 27,
+ IRDMA_FTN_FLAGS = 32,
+ IRDMA_FTN_NOP = 33,
+ IRDMA_MAX_FEATURES, /* Must be last entry */
+};
+
+enum irdma_sched_prio_type {
+ IRDMA_PRIO_WEIGHTED_RR = 1,
+ IRDMA_PRIO_STRICT = 2,
+ IRDMA_PRIO_WEIGHTED_STRICT = 3,
+};
+
+enum irdma_vm_vf_type {
+ IRDMA_VF_TYPE = 0,
+ IRDMA_VM_TYPE,
+ IRDMA_PF_TYPE,
+};
+
+enum irdma_cqp_hmc_profile {
+ IRDMA_HMC_PROFILE_DEFAULT = 1,
+ IRDMA_HMC_PROFILE_FAVOR_VF = 2,
+ IRDMA_HMC_PROFILE_EQUAL = 3,
+};
+
+enum irdma_quad_entry_type {
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED = 1,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_TYPE_UDP_UNICAST,
+ IRDMA_QHASH_TYPE_UDP_MCAST,
+ IRDMA_QHASH_TYPE_ROCE_MCAST,
+ IRDMA_QHASH_TYPE_ROCEV2_HW,
+};
+
+enum irdma_quad_hash_manage_type {
+ IRDMA_QHASH_MANAGE_TYPE_DELETE = 0,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ IRDMA_QHASH_MANAGE_TYPE_MODIFY,
+};
+
+enum irdma_syn_rst_handling {
+ IRDMA_SYN_RST_HANDLING_HW_TCP_SECURE = 0,
+ IRDMA_SYN_RST_HANDLING_HW_TCP,
+ IRDMA_SYN_RST_HANDLING_FW_TCP_SECURE,
+ IRDMA_SYN_RST_HANDLING_FW_TCP,
+};
+
+enum irdma_queue_type {
+ IRDMA_QUEUE_TYPE_SQ_RQ = 0,
+ IRDMA_QUEUE_TYPE_CQP,
+ IRDMA_QUEUE_TYPE_SRQ,
+};
+
+struct irdma_sc_dev;
+struct irdma_vsi_pestat;
+
+struct irdma_dcqcn_cc_params {
+ u8 cc_cfg_valid;
+ u8 min_dec_factor;
+ u8 min_rate;
+ u8 dcqcn_f;
+ u16 rai_factor;
+ u16 hai_factor;
+ u16 dcqcn_t;
+ u32 dcqcn_b;
+ u32 rreduce_mperiod;
+};
+
+struct irdma_cqp_init_info {
+ u64 cqp_compl_ctx;
+ u64 host_ctx_pa;
+ u64 sq_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_cqp_quanta *sq;
+ struct irdma_dcqcn_cc_params dcqcn_params;
+ __le64 *host_ctx;
+ u64 *scratch_array;
+ u32 sq_size;
+ struct irdma_ooo_cqp_op *ooo_op_array;
+ u32 pe_en_vf_cnt;
+ u16 hw_maj_ver;
+ u16 hw_min_ver;
+ u8 struct_ver;
+ u8 hmc_profile;
+ u8 ena_vf_count;
+ u8 ceqs_per_vf;
+ u8 ooisc_blksize;
+ u8 rrsp_blksize;
+ u8 q1_blksize;
+ u8 xmit_blksize;
+ u8 ts_override;
+ u8 ts_shift;
+ u8 en_fine_grained_timers;
+ u8 blksizes_valid;
+ bool en_datacenter_tcp:1;
+ bool disable_packed:1;
+ bool rocev2_rto_policy:1;
+ enum irdma_protocol_used protocol_used;
+};
+
+struct irdma_terminate_hdr {
+ u8 layer_etype;
+ u8 error_code;
+ u8 hdrct;
+ u8 rsvd;
+};
+
+struct irdma_cqp_sq_wqe {
+ __le64 buf[IRDMA_CQP_WQE_SIZE];
+};
+
+struct irdma_sc_aeqe {
+ __le64 buf[IRDMA_AEQE_SIZE];
+};
+
+struct irdma_ceqe {
+ __le64 buf[IRDMA_CEQE_SIZE];
+};
+
+struct irdma_cqp_ctx {
+ __le64 buf[IRDMA_CQP_CTX_SIZE];
+};
+
+struct irdma_cq_shadow_area {
+ __le64 buf[IRDMA_SHADOW_AREA_SIZE];
+};
+
+struct irdma_dev_hw_stats_offsets {
+ u32 stats_offset[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
+};
+
+struct irdma_dev_hw_stats {
+ u64 stats_val[IRDMA_GATHER_STATS_BUF_SIZE / sizeof(u64)];
+};
+
+struct irdma_gather_stats {
+ u64 val[IRDMA_GATHER_STATS_BUF_SIZE / sizeof(u64)];
+};
+
+struct irdma_hw_stat_map {
+ u16 byteoff;
+ u8 bitoff;
+ u64 bitmask;
+};
+
+struct irdma_stats_gather_info {
+ bool use_hmc_fcn_index:1;
+ bool use_stats_inst:1;
+ u8 hmc_fcn_index;
+ u8 stats_inst_index;
+ struct irdma_dma_mem stats_buff_mem;
+ void *gather_stats_va;
+ void *last_gather_stats_va;
+};
+
+struct irdma_vsi_pestat {
+ struct irdma_hw *hw;
+ struct irdma_dev_hw_stats hw_stats;
+ struct irdma_stats_gather_info gather_info;
+ struct timer_list stats_timer;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_dev_hw_stats last_hw_stats;
+ spinlock_t lock; /* rdma stats lock */
+};
+
+struct irdma_mmio_region {
+ u8 __iomem *addr;
+ resource_size_t len;
+ resource_size_t offset;
+};
+
+struct irdma_hw {
+ union {
+ u8 __iomem *hw_addr;
+ struct {
+ struct irdma_mmio_region rdma_reg; /* RDMA region */
+ struct irdma_mmio_region *io_regs; /* Non-RDMA MMIO regions */
+ u16 num_io_regions; /* Number of Non-RDMA MMIO regions */
+ };
+ };
+ struct device *device;
+ struct irdma_hmc_info hmc;
+};
+
+struct irdma_pfpdu {
+ struct list_head rxlist;
+ u32 rcv_nxt;
+ u32 fps;
+ u32 max_fpdu_data;
+ u32 nextseqnum;
+ u32 rcv_start_seq;
+ bool mode:1;
+ bool mpa_crc_err:1;
+ u8 marker_len;
+ u64 total_ieq_bufs;
+ u64 fpdu_processed;
+ u64 bad_seq_num;
+ u64 crc_err;
+ u64 no_tx_bufs;
+ u64 tx_err;
+ u64 out_of_order;
+ u64 pmode_count;
+ struct irdma_sc_ah *ah;
+ struct irdma_puda_buf *ah_buf;
+ spinlock_t lock; /* fpdu processing lock */
+ struct irdma_puda_buf *lastrcv_buf;
+};
+
+struct irdma_sc_pd {
+ struct irdma_sc_dev *dev;
+ u32 pd_id;
+ int abi_ver;
+};
+
+struct irdma_cqp_quanta {
+ __le64 elem[IRDMA_CQP_WQE_SIZE];
+};
+
+struct irdma_ooo_cqp_op {
+ struct list_head list_entry;
+ u64 scratch;
+ u32 def_info;
+ u32 sw_def_info;
+ u32 wqe_idx;
+ bool deferred:1;
+};
+
+struct irdma_sc_cqp {
+ spinlock_t ooo_list_lock; /* protects list of pending completions */
+ struct list_head ooo_avail;
+ struct list_head ooo_pnd;
+ u32 last_def_cmpl_ticket;
+ u32 sw_def_cmpl_ticket;
+ u32 size;
+ u64 sq_pa;
+ u64 host_ctx_pa;
+ void *back_cqp;
+ struct irdma_sc_dev *dev;
+ int (*process_cqp_sds)(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+ struct irdma_dma_mem sdbuf;
+ struct irdma_ring sq_ring;
+ struct irdma_cqp_quanta *sq_base;
+ struct irdma_dcqcn_cc_params dcqcn_params;
+ __le64 *host_ctx;
+ u64 *scratch_array;
+ u64 requested_ops;
+ atomic64_t completed_ops;
+ struct irdma_ooo_cqp_op *ooo_op_array;
+ u32 cqp_id;
+ u32 sq_size;
+ u32 pe_en_vf_cnt;
+ u32 hw_sq_size;
+ u16 hw_maj_ver;
+ u16 hw_min_ver;
+ u8 struct_ver;
+ u8 polarity;
+ u8 hmc_profile;
+ u8 ena_vf_count;
+ u8 timeout_count;
+ u8 ceqs_per_vf;
+ u8 ooisc_blksize;
+ u8 rrsp_blksize;
+ u8 q1_blksize;
+ u8 xmit_blksize;
+ u8 ts_override;
+ u8 ts_shift;
+ u8 en_fine_grained_timers;
+ u8 blksizes_valid;
+ bool en_datacenter_tcp:1;
+ bool disable_packed:1;
+ bool rocev2_rto_policy:1;
+ enum irdma_protocol_used protocol_used;
+};
+
+struct irdma_sc_aeq {
+ u32 size;
+ u64 aeq_elem_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_aeqe *aeqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ struct irdma_ring aeq_ring;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ u32 msix_idx;
+ u8 polarity;
+ bool virtual_map:1;
+ bool pasid_valid:1;
+ u32 pasid;
+};
+
+struct irdma_sc_ceq {
+ u32 size;
+ u64 ceq_elem_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_ceqe *ceqe_base;
+ void *pbl_list;
+ u32 ceq_id;
+ u32 elem_cnt;
+ struct irdma_ring ceq_ring;
+ u8 pbl_chunk_size;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+ u8 polarity;
+ u16 vsi_idx;
+ bool virtual_map:1;
+ bool tph_en:1;
+ bool itr_no_expire:1;
+ bool pasid_valid:1;
+ u32 pasid;
+};
+
+struct irdma_sc_cq {
+ struct irdma_cq_uk cq_uk;
+ u64 cq_pa;
+ u64 shadow_area_pa;
+ struct irdma_sc_dev *dev;
+ u16 vsi_idx;
+ struct irdma_sc_vsi *vsi;
+ void *pbl_list;
+ void *back_cq;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ u8 pbl_chunk_size;
+ u8 cq_type;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+ bool ceqe_mask:1;
+ bool virtual_map:1;
+ bool check_overflow:1;
+ bool ceq_id_valid:1;
+ bool tph_en;
+};
+
+struct irdma_sc_qp {
+ struct irdma_qp_uk qp_uk;
+ u64 sq_pa;
+ u64 rq_pa;
+ u64 hw_host_ctx_pa;
+ u64 shadow_area_pa;
+ u64 q2_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_sc_pd *pd;
+ __le64 *hw_host_ctx;
+ void *llp_stream_handle;
+ struct irdma_pfpdu pfpdu;
+ u32 ieq_qp;
+ u8 *q2_buf;
+ u64 qp_compl_ctx;
+ u32 push_idx;
+ u16 qs_handle;
+ u16 push_offset;
+ u8 flush_wqes_count;
+ u8 sq_tph_val;
+ u8 rq_tph_val;
+ u8 qp_state;
+ u8 hw_sq_size;
+ u8 hw_rq_size;
+ u8 src_mac_addr_idx;
+ bool on_qoslist:1;
+ bool ieq_pass_thru:1;
+ bool sq_tph_en:1;
+ bool rq_tph_en:1;
+ bool rcv_tph_en:1;
+ bool xmit_tph_en:1;
+ bool virtual_map:1;
+ bool flush_sq:1;
+ bool flush_rq:1;
+ bool err_sq_idx_valid:1;
+ bool err_rq_idx_valid:1;
+ u32 err_sq_idx;
+ u32 err_rq_idx;
+ bool sq_flush_code:1;
+ bool rq_flush_code:1;
+ u32 pkt_limit;
+ enum irdma_flush_opcode flush_code;
+ enum irdma_qp_event_type event_type;
+ u8 term_flags;
+ u8 user_pri;
+ struct list_head list;
+};
+
+struct irdma_stats_inst_info {
+ bool use_hmc_fcn_index;
+ u8 hmc_fn_id;
+ u16 stats_idx;
+};
+
+struct irdma_up_info {
+ u8 map[8];
+ u8 cnp_up_override;
+ u16 hmc_fcn_idx;
+ bool use_vlan:1;
+ bool use_cnp_up_override:1;
+};
+
+#define IRDMA_MAX_WS_NODES 0x3FF
+#define IRDMA_WS_NODE_INVALID 0xFFFF
+
+struct irdma_ws_node_info {
+ u16 id;
+ u16 vsi;
+ u16 parent_id;
+ u16 qs_handle;
+ bool type_leaf:1;
+ bool enable:1;
+ u8 prio_type;
+ u8 tc;
+ u8 weight;
+};
+
+struct irdma_hmc_fpm_misc {
+ u32 max_ceqs;
+ u32 max_sds;
+ u32 loc_mem_pages;
+ u8 ird;
+ u32 xf_block_size;
+ u32 q1_block_size;
+ u32 ht_multiplier;
+ u32 timer_bucket;
+ u32 rrf_block_size;
+ u32 ooiscf_block_size;
+};
+
+#define IRDMA_VCHNL_MAX_MSG_SIZE 512
+#define IRDMA_LEAF_DEFAULT_REL_BW 64
+#define IRDMA_PARENT_DEFAULT_REL_BW 1
+
+struct irdma_qos {
+ struct list_head qplist;
+ struct mutex qos_mutex; /* protect QoS attributes per QoS level */
+ u64 lan_qos_handle;
+ u32 l2_sched_node_id;
+ u16 qs_handle;
+ u8 traffic_class;
+ u8 rel_bw;
+ u8 prio_type;
+ bool valid;
+};
+
+#define IRDMA_INVALID_STATS_IDX 0xff
+struct irdma_sc_vsi {
+ u16 vsi_idx;
+ struct irdma_sc_dev *dev;
+ void *back_vsi;
+ u32 ilq_count;
+ struct irdma_virt_mem ilq_mem;
+ struct irdma_puda_rsrc *ilq;
+ u32 ieq_count;
+ struct irdma_virt_mem ieq_mem;
+ struct irdma_puda_rsrc *ieq;
+ u32 exception_lan_q;
+ u16 mtu;
+ u16 vm_id;
+ enum irdma_vm_vf_type vm_vf_type;
+ bool stats_inst_alloc:1;
+ bool tc_change_pending:1;
+ struct irdma_vsi_pestat *pestat;
+ atomic_t qp_suspend_reqs;
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ void (*unregister_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ u8 qos_rel_bw;
+ u8 qos_prio_type;
+ u8 stats_idx;
+ u8 dscp_map[DSCP_MAX];
+ struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
+ u64 hw_stats_regs[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
+ bool dscp_mode:1;
+};
+
+struct irdma_sc_dev {
+ struct list_head cqp_cmd_head; /* head of the CQP command list */
+ spinlock_t cqp_lock; /* protect CQP list access */
+ bool stats_idx_array[IRDMA_MAX_STATS_COUNT_GEN_1];
+ struct irdma_dma_mem vf_fpm_query_buf[IRDMA_MAX_PE_ENA_VF_COUNT];
+ u64 fpm_query_buf_pa;
+ u64 fpm_commit_buf_pa;
+ __le64 *fpm_query_buf;
+ __le64 *fpm_commit_buf;
+ struct irdma_hw *hw;
+ u8 __iomem *db_addr;
+ u32 __iomem *wqe_alloc_db;
+ u32 __iomem *cq_arm_db;
+ u32 __iomem *aeq_alloc_db;
+ u32 __iomem *cqp_db;
+ u32 __iomem *cq_ack_db;
+ u32 __iomem *ceq_itr_mask_db;
+ u32 __iomem *aeq_itr_mask_db;
+ u32 __iomem *hw_regs[IRDMA_MAX_REGS];
+ u32 ceq_itr; /* Interrupt throttle, usecs between interrupts: 0 disabled. 2 - 8160 */
+ u64 hw_masks[IRDMA_MAX_MASKS];
+ u64 hw_shifts[IRDMA_MAX_SHIFTS];
+ const struct irdma_hw_stat_map *hw_stats_map;
+ u64 hw_stats_regs[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
+ u64 feature_info[IRDMA_MAX_FEATURES];
+ u64 cqp_cmd_stats[IRDMA_MAX_CQP_OPS];
+ struct irdma_hw_attrs hw_attrs;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_vchnl_rdma_caps vc_caps;
+ u8 vc_recv_buf[IRDMA_VCHNL_MAX_MSG_SIZE];
+ u16 vc_recv_len;
+ struct irdma_sc_cqp *cqp;
+ struct irdma_sc_aeq *aeq;
+ struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT];
+ struct irdma_sc_cq *ccq;
+ const struct irdma_irq_ops *irq_ops;
+ struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
+ struct irdma_hmc_fpm_misc hmc_fpm_misc;
+ struct irdma_ws_node *ws_tree_root;
+ struct mutex ws_mutex; /* ws tree mutex */
+ u32 vchnl_ver;
+ u16 num_vfs;
+ u16 hmc_fn_id;
+ u16 vf_id;
+ bool privileged:1;
+ bool vchnl_up:1;
+ bool ceq_valid:1;
+ bool is_pf:1;
+ u8 protocol_used;
+ struct mutex vchnl_mutex; /* mutex to synchronize RDMA virtual channel messages */
+ u8 pci_rev;
+ int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri);
+ void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri);
+ void (*ws_reset)(struct irdma_sc_vsi *vsi);
+};
+
+struct irdma_modify_cq_info {
+ u64 cq_pa;
+ struct irdma_cqe *cq_base;
+ u32 cq_size;
+ u32 shadow_read_threshold;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ bool virtual_map:1;
+ bool check_overflow;
+ bool cq_resize:1;
+};
+
+struct irdma_srq_init_info {
+ struct irdma_sc_pd *pd;
+ struct irdma_sc_vsi *vsi;
+ u64 srq_pa;
+ u64 shadow_area_pa;
+ u32 first_pm_pbl_idx;
+ u32 pasid;
+ u32 srq_size;
+ u16 srq_limit;
+ u8 pasid_valid;
+ u8 wqe_size;
+ u8 leaf_pbl_size;
+ u8 virtual_map;
+ u8 tph_en;
+ u8 arm_limit_event;
+ u8 tph_value;
+ u8 pbl_chunk_size;
+ struct irdma_srq_uk_init_info srq_uk_init_info;
+};
+
+struct irdma_sc_srq {
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_sc_pd *pd;
+ struct irdma_srq_uk srq_uk;
+ void *back_srq;
+ u64 srq_pa;
+ u64 shadow_area_pa;
+ u32 first_pm_pbl_idx;
+ u32 pasid;
+ u32 hw_srq_size;
+ u16 srq_limit;
+ u8 pasid_valid;
+ u8 leaf_pbl_size;
+ u8 virtual_map;
+ u8 tph_en;
+ u8 arm_limit_event;
+ u8 tph_val;
+};
+
+struct irdma_modify_srq_info {
+ u16 srq_limit;
+ u8 arm_limit_event;
+};
+
+struct irdma_create_qp_info {
+ bool ord_valid:1;
+ bool tcp_ctx_valid:1;
+ bool cq_num_valid:1;
+ bool arp_cache_idx_valid:1;
+ bool mac_valid:1;
+ bool force_lpb;
+ u8 next_iwarp_state;
+};
+
+struct irdma_modify_qp_info {
+ u64 rx_win0;
+ u64 rx_win1;
+ u16 new_mss;
+ u8 next_iwarp_state;
+ u8 curr_iwarp_state;
+ u8 termlen;
+ bool ord_valid:1;
+ bool tcp_ctx_valid:1;
+ bool udp_ctx_valid:1;
+ bool cq_num_valid:1;
+ bool arp_cache_idx_valid:1;
+ bool reset_tcp_conn:1;
+ bool remove_hash_idx:1;
+ bool dont_send_term:1;
+ bool dont_send_fin:1;
+ bool cached_var_valid:1;
+ bool mss_change:1;
+ bool force_lpb:1;
+ bool mac_valid:1;
+};
+
+struct irdma_ccq_cqe_info {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ u32 op_ret_val;
+ u16 maj_err_code;
+ u16 min_err_code;
+ u8 op_code;
+ bool error:1;
+ bool pending:1;
+};
+
+struct irdma_dcb_app_info {
+ u8 priority;
+ u8 selector;
+ u16 prot_id;
+};
+
+struct irdma_qos_tc_info {
+ u64 tc_ctx;
+ u8 rel_bw;
+ u8 prio_type;
+ u8 egress_virt_up;
+ u8 ingress_virt_up;
+};
+
+struct irdma_l2params {
+ struct irdma_qos_tc_info tc_info[IRDMA_MAX_USER_PRIORITY];
+ struct irdma_dcb_app_info apps[IRDMA_MAX_APPS];
+ u32 num_apps;
+ u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY];
+ u16 mtu;
+ u8 up2tc[IRDMA_MAX_USER_PRIORITY];
+ u8 dscp_map[DSCP_MAX];
+ u8 num_tc;
+ u8 vsi_rel_bw;
+ u8 vsi_prio_type;
+ bool mtu_changed:1;
+ bool tc_changed:1;
+ bool dscp_mode:1;
+};
+
+struct irdma_vsi_init_info {
+ struct irdma_sc_dev *dev;
+ void *back_vsi;
+ struct irdma_l2params *params;
+ u16 exception_lan_q;
+ u16 pf_data_vsi_num;
+ enum irdma_vm_vf_type vm_vf_type;
+ u16 vm_id;
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ void (*unregister_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+};
+
+struct irdma_vsi_stats_info {
+ struct irdma_vsi_pestat *pestat;
+ u16 fcn_id;
+ bool alloc_stats_inst;
+};
+
+struct irdma_device_init_info {
+ u64 fpm_query_buf_pa;
+ u64 fpm_commit_buf_pa;
+ __le64 *fpm_query_buf;
+ __le64 *fpm_commit_buf;
+ struct irdma_hw *hw;
+ void __iomem *bar0;
+ enum irdma_protocol_used protocol_used;
+ u16 hmc_fn_id;
+};
+
+struct irdma_ceq_init_info {
+ u64 ceqe_pa;
+ struct irdma_sc_dev *dev;
+ u64 *ceqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ u32 ceq_id;
+ bool virtual_map:1;
+ bool tph_en:1;
+ bool itr_no_expire:1;
+ u8 pbl_chunk_size;
+ u8 tph_val;
+ u16 vsi_idx;
+ u32 first_pm_pbl_idx;
+};
+
+struct irdma_aeq_init_info {
+ u64 aeq_elem_pa;
+ struct irdma_sc_dev *dev;
+ u32 *aeqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ u32 msix_idx;
+};
+
+struct irdma_ccq_init_info {
+ u64 cq_pa;
+ u64 shadow_area_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_cqe *cq_base;
+ __le64 *shadow_area;
+ void *pbl_list;
+ u32 num_elem;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ bool ceqe_mask:1;
+ bool ceq_id_valid:1;
+ bool avoid_mem_cflct:1;
+ bool virtual_map:1;
+ bool tph_en:1;
+ u8 tph_val;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ struct irdma_sc_vsi *vsi;
+};
+
+struct irdma_udp_offload_info {
+ bool ipv4:1;
+ bool insert_vlan_tag:1;
+ u8 ttl;
+ u8 tos;
+ u16 src_port;
+ u16 dst_port;
+ u32 dest_ip_addr[4];
+ u32 snd_mss;
+ u16 vlan_tag;
+ u16 arp_idx;
+ u32 flow_label;
+ u8 udp_state;
+ u32 psn_nxt;
+ u32 lsn;
+ u32 epsn;
+ u32 psn_max;
+ u32 psn_una;
+ u32 local_ipaddr[4];
+ u32 cwnd;
+ u8 rexmit_thresh;
+ u8 rnr_nak_thresh;
+ u8 rnr_nak_tmr;
+ u8 min_rnr_timer;
+};
+
+struct irdma_roce_offload_info {
+ u16 p_key;
+ u16 err_rq_idx;
+ u32 qkey;
+ u32 dest_qp;
+ u8 roce_tver;
+ u8 ack_credits;
+ u8 err_rq_idx_valid;
+ u32 pd_id;
+ u16 ord_size;
+ u16 ird_size;
+ bool is_qp1:1;
+ bool udprivcq_en:1;
+ bool dcqcn_en:1;
+ bool rcv_no_icrc:1;
+ bool wr_rdresp_en:1;
+ bool bind_en:1;
+ bool fast_reg_en:1;
+ bool priv_mode_en:1;
+ bool rd_en:1;
+ bool timely_en:1;
+ bool dctcp_en:1;
+ bool fw_cc_enable:1;
+ bool use_stats_inst:1;
+ u8 local_ack_timeout;
+ u16 t_high;
+ u16 t_low;
+ u8 last_byte_sent;
+ u8 mac_addr[ETH_ALEN];
+ u8 rtomin;
+};
+
+struct irdma_iwarp_offload_info {
+ u16 rcv_mark_offset;
+ u16 snd_mark_offset;
+ u8 ddp_ver;
+ u8 rdmap_ver;
+ u8 iwarp_mode;
+ u16 err_rq_idx;
+ u32 pd_id;
+ u16 ord_size;
+ u16 ird_size;
+ bool ib_rd_en:1;
+ bool align_hdrs:1;
+ bool rcv_no_mpa_crc:1;
+ bool err_rq_idx_valid:1;
+ bool snd_mark_en:1;
+ bool rcv_mark_en:1;
+ bool wr_rdresp_en:1;
+ bool bind_en:1;
+ bool fast_reg_en:1;
+ bool priv_mode_en:1;
+ bool rd_en:1;
+ bool timely_en:1;
+ bool use_stats_inst:1;
+ bool ecn_en:1;
+ bool dctcp_en:1;
+ u16 t_high;
+ u16 t_low;
+ u8 last_byte_sent;
+ u8 mac_addr[ETH_ALEN];
+ u8 rtomin;
+};
+
+struct irdma_tcp_offload_info {
+ bool ipv4:1;
+ bool no_nagle:1;
+ bool insert_vlan_tag:1;
+ bool time_stamp:1;
+ bool drop_ooo_seg:1;
+ bool avoid_stretch_ack:1;
+ bool wscale:1;
+ bool ignore_tcp_opt:1;
+ bool ignore_tcp_uns_opt:1;
+ u8 cwnd_inc_limit;
+ u8 dup_ack_thresh;
+ u8 ttl;
+ u8 src_mac_addr_idx;
+ u8 tos;
+ u16 src_port;
+ u16 dst_port;
+ u32 dest_ip_addr[4];
+ //u32 dest_ip_addr0;
+ //u32 dest_ip_addr1;
+ //u32 dest_ip_addr2;
+ //u32 dest_ip_addr3;
+ u32 snd_mss;
+ u16 syn_rst_handling;
+ u16 vlan_tag;
+ u16 arp_idx;
+ u32 flow_label;
+ u8 tcp_state;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+ u32 time_stamp_recent;
+ u32 time_stamp_age;
+ u32 snd_nxt;
+ u32 snd_wnd;
+ u32 rcv_nxt;
+ u32 rcv_wnd;
+ u32 snd_max;
+ u32 snd_una;
+ u32 srtt;
+ u32 rtt_var;
+ u32 ss_thresh;
+ u32 cwnd;
+ u32 snd_wl1;
+ u32 snd_wl2;
+ u32 max_snd_window;
+ u8 rexmit_thresh;
+ u32 local_ipaddr[4];
+};
+
+struct irdma_qp_host_ctx_info {
+ u64 qp_compl_ctx;
+ union {
+ struct irdma_tcp_offload_info *tcp_info;
+ struct irdma_udp_offload_info *udp_info;
+ };
+ union {
+ struct irdma_iwarp_offload_info *iwarp_info;
+ struct irdma_roce_offload_info *roce_info;
+ };
+ u32 send_cq_num;
+ u32 rcv_cq_num;
+ u32 srq_id;
+ u32 rem_endpoint_idx;
+ u16 stats_idx;
+ bool remote_atomics_en:1;
+ bool srq_valid:1;
+ bool tcp_info_valid:1;
+ bool iwarp_info_valid:1;
+ bool stats_idx_valid:1;
+ u8 user_pri;
+};
+
+struct irdma_aeqe_info {
+ u64 compl_ctx;
+ u32 qp_cq_id;
+ u32 def_info; /* only valid for DEF_CMPL */
+ u16 ae_id;
+ u16 wqe_idx;
+ u8 tcp_state;
+ u8 iwarp_state;
+ bool qp:1;
+ bool cq:1;
+ bool sq:1;
+ bool rq:1;
+ bool srq:1;
+ bool in_rdrsp_wr:1;
+ bool out_rdrsp:1;
+ bool aeqe_overflow:1;
+ bool err_rq_idx_valid:1;
+ u8 q2_data_written;
+ u8 ae_src;
+};
+
+struct irdma_allocate_stag_info {
+ u64 total_len;
+ u64 first_pm_pbl_idx;
+ u32 chunk_size;
+ u32 stag_idx;
+ u32 page_size;
+ u32 pd_id;
+ u16 access_rights;
+ bool remote_access:1;
+ bool use_hmc_fcn_index:1;
+ bool use_pf_rid:1;
+ bool all_memory:1;
+ bool remote_atomics_en:1;
+ u16 hmc_fcn_index;
+};
+
+struct irdma_mw_alloc_info {
+ u32 mw_stag_index;
+ u32 page_size;
+ u32 pd_id;
+ bool remote_access:1;
+ bool mw_wide:1;
+ bool mw1_bind_dont_vldt_key:1;
+};
+
+struct irdma_reg_ns_stag_info {
+ u64 reg_addr_pa;
+ u64 va;
+ u64 total_len;
+ u32 page_size;
+ u32 chunk_size;
+ u32 first_pm_pbl_index;
+ enum irdma_addressing_type addr_type;
+ irdma_stag_index stag_idx;
+ u16 access_rights;
+ u32 pd_id;
+ irdma_stag_key stag_key;
+ bool use_hmc_fcn_index:1;
+ u8 hmc_fcn_index;
+ bool use_pf_rid:1;
+ bool all_memory:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_fast_reg_stag_info {
+ u64 wr_id;
+ u64 reg_addr_pa;
+ u64 fbo;
+ void *va;
+ u64 total_len;
+ u32 page_size;
+ u32 chunk_size;
+ u32 first_pm_pbl_index;
+ enum irdma_addressing_type addr_type;
+ irdma_stag_index stag_idx;
+ u16 access_rights;
+ u32 pd_id;
+ irdma_stag_key stag_key;
+ bool local_fence:1;
+ bool read_fence:1;
+ bool signaled:1;
+ bool use_hmc_fcn_index:1;
+ u8 hmc_fcn_index;
+ bool use_pf_rid:1;
+ bool defer_flag:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_dealloc_stag_info {
+ u32 stag_idx;
+ u32 pd_id;
+ bool mr:1;
+ bool dealloc_pbl:1;
+};
+
+struct irdma_register_shared_stag {
+ u64 va;
+ enum irdma_addressing_type addr_type;
+ irdma_stag_index new_stag_idx;
+ irdma_stag_index parent_stag_idx;
+ u32 access_rights;
+ u32 pd_id;
+ u32 page_size;
+ irdma_stag_key new_stag_key;
+};
+
+struct irdma_qp_init_info {
+ struct irdma_qp_uk_init_info qp_uk_init_info;
+ struct irdma_sc_pd *pd;
+ struct irdma_sc_vsi *vsi;
+ __le64 *host_ctx;
+ u8 *q2;
+ u64 sq_pa;
+ u64 rq_pa;
+ u64 host_ctx_pa;
+ u64 q2_pa;
+ u64 shadow_area_pa;
+ u8 sq_tph_val;
+ u8 rq_tph_val;
+ bool sq_tph_en:1;
+ bool rq_tph_en:1;
+ bool rcv_tph_en:1;
+ bool xmit_tph_en:1;
+ bool virtual_map:1;
+};
+
+struct irdma_cq_init_info {
+ struct irdma_sc_dev *dev;
+ u64 cq_base_pa;
+ u64 shadow_area_pa;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ bool virtual_map:1;
+ bool ceqe_mask:1;
+ bool ceq_id_valid:1;
+ bool tph_en:1;
+ u8 tph_val;
+ u8 type;
+ struct irdma_cq_uk_init_info cq_uk_init_info;
+ struct irdma_sc_vsi *vsi;
+};
+
+struct irdma_upload_context_info {
+ u64 buf_pa;
+ u32 qp_id;
+ u8 qp_type;
+ bool freeze_qp:1;
+ bool raw_format:1;
+};
+
+struct irdma_local_mac_entry_info {
+ u8 mac_addr[6];
+ u16 entry_idx;
+};
+
+struct irdma_add_arp_cache_entry_info {
+ u8 mac_addr[ETH_ALEN];
+ u32 reach_max;
+ u16 arp_index;
+ bool permanent;
+};
+
+struct irdma_apbvt_info {
+ u16 port;
+ bool add;
+};
+
+struct irdma_qhash_table_info {
+ struct irdma_sc_vsi *vsi;
+ enum irdma_quad_hash_manage_type manage;
+ enum irdma_quad_entry_type entry_type;
+ bool vlan_valid:1;
+ bool ipv4_valid:1;
+ u8 mac_addr[ETH_ALEN];
+ u16 vlan_id;
+ u8 user_pri;
+ u32 qp_num;
+ u32 dest_ip[4];
+ u32 src_ip[4];
+ u16 dest_port;
+ u16 src_port;
+};
+
+struct irdma_cqp_manage_push_page_info {
+ u32 push_idx;
+ u16 qs_handle;
+ u8 free_page;
+ u8 push_page_type;
+};
+
+struct irdma_qp_flush_info {
+ u32 err_sq_idx;
+ u32 err_rq_idx;
+ u16 sq_minor_code;
+ u16 sq_major_code;
+ u16 rq_minor_code;
+ u16 rq_major_code;
+ u16 ae_code;
+ u8 ae_src;
+ bool sq:1;
+ bool rq:1;
+ bool userflushcode:1;
+ bool generate_ae:1;
+ bool err_sq_idx_valid:1;
+ bool err_rq_idx_valid:1;
+};
+
+struct irdma_gen_ae_info {
+ u16 ae_code;
+ u8 ae_src;
+};
+
+struct irdma_cqp_timeout {
+ u64 compl_cqp_cmds;
+ u32 count;
+};
+
+struct irdma_irq_ops {
+ void (*irdma_cfg_aeq)(struct irdma_sc_dev *dev, u32 idx, bool enable);
+ void (*irdma_cfg_ceq)(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx,
+ bool enable);
+ void (*irdma_dis_irq)(struct irdma_sc_dev *dev, u32 idx);
+ void (*irdma_en_irq)(struct irdma_sc_dev *dev, u32 idx);
+};
+
+void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq);
+int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
+ bool check_overflow, bool post_sq);
+int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq);
+int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_cqe_info *info);
+int irdma_sc_ccq_init(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_init_info *info);
+
+int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch);
+int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq);
+
+int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq);
+int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ struct irdma_ceq_init_info *info);
+void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq);
+void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq);
+
+int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
+ struct irdma_aeq_init_info *info);
+int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ struct irdma_aeqe_info *info);
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
+
+void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
+ int abi_ver);
+void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable);
+void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout,
+ struct irdma_sc_dev *dev);
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info);
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev);
+int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err);
+int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp);
+int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_init_info *info);
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
+int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode,
+ struct irdma_ccq_cqe_info *cmpl_info);
+int irdma_sc_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info, bool post_sq);
+int irdma_sc_qp_create(struct irdma_sc_qp *qp,
+ struct irdma_create_qp_info *info, u64 scratch,
+ bool post_sq);
+int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
+ bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq);
+int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, u64 scratch,
+ bool post_sq);
+int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info);
+int irdma_sc_qp_modify(struct irdma_sc_qp *qp,
+ struct irdma_modify_qp_info *info, u64 scratch,
+ bool post_sq);
+void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size,
+ irdma_stag stag);
+
+void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read);
+void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info);
+void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info);
+int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq);
+int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info);
+void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info);
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers);
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info);
+
+void sc_vsi_update_stats(struct irdma_sc_vsi *vsi);
+struct cqp_info {
+ union {
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_create_qp_info info;
+ u64 scratch;
+ } qp_create;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_modify_qp_info info;
+ u64 scratch;
+ } qp_modify;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ u64 scratch;
+ bool remove_hash_idx;
+ bool ignore_mw_bnd;
+ } qp_destroy;
+
+ struct {
+ struct irdma_sc_cq *cq;
+ u64 scratch;
+ bool check_overflow;
+ } cq_create;
+
+ struct {
+ struct irdma_sc_cq *cq;
+ struct irdma_modify_cq_info info;
+ u64 scratch;
+ } cq_modify;
+
+ struct {
+ struct irdma_sc_cq *cq;
+ u64 scratch;
+ } cq_destroy;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_allocate_stag_info info;
+ u64 scratch;
+ } alloc_stag;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_mw_alloc_info info;
+ u64 scratch;
+ } mw_alloc;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_reg_ns_stag_info info;
+ u64 scratch;
+ } mr_reg_non_shared;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_dealloc_stag_info info;
+ u64 scratch;
+ } dealloc_stag;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_add_arp_cache_entry_info info;
+ u64 scratch;
+ } add_arp_cache_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ u16 arp_index;
+ } del_arp_cache_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_local_mac_entry_info info;
+ u64 scratch;
+ } add_local_mac_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ u8 entry_idx;
+ u8 ignore_ref_count;
+ } del_local_mac_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ } alloc_local_mac_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_cqp_manage_push_page_info info;
+ u64 scratch;
+ } manage_push_page;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_upload_context_info info;
+ u64 scratch;
+ } qp_upload_context;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_hmc_fcn_info info;
+ u64 scratch;
+ } manage_hmc_pm;
+
+ struct {
+ struct irdma_sc_ceq *ceq;
+ u64 scratch;
+ } ceq_create;
+
+ struct {
+ struct irdma_sc_ceq *ceq;
+ u64 scratch;
+ } ceq_destroy;
+
+ struct {
+ struct irdma_sc_aeq *aeq;
+ u64 scratch;
+ } aeq_create;
+
+ struct {
+ struct irdma_sc_aeq *aeq;
+ u64 scratch;
+ } aeq_destroy;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_qp_flush_info info;
+ u64 scratch;
+ } qp_flush_wqes;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_gen_ae_info info;
+ u64 scratch;
+ } gen_ae;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ void *fpm_val_va;
+ u64 fpm_val_pa;
+ u8 hmc_fn_id;
+ u64 scratch;
+ } query_fpm_val;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ void *fpm_val_va;
+ u64 fpm_val_pa;
+ u8 hmc_fn_id;
+ u64 scratch;
+ } commit_fpm_val;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_apbvt_info info;
+ u64 scratch;
+ } manage_apbvt_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_qhash_table_info info;
+ u64 scratch;
+ } manage_qhash_table_entry;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_update_sds_info info;
+ u64 scratch;
+ } update_pe_sds;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_sc_qp *qp;
+ u64 scratch;
+ } suspend_resume;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_ah_info info;
+ u64 scratch;
+ } ah_create;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_ah_info info;
+ u64 scratch;
+ } ah_destroy;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_mcast_grp_info info;
+ u64 scratch;
+ } mc_create;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_mcast_grp_info info;
+ u64 scratch;
+ } mc_destroy;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_mcast_grp_info info;
+ u64 scratch;
+ } mc_modify;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_stats_inst_info info;
+ u64 scratch;
+ } stats_manage;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_stats_gather_info info;
+ u64 scratch;
+ } stats_gather;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_ws_node_info info;
+ u64 scratch;
+ } ws_node;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_up_info info;
+ u64 scratch;
+ } up_map;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_dma_mem query_buff_mem;
+ u64 scratch;
+ } query_rdma;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ u64 scratch;
+ } srq_create;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ struct irdma_modify_srq_info info;
+ u64 scratch;
+ } srq_modify;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ u64 scratch;
+ } srq_destroy;
+
+ } u;
+};
+
+struct cqp_cmds_info {
+ struct list_head cqp_cmd_entry;
+ u8 cqp_cmd;
+ u8 post_sq;
+ struct cqp_info in;
+};
+
+__le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch,
+ u32 *wqe_idx);
+
+/**
+ * irdma_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
+ * @cqp: struct for cqp hw
+ * @scratch: private data for CQP WQE
+ */
+static inline __le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch)
+{
+ u32 wqe_idx;
+
+ return irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
+}
+#endif /* IRDMA_TYPE_H */
diff --git a/drivers/infiniband/hw/irdma/uda.c b/drivers/infiniband/hw/irdma/uda.c
new file mode 100644
index 000000000000..84051266d948
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uda.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#include <linux/etherdevice.h>
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "uda.h"
+#include "uda_d.h"
+
+/**
+ * irdma_sc_access_ah() - Create, modify or delete AH
+ * @cqp: struct for cqp hw
+ * @info: ah information
+ * @op: Operation
+ * @scratch: u64 saved to be used during cqp completion
+ */
+int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
+ u32 op, u64 scratch)
+{
+ __le64 *wqe;
+ u64 qw1, qw2;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr) << 16);
+ qw1 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXLO, info->pd_idx) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_TC, info->tc_tos) |
+ FIELD_PREP(IRDMA_UDAQPC_VLANTAG, info->vlan_tag);
+
+ qw2 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ARPINDEX, info->dst_arpindex) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_FLOWLABEL, info->flow_label) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_HOPLIMIT, info->hop_ttl) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXHI, info->pd_idx >> 16);
+
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->dest_ip_addr[1]));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[3]));
+
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->src_ip_addr[0]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->src_ip_addr[1]));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->src_ip_addr[2]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->src_ip_addr[3]));
+ } else {
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[0]));
+
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->src_ip_addr[0]));
+ }
+
+ set_64bit_val(wqe, 8, qw1);
+ set_64bit_val(wqe, 16, qw2);
+
+ dma_wmb(); /* need write block before writing WQE header */
+
+ set_64bit_val(
+ wqe, 24,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_OPCODE, op) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK, info->do_lpbk) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_IPV4VALID, info->ipv4_valid) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_AVIDX, info->ah_idx) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG, info->insert_vlan_tag));
+
+ print_hex_dump_debug("WQE: MANAGE_AH WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_create_mg_ctx() - create a mcg context
+ * @info: multicast group context info
+ */
+static void irdma_create_mg_ctx(struct irdma_mcast_grp_info *info)
+{
+ struct irdma_mcast_grp_ctx_entry_info *entry_info = NULL;
+ u8 idx = 0; /* index in the array */
+ u8 ctx_idx = 0; /* index in the MG context */
+
+ memset(info->dma_mem_mc.va, 0, IRDMA_MAX_MGS_PER_CTX * sizeof(u64));
+
+ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) {
+ entry_info = &info->mg_ctx_info[idx];
+ if (entry_info->valid_entry) {
+ set_64bit_val((__le64 *)info->dma_mem_mc.va,
+ ctx_idx * sizeof(u64),
+ FIELD_PREP(IRDMA_UDA_MGCTX_DESTPORT, entry_info->dest_port) |
+ FIELD_PREP(IRDMA_UDA_MGCTX_VALIDENT, entry_info->valid_entry) |
+ FIELD_PREP(IRDMA_UDA_MGCTX_QPID, entry_info->qp_id));
+ ctx_idx++;
+ }
+ }
+}
+
+/**
+ * irdma_access_mcast_grp() - Access mcast group based on op
+ * @cqp: Control QP
+ * @info: multicast group context info
+ * @op: operation to perform
+ * @scratch: u64 saved to be used during cqp completion
+ */
+int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info, u32 op,
+ u64 scratch)
+{
+ __le64 *wqe;
+
+ if (info->mg_id >= IRDMA_UDA_MAX_FSI_MGS) {
+ ibdev_dbg(to_ibdev(cqp->dev), "WQE: mg_id out of range\n");
+ return -EINVAL;
+ }
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe) {
+ ibdev_dbg(to_ibdev(cqp->dev), "WQE: ring full\n");
+ return -ENOMEM;
+ }
+
+ irdma_create_mg_ctx(info);
+
+ set_64bit_val(wqe, 32, info->dma_mem_mc.pa);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_VLANID, info->vlan_id) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_QS_HANDLE, info->qs_handle));
+ set_64bit_val(wqe, 0, ether_addr_to_u64(info->dest_mac_addr));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_HMC_FCN_ID, info->hmc_fcn_id));
+
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->dest_ip_addr[1]));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[3]));
+ } else {
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[0]));
+ }
+
+ dma_wmb(); /* need write memory block before writing the WQE header. */
+
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_OPCODE, op) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_MGIDX, info->mg_id) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_VLANVALID, info->vlan_valid) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_IPV4VALID, info->ipv4_valid));
+
+ print_hex_dump_debug("WQE: MANAGE_MCG WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ print_hex_dump_debug("WQE: MCG_HOST CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, info->dma_mem_mc.va,
+ IRDMA_MAX_MGS_PER_CTX * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_compare_mgs - Compares two multicast group structures
+ * @entry1: Multcast group info
+ * @entry2: Multcast group info in context
+ */
+static bool irdma_compare_mgs(struct irdma_mcast_grp_ctx_entry_info *entry1,
+ struct irdma_mcast_grp_ctx_entry_info *entry2)
+{
+ if (entry1->dest_port == entry2->dest_port &&
+ entry1->qp_id == entry2->qp_id)
+ return true;
+
+ return false;
+}
+
+/**
+ * irdma_sc_add_mcast_grp - Allocates mcast group entry in ctx
+ * @ctx: Multcast group context
+ * @mg: Multcast group info
+ */
+int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg)
+{
+ u32 idx;
+ bool free_entry_found = false;
+ u32 free_entry_idx = 0;
+
+ /* find either an identical or a free entry for a multicast group */
+ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) {
+ if (ctx->mg_ctx_info[idx].valid_entry) {
+ if (irdma_compare_mgs(&ctx->mg_ctx_info[idx], mg)) {
+ ctx->mg_ctx_info[idx].use_cnt++;
+ return 0;
+ }
+ continue;
+ }
+ if (!free_entry_found) {
+ free_entry_found = true;
+ free_entry_idx = idx;
+ }
+ }
+
+ if (free_entry_found) {
+ ctx->mg_ctx_info[free_entry_idx] = *mg;
+ ctx->mg_ctx_info[free_entry_idx].valid_entry = true;
+ ctx->mg_ctx_info[free_entry_idx].use_cnt = 1;
+ ctx->no_of_mgs++;
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * irdma_sc_del_mcast_grp - Delete mcast group
+ * @ctx: Multcast group context
+ * @mg: Multcast group info
+ *
+ * Finds and removes a specific mulicast group from context, all
+ * parameters must match to remove a multicast group.
+ */
+int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg)
+{
+ u32 idx;
+
+ /* find an entry in multicast group context */
+ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) {
+ if (!ctx->mg_ctx_info[idx].valid_entry)
+ continue;
+
+ if (irdma_compare_mgs(mg, &ctx->mg_ctx_info[idx])) {
+ ctx->mg_ctx_info[idx].use_cnt--;
+
+ if (!ctx->mg_ctx_info[idx].use_cnt) {
+ ctx->mg_ctx_info[idx].valid_entry = false;
+ ctx->no_of_mgs--;
+ /* Remove gap if element was not the last */
+ if (idx != ctx->no_of_mgs &&
+ ctx->no_of_mgs > 0) {
+ memcpy(&ctx->mg_ctx_info[idx],
+ &ctx->mg_ctx_info[ctx->no_of_mgs - 1],
+ sizeof(ctx->mg_ctx_info[idx]));
+ ctx->mg_ctx_info[ctx->no_of_mgs - 1].valid_entry = false;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
diff --git a/drivers/infiniband/hw/irdma/uda.h b/drivers/infiniband/hw/irdma/uda.h
new file mode 100644
index 000000000000..27b8701cf21b
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uda.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#ifndef IRDMA_UDA_H
+#define IRDMA_UDA_H
+
+#define IRDMA_UDA_MAX_FSI_MGS 4096
+#define IRDMA_UDA_MAX_PFS 16
+#define IRDMA_UDA_MAX_VFS 128
+
+struct irdma_sc_cqp;
+
+struct irdma_ah_info {
+ struct irdma_sc_vsi *vsi;
+ u32 pd_idx;
+ u32 dst_arpindex;
+ u32 dest_ip_addr[4];
+ u32 src_ip_addr[4];
+ u32 flow_label;
+ u32 ah_idx;
+ u16 vlan_tag;
+ u8 insert_vlan_tag;
+ u8 tc_tos;
+ u8 hop_ttl;
+ u8 mac_addr[ETH_ALEN];
+ bool ah_valid:1;
+ bool ipv4_valid:1;
+ bool do_lpbk:1;
+};
+
+struct irdma_sc_ah {
+ struct irdma_sc_dev *dev;
+ struct irdma_ah_info ah_info;
+};
+
+int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg);
+int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg);
+int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
+ u32 op, u64 scratch);
+int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info, u32 op,
+ u64 scratch);
+
+static inline void irdma_sc_init_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah)
+{
+ ah->dev = dev;
+}
+
+static inline int irdma_sc_create_ah(struct irdma_sc_cqp *cqp,
+ struct irdma_ah_info *info, u64 scratch)
+{
+ return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_CREATE_ADDR_HANDLE,
+ scratch);
+}
+
+static inline int irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp,
+ struct irdma_ah_info *info, u64 scratch)
+{
+ return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_DESTROY_ADDR_HANDLE,
+ scratch);
+}
+
+static inline int irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
+{
+ return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_CREATE_MCAST_GRP,
+ scratch);
+}
+
+static inline int irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
+{
+ return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_MODIFY_MCAST_GRP,
+ scratch);
+}
+
+static inline int irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
+{
+ return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_DESTROY_MCAST_GRP,
+ scratch);
+}
+#endif /* IRDMA_UDA_H */
diff --git a/drivers/infiniband/hw/irdma/uda_d.h b/drivers/infiniband/hw/irdma/uda_d.h
new file mode 100644
index 000000000000..4fb4daa20722
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uda_d.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#ifndef IRDMA_UDA_D_H
+#define IRDMA_UDA_D_H
+
+/* L4 packet type */
+#define IRDMA_E_UDA_SQ_L4T_UNKNOWN 0
+#define IRDMA_E_UDA_SQ_L4T_TCP 1
+#define IRDMA_E_UDA_SQ_L4T_SCTP 2
+#define IRDMA_E_UDA_SQ_L4T_UDP 3
+
+/* Inner IP header type */
+#define IRDMA_E_UDA_SQ_IIPT_UNKNOWN 0
+#define IRDMA_E_UDA_SQ_IIPT_IPV6 1
+#define IRDMA_E_UDA_SQ_IIPT_IPV4_NO_CSUM 2
+#define IRDMA_E_UDA_SQ_IIPT_IPV4_CSUM 3
+#define IRDMA_UDA_QPSQ_PUSHWQE BIT_ULL(56)
+#define IRDMA_UDA_QPSQ_INLINEDATAFLAG BIT_ULL(57)
+#define IRDMA_UDA_QPSQ_INLINEDATALEN GENMASK_ULL(55, 48)
+#define IRDMA_UDA_QPSQ_ADDFRAGCNT GENMASK_ULL(41, 38)
+#define IRDMA_UDA_QPSQ_IPFRAGFLAGS GENMASK_ULL(43, 42)
+#define IRDMA_UDA_QPSQ_NOCHECKSUM BIT_ULL(45)
+#define IRDMA_UDA_QPSQ_AHIDXVALID BIT_ULL(46)
+#define IRDMA_UDA_QPSQ_LOCAL_FENCE BIT_ULL(61)
+#define IRDMA_UDA_QPSQ_AHIDX GENMASK_ULL(16, 0)
+#define IRDMA_UDA_QPSQ_PROTOCOL GENMASK_ULL(23, 16)
+#define IRDMA_UDA_QPSQ_EXTHDRLEN GENMASK_ULL(40, 32)
+#define IRDMA_UDA_QPSQ_MULTICAST BIT_ULL(63)
+#define IRDMA_UDA_QPSQ_MACLEN GENMASK_ULL(62, 56)
+#define IRDMA_UDA_QPSQ_MACLEN_LINE 2
+#define IRDMA_UDA_QPSQ_IPLEN GENMASK_ULL(54, 48)
+#define IRDMA_UDA_QPSQ_IPLEN_LINE 2
+#define IRDMA_UDA_QPSQ_L4T GENMASK_ULL(31, 30)
+#define IRDMA_UDA_QPSQ_L4T_LINE 2
+#define IRDMA_UDA_QPSQ_IIPT GENMASK_ULL(29, 28)
+#define IRDMA_UDA_QPSQ_IIPT_LINE 2
+
+#define IRDMA_UDA_QPSQ_DO_LPB_LINE 3
+#define IRDMA_UDA_QPSQ_FWD_PROG_CONFIRM BIT_ULL(45)
+#define IRDMA_UDA_QPSQ_FWD_PROG_CONFIRM_LINE 3
+#define IRDMA_UDA_QPSQ_IMMDATA GENMASK_ULL(63, 0)
+
+/* Byte Offset 0 */
+#define IRDMA_UDAQPC_IPV4_M BIT_ULL(3)
+#define IRDMA_UDAQPC_INSERTVLANTAG BIT_ULL(5)
+#define IRDMA_UDAQPC_ISQP1 BIT_ULL(6)
+
+#define IRDMA_UDAQPC_ECNENABLE BIT_ULL(14)
+#define IRDMA_UDAQPC_PDINDEXHI GENMASK_ULL(21, 20)
+#define IRDMA_UDAQPC_DCTCPENABLE BIT_ULL(25)
+
+#define IRDMA_UDAQPC_RCVTPHEN IRDMAQPC_RCVTPHEN
+#define IRDMA_UDAQPC_XMITTPHEN IRDMAQPC_XMITTPHEN
+#define IRDMA_UDAQPC_RQTPHEN IRDMAQPC_RQTPHEN
+#define IRDMA_UDAQPC_SQTPHEN IRDMAQPC_SQTPHEN
+#define IRDMA_UDAQPC_PPIDX IRDMAQPC_PPIDX
+#define IRDMA_UDAQPC_PMENA IRDMAQPC_PMENA
+#define IRDMA_UDAQPC_INSERTTAG2 BIT_ULL(11)
+#define IRDMA_UDAQPC_INSERTTAG3 BIT_ULL(14)
+
+#define IRDMA_UDAQPC_RQSIZE IRDMAQPC_RQSIZE
+#define IRDMA_UDAQPC_SQSIZE IRDMAQPC_SQSIZE
+#define IRDMA_UDAQPC_TXCQNUM IRDMAQPC_TXCQNUM
+#define IRDMA_UDAQPC_RXCQNUM IRDMAQPC_RXCQNUM
+#define IRDMA_UDAQPC_QPCOMPCTX IRDMAQPC_QPCOMPCTX
+#define IRDMA_UDAQPC_SQTPHVAL IRDMAQPC_SQTPHVAL
+#define IRDMA_UDAQPC_RQTPHVAL IRDMAQPC_RQTPHVAL
+#define IRDMA_UDAQPC_QSHANDLE IRDMAQPC_QSHANDLE
+#define IRDMA_UDAQPC_RQHDRRINGBUFSIZE GENMASK_ULL(49, 48)
+#define IRDMA_UDAQPC_SQHDRRINGBUFSIZE GENMASK_ULL(33, 32)
+#define IRDMA_UDAQPC_PRIVILEGEENABLE BIT_ULL(25)
+#define IRDMA_UDAQPC_USE_STATISTICS_INSTANCE BIT_ULL(26)
+#define IRDMA_UDAQPC_STATISTICS_INSTANCE_INDEX GENMASK_ULL(6, 0)
+#define IRDMA_UDAQPC_PRIVHDRGENENABLE BIT_ULL(0)
+#define IRDMA_UDAQPC_RQHDRSPLITENABLE BIT_ULL(3)
+#define IRDMA_UDAQPC_RQHDRRINGBUFENABLE BIT_ULL(2)
+#define IRDMA_UDAQPC_SQHDRRINGBUFENABLE BIT_ULL(1)
+#define IRDMA_UDAQPC_IPID GENMASK_ULL(47, 32)
+#define IRDMA_UDAQPC_SNDMSS GENMASK_ULL(29, 16)
+#define IRDMA_UDAQPC_VLANTAG GENMASK_ULL(15, 0)
+#define IRDMA_UDA_CQPSQ_MAV_PDINDEXHI GENMASK_ULL(27, 20)
+#define IRDMA_UDA_CQPSQ_MAV_PDINDEXLO GENMASK_ULL(63, 48)
+#define IRDMA_UDA_CQPSQ_MAV_SRCMACADDRINDEX GENMASK_ULL(29, 24)
+#define IRDMA_UDA_CQPSQ_MAV_ARPINDEX GENMASK_ULL(63, 48)
+#define IRDMA_UDA_CQPSQ_MAV_TC GENMASK_ULL(39, 32)
+#define IRDMA_UDA_CQPSQ_MAV_HOPLIMIT GENMASK_ULL(39, 32)
+#define IRDMA_UDA_CQPSQ_MAV_FLOWLABEL GENMASK_ULL(19, 0)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR1 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR2 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR3 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_MAV_WQEVALID BIT_ULL(63)
+#define IRDMA_UDA_CQPSQ_MAV_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK BIT_ULL(62)
+#define IRDMA_UDA_CQPSQ_MAV_IPV4VALID BIT_ULL(59)
+#define IRDMA_UDA_CQPSQ_MAV_AVIDX GENMASK_ULL(23, 0)
+#define IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG BIT_ULL(60)
+#define IRDMA_UDA_MGCTX_VFFLAG BIT_ULL(29)
+#define IRDMA_UDA_MGCTX_DESTPORT GENMASK_ULL(47, 32)
+#define IRDMA_UDA_MGCTX_VFID GENMASK_ULL(28, 22)
+#define IRDMA_UDA_MGCTX_VALIDENT BIT_ULL(31)
+#define IRDMA_UDA_MGCTX_PFID GENMASK_ULL(21, 18)
+#define IRDMA_UDA_MGCTX_FLAGIGNOREDPORT BIT_ULL(30)
+#define IRDMA_UDA_MGCTX_QPID GENMASK_ULL(17, 0)
+#define IRDMA_UDA_CQPSQ_MG_WQEVALID BIT_ULL(63)
+#define IRDMA_UDA_CQPSQ_MG_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_CQPSQ_MG_MGIDX GENMASK_ULL(12, 0)
+#define IRDMA_UDA_CQPSQ_MG_IPV4VALID BIT_ULL(60)
+#define IRDMA_UDA_CQPSQ_MG_VLANVALID BIT_ULL(59)
+#define IRDMA_UDA_CQPSQ_MG_HMC_FCN_ID GENMASK_ULL(5, 0)
+#define IRDMA_UDA_CQPSQ_MG_VLANID GENMASK_ULL(43, 32)
+#define IRDMA_UDA_CQPSQ_QS_HANDLE GENMASK_ULL(9, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_QPN GENMASK_ULL(49, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_ BIT_ULL(0)
+#define IRDMA_UDA_CQPSQ_QHASH_SRC_PORT GENMASK_ULL(31, 16)
+#define IRDMA_UDA_CQPSQ_QHASH_DEST_PORT GENMASK_ULL(15, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR1 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR2 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR3 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_WQEVALID BIT_ULL(63)
+#define IRDMA_UDA_CQPSQ_QHASH_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_MANAGE GENMASK_ULL(62, 61)
+#define IRDMA_UDA_CQPSQ_QHASH_IPV4VALID GENMASK_ULL(60, 60)
+#define IRDMA_UDA_CQPSQ_QHASH_LANFWD GENMASK_ULL(59, 59)
+#define IRDMA_UDA_CQPSQ_QHASH_ENTRYTYPE GENMASK_ULL(44, 42)
+#endif /* IRDMA_UDA_D_H */
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
new file mode 100644
index 000000000000..f0846b800913
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -0,0 +1,1930 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "defs.h"
+#include "user.h"
+#include "irdma.h"
+
+/**
+ * irdma_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ * @valid: The wqe valid
+ */
+static void irdma_set_fragment(__le64 *wqe, u32 offset, struct ib_sge *sge,
+ u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr));
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(IRDMAQPSQ_VALID, valid) |
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->length) |
+ FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->lkey));
+ } else {
+ set_64bit_val(wqe, offset, 0);
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(IRDMAQPSQ_VALID, valid));
+ }
+}
+
+/**
+ * irdma_set_fragment_gen_1 - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ * @valid: wqe valid flag
+ */
+static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset,
+ struct ib_sge *sge, u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr));
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->length) |
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->lkey));
+ } else {
+ set_64bit_val(wqe, offset, 0);
+ set_64bit_val(wqe, offset + 8, 0);
+ }
+}
+
+/**
+ * irdma_nop_1 - insert a NOP wqe
+ * @qp: hw qp ptr
+ */
+static int irdma_nop_1(struct irdma_qp_uk *qp)
+{
+ u64 hdr;
+ __le64 *wqe;
+ u32 wqe_idx;
+ bool signaled = false;
+
+ if (!qp->sq_ring.head)
+ return -EINVAL;
+
+ wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ wqe = qp->sq_base[wqe_idx].elem;
+
+ qp->sq_wrtrk_array[wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ /* make sure WQE is written before valid bit is set */
+ dma_wmb();
+
+ set_64bit_val(wqe, 24, hdr);
+
+ return 0;
+}
+
+/**
+ * irdma_clr_wqes - clear next 128 sq entries
+ * @qp: hw qp ptr
+ * @qp_wqe_idx: wqe_idx
+ */
+void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx)
+{
+ struct irdma_qp_quanta *sq;
+ u32 wqe_idx;
+
+ if (!(qp_wqe_idx & 0x7F)) {
+ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
+ sq = qp->sq_base + wqe_idx;
+ if (wqe_idx)
+ memset(sq, qp->swqe_polarity ? 0 : 0xFF,
+ 128 * sizeof(*sq));
+ else
+ memset(sq, qp->swqe_polarity ? 0xFF : 0,
+ 128 * sizeof(*sq));
+ }
+}
+
+/**
+ * irdma_uk_qp_post_wr - ring doorbell
+ * @qp: hw qp ptr
+ */
+void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp)
+{
+ dma_wmb();
+ writel(qp->qp_id, qp->wqe_alloc_db);
+}
+
+/**
+ * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @quanta: size of WR in quanta
+ * @total_size: size of WR in bytes
+ * @info: info on WR
+ */
+__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
+ u16 quanta, u32 total_size,
+ struct irdma_post_sq_info *info)
+{
+ __le64 *wqe;
+ __le64 *wqe_0 = NULL;
+ u16 avail_quanta;
+ u16 i;
+
+ avail_quanta = qp->uk_attrs->max_hw_sq_chunk -
+ (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) %
+ qp->uk_attrs->max_hw_sq_chunk);
+ if (quanta <= avail_quanta) {
+ /* WR fits in current chunk */
+ if (quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+ } else {
+ /* Need to pad with NOP */
+ if (quanta + avail_quanta >
+ IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+
+ for (i = 0; i < avail_quanta; i++) {
+ irdma_nop_1(qp);
+ IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
+ }
+ }
+
+ *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+
+ IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
+ wqe = qp->sq_base[*wqe_idx].elem;
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && quanta == 1 &&
+ (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) {
+ wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem;
+ wqe_0[3] = cpu_to_le64(FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity ? 0 : 1));
+ }
+ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
+ qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled;
+
+ return wqe;
+}
+
+__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx)
+{
+ int ret_code;
+ __le64 *wqe;
+
+ if (IRDMA_RING_FULL_ERR(srq->srq_ring))
+ return NULL;
+
+ IRDMA_ATOMIC_RING_MOVE_HEAD(srq->srq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ srq->srwqe_polarity = !srq->srwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */
+ wqe = srq->srq_base[*wqe_idx * (srq->wqe_size_multiplier)].elem;
+
+ return wqe;
+}
+
+/**
+ * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx)
+{
+ __le64 *wqe;
+ int ret_code;
+
+ if (IRDMA_RING_FULL_ERR(qp->rq_ring))
+ return NULL;
+
+ IRDMA_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ qp->rwqe_polarity = !qp->rwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */
+ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem;
+
+ return wqe;
+}
+
+/**
+ * irdma_uk_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ u64 hdr;
+ __le64 *wqe;
+ struct irdma_rdma_write *op_info;
+ u32 i, wqe_idx;
+ u32 total_size = 0, byte_off;
+ int ret_code;
+ u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ u16 quanta;
+
+ op_info = &info->op.rdma_write;
+ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
+
+ read_fence |= info->read_fence;
+
+ if (info->imm_data_valid)
+ frag_cnt = op_info->num_lo_sges + 1;
+ else
+ frag_cnt = op_info->num_lo_sges;
+ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0;
+ ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr));
+
+ if (info->imm_data_valid) {
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ i = 0;
+ } else {
+ qp->wqe_ops.iw_set_fragment(wqe, 0,
+ op_info->lo_sg_list,
+ qp->swqe_polarity);
+ i = 1;
+ }
+
+ for (byte_off = 32; i < op_info->num_lo_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) &&
+ frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) |
+ FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_atomic_fetch_add - atomic fetch and add operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ struct irdma_atomic_fetch_add *op_info;
+ u32 total_size = 0;
+ u16 quanta = 2;
+ u32 wqe_idx;
+ __le64 *wqe;
+ u64 hdr;
+
+ op_info = &info->op.atomic_fetch_add;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, op_info->tagged_offset);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag));
+ set_64bit_val(wqe, 16, op_info->remote_tagged_offset);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_FETCH_ADD) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32, op_info->fetch_add_data_bytes);
+ set_64bit_val(wqe, 40, 0);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity));
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_atomic_compare_swap - atomic compare and swap operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ struct irdma_atomic_compare_swap *op_info;
+ u32 total_size = 0;
+ u16 quanta = 2;
+ u32 wqe_idx;
+ __le64 *wqe;
+ u64 hdr;
+
+ op_info = &info->op.atomic_compare_swap;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, op_info->tagged_offset);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag));
+ set_64bit_val(wqe, 16, op_info->remote_tagged_offset);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32, op_info->swap_data_bytes);
+ set_64bit_val(wqe, 40, op_info->compare_data_bytes);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity));
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_srq_post_receive - post a receive wqe to a shared rq
+ * @srq: shared rq ptr
+ * @info: post rq information
+ */
+int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq,
+ struct irdma_post_rq_info *info)
+{
+ u32 wqe_idx, i, byte_off;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u64 hdr;
+
+ if (srq->max_srq_frag_cnt < info->num_sges)
+ return -EINVAL;
+
+ wqe = irdma_srq_get_next_recv_wqe(srq, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ addl_frag_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0;
+ srq->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list,
+ srq->srwqe_polarity);
+
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
+ srq->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i],
+ srq->srwqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (srq->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) &&
+ info->num_sges) {
+ srq->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ srq->srwqe_polarity);
+ if (srq->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16, (u64)info->wr_id);
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_VALID, srq->srwqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ set_64bit_val(srq->shadow_area, 0, (wqe_idx + 1) % srq->srq_ring.size);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq)
+{
+ struct irdma_rdma_read *op_info;
+ int ret_code;
+ u32 i, byte_off, total_size = 0;
+ bool local_fence = false;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u32 wqe_idx;
+ u16 quanta;
+ u64 hdr;
+
+ op_info = &info->op.rdma_read;
+ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
+
+ ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ addl_frag_cnt = op_info->num_lo_sges > 1 ?
+ (op_info->num_lo_sges - 1) : 0;
+ local_fence |= info->local_fence;
+
+ qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->lo_sg_list,
+ qp->swqe_polarity);
+ for (i = 1, byte_off = 32; i < op_info->num_lo_sges; ++i) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 &&
+ !(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr));
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE,
+ (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_post_send *op_info;
+ u64 hdr;
+ u32 i, wqe_idx, total_size = 0, byte_off;
+ int ret_code;
+ u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ u16 quanta;
+
+ op_info = &info->op.send;
+ if (qp->max_sq_frag_cnt < op_info->num_sges)
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].length;
+
+ if (info->imm_data_valid)
+ frag_cnt = op_info->num_sges + 1;
+ else
+ frag_cnt = op_info->num_sges;
+ ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0;
+ if (info->imm_data_valid) {
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ i = 0;
+ } else {
+ qp->wqe_ops.iw_set_fragment(wqe, 0,
+ frag_cnt ? op_info->sg_list : NULL,
+ qp->swqe_polarity);
+ i = 1;
+ }
+
+ for (byte_off = 32; i < op_info->num_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) &&
+ frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) |
+ FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp));
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
+ FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
+ (info->imm_data_valid ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) |
+ FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_set_mw_bind_wqe_gen_1 - set mw bind wqe
+ * @wqe: wqe for setting fragment
+ * @op_info: info for setting bind wqe values
+ */
+static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe,
+ struct irdma_bind_window *op_info)
+{
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mw_stag) |
+ FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mr_stag));
+ set_64bit_val(wqe, 16, op_info->bind_len);
+}
+
+/**
+ * irdma_copy_inline_data_gen_1 - Copy inline data to wqe
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: Total inline data length
+ * @polarity: compatibility parameter
+ */
+static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity)
+{
+ u32 quanta_bytes_remaining = 16;
+ int i;
+
+ for (i = 0; i < num_sges; i++) {
+ u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+ u32 sge_len = sge_list[i].length;
+
+ while (sge_len) {
+ u32 bytes_copied;
+
+ bytes_copied = min(sge_len, quanta_bytes_remaining);
+ memcpy(wqe, cur_sge, bytes_copied);
+ wqe += bytes_copied;
+ cur_sge += bytes_copied;
+ quanta_bytes_remaining -= bytes_copied;
+ sge_len -= bytes_copied;
+
+ if (!quanta_bytes_remaining) {
+ /* Remaining inline bytes reside after hdr */
+ wqe += 16;
+ quanta_bytes_remaining = 32;
+ }
+ }
+ }
+}
+
+/**
+ * irdma_inline_data_size_to_quanta_gen_1 - based on inline data, quanta
+ * @data_size: data size for inline
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static inline u16 irdma_inline_data_size_to_quanta_gen_1(u32 data_size)
+{
+ return data_size <= 16 ? IRDMA_QP_WQE_MIN_QUANTA : 2;
+}
+
+/**
+ * irdma_set_mw_bind_wqe - set mw bind in wqe
+ * @wqe: wqe for setting mw bind
+ * @op_info: info for setting wqe values
+ */
+static void irdma_set_mw_bind_wqe(__le64 *wqe,
+ struct irdma_bind_window *op_info)
+{
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mr_stag) |
+ FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mw_stag));
+ set_64bit_val(wqe, 16, op_info->bind_len);
+}
+
+/**
+ * irdma_copy_inline_data - Copy inline data to wqe
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: number of SGE's
+ * @polarity: polarity of wqe valid bit
+ */
+static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity)
+{
+ u8 inline_valid = polarity << IRDMA_INLINE_VALID_S;
+ u32 quanta_bytes_remaining = 8;
+ bool first_quanta = true;
+ int i;
+
+ wqe += 8;
+
+ for (i = 0; i < num_sges; i++) {
+ u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+ u32 sge_len = sge_list[i].length;
+
+ while (sge_len) {
+ u32 bytes_copied;
+
+ bytes_copied = min(sge_len, quanta_bytes_remaining);
+ memcpy(wqe, cur_sge, bytes_copied);
+ wqe += bytes_copied;
+ cur_sge += bytes_copied;
+ quanta_bytes_remaining -= bytes_copied;
+ sge_len -= bytes_copied;
+
+ if (!quanta_bytes_remaining) {
+ quanta_bytes_remaining = 31;
+
+ /* Remaining inline bytes reside after hdr */
+ if (first_quanta) {
+ first_quanta = false;
+ wqe += 16;
+ } else {
+ *wqe = inline_valid;
+ wqe++;
+ }
+ }
+ }
+ }
+ if (!first_quanta && quanta_bytes_remaining < 31)
+ *(wqe + quanta_bytes_remaining) = inline_valid;
+}
+
+/**
+ * irdma_inline_data_size_to_quanta - based on inline data, quanta
+ * @data_size: data size for inline
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static u16 irdma_inline_data_size_to_quanta(u32 data_size)
+{
+ if (data_size <= 8)
+ return IRDMA_QP_WQE_MIN_QUANTA;
+ else if (data_size <= 39)
+ return 2;
+ else if (data_size <= 70)
+ return 3;
+ else if (data_size <= 101)
+ return 4;
+ else if (data_size <= 132)
+ return 5;
+ else if (data_size <= 163)
+ return 6;
+ else if (data_size <= 194)
+ return 7;
+ else
+ return 8;
+}
+
+/**
+ * irdma_uk_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_rdma_write *op_info;
+ u64 hdr = 0;
+ u32 wqe_idx;
+ bool read_fence = false;
+ u32 i, total_size = 0;
+ u16 quanta;
+
+ op_info = &info->op.rdma_write;
+
+ if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges))
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
+
+ if (unlikely(total_size > qp->max_inline_data))
+ return -EINVAL;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr));
+
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ if (info->imm_data_valid)
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list,
+ op_info->num_lo_sges,
+ qp->swqe_polarity);
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_post_send *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ bool read_fence = false;
+ u32 i, total_size = 0;
+ u16 quanta;
+
+ op_info = &info->op.send;
+
+ if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges))
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].length;
+
+ if (unlikely(total_size > qp->max_inline_data))
+ return -EINVAL;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) |
+ FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp));
+
+ read_fence |= info->read_fence;
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
+ FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
+ (info->imm_data_valid ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) |
+ FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ if (info->imm_data_valid)
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list,
+ op_info->num_sges, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_inv_local_stag *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ bool local_fence = false;
+ struct ib_sge sge = {};
+
+ op_info = &info->op.inv_local_stag;
+ local_fence = info->local_fence;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
+ 0, info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ sge.lkey = op_info->target_stag;
+ qp->wqe_ops.iw_set_fragment(wqe, 0, &sge, 0);
+
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+int irdma_uk_post_receive(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info)
+{
+ u32 wqe_idx, i, byte_off;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u64 hdr;
+
+ if (qp->max_rq_frag_cnt < info->num_sges)
+ return -EINVAL;
+
+ wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ qp->rq_wrid_array[wqe_idx] = info->wr_id;
+ addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0;
+ qp->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list,
+ qp->rwqe_polarity);
+
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i],
+ qp->rwqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) &&
+ info->num_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->rwqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16, 0);
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->rwqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_cq_resize - reset the cq buffer info
+ * @cq: cq to resize
+ * @cq_base: new cq buffer addr
+ * @cq_size: number of cqes
+ */
+void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int cq_size)
+{
+ cq->cq_base = cq_base;
+ cq->cq_size = cq_size;
+ IRDMA_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+}
+
+/**
+ * irdma_uk_cq_set_resized_cnt - record the count of the resized buffers
+ * @cq: cq to resize
+ * @cq_cnt: the count of the resized cq buffers
+ */
+void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *cq, u16 cq_cnt)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se;
+ u8 arm_next;
+ u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 32, &temp_val);
+
+ sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ sw_cq_sel += cq_cnt;
+
+ arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
+ arm_next = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT, temp_val);
+
+ temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next);
+
+ set_64bit_val(cq->shadow_area, 32, temp_val);
+}
+
+/**
+ * irdma_uk_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
+ enum irdma_cmpl_notify cq_notify)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se = 0;
+ u8 arm_next = 0;
+ u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 32, &temp_val);
+ arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_seq_num++;
+ sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
+ arm_next_se |= 1;
+ if (cq_notify == IRDMA_CQ_COMPL_EVENT)
+ arm_next = 1;
+ temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next);
+
+ set_64bit_val(cq->shadow_area, 32, temp_val);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ writel(cq->cq_id, cq->cqe_alloc_db);
+}
+
+/**
+ * irdma_uk_cq_empty - Check if CQ is empty
+ * @cq: hw cq
+ */
+bool irdma_uk_cq_empty(struct irdma_cq_uk *cq)
+{
+ __le64 *cqe;
+ u8 polarity;
+ u64 qword3;
+
+ if (cq->avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq);
+
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+
+ return polarity != cq->polarity;
+}
+
+/**
+ * irdma_uk_cq_poll_cmpl - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ */
+int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info)
+{
+ u64 comp_ctx, qword0, qword2, qword3;
+ __le64 *cqe;
+ struct irdma_qp_uk *qp;
+ struct irdma_srq_uk *srq;
+ struct qp_err_code qp_err;
+ u8 is_srq;
+ struct irdma_ring *pring = NULL;
+ u32 wqe_idx;
+ int ret_code;
+ bool move_cq_head = true;
+ u8 polarity;
+ bool ext_valid;
+ __le64 *ext_cqe;
+
+ if (cq->avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq);
+
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+ if (polarity != cq->polarity)
+ return -ENOENT;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
+ if (ext_valid) {
+ u64 qword6, qword7;
+ u32 peek_head;
+
+ if (cq->avoid_mem_cflct) {
+ ext_cqe = (__le64 *)((u8 *)cqe + 32);
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
+ } else {
+ peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size;
+ ext_cqe = cq->cq_base[peek_head].buf;
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
+ if (!peek_head)
+ polarity ^= 1;
+ }
+ if (polarity != cq->polarity)
+ return -ENOENT;
+
+ /* Ensure ext CQE contents are read after ext valid bit is checked */
+ dma_rmb();
+
+ info->imm_valid = (bool)FIELD_GET(IRDMA_CQ_IMMVALID, qword7);
+ if (info->imm_valid) {
+ u64 qword4;
+
+ get_64bit_val(ext_cqe, 0, &qword4);
+ info->imm_data = (u32)FIELD_GET(IRDMA_CQ_IMMDATALOW32, qword4);
+ }
+ info->ud_smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7);
+ info->ud_vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7);
+ if (info->ud_smac_valid || info->ud_vlan_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ if (info->ud_vlan_valid)
+ info->ud_vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6);
+ if (info->ud_smac_valid) {
+ info->ud_smac[5] = qword6 & 0xFF;
+ info->ud_smac[4] = (qword6 >> 8) & 0xFF;
+ info->ud_smac[3] = (qword6 >> 16) & 0xFF;
+ info->ud_smac[2] = (qword6 >> 24) & 0xFF;
+ info->ud_smac[1] = (qword6 >> 32) & 0xFF;
+ info->ud_smac[0] = (qword6 >> 40) & 0xFF;
+ }
+ }
+ } else {
+ info->imm_valid = false;
+ info->ud_smac_valid = false;
+ info->ud_vlan_valid = false;
+ }
+
+ info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ is_srq = (u8)FIELD_GET(IRDMA_CQ_SRQ, qword3);
+ info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
+ info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if (is_srq)
+ get_64bit_val(cqe, 40, (u64 *)&qp);
+ else
+ qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ if (info->error) {
+ info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3);
+ info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3);
+ switch (info->major_err) {
+ case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR:
+ qp_err = irdma_ae_to_qp_err_code(info->minor_err);
+ info->minor_err = qp_err.flush_code;
+ fallthrough;
+ case IRDMA_FLUSH_MAJOR_ERR:
+ /* Set the min error to standard flush error code for remaining cqes */
+ if (info->minor_err != FLUSH_GENERAL_ERR) {
+ qword3 &= ~IRDMA_CQ_MINERR;
+ qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR);
+ set_64bit_val(cqe, 24, qword3);
+ }
+ info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ break;
+ default:
+#define IRDMA_CIE_SIGNATURE 0xE
+#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12)
+ if (info->q_type == IRDMA_CQE_QTYPE_SQ &&
+ qp->qp_type == IRDMA_QP_TYPE_ROCE_UD &&
+ FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err)
+ == IRDMA_CIE_SIGNATURE) {
+ info->error = 0;
+ info->major_err = 0;
+ info->minor_err = 0;
+ info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+ } else {
+ info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+ }
+ break;
+ }
+ } else {
+ info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
+ info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+
+ info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3);
+ qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ if (!qp || qp->destroy_pending) {
+ ret_code = -EFAULT;
+ goto exit;
+ }
+ wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
+ info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
+ info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
+
+ if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) {
+ unsigned long flags;
+
+ srq = qp->srq_uk;
+
+ get_64bit_val(cqe, 8, &info->wr_id);
+ info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+
+ if (qword3 & IRDMACQ_STAG) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG,
+ qword2);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ spin_lock_irqsave(srq->lock, flags);
+ IRDMA_RING_MOVE_TAIL(srq->srq_ring);
+ spin_unlock_irqrestore(srq->lock, flags);
+ pring = &srq->srq_ring;
+
+ } else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) {
+ u32 array_idx;
+
+ array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
+
+ if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED ||
+ info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) {
+ if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) {
+ ret_code = -ENOENT;
+ goto exit;
+ }
+
+ info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
+ array_idx = qp->rq_ring.tail;
+ } else {
+ info->wr_id = qp->rq_wrid_array[array_idx];
+ }
+
+ info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+
+ if (qword3 & IRDMACQ_STAG) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
+ if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
+ qp->rq_flush_seen = true;
+ if (!IRDMA_RING_MORE_WORK(qp->rq_ring))
+ qp->rq_flush_complete = true;
+ else
+ move_cq_head = false;
+ }
+ pring = &qp->rq_ring;
+ } else { /* q_type is IRDMA_CQE_QTYPE_SQ */
+ if (qp->first_sq_wq) {
+ if (wqe_idx + 1 >= qp->conn_wqes)
+ qp->first_sq_wq = false;
+
+ if (wqe_idx < qp->conn_wqes && qp->sq_ring.head == qp->sq_ring.tail) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+ memset(info, 0,
+ sizeof(struct irdma_cq_poll_info));
+ return irdma_uk_cq_poll_cmpl(cq, info);
+ }
+ }
+ if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) {
+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ if (!info->comp_status)
+ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+ if (!qp->sq_wrtrk_array[wqe_idx].signaled) {
+ ret_code = -EFAULT;
+ goto exit;
+ }
+ info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
+ IRDMA_RING_SET_TAIL(qp->sq_ring,
+ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
+ } else {
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) {
+ ret_code = -ENOENT;
+ goto exit;
+ }
+
+ do {
+ __le64 *sw_wqe;
+ u64 wqe_qword;
+ u32 tail;
+
+ tail = qp->sq_ring.tail;
+ sw_wqe = qp->sq_base[tail].elem;
+ get_64bit_val(sw_wqe, 24,
+ &wqe_qword);
+ info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE,
+ wqe_qword);
+ IRDMA_RING_SET_TAIL(qp->sq_ring,
+ tail + qp->sq_wrtrk_array[tail].quanta);
+ if (info->op_type != IRDMAQP_OP_NOP) {
+ info->wr_id = qp->sq_wrtrk_array[tail].wrid;
+ info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
+ break;
+ }
+ } while (1);
+ if (info->op_type == IRDMA_OP_TYPE_BIND_MW &&
+ info->minor_err == FLUSH_PROT_ERR)
+ info->minor_err = FLUSH_MW_BIND_ERR;
+ qp->sq_flush_seen = true;
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
+ qp->sq_flush_complete = true;
+ }
+ pring = &qp->sq_ring;
+ }
+
+ ret_code = 0;
+
+exit:
+ if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
+ if (pring && IRDMA_RING_MORE_WORK(*pring))
+ /* Park CQ head during a flush to generate additional CQEs
+ * from SW for all unprocessed WQEs. For GEN3 and beyond
+ * FW will generate/flush these CQEs so move to the next CQE
+ */
+ move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ?
+ false : true;
+ }
+
+ if (move_cq_head) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring))
+ cq->polarity ^= 1;
+
+ if (ext_valid && !cq->avoid_mem_cflct) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring))
+ cq->polarity ^= 1;
+ }
+
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ if (!cq->avoid_mem_cflct && ext_valid)
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) & 0x3F || irdma_uk_cq_empty(cq))
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+ } else {
+ qword3 &= ~IRDMA_CQ_WQEIDX;
+ qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail);
+ set_64bit_val(cqe, 24, qword3);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_round_up_wq - return round up qp wq depth
+ * @wqdepth: wq depth in quanta to round up
+ */
+static int irdma_round_up_wq(u32 wqdepth)
+{
+ int scount = 1;
+
+ for (wqdepth--; scount <= 16; scount *= 2)
+ wqdepth |= wqdepth >> scount;
+
+ return ++wqdepth;
+}
+
+/**
+ * irdma_get_wqe_shift - get shift count for maximum wqe size
+ * @uk_attrs: qp HW attributes
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @inline_data: Maximum inline data size
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
+ * For 1 SGE or inline data <= 8, shift = 0 (wqe size of 32
+ * bytes). For 2 or 3 SGEs or inline data <= 39, shift = 1 (wqe
+ * size of 64 bytes).
+ * For 4-7 SGE's and inline <= 101 Shift of 2 otherwise (wqe
+ * size of 256 bytes).
+ */
+void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
+ u32 inline_data, u8 *shift)
+{
+ *shift = 0;
+ if (uk_attrs->hw_rev >= IRDMA_GEN_2) {
+ if (sge > 1 || inline_data > 8) {
+ if (sge < 4 && inline_data <= 39)
+ *shift = 1;
+ else if (sge < 8 && inline_data <= 101)
+ *shift = 2;
+ else
+ *shift = 3;
+ }
+ } else if (sge > 1 || inline_data > 16) {
+ *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
+ }
+}
+
+/*
+ * irdma_get_sqdepth - get SQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @sq_size: SQ size
+ * @shift: shift which determines size of WQE
+ * @sqdepth: depth of SQ
+ *
+ */
+int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
+ u32 *sqdepth)
+{
+ u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+
+ *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD);
+
+ if (*sqdepth < min_size)
+ *sqdepth = min_size;
+ else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * irdma_get_rqdepth - get RQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @rq_size: RQ size
+ * @shift: shift which determines size of WQE
+ * @rqdepth: depth of RQ
+ */
+int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
+ u32 *rqdepth)
+{
+ u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+
+ *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD);
+
+ if (*rqdepth < min_size)
+ *rqdepth = min_size;
+ else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * irdma_get_srqdepth - get SRQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @srq_size: SRQ size
+ * @shift: shift which determines size of WQE
+ * @srqdepth: depth of SRQ
+ */
+int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
+ u32 *srqdepth)
+{
+ *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD);
+
+ if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift))
+ *srqdepth = uk_attrs->min_hw_wq_size << shift;
+ else if (*srqdepth > uk_attrs->max_hw_srq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = {
+ .iw_copy_inline_data = irdma_copy_inline_data,
+ .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta,
+ .iw_set_fragment = irdma_set_fragment,
+ .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe,
+};
+
+static const struct irdma_wqe_uk_ops iw_wqe_uk_ops_gen_1 = {
+ .iw_copy_inline_data = irdma_copy_inline_data_gen_1,
+ .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta_gen_1,
+ .iw_set_fragment = irdma_set_fragment_gen_1,
+ .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe_gen_1,
+};
+
+/**
+ * irdma_setup_connection_wqes - setup WQEs necessary to complete
+ * connection.
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ */
+static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info)
+{
+ u16 move_cnt = 1;
+
+ if (!info->legacy_mode &&
+ (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RTS_AE))
+ move_cnt = 3;
+
+ qp->conn_wqes = move_cnt;
+ IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt);
+ IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt);
+}
+
+/**
+ * irdma_uk_srq_init - initialize shared qp
+ * @srq: hw srq (user and kernel)
+ * @info: srq initialization info
+ *
+ * Initializes the vars used in both user and kernel mode.
+ * The size of the wqe depends on number of max fragments
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for srq.
+ */
+int irdma_uk_srq_init(struct irdma_srq_uk *srq,
+ struct irdma_srq_uk_init_info *info)
+{
+ u8 rqshift;
+
+ srq->uk_attrs = info->uk_attrs;
+ if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ irdma_get_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt, 0, &rqshift);
+ srq->srq_caps = info->srq_caps;
+ srq->srq_base = info->srq;
+ srq->shadow_area = info->shadow_area;
+ srq->srq_id = info->srq_id;
+ srq->srwqe_polarity = 0;
+ srq->srq_size = info->srq_size;
+ srq->wqe_size = rqshift;
+ srq->max_srq_frag_cnt = min(srq->uk_attrs->max_hw_wq_frags,
+ ((u32)2 << rqshift) - 1);
+ IRDMA_RING_INIT(srq->srq_ring, srq->srq_size);
+ srq->wqe_size_multiplier = 1 << rqshift;
+ srq->wqe_ops = iw_wqe_uk_ops;
+
+ return 0;
+}
+
+/**
+ * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ
+ * @ukinfo: qp initialization info
+ * @sq_shift: Returns shift of SQ
+ * @rq_shift: Returns shift of RQ
+ */
+void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift,
+ u8 *rq_shift)
+{
+ bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs,
+ imm_support ? ukinfo->max_sq_frag_cnt + 1 :
+ ukinfo->max_sq_frag_cnt,
+ ukinfo->max_inline_data, sq_shift);
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0,
+ rq_shift);
+
+ if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) {
+ if (ukinfo->abi_ver > 4)
+ *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
+ }
+}
+
+/**
+ * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size.
+ * @ukinfo: qp initialization info
+ * @sq_depth: Returns depth of SQ
+ * @sq_shift: Returns shift of SQ
+ */
+int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *sq_depth, u8 *sq_shift)
+{
+ bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2;
+ int status;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs,
+ imm_support ? ukinfo->max_sq_frag_cnt + 1 :
+ ukinfo->max_sq_frag_cnt,
+ ukinfo->max_inline_data, sq_shift);
+ status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size,
+ *sq_shift, sq_depth);
+
+ return status;
+}
+
+/**
+ * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size.
+ * @ukinfo: qp initialization info
+ * @rq_depth: Returns depth of RQ
+ * @rq_shift: Returns shift of RQ
+ */
+int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *rq_depth, u8 *rq_shift)
+{
+ int status;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0,
+ rq_shift);
+
+ if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) {
+ if (ukinfo->abi_ver > 4)
+ *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
+ }
+
+ status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size,
+ *rq_shift, rq_depth);
+
+ return status;
+}
+
+/**
+ * irdma_uk_qp_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq.
+ */
+int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
+{
+ int ret_code = 0;
+ u32 sq_ring_size;
+
+ qp->uk_attrs = info->uk_attrs;
+ if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags ||
+ info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ qp->qp_caps = info->qp_caps;
+ qp->sq_base = info->sq;
+ qp->rq_base = info->rq;
+ qp->qp_type = info->type ? info->type : IRDMA_QP_TYPE_IWARP;
+ qp->shadow_area = info->shadow_area;
+ qp->sq_wrtrk_array = info->sq_wrtrk_array;
+
+ qp->rq_wrid_array = info->rq_wrid_array;
+ qp->wqe_alloc_db = info->wqe_alloc_db;
+ qp->qp_id = info->qp_id;
+ qp->sq_size = info->sq_size;
+ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+ sq_ring_size = qp->sq_size << info->sq_shift;
+ IRDMA_RING_INIT(qp->sq_ring, sq_ring_size);
+ if (info->first_sq_wq) {
+ irdma_setup_connection_wqes(qp, info);
+ qp->swqe_polarity = 1;
+ qp->first_sq_wq = true;
+ } else {
+ qp->swqe_polarity = 0;
+ }
+ qp->swqe_polarity_deferred = 1;
+ qp->rwqe_polarity = 0;
+ qp->rq_size = info->rq_size;
+ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+ qp->max_inline_data = info->max_inline_data;
+ qp->rq_wqe_size = info->rq_shift;
+ IRDMA_RING_INIT(qp->rq_ring, qp->rq_size);
+ qp->rq_wqe_size_multiplier = 1 << info->rq_shift;
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_1)
+ qp->wqe_ops = iw_wqe_uk_ops_gen_1;
+ else
+ qp->wqe_ops = iw_wqe_uk_ops;
+ qp->srq_uk = info->srq_uk;
+ return ret_code;
+}
+
+/**
+ * irdma_uk_cq_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+void irdma_uk_cq_init(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info)
+{
+ cq->cq_base = info->cq_base;
+ cq->cq_id = info->cq_id;
+ cq->cq_size = info->cq_size;
+ cq->cqe_alloc_db = info->cqe_alloc_db;
+ cq->cq_ack_db = info->cq_ack_db;
+ cq->shadow_area = info->shadow_area;
+ cq->avoid_mem_cflct = info->avoid_mem_cflct;
+ IRDMA_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+}
+
+/**
+ * irdma_uk_clean_cq - clean cq entries
+ * @q: completion context
+ * @cq: cq to clean
+ */
+void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq)
+{
+ __le64 *cqe;
+ u64 qword3, comp_ctx;
+ u32 cq_head;
+ u8 polarity, temp;
+
+ cq_head = cq->cq_ring.head;
+ temp = cq->polarity;
+ do {
+ if (cq->avoid_mem_cflct)
+ cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf;
+ else
+ cqe = cq->cq_base[cq_head].buf;
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+
+ if (polarity != temp)
+ break;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if ((void *)(unsigned long)comp_ctx == q)
+ set_64bit_val(cqe, 8, 0);
+
+ cq_head = (cq_head + 1) % cq->cq_ring.size;
+ if (!cq_head)
+ temp ^= 1;
+ } while (true);
+}
+
+/**
+ * irdma_nop - post a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: signaled for completion
+ * @post_sq: ring doorbell
+ */
+int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 wqe_idx;
+ struct irdma_post_sq_info info = {};
+
+ info.wr_id = wr_id;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
+ 0, &info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @quanta: quanta for frag_cnt
+ */
+int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *quanta = IRDMA_QP_WQE_MIN_QUANTA;
+ break;
+ case 2:
+ case 3:
+ *quanta = 2;
+ break;
+ case 4:
+ case 5:
+ *quanta = 3;
+ break;
+ case 6:
+ case 7:
+ *quanta = 4;
+ break;
+ case 8:
+ case 9:
+ *quanta = 5;
+ break;
+ case 10:
+ case 11:
+ *quanta = 6;
+ break;
+ case 12:
+ case 13:
+ *quanta = 7;
+ break;
+ case 14:
+ case 15: /* when immediate data is present */
+ *quanta = 8;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size in bytes given frag_cnt
+ */
+int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *wqe_size = 32;
+ break;
+ case 2:
+ case 3:
+ *wqe_size = 64;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ *wqe_size = 128;
+ break;
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ *wqe_size = 256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
new file mode 100644
index 000000000000..9eb7fd0b1cbf
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -0,0 +1,676 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_USER_H
+#define IRDMA_USER_H
+
+#define irdma_handle void *
+#define irdma_adapter_handle irdma_handle
+#define irdma_qp_handle irdma_handle
+#define irdma_cq_handle irdma_handle
+#define irdma_pd_id irdma_handle
+#define irdma_stag_handle irdma_handle
+#define irdma_stag_index u32
+#define irdma_stag u32
+#define irdma_stag_key u8
+#define irdma_tagged_offset u64
+#define irdma_access_privileges u32
+#define irdma_physical_fragment u64
+#define irdma_address_list u64 *
+
+#define IRDMA_MAX_MR_SIZE 0x200000000000ULL
+
+#define IRDMA_ACCESS_FLAGS_LOCALREAD 0x01
+#define IRDMA_ACCESS_FLAGS_LOCALWRITE 0x02
+#define IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
+#define IRDMA_ACCESS_FLAGS_REMOTEREAD 0x05
+#define IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
+#define IRDMA_ACCESS_FLAGS_REMOTEWRITE 0x0a
+#define IRDMA_ACCESS_FLAGS_BIND_WINDOW 0x10
+#define IRDMA_ACCESS_FLAGS_ZERO_BASED 0x20
+#define IRDMA_ACCESS_FLAGS_ALL 0x3f
+
+#define IRDMA_OP_TYPE_RDMA_WRITE 0x00
+#define IRDMA_OP_TYPE_RDMA_READ 0x01
+#define IRDMA_OP_TYPE_SEND 0x03
+#define IRDMA_OP_TYPE_SEND_INV 0x04
+#define IRDMA_OP_TYPE_SEND_SOL 0x05
+#define IRDMA_OP_TYPE_SEND_SOL_INV 0x06
+#define IRDMA_OP_TYPE_RDMA_WRITE_SOL 0x0d
+#define IRDMA_OP_TYPE_BIND_MW 0x08
+#define IRDMA_OP_TYPE_FAST_REG_NSMR 0x09
+#define IRDMA_OP_TYPE_INV_STAG 0x0a
+#define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b
+#define IRDMA_OP_TYPE_NOP 0x0c
+#define IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD 0x0f
+#define IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP 0x11
+#define IRDMA_OP_TYPE_REC 0x3e
+#define IRDMA_OP_TYPE_REC_IMM 0x3f
+
+#define IRDMA_FLUSH_MAJOR_ERR 1
+#define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe
+
+/* Async Events codes */
+#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102
+#define IRDMA_AE_AMP_INVALID_STAG 0x0103
+#define IRDMA_AE_AMP_BAD_QP 0x0104
+#define IRDMA_AE_AMP_BAD_PD 0x0105
+#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106
+#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107
+#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108
+#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109
+#define IRDMA_AE_AMP_TO_WRAP 0x010a
+#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c
+#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d
+#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
+#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
+#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111
+#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
+#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
+#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114
+#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115
+#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
+#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117
+#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
+#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
+#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
+#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b
+#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c
+#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d
+#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e
+#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f
+#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120
+#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132
+#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134
+#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135
+#define IRDMA_AE_BAD_CLOSE 0x0201
+#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
+#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203
+#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
+#define IRDMA_AE_STAG_ZERO_INVALID 0x0206
+#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207
+#define IRDMA_AE_IB_INVALID_REQUEST 0x0208
+#define IRDMA_AE_SRQ_LIMIT 0x0209
+#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a
+#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b
+#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c
+#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
+#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
+#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f
+#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
+#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221
+#define IRDMA_AE_ATOMIC_MASK 0x0222
+#define IRDMA_AE_INVALID_REQUEST 0x0223
+#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224
+#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
+#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
+#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
+#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305
+#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
+#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307
+#define IRDMA_AE_DDP_NO_L_BIT 0x0308
+#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
+#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
+#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
+#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
+#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316
+#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380
+#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381
+#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382
+#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383
+#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401
+#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402
+#define IRDMA_AE_STALE_ARP_ENTRY 0x0403
+#define IRDMA_AE_INVALID_AH_ENTRY 0x0406
+#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501
+#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502
+#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503
+#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
+#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
+#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507
+#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508
+#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509
+#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a
+#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
+#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
+#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
+#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
+#define IRDMA_AE_RESET_SENT 0x0601
+#define IRDMA_AE_TERMINATE_SENT 0x0602
+#define IRDMA_AE_RESET_NOT_SENT 0x0603
+#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700
+#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
+#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702
+#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703
+#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704
+#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705
+#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900
+#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901
+#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B
+
+enum irdma_device_caps_const {
+ IRDMA_WQE_SIZE = 4,
+ IRDMA_CQP_WQE_SIZE = 8,
+ IRDMA_CQE_SIZE = 4,
+ IRDMA_EXTENDED_CQE_SIZE = 8,
+ IRDMA_AEQE_SIZE = 2,
+ IRDMA_CEQE_SIZE = 1,
+ IRDMA_CQP_CTX_SIZE = 8,
+ IRDMA_SHADOW_AREA_SIZE = 8,
+ IRDMA_QUERY_FPM_BUF_SIZE = 192,
+ IRDMA_COMMIT_FPM_BUF_SIZE = 192,
+ IRDMA_GATHER_STATS_BUF_SIZE = 1024,
+ IRDMA_MIN_IW_QP_ID = 0,
+ IRDMA_MAX_IW_QP_ID = 262143,
+ IRDMA_MIN_IW_SRQ_ID = 0,
+ IRDMA_MIN_CEQID = 0,
+ IRDMA_MAX_CEQID = 1023,
+ IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1,
+ IRDMA_MIN_CQID = 0,
+ IRDMA_MAX_CQID = 524287,
+ IRDMA_MIN_AEQ_ENTRIES = 1,
+ IRDMA_MAX_AEQ_ENTRIES = 524287,
+ IRDMA_MAX_AEQ_ENTRIES_GEN_3 = 262144,
+ IRDMA_MIN_CEQ_ENTRIES = 1,
+ IRDMA_MAX_CEQ_ENTRIES = 262143,
+ IRDMA_MIN_CQ_SIZE = 1,
+ IRDMA_MAX_CQ_SIZE = 1048575,
+ IRDMA_DB_ID_ZERO = 0,
+ IRDMA_MAX_WQ_FRAGMENT_COUNT = 13,
+ IRDMA_MAX_SGE_RD = 13,
+ IRDMA_MAX_OUTBOUND_MSG_SIZE = 2147483647,
+ IRDMA_MAX_INBOUND_MSG_SIZE = 2147483647,
+ IRDMA_MAX_PUSH_PAGE_COUNT = 1024,
+ IRDMA_MAX_PE_ENA_VF_COUNT = 32,
+ IRDMA_MAX_VF_FPM_ID = 47,
+ IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496,
+ IRDMA_MAX_INLINE_DATA_SIZE = 101,
+ IRDMA_MAX_WQ_ENTRIES = 32768,
+ IRDMA_Q2_BUF_SIZE = 256,
+ IRDMA_QP_CTX_SIZE = 256,
+ IRDMA_MAX_PDS = 262144,
+ IRDMA_MIN_WQ_SIZE_GEN2 = 8,
+};
+
+enum irdma_addressing_type {
+ IRDMA_ADDR_TYPE_ZERO_BASED = 0,
+ IRDMA_ADDR_TYPE_VA_BASED = 1,
+};
+
+enum irdma_flush_opcode {
+ FLUSH_INVALID = 0,
+ FLUSH_GENERAL_ERR,
+ FLUSH_PROT_ERR,
+ FLUSH_REM_ACCESS_ERR,
+ FLUSH_LOC_QP_OP_ERR,
+ FLUSH_REM_OP_ERR,
+ FLUSH_LOC_LEN_ERR,
+ FLUSH_FATAL_ERR,
+ FLUSH_RETRY_EXC_ERR,
+ FLUSH_MW_BIND_ERR,
+ FLUSH_REM_INV_REQ_ERR,
+ FLUSH_RNR_RETRY_EXC_ERR,
+};
+
+enum irdma_qp_event_type {
+ IRDMA_QP_EVENT_CATASTROPHIC,
+ IRDMA_QP_EVENT_ACCESS_ERR,
+ IRDMA_QP_EVENT_REQ_ERR,
+};
+
+enum irdma_cmpl_status {
+ IRDMA_COMPL_STATUS_SUCCESS = 0,
+ IRDMA_COMPL_STATUS_FLUSHED,
+ IRDMA_COMPL_STATUS_INVALID_WQE,
+ IRDMA_COMPL_STATUS_QP_CATASTROPHIC,
+ IRDMA_COMPL_STATUS_REMOTE_TERMINATION,
+ IRDMA_COMPL_STATUS_INVALID_STAG,
+ IRDMA_COMPL_STATUS_BASE_BOUND_VIOLATION,
+ IRDMA_COMPL_STATUS_ACCESS_VIOLATION,
+ IRDMA_COMPL_STATUS_INVALID_PD_ID,
+ IRDMA_COMPL_STATUS_WRAP_ERROR,
+ IRDMA_COMPL_STATUS_STAG_INVALID_PDID,
+ IRDMA_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+ IRDMA_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+ IRDMA_COMPL_STATUS_STAG_NOT_INVALID,
+ IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_SIZE,
+ IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY,
+ IRDMA_COMPL_STATUS_INVALID_FBO,
+ IRDMA_COMPL_STATUS_INVALID_LEN,
+ IRDMA_COMPL_STATUS_INVALID_ACCESS,
+ IRDMA_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG,
+ IRDMA_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+ IRDMA_COMPL_STATUS_INVALID_REGION,
+ IRDMA_COMPL_STATUS_INVALID_WINDOW,
+ IRDMA_COMPL_STATUS_INVALID_TOTAL_LEN,
+ IRDMA_COMPL_STATUS_UNKNOWN,
+};
+
+enum irdma_cmpl_notify {
+ IRDMA_CQ_COMPL_EVENT = 0,
+ IRDMA_CQ_COMPL_SOLICITED = 1,
+};
+
+enum irdma_qp_caps {
+ IRDMA_WRITE_WITH_IMM = 1,
+ IRDMA_SEND_WITH_IMM = 2,
+ IRDMA_ROCE = 4,
+ IRDMA_PUSH_MODE = 8,
+};
+
+struct irdma_srq_uk;
+struct irdma_srq_uk_init_info;
+struct irdma_qp_uk;
+struct irdma_cq_uk;
+struct irdma_qp_uk_init_info;
+struct irdma_cq_uk_init_info;
+
+struct irdma_ring {
+ u32 head;
+ u32 tail;
+ u32 size;
+};
+
+struct irdma_cqe {
+ __le64 buf[IRDMA_CQE_SIZE];
+};
+
+struct irdma_extended_cqe {
+ __le64 buf[IRDMA_EXTENDED_CQE_SIZE];
+};
+
+struct irdma_post_send {
+ struct ib_sge *sg_list;
+ u32 num_sges;
+ u32 qkey;
+ u32 dest_qp;
+ u32 ah_id;
+};
+
+struct irdma_post_rq_info {
+ u64 wr_id;
+ struct ib_sge *sg_list;
+ u32 num_sges;
+};
+
+struct irdma_rdma_write {
+ struct ib_sge *lo_sg_list;
+ u32 num_lo_sges;
+ struct ib_sge rem_addr;
+};
+
+struct irdma_rdma_read {
+ struct ib_sge *lo_sg_list;
+ u32 num_lo_sges;
+ struct ib_sge rem_addr;
+};
+
+struct irdma_bind_window {
+ irdma_stag mr_stag;
+ u64 bind_len;
+ void *va;
+ enum irdma_addressing_type addressing_type;
+ bool ena_reads:1;
+ bool ena_writes:1;
+ irdma_stag mw_stag;
+ bool mem_window_type_1:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_atomic_fetch_add {
+ u64 tagged_offset;
+ u64 remote_tagged_offset;
+ u64 fetch_add_data_bytes;
+ u32 stag;
+ u32 remote_stag;
+};
+
+struct irdma_atomic_compare_swap {
+ u64 tagged_offset;
+ u64 remote_tagged_offset;
+ u64 swap_data_bytes;
+ u64 compare_data_bytes;
+ u32 stag;
+ u32 remote_stag;
+};
+
+struct irdma_inv_local_stag {
+ irdma_stag target_stag;
+};
+
+struct irdma_post_sq_info {
+ u64 wr_id;
+ u8 op_type;
+ u8 l4len;
+ bool signaled:1;
+ bool read_fence:1;
+ bool local_fence:1;
+ bool inline_data:1;
+ bool imm_data_valid:1;
+ bool report_rtt:1;
+ bool udp_hdr:1;
+ bool defer_flag:1;
+ bool remote_atomic_en:1;
+ u32 imm_data;
+ u32 stag_to_inv;
+ union {
+ struct irdma_post_send send;
+ struct irdma_rdma_write rdma_write;
+ struct irdma_rdma_read rdma_read;
+ struct irdma_bind_window bind_window;
+ struct irdma_inv_local_stag inv_local_stag;
+ struct irdma_atomic_fetch_add atomic_fetch_add;
+ struct irdma_atomic_compare_swap atomic_compare_swap;
+ } op;
+};
+
+struct irdma_cq_poll_info {
+ u64 wr_id;
+ irdma_qp_handle qp_handle;
+ u32 bytes_xfered;
+ u32 tcp_seq_num_rtt;
+ u32 qp_id;
+ u32 ud_src_qpn;
+ u32 imm_data;
+ irdma_stag inv_stag; /* or L_R_Key */
+ enum irdma_cmpl_status comp_status;
+ u16 major_err;
+ u16 minor_err;
+ u16 ud_vlan;
+ u8 ud_smac[6];
+ u8 op_type;
+ u8 q_type;
+ bool stag_invalid_set:1; /* or L_R_Key set */
+ bool error:1;
+ bool solicited_event:1;
+ bool ipv4:1;
+ bool ud_vlan_valid:1;
+ bool ud_smac_valid:1;
+ bool imm_valid:1;
+};
+
+struct qp_err_code {
+ enum irdma_flush_opcode flush_code;
+ enum irdma_qp_event_type event_type;
+};
+
+int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled,
+ bool post_sq);
+int irdma_uk_post_receive(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info);
+void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp);
+int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq);
+int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq);
+int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq);
+int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+
+struct irdma_wqe_uk_ops {
+ void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity);
+ u16 (*iw_inline_data_size_to_quanta)(u32 data_size);
+ void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge,
+ u8 valid);
+ void (*iw_set_mw_bind_wqe)(__le64 *wqe,
+ struct irdma_bind_window *op_info);
+};
+
+bool irdma_uk_cq_empty(struct irdma_cq_uk *cq);
+int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info);
+void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
+ enum irdma_cmpl_notify cq_notify);
+void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size);
+void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt);
+void irdma_uk_cq_init(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info);
+int irdma_uk_qp_init(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info);
+void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift,
+ u8 *rq_shift);
+int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *sq_depth, u8 *sq_shift);
+int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *rq_depth, u8 *rq_shift);
+int irdma_uk_srq_init(struct irdma_srq_uk *srq,
+ struct irdma_srq_uk_init_info *info);
+int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq,
+ struct irdma_post_rq_info *info);
+
+struct irdma_srq_uk {
+ u32 srq_caps;
+ struct irdma_qp_quanta *srq_base;
+ struct irdma_uk_attrs *uk_attrs;
+ __le64 *shadow_area;
+ struct irdma_ring srq_ring;
+ u32 srq_id;
+ u32 srq_size;
+ u32 max_srq_frag_cnt;
+ struct irdma_wqe_uk_ops wqe_ops;
+ u8 srwqe_polarity;
+ u8 wqe_size;
+ u8 wqe_size_multiplier;
+ u8 deferred_flag;
+ spinlock_t *lock;
+};
+
+struct irdma_srq_uk_init_info {
+ struct irdma_qp_quanta *srq;
+ struct irdma_uk_attrs *uk_attrs;
+ __le64 *shadow_area;
+ u64 *srq_wrid_array;
+ u32 srq_id;
+ u32 srq_caps;
+ u32 srq_size;
+ u32 max_srq_frag_cnt;
+};
+
+struct irdma_sq_uk_wr_trk_info {
+ u64 wrid;
+ u32 wr_len;
+ u16 quanta;
+ u8 signaled;
+ u8 reserved[1];
+};
+
+struct irdma_qp_quanta {
+ __le64 elem[IRDMA_WQE_SIZE];
+};
+
+struct irdma_qp_uk {
+ struct irdma_qp_quanta *sq_base;
+ struct irdma_qp_quanta *rq_base;
+ struct irdma_uk_attrs *uk_attrs;
+ u32 __iomem *wqe_alloc_db;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ __le64 *shadow_area;
+ struct irdma_ring sq_ring;
+ struct irdma_ring rq_ring;
+ struct irdma_ring initial_ring;
+ u32 qp_id;
+ u32 qp_caps;
+ u32 sq_size;
+ u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
+ struct irdma_wqe_uk_ops wqe_ops;
+ u16 conn_wqes;
+ u8 qp_type;
+ u8 swqe_polarity;
+ u8 swqe_polarity_deferred;
+ u8 rwqe_polarity;
+ u8 rq_wqe_size;
+ u8 rq_wqe_size_multiplier;
+ bool deferred_flag:1;
+ bool first_sq_wq:1;
+ bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */
+ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */
+ bool destroy_pending:1; /* Indicates the QP is being destroyed */
+ void *back_qp;
+ u8 dbg_rq_flushed;
+ struct irdma_srq_uk *srq_uk;
+ u8 sq_flush_seen;
+ u8 rq_flush_seen;
+};
+
+struct irdma_cq_uk {
+ struct irdma_cqe *cq_base;
+ u32 __iomem *cqe_alloc_db;
+ u32 __iomem *cq_ack_db;
+ __le64 *shadow_area;
+ u32 cq_id;
+ u32 cq_size;
+ struct irdma_ring cq_ring;
+ u8 polarity;
+ bool avoid_mem_cflct:1;
+};
+
+struct irdma_qp_uk_init_info {
+ struct irdma_qp_quanta *sq;
+ struct irdma_qp_quanta *rq;
+ struct irdma_uk_attrs *uk_attrs;
+ u32 __iomem *wqe_alloc_db;
+ __le64 *shadow_area;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 qp_id;
+ u32 qp_caps;
+ u32 sq_size;
+ u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
+ u32 sq_depth;
+ u32 rq_depth;
+ u8 first_sq_wq;
+ u8 type;
+ u8 sq_shift;
+ u8 rq_shift;
+ int abi_ver;
+ bool legacy_mode;
+ struct irdma_srq_uk *srq_uk;
+};
+
+struct irdma_cq_uk_init_info {
+ u32 __iomem *cqe_alloc_db;
+ u32 __iomem *cq_ack_db;
+ struct irdma_cqe *cq_base;
+ __le64 *shadow_area;
+ u32 cq_size;
+ u32 cq_id;
+ bool avoid_mem_cflct;
+};
+
+__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
+ u16 quanta, u32 total_size,
+ struct irdma_post_sq_info *info);
+__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx);
+__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx);
+void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq);
+int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq);
+int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta);
+int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size);
+void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
+ u32 inline_data, u8 *shift);
+int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
+ u32 *wqdepth);
+int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
+ u32 *wqdepth);
+int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
+ u32 *srqdepth);
+void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx);
+
+static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id)
+{
+ struct qp_err_code qp_err = {};
+
+ switch (ae_id) {
+ case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+ case IRDMA_AE_AMP_INVALID_STAG:
+ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+ case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ case IRDMA_AE_AMP_BAD_PD:
+ case IRDMA_AE_AMP_BAD_QP:
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ case IRDMA_AE_AMP_TO_WRAP:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ qp_err.flush_code = FLUSH_PROT_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_UDA_XMIT_BAD_PD:
+ case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
+ qp_err.flush_code = FLUSH_LOC_QP_OP_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp_err.flush_code = FLUSH_LOC_LEN_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+ qp_err.flush_code = FLUSH_REM_ACCESS_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
+ case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
+ case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+ case IRDMA_AE_AMP_MWBIND_VALID_STAG:
+ qp_err.flush_code = FLUSH_MW_BIND_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ qp_err.flush_code = FLUSH_RETRY_EXC_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_IB_INVALID_REQUEST:
+ qp_err.flush_code = FLUSH_REM_INV_REQ_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR;
+ break;
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
+ case IRDMA_AE_IB_REMOTE_OP_ERROR:
+ qp_err.flush_code = FLUSH_REM_OP_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ qp_err.flush_code = FLUSH_RNR_RETRY_EXC_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ qp_err.flush_code = FLUSH_FATAL_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ default:
+ qp_err.flush_code = FLUSH_GENERAL_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ }
+
+ return qp_err;
+}
+#endif /* IRDMA_USER_H */
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
new file mode 100644
index 000000000000..cc2a12f735d3
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -0,0 +1,2508 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+
+/**
+ * irdma_arp_table -manage arp table
+ * @rf: RDMA PCI function
+ * @ip_addr: ip address for device
+ * @ipv4: IPv4 flag
+ * @mac_addr: mac address ptr
+ * @action: modify, delete or add
+ */
+int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4,
+ const u8 *mac_addr, u32 action)
+{
+ unsigned long flags;
+ int arp_index;
+ u32 ip[4] = {};
+
+ if (ipv4)
+ ip[0] = *ip_addr;
+ else
+ memcpy(ip, ip_addr, sizeof(ip));
+
+ spin_lock_irqsave(&rf->arp_lock, flags);
+ for (arp_index = 0; (u32)arp_index < rf->arp_table_size; arp_index++) {
+ if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip)))
+ break;
+ }
+
+ switch (action) {
+ case IRDMA_ARP_ADD:
+ if (arp_index != rf->arp_table_size) {
+ arp_index = -1;
+ break;
+ }
+
+ arp_index = 0;
+ if (irdma_alloc_rsrc(rf, rf->allocated_arps, rf->arp_table_size,
+ (u32 *)&arp_index, &rf->next_arp_index)) {
+ arp_index = -1;
+ break;
+ }
+
+ memcpy(rf->arp_table[arp_index].ip_addr, ip,
+ sizeof(rf->arp_table[arp_index].ip_addr));
+ ether_addr_copy(rf->arp_table[arp_index].mac_addr, mac_addr);
+ break;
+ case IRDMA_ARP_RESOLVE:
+ if (arp_index == rf->arp_table_size)
+ arp_index = -1;
+ break;
+ case IRDMA_ARP_DELETE:
+ if (arp_index == rf->arp_table_size) {
+ arp_index = -1;
+ break;
+ }
+
+ memset(rf->arp_table[arp_index].ip_addr, 0,
+ sizeof(rf->arp_table[arp_index].ip_addr));
+ eth_zero_addr(rf->arp_table[arp_index].mac_addr);
+ irdma_free_rsrc(rf, rf->allocated_arps, arp_index);
+ break;
+ default:
+ arp_index = -1;
+ break;
+ }
+
+ spin_unlock_irqrestore(&rf->arp_lock, flags);
+ return arp_index;
+}
+
+/**
+ * irdma_add_arp - add a new arp entry if needed
+ * @rf: RDMA function
+ * @ip: IP address
+ * @ipv4: IPv4 flag
+ * @mac: MAC address
+ */
+int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac)
+{
+ int arpidx;
+
+ arpidx = irdma_arp_table(rf, &ip[0], ipv4, NULL, IRDMA_ARP_RESOLVE);
+ if (arpidx >= 0) {
+ if (ether_addr_equal(rf->arp_table[arpidx].mac_addr, mac))
+ return arpidx;
+
+ irdma_manage_arp_cache(rf, rf->arp_table[arpidx].mac_addr, ip,
+ ipv4, IRDMA_ARP_DELETE);
+ }
+
+ irdma_manage_arp_cache(rf, mac, ip, ipv4, IRDMA_ARP_ADD);
+
+ return irdma_arp_table(rf, ip, ipv4, NULL, IRDMA_ARP_RESOLVE);
+}
+
+/**
+ * wr32 - write 32 bits to hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ * @val: value to write to register
+ */
+inline void wr32(struct irdma_hw *hw, u32 reg, u32 val)
+{
+ writel(val, hw->hw_addr + reg);
+}
+
+/**
+ * rd32 - read a 32 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u32 rd32(struct irdma_hw *hw, u32 reg)
+{
+ return readl(hw->hw_addr + reg);
+}
+
+/**
+ * rd64 - read a 64 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u64 rd64(struct irdma_hw *hw, u32 reg)
+{
+ return readq(hw->hw_addr + reg);
+}
+
+static void irdma_gid_change_event(struct ib_device *ibdev)
+{
+ struct ib_event ib_event;
+
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = ibdev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+}
+
+/**
+ * irdma_inetaddr_event - system notifier for ipv4 addr events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *real_dev, *netdev = ifa->ifa_dev->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ u32 local_ipaddr;
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ local_ipaddr = ntohl(ifa->ifa_address);
+ ibdev_dbg(&iwdev->ibdev,
+ "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev,
+ event, &local_ipaddr, real_dev->dev_addr);
+ switch (event) {
+ case NETDEV_DOWN:
+ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
+ &local_ipaddr, true, IRDMA_ARP_DELETE);
+ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ case NETDEV_UP:
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr);
+ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ default:
+ break;
+ }
+
+ ib_device_put(ibdev);
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_inet6addr_event - system notifier for ipv6 addr events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ struct net_device *real_dev, *netdev = ifa->idev->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ u32 local_ipaddr6[4];
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+ ibdev_dbg(&iwdev->ibdev,
+ "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev,
+ event, local_ipaddr6, real_dev->dev_addr);
+ switch (event) {
+ case NETDEV_DOWN:
+ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
+ local_ipaddr6, false, IRDMA_ARP_DELETE);
+ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ case NETDEV_UP:
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, local_ipaddr6, false,
+ real_dev->dev_addr);
+ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ default:
+ break;
+ }
+
+ ib_device_put(ibdev);
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_net_event - system notifier for net events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: neighbor
+ */
+int irdma_net_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct neighbour *neigh = ptr;
+ struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ __be32 *p;
+ u32 local_ipaddr[4] = {};
+ bool ipv4 = true;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ p = (__be32 *)neigh->primary_key;
+ if (neigh->tbl->family == AF_INET6) {
+ ipv4 = false;
+ irdma_copy_ip_ntohl(local_ipaddr, p);
+ } else {
+ local_ipaddr[0] = ntohl(*p);
+ }
+
+ ibdev_dbg(&iwdev->ibdev,
+ "DEV: netdev %p state %d local_ip=%pI4 MAC=%pM\n",
+ iwdev->netdev, neigh->nud_state, local_ipaddr,
+ neigh->ha);
+
+ if (neigh->nud_state & NUD_VALID)
+ irdma_add_arp(iwdev->rf, local_ipaddr, ipv4, neigh->ha);
+
+ else
+ irdma_manage_arp_cache(iwdev->rf, neigh->ha,
+ local_ipaddr, ipv4,
+ IRDMA_ARP_DELETE);
+ ib_device_put(ibdev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_netdevice_event - system notifier for netdev events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: netdev
+ */
+int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ iwdev->iw_status = 1;
+ switch (event) {
+ case NETDEV_DOWN:
+ iwdev->iw_status = 0;
+ fallthrough;
+ default:
+ break;
+ }
+ ib_device_put(ibdev);
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_add_ipv6_addr - add ipv6 address to the hw arp table
+ * @iwdev: irdma device
+ */
+static void irdma_add_ipv6_addr(struct irdma_device *iwdev)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp, *tmp;
+ u32 local_ipaddr6[4];
+
+ rcu_read_lock();
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF &&
+ rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev) ||
+ ip_dev == iwdev->netdev) &&
+ (READ_ONCE(ip_dev->flags) & IFF_UP)) {
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ ibdev_err(&iwdev->ibdev, "ipv6 inet device not found\n");
+ break;
+ }
+ list_for_each_entry_safe (ifp, tmp, &idev->addr_list,
+ if_list) {
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: IP=%pI6, vlan_id=%d, MAC=%pM\n",
+ &ifp->addr,
+ rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+
+ irdma_copy_ip_ntohl(local_ipaddr6,
+ ifp->addr.in6_u.u6_addr32);
+ irdma_manage_arp_cache(iwdev->rf,
+ ip_dev->dev_addr,
+ local_ipaddr6, false,
+ IRDMA_ARP_ADD);
+ }
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * irdma_add_ipv4_addr - add ipv4 address to the hw arp table
+ * @iwdev: irdma device
+ */
+static void irdma_add_ipv4_addr(struct irdma_device *iwdev)
+{
+ struct net_device *dev;
+ struct in_device *idev;
+ u32 ip_addr;
+
+ rcu_read_lock();
+ for_each_netdev_rcu (&init_net, dev) {
+ if (((rdma_vlan_dev_vlan_id(dev) < 0xFFFF &&
+ rdma_vlan_dev_real_dev(dev) == iwdev->netdev) ||
+ dev == iwdev->netdev) && (READ_ONCE(dev->flags) & IFF_UP)) {
+ const struct in_ifaddr *ifa;
+
+ idev = __in_dev_get_rcu(dev);
+ if (!idev)
+ continue;
+
+ in_dev_for_each_ifa_rcu(ifa, idev) {
+ ibdev_dbg(&iwdev->ibdev, "CM: IP=%pI4, vlan_id=%d, MAC=%pM\n",
+ &ifa->ifa_address, rdma_vlan_dev_vlan_id(dev),
+ dev->dev_addr);
+
+ ip_addr = ntohl(ifa->ifa_address);
+ irdma_manage_arp_cache(iwdev->rf, dev->dev_addr,
+ &ip_addr, true,
+ IRDMA_ARP_ADD);
+ }
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * irdma_add_ip - add ip addresses
+ * @iwdev: irdma device
+ *
+ * Add ipv4/ipv6 addresses to the arp cache
+ */
+void irdma_add_ip(struct irdma_device *iwdev)
+{
+ irdma_add_ipv4_addr(iwdev);
+ irdma_add_ipv6_addr(iwdev);
+}
+
+/**
+ * irdma_alloc_and_get_cqp_request - get cqp struct
+ * @cqp: device cqp ptr
+ * @wait: cqp to be used in wait mode
+ */
+struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp,
+ bool wait)
+{
+ struct irdma_cqp_request *cqp_request = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ if (!list_empty(&cqp->cqp_avail_reqs)) {
+ cqp_request = list_first_entry(&cqp->cqp_avail_reqs,
+ struct irdma_cqp_request, list);
+ list_del_init(&cqp_request->list);
+ }
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ if (!cqp_request) {
+ cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
+ if (cqp_request) {
+ cqp_request->dynamic = true;
+ if (wait)
+ init_waitqueue_head(&cqp_request->waitq);
+ }
+ }
+ if (!cqp_request) {
+ ibdev_dbg(to_ibdev(cqp->sc_cqp.dev), "ERR: CQP Request Fail: No Memory");
+ return NULL;
+ }
+
+ cqp_request->waiting = wait;
+ refcount_set(&cqp_request->refcnt, 1);
+ memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info));
+ memset(&cqp_request->info, 0, sizeof(cqp_request->info));
+
+ return cqp_request;
+}
+
+/**
+ * irdma_get_cqp_request - increase refcount for cqp_request
+ * @cqp_request: pointer to cqp_request instance
+ */
+static inline void irdma_get_cqp_request(struct irdma_cqp_request *cqp_request)
+{
+ refcount_inc(&cqp_request->refcnt);
+}
+
+/**
+ * irdma_free_cqp_request - free cqp request
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void irdma_free_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ unsigned long flags;
+
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+ WRITE_ONCE(cqp_request->request_done, false);
+ cqp_request->callback_fcn = NULL;
+ cqp_request->waiting = false;
+ cqp_request->pending = false;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ }
+ wake_up(&cqp->remove_wq);
+}
+
+/**
+ * irdma_put_cqp_request - dec ref count and free if 0
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void irdma_put_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (refcount_dec_and_test(&cqp_request->refcnt))
+ irdma_free_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_free_pending_cqp_request -free pending cqp request objs
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+static void
+irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (cqp_request->waiting) {
+ cqp_request->compl_info.error = true;
+ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ }
+ wait_event_timeout(cqp->remove_wq,
+ refcount_read(&cqp_request->refcnt) == 1, 1000);
+ irdma_put_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_cleanup_deferred_cqp_ops - clean-up cqp with no completions
+ * @dev: sc_dev
+ * @cqp: cqp
+ */
+static void irdma_cleanup_deferred_cqp_ops(struct irdma_sc_dev *dev,
+ struct irdma_cqp *cqp)
+{
+ u64 scratch;
+
+ /* process all CQP requests with deferred/pending completions */
+ while ((scratch = irdma_sc_cqp_cleanup_handler(dev)))
+ irdma_free_pending_cqp_request(cqp, (struct irdma_cqp_request *)
+ (uintptr_t)scratch);
+}
+
+/**
+ * irdma_cleanup_pending_cqp_op - clean-up cqp with no
+ * completions
+ * @rf: RDMA PCI function
+ */
+void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp *cqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request = NULL;
+ struct cqp_cmds_info *pcmdinfo = NULL;
+ u32 i, pending_work, wqe_idx;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ irdma_cleanup_deferred_cqp_ops(dev, cqp);
+ pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring);
+ wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring);
+ for (i = 0; i < pending_work; i++) {
+ cqp_request = (struct irdma_cqp_request *)(unsigned long)
+ cqp->scratch_array[wqe_idx];
+ if (cqp_request)
+ irdma_free_pending_cqp_request(cqp, cqp_request);
+ wqe_idx = (wqe_idx + 1) % IRDMA_RING_SIZE(cqp->sc_cqp.sq_ring);
+ }
+
+ while (!list_empty(&dev->cqp_cmd_head)) {
+ pcmdinfo = irdma_remove_cqp_head(dev);
+ cqp_request =
+ container_of(pcmdinfo, struct irdma_cqp_request, info);
+ if (cqp_request)
+ irdma_free_pending_cqp_request(cqp, cqp_request);
+ }
+}
+
+static int irdma_get_timeout_threshold(struct irdma_sc_dev *dev)
+{
+ u16 time_s = dev->vc_caps.cqp_timeout_s;
+
+ if (!time_s)
+ return CQP_TIMEOUT_THRESHOLD;
+
+ return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms;
+}
+
+static int irdma_get_def_timeout_threshold(struct irdma_sc_dev *dev)
+{
+ u16 time_s = dev->vc_caps.cqp_def_timeout_s;
+
+ if (!time_s)
+ return CQP_DEF_CMPL_TIMEOUT_THRESHOLD;
+
+ return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms;
+}
+
+/**
+ * irdma_wait_event - wait for completion
+ * @rf: RDMA PCI function
+ * @cqp_request: cqp request to wait
+ */
+static int irdma_wait_event(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_cqp_timeout cqp_timeout = {};
+ int timeout_threshold = irdma_get_timeout_threshold(&rf->sc_dev);
+ bool cqp_error = false;
+ int err_code = 0;
+
+ cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops);
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ if (wait_event_timeout(cqp_request->waitq,
+ READ_ONCE(cqp_request->request_done),
+ msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
+ break;
+
+ if (cqp_request->pending)
+ /* There was a deferred or pending completion
+ * received for this CQP request, so we need
+ * to wait longer than usual.
+ */
+ timeout_threshold =
+ irdma_get_def_timeout_threshold(&rf->sc_dev);
+
+ irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev);
+
+ if (cqp_timeout.count < timeout_threshold)
+ continue;
+
+ if (!rf->reset) {
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
+ return -ETIMEDOUT;
+ } while (1);
+
+ cqp_error = cqp_request->compl_info.error;
+ if (cqp_error) {
+ err_code = -EIO;
+ if (cqp_request->compl_info.maj_err_code == 0xFFFF) {
+ if (cqp_request->compl_info.min_err_code == 0x8002)
+ err_code = -EBUSY;
+ else if (cqp_request->compl_info.min_err_code == 0x8029) {
+ if (!rf->reset) {
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
+ }
+ }
+ }
+
+ return err_code;
+}
+
+static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
+ [IRDMA_OP_CEQ_DESTROY] = "Destroy CEQ Cmd",
+ [IRDMA_OP_AEQ_DESTROY] = "Destroy AEQ Cmd",
+ [IRDMA_OP_DELETE_ARP_CACHE_ENTRY] = "Delete ARP Cache Cmd",
+ [IRDMA_OP_MANAGE_APBVT_ENTRY] = "Manage APBV Table Entry Cmd",
+ [IRDMA_OP_CEQ_CREATE] = "CEQ Create Cmd",
+ [IRDMA_OP_AEQ_CREATE] = "AEQ Destroy Cmd",
+ [IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY] = "Manage Quad Hash Table Entry Cmd",
+ [IRDMA_OP_QP_MODIFY] = "Modify QP Cmd",
+ [IRDMA_OP_QP_UPLOAD_CONTEXT] = "Upload Context Cmd",
+ [IRDMA_OP_CQ_CREATE] = "Create CQ Cmd",
+ [IRDMA_OP_CQ_DESTROY] = "Destroy CQ Cmd",
+ [IRDMA_OP_QP_CREATE] = "Create QP Cmd",
+ [IRDMA_OP_QP_DESTROY] = "Destroy QP Cmd",
+ [IRDMA_OP_ALLOC_STAG] = "Allocate STag Cmd",
+ [IRDMA_OP_MR_REG_NON_SHARED] = "Register Non-Shared MR Cmd",
+ [IRDMA_OP_DEALLOC_STAG] = "Deallocate STag Cmd",
+ [IRDMA_OP_MW_ALLOC] = "Allocate Memory Window Cmd",
+ [IRDMA_OP_QP_FLUSH_WQES] = "Flush QP Cmd",
+ [IRDMA_OP_ADD_ARP_CACHE_ENTRY] = "Add ARP Cache Cmd",
+ [IRDMA_OP_MANAGE_PUSH_PAGE] = "Manage Push Page Cmd",
+ [IRDMA_OP_UPDATE_PE_SDS] = "Update PE SDs Cmd",
+ [IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE] = "Manage HMC PM Function Table Cmd",
+ [IRDMA_OP_SUSPEND] = "Suspend QP Cmd",
+ [IRDMA_OP_RESUME] = "Resume QP Cmd",
+ [IRDMA_OP_MANAGE_VF_PBLE_BP] = "Manage VF PBLE Backing Pages Cmd",
+ [IRDMA_OP_QUERY_FPM_VAL] = "Query FPM Values Cmd",
+ [IRDMA_OP_COMMIT_FPM_VAL] = "Commit FPM Values Cmd",
+ [IRDMA_OP_AH_CREATE] = "Create Address Handle Cmd",
+ [IRDMA_OP_AH_MODIFY] = "Modify Address Handle Cmd",
+ [IRDMA_OP_AH_DESTROY] = "Destroy Address Handle Cmd",
+ [IRDMA_OP_MC_CREATE] = "Create Multicast Group Cmd",
+ [IRDMA_OP_MC_DESTROY] = "Destroy Multicast Group Cmd",
+ [IRDMA_OP_MC_MODIFY] = "Modify Multicast Group Cmd",
+ [IRDMA_OP_STATS_ALLOCATE] = "Add Statistics Instance Cmd",
+ [IRDMA_OP_STATS_FREE] = "Free Statistics Instance Cmd",
+ [IRDMA_OP_STATS_GATHER] = "Gather Statistics Cmd",
+ [IRDMA_OP_WS_ADD_NODE] = "Add Work Scheduler Node Cmd",
+ [IRDMA_OP_WS_MODIFY_NODE] = "Modify Work Scheduler Node Cmd",
+ [IRDMA_OP_WS_DELETE_NODE] = "Delete Work Scheduler Node Cmd",
+ [IRDMA_OP_SET_UP_MAP] = "Set UP-UP Mapping Cmd",
+ [IRDMA_OP_GEN_AE] = "Generate AE Cmd",
+ [IRDMA_OP_QUERY_RDMA_FEATURES] = "RDMA Get Features Cmd",
+ [IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY] = "Allocate Local MAC Entry Cmd",
+ [IRDMA_OP_ADD_LOCAL_MAC_ENTRY] = "Add Local MAC Entry Cmd",
+ [IRDMA_OP_DELETE_LOCAL_MAC_ENTRY] = "Delete Local MAC Entry Cmd",
+ [IRDMA_OP_CQ_MODIFY] = "CQ Modify Cmd",
+ [IRDMA_OP_SRQ_CREATE] = "Create SRQ Cmd",
+ [IRDMA_OP_SRQ_MODIFY] = "Modify SRQ Cmd",
+ [IRDMA_OP_SRQ_DESTROY] = "Destroy SRQ Cmd",
+};
+
+static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
+ {0xffff, 0x8002, "Invalid State"},
+ {0xffff, 0x8006, "Flush No Wqe Pending"},
+ {0xffff, 0x8007, "Modify QP Bad Close"},
+ {0xffff, 0x8009, "LLP Closed"},
+ {0xffff, 0x800a, "Reset Not Sent"}
+};
+
+/**
+ * irdma_cqp_crit_err - check if CQP error is critical
+ * @dev: pointer to dev structure
+ * @cqp_cmd: code for last CQP operation
+ * @maj_err_code: major error code
+ * @min_err_code: minot error code
+ */
+bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd,
+ u16 maj_err_code, u16 min_err_code)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irdma_noncrit_err_list); ++i) {
+ if (maj_err_code == irdma_noncrit_err_list[i].maj &&
+ min_err_code == irdma_noncrit_err_list[i].min) {
+ ibdev_dbg(to_ibdev(dev),
+ "CQP: [%s Error][%s] maj=0x%x min=0x%x\n",
+ irdma_noncrit_err_list[i].desc,
+ irdma_cqp_cmd_names[cqp_cmd], maj_err_code,
+ min_err_code);
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * irdma_handle_cqp_op - process cqp command
+ * @rf: RDMA PCI function
+ * @cqp_request: cqp request to process
+ */
+int irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct cqp_cmds_info *info = &cqp_request->info;
+ int status;
+ bool put_cqp_request = true;
+
+ if (rf->reset)
+ return -EBUSY;
+
+ irdma_get_cqp_request(cqp_request);
+ status = irdma_process_cqp_cmd(dev, info);
+ if (status)
+ goto err;
+
+ if (cqp_request->waiting) {
+ put_cqp_request = false;
+ status = irdma_wait_event(rf, cqp_request);
+ if (status)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (irdma_cqp_crit_err(dev, info->cqp_cmd,
+ cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code))
+ ibdev_err(&rf->iwdev->ibdev,
+ "[%s Error][op_code=%d] status=%d waiting=%d completion_err=%d maj=0x%x min=0x%x\n",
+ irdma_cqp_cmd_names[info->cqp_cmd], info->cqp_cmd, status, cqp_request->waiting,
+ cqp_request->compl_info.error, cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code);
+
+ if (put_cqp_request)
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+void irdma_qp_add_ref(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = (struct irdma_qp *)ibqp;
+
+ refcount_inc(&iwqp->refcnt);
+}
+
+void irdma_qp_rem_ref(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ u32 qp_num;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (!refcount_dec_and_test(&iwqp->refcnt)) {
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ return;
+ }
+
+ qp_num = iwqp->ibqp.qp_num;
+ iwdev->rf->qp_table[qp_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ complete(&iwqp->free_qp);
+}
+
+void irdma_cq_add_ref(struct ib_cq *ibcq)
+{
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+
+ refcount_inc(&iwcq->refcnt);
+}
+
+void irdma_cq_rem_ref(struct ib_cq *ibcq)
+{
+ struct ib_device *ibdev = ibcq->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->rf->cqtable_lock, flags);
+ if (!refcount_dec_and_test(&iwcq->refcnt)) {
+ spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
+ return;
+ }
+
+ iwdev->rf->cq_table[iwcq->cq_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
+ complete(&iwcq->free_cq);
+}
+
+struct ib_device *to_ibdev(struct irdma_sc_dev *dev)
+{
+ return &(container_of(dev, struct irdma_pci_f, sc_dev))->iwdev->ibdev;
+}
+
+/**
+ * irdma_get_qp - get qp address
+ * @device: iwarp device
+ * @qpn: qp number
+ */
+struct ib_qp *irdma_get_qp(struct ib_device *device, int qpn)
+{
+ struct irdma_device *iwdev = to_iwdev(device);
+
+ if (qpn < IW_FIRST_QPN || qpn >= iwdev->rf->max_qp)
+ return NULL;
+
+ return &iwdev->rf->qp_table[qpn]->ibqp;
+}
+
+/**
+ * irdma_remove_cqp_head - return head entry and remove
+ * @dev: device
+ */
+void *irdma_remove_cqp_head(struct irdma_sc_dev *dev)
+{
+ struct list_head *entry;
+ struct list_head *list = &dev->cqp_cmd_head;
+
+ if (list_empty(list))
+ return NULL;
+
+ entry = list->next;
+ list_del(entry);
+
+ return entry;
+}
+
+/**
+ * irdma_cqp_sds_cmd - create cqp command for sd
+ * @dev: hardware control device structure
+ * @sdinfo: information for sd cqp
+ *
+ */
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *sdinfo)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
+ sizeof(cqp_info->in.u.update_pe_sds.info));
+ cqp_info->cqp_cmd = IRDMA_OP_UPDATE_PE_SDS;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.update_pe_sds.dev = dev;
+ cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_qp_suspend_resume - cqp command for suspend/resume
+ * @qp: hardware control qp
+ * @op: suspend or resume
+ */
+int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 op)
+{
+ struct irdma_sc_dev *dev = qp->dev;
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.suspend_resume.cqp = cqp;
+ cqp_info->in.u.suspend_resume.qp = qp;
+ cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_term_modify_qp - modify qp for term message
+ * @qp: hardware control qp
+ * @next_state: qp's next state
+ * @term: terminate code
+ * @term_len: length
+ */
+void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term,
+ u8 term_len)
+{
+ struct irdma_qp *iwqp;
+
+ iwqp = qp->qp_uk.back_qp;
+ irdma_next_iw_state(iwqp, next_state, 0, term, term_len);
+};
+
+/**
+ * irdma_terminate_done - after terminate is completed
+ * @qp: hardware control qp
+ * @timeout_occurred: indicates if terminate timer expired
+ */
+void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred)
+{
+ struct irdma_qp *iwqp;
+ u8 hte = 0;
+ bool first_time;
+ unsigned long flags;
+
+ iwqp = qp->qp_uk.back_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->hte_added) {
+ iwqp->hte_added = 0;
+ hte = 1;
+ }
+ first_time = !(qp->term_flags & IRDMA_TERM_DONE);
+ qp->term_flags |= IRDMA_TERM_DONE;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (first_time) {
+ if (!timeout_occurred)
+ irdma_terminate_del_timer(qp);
+
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, hte, 0, 0);
+ irdma_cm_disconn(iwqp);
+ }
+}
+
+static void irdma_terminate_timeout(struct timer_list *t)
+{
+ struct irdma_qp *iwqp = timer_container_of(iwqp, t, terminate_timer);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ irdma_terminate_done(qp, 1);
+ irdma_qp_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_terminate_start_timer - start terminate timeout
+ * @qp: hardware control qp
+ */
+void irdma_terminate_start_timer(struct irdma_sc_qp *qp)
+{
+ struct irdma_qp *iwqp;
+
+ iwqp = qp->qp_uk.back_qp;
+ irdma_qp_add_ref(&iwqp->ibqp);
+ timer_setup(&iwqp->terminate_timer, irdma_terminate_timeout, 0);
+ iwqp->terminate_timer.expires = jiffies + HZ;
+
+ add_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * irdma_terminate_del_timer - delete terminate timeout
+ * @qp: hardware control qp
+ */
+void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
+{
+ struct irdma_qp *iwqp;
+ int ret;
+
+ iwqp = qp->qp_uk.back_qp;
+ ret = timer_delete(&iwqp->terminate_timer);
+ if (ret)
+ irdma_qp_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_cqp_cq_create_cmd - create a cq for the cqp
+ * @dev: device pointer
+ * @cq: pointer to created cq
+ */
+int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_qp_create_cmd - create a qp for the cqp
+ * @dev: device pointer
+ * @qp: pointer to created qp
+ */
+int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_create_qp_info *qp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS;
+ cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_dealloc_push_page - free a push page for qp
+ * @rf: RDMA PCI function
+ * @qp: hardware control qp
+ */
+static void irdma_dealloc_push_page(struct irdma_pci_f *rf,
+ struct irdma_sc_qp *qp)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX)
+ return;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 1;
+ cqp_info->in.u.manage_push_page.info.push_page_type = 0;
+ cqp_info->in.u.manage_push_page.cqp = &rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status)
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+static void irdma_free_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 qp_num)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ unsigned long flags;
+
+ if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ return;
+
+ irdma_vchnl_req_del_vport(&rf->sc_dev, iwdev->vport_id, qp_num);
+
+ if (qp_num == 1) {
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rf->hwqp1_rsvd = false;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ } else if (qp_num > 2) {
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ }
+}
+
+/**
+ * irdma_free_qp_rsrc - free up memory resources for qp
+ * @iwqp: qp ptr (user or kernel)
+ */
+void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ u32 qp_num = iwqp->sc_qp.qp_uk.qp_id;
+
+ irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
+ irdma_dealloc_push_page(rf, &iwqp->sc_qp);
+ if (iwqp->sc_qp.vsi) {
+ irdma_qp_rem_qos(&iwqp->sc_qp);
+ iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi,
+ iwqp->sc_qp.user_pri);
+ }
+
+ if (iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ irdma_free_gsi_qp_rsrc(iwqp, qp_num);
+ } else {
+ if (qp_num > 2)
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ }
+ dma_free_coherent(rf->sc_dev.hw->device, iwqp->q2_ctx_mem.size,
+ iwqp->q2_ctx_mem.va, iwqp->q2_ctx_mem.pa);
+ iwqp->q2_ctx_mem.va = NULL;
+ dma_free_coherent(rf->sc_dev.hw->device, iwqp->kqp.dma_mem.size,
+ iwqp->kqp.dma_mem.va, iwqp->kqp.dma_mem.pa);
+ iwqp->kqp.dma_mem.va = NULL;
+ kfree(iwqp->kqp.sq_wrid_mem);
+ kfree(iwqp->kqp.rq_wrid_mem);
+}
+
+/**
+ * irdma_srq_wq_destroy - send srq destroy cqp
+ * @rf: RDMA PCI function
+ * @srq: hardware control srq
+ */
+void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_destroy.srq = srq;
+ cqp_info->in.u.srq_destroy.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_cq_wq_destroy - send cq destroy cqp
+ * @rf: RDMA PCI function
+ * @cq: hardware control cq
+ */
+void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_destroy.cq = cq;
+ cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_hw_modify_qp_callback - handle state for modifyQPs that don't wait
+ * @cqp_request: modify QP completion
+ */
+static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request)
+{
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_qp *iwqp;
+
+ cqp_info = &cqp_request->info;
+ iwqp = cqp_info->in.u.qp_modify.qp->qp_uk.back_qp;
+ atomic_dec(&iwqp->hw_mod_qp_pend);
+ wake_up(&iwqp->mod_qp_waitq);
+}
+
+/**
+ * irdma_hw_modify_qp - setup cqp for modify qp
+ * @iwdev: RDMA device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: info for modify qp
+ * @wait: flag to wait or not for modify qp completion
+ */
+int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info, bool wait)
+{
+ int status;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_modify_qp_info *m_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ if (!wait) {
+ cqp_request->callback_fcn = irdma_hw_modify_qp_callback;
+ atomic_inc(&iwqp->hw_mod_qp_pend);
+ }
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (status) {
+ if (rdma_protocol_roce(&iwdev->ibdev, 1))
+ return status;
+
+ switch (m_info->next_iwarp_state) {
+ struct irdma_gen_ae_info ae_info;
+
+ case IRDMA_QP_STATE_RTS:
+ case IRDMA_QP_STATE_IDLE:
+ case IRDMA_QP_STATE_TERMINATE:
+ case IRDMA_QP_STATE_CLOSING:
+ if (info->curr_iwarp_state == IRDMA_QP_STATE_IDLE)
+ irdma_send_reset(iwqp->cm_node);
+ else
+ iwqp->sc_qp.term_flags = IRDMA_TERM_DONE;
+ if (!wait) {
+ ae_info.ae_code = IRDMA_AE_BAD_CLOSE;
+ ae_info.ae_src = 0;
+ irdma_gen_ae(rf, &iwqp->sc_qp, &ae_info, false);
+ } else {
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp,
+ wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ m_info->next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ m_info->reset_tcp_conn = true;
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ }
+ break;
+ case IRDMA_QP_STATE_ERROR:
+ default:
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * irdma_cqp_cq_destroy_cmd - destroy the cqp cq
+ * @dev: device pointer
+ * @cq: pointer to cq
+ */
+void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ irdma_cq_wq_destroy(rf, cq);
+}
+
+/**
+ * irdma_cqp_qp_destroy_cmd - destroy the cqp
+ * @dev: device pointer
+ * @qp: pointer to qp
+ */
+int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_ieq_mpa_crc_ae - generate AE for crc error
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_gen_ae_info info = {};
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ ibdev_dbg(&rf->iwdev->ibdev, "AEQ: Generate MPA CRC AE\n");
+ info.ae_code = IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR;
+ info.ae_src = IRDMA_AE_SOURCE_RQ;
+ irdma_gen_ae(rf, qp, &info, false);
+}
+
+/**
+ * irdma_ieq_check_mpacrc - check if mpa crc is OK
+ * @addr: address of buffer for crc
+ * @len: length of buffer
+ * @val: value to be compared
+ */
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val)
+{
+ if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_get_qp - get qp based on quad in puda buffer
+ * @dev: hardware control device structure
+ * @buf: receive puda buffer on exception q
+ */
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_cm_node *cm_node;
+ struct irdma_device *iwdev = buf->vsi->back_vsi;
+ u32 loc_addr[4] = {};
+ u32 rem_addr[4] = {};
+ u16 loc_port, rem_port;
+ struct ipv6hdr *ip6h;
+ struct iphdr *iph = (struct iphdr *)buf->iph;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+ if (iph->version == 4) {
+ loc_addr[0] = ntohl(iph->daddr);
+ rem_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ irdma_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
+ }
+ loc_port = ntohs(tcph->dest);
+ rem_port = ntohs(tcph->source);
+ cm_node = irdma_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
+ loc_addr, buf->vlan_valid ? buf->vlan_id : 0xFFFF);
+ if (!cm_node)
+ return NULL;
+
+ iwqp = cm_node->iwqp;
+ irdma_rem_ref_cm_node(cm_node);
+
+ return &iwqp->sc_qp;
+}
+
+/**
+ * irdma_send_ieq_ack - ACKs for duplicate or OOO partials FPDUs
+ * @qp: qp ptr
+ */
+void irdma_send_ieq_ack(struct irdma_sc_qp *qp)
+{
+ struct irdma_cm_node *cm_node = ((struct irdma_qp *)qp->qp_uk.back_qp)->cm_node;
+ struct irdma_puda_buf *buf = qp->pfpdu.lastrcv_buf;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+ cm_node->tcp_cntxt.rcv_nxt = qp->pfpdu.nextseqnum;
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+
+ irdma_send_ack(cm_node);
+}
+
+/**
+ * irdma_puda_ieq_get_ah_info - get AH info from IEQ buffer
+ * @qp: qp pointer
+ * @ah_info: AH info pointer
+ */
+void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp,
+ struct irdma_ah_info *ah_info)
+{
+ struct irdma_puda_buf *buf = qp->pfpdu.ah_buf;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+
+ memset(ah_info, 0, sizeof(*ah_info));
+ ah_info->do_lpbk = true;
+ ah_info->vlan_tag = buf->vlan_id;
+ ah_info->insert_vlan_tag = buf->vlan_valid;
+ ah_info->ipv4_valid = buf->ipv4;
+ ah_info->vsi = qp->vsi;
+
+ if (buf->smac_valid)
+ ether_addr_copy(ah_info->mac_addr, buf->smac);
+
+ if (buf->ipv4) {
+ ah_info->ipv4_valid = true;
+ iph = (struct iphdr *)buf->iph;
+ ah_info->hop_ttl = iph->ttl;
+ ah_info->tc_tos = iph->tos;
+ ah_info->dest_ip_addr[0] = ntohl(iph->daddr);
+ ah_info->src_ip_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ ah_info->hop_ttl = ip6h->hop_limit;
+ ah_info->tc_tos = ip6h->priority;
+ irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(ah_info->src_ip_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ }
+
+ ah_info->dst_arpindex = irdma_arp_table(dev_to_rf(qp->dev),
+ ah_info->dest_ip_addr,
+ ah_info->ipv4_valid,
+ NULL, IRDMA_ARP_RESOLVE);
+}
+
+/**
+ * irdma_gen1_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @len: length of buffer
+ * @seqnum: seq number for tcp
+ */
+static void irdma_gen1_ieq_update_tcpip_info(struct irdma_puda_buf *buf,
+ u16 len, u32 seqnum)
+{
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ u16 iphlen;
+ u16 pktsize;
+ u8 *addr = buf->mem.va;
+
+ iphlen = (buf->ipv4) ? 20 : 40;
+ iph = (struct iphdr *)(addr + buf->maclen);
+ tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
+ pktsize = len + buf->tcphlen + iphlen;
+ iph->tot_len = htons(pktsize);
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * irdma_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @len: length of buffer
+ * @seqnum: seq number for tcp
+ */
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
+ u32 seqnum)
+{
+ struct tcphdr *tcph;
+ u8 *addr;
+
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ return irdma_gen1_ieq_update_tcpip_info(buf, len, seqnum);
+
+ addr = buf->mem.va;
+ tcph = (struct tcphdr *)addr;
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * irdma_gen1_puda_get_tcpip_info - get tcpip info from puda
+ * buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+static int irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
+{
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ u16 iphlen;
+ u16 pkt_len;
+ u8 *mem = buf->mem.va;
+ struct ethhdr *ethh = buf->mem.va;
+
+ if (ethh->h_proto == htons(0x8100)) {
+ info->vlan_valid = true;
+ buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) &
+ VLAN_VID_MASK;
+ }
+
+ buf->maclen = (info->vlan_valid) ? 18 : 14;
+ iphlen = (info->l3proto) ? 40 : 20;
+ buf->ipv4 = (info->l3proto) ? false : true;
+ buf->iph = mem + buf->maclen;
+ iph = (struct iphdr *)buf->iph;
+ buf->tcph = buf->iph + iphlen;
+ tcph = (struct tcphdr *)buf->tcph;
+
+ if (buf->ipv4) {
+ pkt_len = ntohs(iph->tot_len);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ pkt_len = ntohs(ip6h->payload_len) + iphlen;
+ }
+
+ buf->totallen = pkt_len + buf->maclen;
+
+ if (info->payload_len < buf->totallen) {
+ ibdev_dbg(to_ibdev(buf->vsi->dev),
+ "ERR: payload_len = 0x%x totallen expected0x%x\n",
+ info->payload_len, buf->totallen);
+ return -EINVAL;
+ }
+
+ buf->tcphlen = tcph->doff << 2;
+ buf->datalen = pkt_len - iphlen - buf->tcphlen;
+ buf->data = buf->datalen ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+
+ return 0;
+}
+
+/**
+ * irdma_puda_get_tcpip_info - get tcpip info from puda buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
+{
+ struct tcphdr *tcph;
+ u32 pkt_len;
+ u8 *mem;
+
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ return irdma_gen1_puda_get_tcpip_info(info, buf);
+
+ mem = buf->mem.va;
+ buf->vlan_valid = info->vlan_valid;
+ if (info->vlan_valid)
+ buf->vlan_id = info->vlan;
+
+ buf->ipv4 = info->ipv4;
+ if (buf->ipv4)
+ buf->iph = mem + IRDMA_IPV4_PAD;
+ else
+ buf->iph = mem;
+
+ buf->tcph = mem + IRDMA_TCP_OFFSET;
+ tcph = (struct tcphdr *)buf->tcph;
+ pkt_len = info->payload_len;
+ buf->totallen = pkt_len;
+ buf->tcphlen = tcph->doff << 2;
+ buf->datalen = pkt_len - IRDMA_TCP_OFFSET - buf->tcphlen;
+ buf->data = buf->datalen ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = IRDMA_TCP_OFFSET + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+
+ if (info->smac_valid) {
+ ether_addr_copy(buf->smac, info->smac);
+ buf->smac_valid = true;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_hw_stats_timeout - Stats timer-handler which updates all HW stats
+ * @t: timer_list pointer
+ */
+static void irdma_hw_stats_timeout(struct timer_list *t)
+{
+ struct irdma_vsi_pestat *pf_devstat =
+ timer_container_of(pf_devstat, t, stats_timer);
+ struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi;
+
+ if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ irdma_cqp_gather_stats_cmd(sc_vsi->dev, sc_vsi->pestat, false);
+ else
+ irdma_cqp_gather_stats_gen1(sc_vsi->dev, sc_vsi->pestat);
+
+ mod_timer(&pf_devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * irdma_hw_stats_start_timer - Start periodic stats timer
+ * @vsi: vsi structure pointer
+ */
+void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_vsi_pestat *devstat = vsi->pestat;
+
+ timer_setup(&devstat->stats_timer, irdma_hw_stats_timeout, 0);
+ mod_timer(&devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * irdma_hw_stats_stop_timer - Delete periodic stats timer
+ * @vsi: pointer to vsi structure
+ */
+void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_vsi_pestat *devstat = vsi->pestat;
+
+ timer_delete_sync(&devstat->stats_timer);
+}
+
+/**
+ * irdma_process_stats - Checking for wrap and update stats
+ * @pestat: stats structure pointer
+ */
+static inline void irdma_process_stats(struct irdma_vsi_pestat *pestat)
+{
+ sc_vsi_update_stats(pestat->vsi);
+}
+
+/**
+ * irdma_cqp_gather_stats_gen1 - Gather stats
+ * @dev: pointer to device structure
+ * @pestat: statistics structure
+ */
+void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat)
+{
+ struct irdma_gather_stats *gather_stats =
+ pestat->gather_info.gather_stats_va;
+ const struct irdma_hw_stat_map *map = dev->hw_stats_map;
+ u16 max_stats_idx = dev->hw_attrs.max_stat_idx;
+ u32 stats_inst_offset_32;
+ u32 stats_inst_offset_64;
+ u64 new_val;
+ u16 i;
+
+ stats_inst_offset_32 = (pestat->gather_info.use_stats_inst) ?
+ pestat->gather_info.stats_inst_index :
+ pestat->hw->hmc.hmc_fn_id;
+ stats_inst_offset_32 *= 4;
+ stats_inst_offset_64 = stats_inst_offset_32 * 2;
+
+ for (i = 0; i < max_stats_idx; i++) {
+ if (map[i].bitmask <= IRDMA_MAX_STATS_32)
+ new_val = rd32(dev->hw,
+ dev->hw_stats_regs[i] + stats_inst_offset_32);
+ else
+ new_val = rd64(dev->hw,
+ dev->hw_stats_regs[i] + stats_inst_offset_64);
+ gather_stats->val[map[i].byteoff / sizeof(u64)] = new_val;
+ }
+
+ irdma_process_stats(pestat);
+}
+
+/**
+ * irdma_process_cqp_stats - Checking for wrap and update stats
+ * @cqp_request: cqp_request structure pointer
+ */
+static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_vsi_pestat *pestat = cqp_request->param;
+
+ irdma_process_stats(pestat);
+}
+
+/**
+ * irdma_cqp_gather_stats_cmd - Gather stats
+ * @dev: pointer to device structure
+ * @pestat: pointer to stats info
+ * @wait: flag to wait or not wait for stats
+ */
+int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat, bool wait)
+
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.stats_gather.info = pestat->gather_info;
+ cqp_info->in.u.stats_gather.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.stats_gather.cqp = &rf->cqp.sc_cqp;
+ cqp_request->param = pestat;
+ if (!wait)
+ cqp_request->callback_fcn = irdma_process_cqp_stats;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (wait)
+ irdma_process_stats(pestat);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_stats_inst_cmd - Allocate/free stats instance
+ * @vsi: pointer to vsi structure
+ * @cmd: command to allocate or free
+ * @stats_info: pointer to allocate stats info
+ */
+int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
+ struct irdma_stats_inst_info *stats_info)
+{
+ struct irdma_pci_f *rf = dev_to_rf(vsi->dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+ bool wait = false;
+
+ if (cmd == IRDMA_OP_STATS_ALLOCATE)
+ wait = true;
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.stats_manage.info = *stats_info;
+ cqp_info->in.u.stats_manage.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.stats_manage.cqp = &rf->cqp.sc_cqp;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (wait)
+ stats_info->stats_idx = cqp_request->compl_info.op_ret_val;
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_ceq_cmd - Create/Destroy CEQ's after CEQ 0
+ * @dev: pointer to device info
+ * @sc_ceq: pointer to ceq structure
+ * @op: Create or Destroy
+ */
+int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq,
+ u8 op)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.ceq_create.ceq = sc_ceq;
+ cqp_info->in.u.ceq_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_aeq_cmd - Create/Destroy AEQ
+ * @dev: pointer to device info
+ * @sc_aeq: pointer to aeq structure
+ * @op: Create or Destroy
+ */
+int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq,
+ u8 op)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.aeq_create.aeq = sc_aeq;
+ cqp_info->in.u.aeq_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_ws_node_cmd - Add/modify/delete ws node
+ * @dev: pointer to device structure
+ * @cmd: Add, modify or delete
+ * @node_info: pointer to ws node info
+ */
+int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
+ struct irdma_ws_node_info *node_info)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+ bool poll;
+
+ if (!rf->sc_dev.ceq_valid)
+ poll = true;
+ else
+ poll = false;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, !poll);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.ws_node.info = *node_info;
+ cqp_info->in.u.ws_node.cqp = cqp;
+ cqp_info->in.u.ws_node.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ goto exit;
+
+ if (poll) {
+ struct irdma_ccq_cqe_info compl_info;
+
+ status = irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_WORK_SCHED_NODE,
+ &compl_info);
+ node_info->qs_handle = compl_info.op_ret_val;
+ ibdev_dbg(&rf->iwdev->ibdev, "DCB: opcode=%d, compl_info.retval=%d\n",
+ compl_info.op_code, compl_info.op_ret_val);
+ } else {
+ node_info->qs_handle = cqp_request->compl_info.op_ret_val;
+ }
+
+exit:
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_ah_cqp_op - perform an AH cqp operation
+ * @rf: RDMA PCI function
+ * @sc_ah: address handle
+ * @cmd: AH operation
+ * @wait: wait if true
+ * @callback_fcn: Callback function on CQP op completion
+ * @cb_param: parameter for callback function
+ *
+ * returns errno
+ */
+int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
+ bool wait,
+ void (*callback_fcn)(struct irdma_cqp_request *),
+ void *cb_param)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (cmd != IRDMA_OP_AH_CREATE && cmd != IRDMA_OP_AH_DESTROY)
+ return -EINVAL;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ if (cmd == IRDMA_OP_AH_CREATE) {
+ cqp_info->in.u.ah_create.info = sc_ah->ah_info;
+ cqp_info->in.u.ah_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.ah_create.cqp = &rf->cqp.sc_cqp;
+ } else if (cmd == IRDMA_OP_AH_DESTROY) {
+ cqp_info->in.u.ah_destroy.info = sc_ah->ah_info;
+ cqp_info->in.u.ah_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.ah_destroy.cqp = &rf->cqp.sc_cqp;
+ }
+
+ if (!wait) {
+ cqp_request->callback_fcn = callback_fcn;
+ cqp_request->param = cb_param;
+ }
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ if (status)
+ return -ENOMEM;
+
+ if (wait)
+ sc_ah->ah_info.ah_valid = (cmd == IRDMA_OP_AH_CREATE);
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_ah_cb - callback after creation of AH for IEQ
+ * @cqp_request: pointer to cqp_request of create AH
+ */
+static void irdma_ieq_ah_cb(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_sc_qp *qp = cqp_request->param;
+ struct irdma_sc_ah *sc_ah = qp->pfpdu.ah;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->pfpdu.lock, flags);
+ if (!cqp_request->compl_info.op_ret_val) {
+ sc_ah->ah_info.ah_valid = true;
+ irdma_ieq_process_fpdus(qp, qp->vsi->ieq);
+ } else {
+ sc_ah->ah_info.ah_valid = false;
+ irdma_ieq_cleanup_qp(qp->vsi->ieq, qp);
+ }
+ spin_unlock_irqrestore(&qp->pfpdu.lock, flags);
+}
+
+/**
+ * irdma_ilq_ah_cb - callback after creation of AH for ILQ
+ * @cqp_request: pointer to cqp_request of create AH
+ */
+static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_cm_node *cm_node = cqp_request->param;
+ struct irdma_sc_ah *sc_ah = cm_node->ah;
+
+ sc_ah->ah_info.ah_valid = !cqp_request->compl_info.op_ret_val;
+ irdma_add_conn_est_qh(cm_node);
+}
+
+/**
+ * irdma_puda_create_ah - create AH for ILQ/IEQ qp's
+ * @dev: device pointer
+ * @ah_info: Address handle info
+ * @wait: When true will wait for operation to complete
+ * @type: ILQ/IEQ
+ * @cb_param: Callback param when not waiting
+ * @ah_ret: Returned pointer to address handle if created
+ *
+ */
+int irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info, bool wait,
+ enum puda_rsrc_type type, void *cb_param,
+ struct irdma_sc_ah **ah_ret)
+{
+ struct irdma_sc_ah *ah;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int err;
+
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ *ah_ret = ah;
+ if (!ah)
+ return -ENOMEM;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah,
+ &ah_info->ah_idx, &rf->next_ah);
+ if (err)
+ goto err_free;
+
+ ah->dev = dev;
+ ah->ah_info = *ah_info;
+
+ if (type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait,
+ irdma_ilq_ah_cb, cb_param);
+ else
+ err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait,
+ irdma_ieq_ah_cb, cb_param);
+
+ if (err)
+ goto error;
+ return 0;
+
+error:
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx);
+err_free:
+ kfree(ah);
+ *ah_ret = NULL;
+ return -ENOMEM;
+}
+
+/**
+ * irdma_puda_free_ah - free a puda address handle
+ * @dev: device pointer
+ * @ah: The address handle to free
+ */
+void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ if (!ah)
+ return;
+
+ if (ah->ah_info.ah_valid) {
+ irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_DESTROY, false, NULL, NULL);
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx);
+ }
+
+ kfree(ah);
+}
+
+/**
+ * irdma_gsi_ud_qp_ah_cb - callback after creation of AH for GSI/ID QP
+ * @cqp_request: pointer to cqp_request of create AH
+ */
+void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_sc_ah *sc_ah = cqp_request->param;
+
+ if (!cqp_request->compl_info.op_ret_val)
+ sc_ah->ah_info.ah_valid = true;
+ else
+ sc_ah->ah_info.ah_valid = false;
+}
+
+/**
+ * irdma_prm_add_pble_mem - add moemory to pble resources
+ * @pprm: pble resource manager
+ * @pchunk: chunk of memory to add
+ */
+int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk)
+{
+ u64 sizeofbitmap;
+
+ if (pchunk->size & 0xfff)
+ return -EINVAL;
+
+ sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift;
+
+ pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL);
+ if (!pchunk->bitmapbuf)
+ return -ENOMEM;
+
+ pchunk->sizeofbitmap = sizeofbitmap;
+ /* each pble is 8 bytes hence shift by 3 */
+ pprm->total_pble_alloc += pchunk->size >> 3;
+ pprm->free_pble_cnt += pchunk->size >> 3;
+
+ return 0;
+}
+
+/**
+ * irdma_prm_get_pbles - get pble's from prm
+ * @pprm: pble resource manager
+ * @chunkinfo: nformation about chunk where pble's were acquired
+ * @mem_size: size of pble memory needed
+ * @vaddr: returns virtual address of pble memory
+ * @fpm_addr: returns fpm address of pble memory
+ */
+int irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
+ u64 **vaddr, u64 *fpm_addr)
+{
+ u64 bits_needed;
+ u64 bit_idx = PBLE_INVALID_IDX;
+ struct irdma_chunk *pchunk = NULL;
+ struct list_head *chunk_entry = pprm->clist.next;
+ u32 offset;
+ unsigned long flags;
+ *vaddr = NULL;
+ *fpm_addr = 0;
+
+ bits_needed = DIV_ROUND_UP_ULL(mem_size, BIT_ULL(pprm->pble_shift));
+
+ spin_lock_irqsave(&pprm->prm_lock, flags);
+ while (chunk_entry != &pprm->clist) {
+ pchunk = (struct irdma_chunk *)chunk_entry;
+ bit_idx = bitmap_find_next_zero_area(pchunk->bitmapbuf,
+ pchunk->sizeofbitmap, 0,
+ bits_needed, 0);
+ if (bit_idx < pchunk->sizeofbitmap)
+ break;
+
+ /* list.next used macro */
+ chunk_entry = pchunk->list.next;
+ }
+
+ if (!pchunk || bit_idx >= pchunk->sizeofbitmap) {
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+ return -ENOMEM;
+ }
+
+ bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed);
+ offset = bit_idx << pprm->pble_shift;
+ *vaddr = pchunk->vaddr + offset;
+ *fpm_addr = pchunk->fpm_addr + offset;
+
+ chunkinfo->pchunk = pchunk;
+ chunkinfo->bit_idx = bit_idx;
+ chunkinfo->bits_used = bits_needed;
+ /* 3 is sizeof pble divide */
+ pprm->free_pble_cnt -= chunkinfo->bits_used << (pprm->pble_shift - 3);
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_prm_return_pbles - return pbles back to prm
+ * @pprm: pble resource manager
+ * @chunkinfo: chunk where pble's were acquired and to be freed
+ */
+void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pprm->prm_lock, flags);
+ pprm->free_pble_cnt += chunkinfo->bits_used << (pprm->pble_shift - 3);
+ bitmap_clear(chunkinfo->pchunk->bitmapbuf, chunkinfo->bit_idx,
+ chunkinfo->bits_used);
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+}
+
+int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma,
+ u32 pg_cnt)
+{
+ struct page *vm_page;
+ int i;
+ u8 *addr;
+
+ addr = (u8 *)(uintptr_t)va;
+ for (i = 0; i < pg_cnt; i++) {
+ vm_page = vmalloc_to_page(addr);
+ if (!vm_page)
+ goto err;
+
+ pg_dma[i] = dma_map_page(hw->device, vm_page, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(hw->device, pg_dma[i]))
+ goto err;
+
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+
+err:
+ irdma_unmap_vm_page_list(hw, pg_dma, i);
+ return -ENOMEM;
+}
+
+void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt)
+{
+ int i;
+
+ for (i = 0; i < pg_cnt; i++)
+ dma_unmap_page(hw->device, pg_dma[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+/**
+ * irdma_pble_free_paged_mem - free virtual paged memory
+ * @chunk: chunk to free with paged memory
+ */
+void irdma_pble_free_paged_mem(struct irdma_chunk *chunk)
+{
+ if (!chunk->pg_cnt)
+ goto done;
+
+ irdma_unmap_vm_page_list(chunk->dev->hw, chunk->dmainfo.dmaaddrs,
+ chunk->pg_cnt);
+
+done:
+ kfree(chunk->dmainfo.dmaaddrs);
+ chunk->dmainfo.dmaaddrs = NULL;
+ vfree(chunk->vaddr);
+ chunk->vaddr = NULL;
+ chunk->type = 0;
+}
+
+/**
+ * irdma_pble_get_paged_mem -allocate paged memory for pbles
+ * @chunk: chunk to add for paged memory
+ * @pg_cnt: number of pages needed
+ */
+int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt)
+{
+ u32 size;
+ void *va;
+
+ chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
+ if (!chunk->dmainfo.dmaaddrs)
+ return -ENOMEM;
+
+ size = PAGE_SIZE * pg_cnt;
+ va = vmalloc(size);
+ if (!va)
+ goto err;
+
+ if (irdma_map_vm_page_list(chunk->dev->hw, va, chunk->dmainfo.dmaaddrs,
+ pg_cnt)) {
+ vfree(va);
+ goto err;
+ }
+ chunk->vaddr = va;
+ chunk->size = size;
+ chunk->pg_cnt = pg_cnt;
+ chunk->type = PBLE_SD_PAGED;
+
+ return 0;
+err:
+ kfree(chunk->dmainfo.dmaaddrs);
+ chunk->dmainfo.dmaaddrs = NULL;
+
+ return -ENOMEM;
+}
+
+/**
+ * irdma_alloc_ws_node_id - Allocate a tx scheduler node ID
+ * @dev: device pointer
+ */
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ u32 next = 1;
+ u32 node_id;
+
+ if (irdma_alloc_rsrc(rf, rf->allocated_ws_nodes, rf->max_ws_node_id,
+ &node_id, &next))
+ return IRDMA_WS_NODE_INVALID;
+
+ return (u16)node_id;
+}
+
+/**
+ * irdma_free_ws_node_id - Free a tx scheduler node ID
+ * @dev: device pointer
+ * @node_id: Work scheduler node ID
+ */
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ irdma_free_rsrc(rf, rf->allocated_ws_nodes, (u32)node_id);
+}
+
+/**
+ * irdma_modify_qp_to_err - Modify a QP to error
+ * @sc_qp: qp structure
+ */
+void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp)
+{
+ struct irdma_qp *qp = sc_qp->qp_uk.back_qp;
+ struct ib_qp_attr attr;
+
+ if (qp->iwdev->rf->reset)
+ return;
+ attr.qp_state = IB_QPS_ERR;
+
+ if (rdma_protocol_roce(qp->ibqp.device, 1))
+ irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+ else
+ irdma_modify_qp(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+}
+
+void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event)
+{
+ struct ib_event ibevent;
+
+ if (!iwqp->ibqp.event_handler)
+ return;
+
+ switch (event) {
+ case IRDMA_QP_EVENT_CATASTROPHIC:
+ ibevent.event = IB_EVENT_QP_FATAL;
+ break;
+ case IRDMA_QP_EVENT_ACCESS_ERR:
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case IRDMA_QP_EVENT_REQ_ERR:
+ ibevent.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ }
+ ibevent.device = iwqp->ibqp.device;
+ ibevent.element.qp = &iwqp->ibqp;
+ iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
+}
+
+void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
+{
+ struct irdma_cmpl_gen *cmpl_node;
+ struct list_head *tmp_node, *list_node;
+
+ list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
+ cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
+ list_del(&cmpl_node->list);
+ kfree(cmpl_node);
+ }
+}
+
+int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
+{
+ struct irdma_cmpl_gen *cmpl;
+
+ if (list_empty(&iwcq->cmpl_generated))
+ return -ENOENT;
+ cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
+ list_del(&cmpl->list);
+ memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
+ kfree(cmpl);
+
+ ibdev_dbg(iwcq->ibcq.device,
+ "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
+ __func__, cq_poll_info->qp_id, cq_poll_info->op_type,
+ cq_poll_info->wr_id);
+
+ return 0;
+}
+
+/**
+ * irdma_set_cpi_common_values - fill in values for polling info struct
+ * @cpi: resulting structure of cq_poll_info type
+ * @qp: QPair
+ * @qp_num: id of the QP
+ */
+static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
+ struct irdma_qp_uk *qp, u32 qp_num)
+{
+ cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ cpi->error = true;
+ cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
+ cpi->minor_err = FLUSH_GENERAL_ERR;
+ cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
+ cpi->qp_id = qp_num;
+}
+
+static inline void irdma_comp_handler(struct irdma_cq *cq)
+{
+ if (!cq->ibcq.comp_handler)
+ return;
+ if (atomic_cmpxchg(&cq->armed, 1, 0))
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+void irdma_generate_flush_completions(struct irdma_qp *iwqp)
+{
+ struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
+ struct irdma_ring *sq_ring = &qp->sq_ring;
+ struct irdma_ring *rq_ring = &qp->rq_ring;
+ struct irdma_cq *iwscq = iwqp->iwscq;
+ struct irdma_cq *iwrcq = iwqp->iwrcq;
+ struct irdma_cmpl_gen *cmpl;
+ __le64 *sw_wqe;
+ u64 wqe_qword;
+ u32 wqe_idx;
+ bool compl_generated = false;
+ unsigned long flags1;
+
+ spin_lock_irqsave(&iwscq->lock, flags1);
+ if (irdma_uk_cq_empty(&iwscq->sc_cq.cq_uk)) {
+ unsigned long flags2;
+
+ spin_lock_irqsave(&iwqp->lock, flags2);
+ while (IRDMA_RING_MORE_WORK(*sq_ring)) {
+ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
+ if (!cmpl) {
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwscq->lock, flags1);
+ return;
+ }
+
+ wqe_idx = sq_ring->tail;
+ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
+
+ cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ sw_wqe = qp->sq_base[wqe_idx].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
+ cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ;
+ /* remove the SQ WR by moving SQ tail*/
+ IRDMA_RING_SET_TAIL(*sq_ring,
+ sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
+ if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) {
+ kfree(cmpl);
+ continue;
+ }
+ ibdev_dbg(iwscq->ibcq.device,
+ "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
+ __func__, cmpl->cpi.wr_id, qp->qp_id);
+ list_add_tail(&cmpl->list, &iwscq->cmpl_generated);
+ compl_generated = true;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwscq->lock, flags1);
+ if (compl_generated)
+ irdma_comp_handler(iwscq);
+ } else {
+ spin_unlock_irqrestore(&iwscq->lock, flags1);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ spin_lock_irqsave(&iwrcq->lock, flags1);
+ if (irdma_uk_cq_empty(&iwrcq->sc_cq.cq_uk)) {
+ unsigned long flags2;
+
+ spin_lock_irqsave(&iwqp->lock, flags2);
+ while (IRDMA_RING_MORE_WORK(*rq_ring)) {
+ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
+ if (!cmpl) {
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwrcq->lock, flags1);
+ return;
+ }
+
+ wqe_idx = rq_ring->tail;
+ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
+
+ cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
+ cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
+ cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ;
+ /* remove the RQ WR by moving RQ tail */
+ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
+ ibdev_dbg(iwrcq->ibcq.device,
+ "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
+ __func__, cmpl->cpi.wr_id, qp->qp_id,
+ wqe_idx);
+ list_add_tail(&cmpl->list, &iwrcq->cmpl_generated);
+
+ compl_generated = true;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwrcq->lock, flags1);
+ if (compl_generated)
+ irdma_comp_handler(iwrcq);
+ } else {
+ spin_unlock_irqrestore(&iwrcq->lock, flags1);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
new file mode 100644
index 000000000000..6d9af41a2884
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -0,0 +1,5517 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+
+/**
+ * irdma_query_device - get device attributes
+ * @ibdev: device pointer from stack
+ * @props: returning device attributes
+ * @udata: user data
+ */
+static int irdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct pci_dev *pcidev = iwdev->rf->pcidev;
+ struct irdma_hw_attrs *hw_attrs = &rf->sc_dev.hw_attrs;
+
+ if (udata->inlen || udata->outlen)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ addrconf_addr_eui48((u8 *)&props->sys_image_guid,
+ iwdev->netdev->dev_addr);
+ props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
+ irdma_fw_minor_ver(&rf->sc_dev);
+ props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (hw_attrs->uk_attrs.hw_rev < IRDMA_GEN_3)
+ props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ props->vendor_id = pcidev->vendor;
+ props->vendor_part_id = pcidev->device;
+
+ props->hw_ver = rf->pcidev->revision;
+ props->page_size_cap = hw_attrs->page_size_cap;
+ props->max_mr_size = hw_attrs->max_mr_size;
+ props->max_qp = rf->max_qp - rf->used_qps;
+ props->max_qp_wr = hw_attrs->max_qp_wr;
+ props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ props->max_cq = rf->max_cq - rf->used_cqs;
+ props->max_cqe = rf->max_cqe - 1;
+ props->max_mr = rf->max_mr - rf->used_mrs;
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+ props->max_mw = props->max_mr;
+ props->max_pd = rf->max_pd - rf->used_pds;
+ props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
+ props->max_qp_rd_atom = hw_attrs->max_hw_ird;
+ props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_pkeys = IRDMA_PKEY_TBL_SZ;
+ }
+
+ props->max_ah = rf->max_ah;
+ props->max_mcast_grp = rf->max_mcg;
+ props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
+ props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX;
+ props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR;
+ props->max_srq = rf->max_srq - rf->used_srqs;
+ props->max_srq_wr = IRDMA_MAX_SRQ_WRS;
+ props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ if (hw_attrs->uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ props->atomic_cap = IB_ATOMIC_HCA;
+ else
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) {
+#define HCA_CORE_CLOCK_KHZ 1000000UL
+ props->timestamp_mask = GENMASK(31, 0);
+ props->hca_core_clock = HCA_CORE_CLOCK_KHZ;
+ }
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
+
+ return 0;
+}
+
+/**
+ * irdma_query_port - get port attributes
+ * @ibdev: device pointer from stack
+ * @port: port number for query
+ * @props: returning device attributes
+ */
+static int irdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct net_device *netdev = iwdev->netdev;
+
+ /* no need to zero out pros here. done by caller */
+
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+ props->lid = 1;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ if (netif_carrier_ok(netdev) && netif_running(netdev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width);
+
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->gid_tbl_len = 32;
+ props->ip_gids = true;
+ props->pkey_tbl_len = IRDMA_PKEY_TBL_SZ;
+ } else {
+ props->gid_tbl_len = 1;
+ }
+ props->qkey_viol_cntr = 0;
+ props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_REINIT_SUP;
+ props->max_msg_sz = iwdev->rf->sc_dev.hw_attrs.max_hw_outbound_msg_size;
+
+ return 0;
+}
+
+/**
+ * irdma_disassociate_ucontext - Disassociate user context
+ * @context: ib user context
+ */
+static void irdma_disassociate_ucontext(struct ib_ucontext *context)
+{
+}
+
+static int irdma_mmap_legacy(struct irdma_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ u64 pfn;
+
+ if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_private_data = ucontext;
+ pfn = ((uintptr_t)ucontext->iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET] +
+ pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT;
+
+ return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot), NULL);
+}
+
+static void irdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct irdma_user_mmap_entry *entry = to_irdma_mmap_entry(rdma_entry);
+
+ kfree(entry);
+}
+
+static struct rdma_user_mmap_entry*
+irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset,
+ enum irdma_mmap_flag mmap_flag, u64 *mmap_offset)
+{
+ struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ int ret;
+
+ if (!entry)
+ return NULL;
+
+ entry->bar_offset = bar_offset;
+ entry->mmap_flag = mmap_flag;
+
+ ret = rdma_user_mmap_entry_insert(&ucontext->ibucontext,
+ &entry->rdma_entry, PAGE_SIZE);
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+ *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+/**
+ * irdma_mmap - user memory map
+ * @context: context created during alloc
+ * @vma: kernel info for user memory map
+ */
+static int irdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct irdma_user_mmap_entry *entry;
+ struct irdma_ucontext *ucontext;
+ u64 pfn;
+ int ret;
+
+ ucontext = to_ucontext(context);
+
+ /* Legacy support for libi40iw with hard-coded mmap key */
+ if (ucontext->legacy_mode)
+ return irdma_mmap_legacy(ucontext, vma);
+
+ rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&ucontext->iwdev->ibdev,
+ "VERBS: pgoff[0x%lx] does not have valid entry\n",
+ vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ entry = to_irdma_mmap_entry(rdma_entry);
+ ibdev_dbg(&ucontext->iwdev->ibdev,
+ "VERBS: bar_offset [0x%llx] mmap_flag [%d]\n",
+ entry->bar_offset, entry->mmap_flag);
+
+ pfn = (entry->bar_offset +
+ pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT;
+
+ switch (entry->mmap_flag) {
+ case IRDMA_MMAP_IO_NC:
+ ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case IRDMA_MMAP_IO_WC:
+ ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ ibdev_dbg(&ucontext->iwdev->ibdev,
+ "VERBS: bar_offset [0x%llx] mmap_flag[%d] err[%d]\n",
+ entry->bar_offset, entry->mmap_flag, ret);
+ rdma_user_mmap_entry_put(rdma_entry);
+
+ return ret;
+}
+
+/**
+ * irdma_alloc_push_page - allocate a push page for qp
+ * @iwqp: qp pointer
+ */
+static void irdma_alloc_push_page(struct irdma_qp *iwqp)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_push_page.info.push_idx = 0;
+ cqp_info->in.u.manage_push_page.info.qs_handle =
+ qp->vsi->qos[qp->user_pri].qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 0;
+ cqp_info->in.u.manage_push_page.info.push_page_type = 0;
+ cqp_info->in.u.manage_push_page.cqp = &iwdev->rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (!status && cqp_request->compl_info.op_ret_val <
+ iwdev->rf->sc_dev.hw_attrs.max_hw_device_pages) {
+ qp->push_idx = cqp_request->compl_info.op_ret_val;
+ qp->push_offset = 0;
+ }
+
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_alloc_ucontext - Allocate the user context data structure
+ * @uctx: uverbs context pointer
+ * @udata: user data
+ *
+ * This keeps track of all objects associated with a particular
+ * user-mode client.
+ */
+static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
+{
+#define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8)
+#define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd)
+ struct ib_device *ibdev = uctx->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_alloc_ucontext_req req = {};
+ struct irdma_alloc_ucontext_resp uresp = {};
+ struct irdma_ucontext *ucontext = to_ucontext(uctx);
+ struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
+
+ if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN)
+ return -EINVAL;
+
+ if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)))
+ return -EINVAL;
+
+ if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER)
+ goto ver_error;
+
+ ucontext->iwdev = iwdev;
+ ucontext->abi_ver = req.userspace_ver;
+
+ if (!(req.comp_mask & IRDMA_SUPPORT_WQE_FORMAT_V2) &&
+ uk_attrs->hw_rev >= IRDMA_GEN_3)
+ return -EOPNOTSUPP;
+
+ if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR)
+ ucontext->use_raw_attrs = true;
+
+ /* GEN_1 legacy support with libi40iw */
+ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) {
+ if (uk_attrs->hw_rev != IRDMA_GEN_1)
+ return -EOPNOTSUPP;
+
+ ucontext->legacy_mode = true;
+ uresp.max_qps = iwdev->rf->max_qp;
+ uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds;
+ uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2;
+ uresp.kernel_ver = req.userspace_ver;
+ if (ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen)))
+ return -EFAULT;
+ } else {
+ u64 bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
+
+ ucontext->db_mmap_entry =
+ irdma_user_mmap_entry_insert(ucontext, bar_off,
+ IRDMA_MMAP_IO_NC,
+ &uresp.db_mmap_key);
+ if (!ucontext->db_mmap_entry)
+ return -ENOMEM;
+
+ uresp.kernel_ver = IRDMA_ABI_VER;
+ uresp.feature_flags = uk_attrs->feature_flags;
+ uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags;
+ uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges;
+ uresp.max_hw_inline = uk_attrs->max_hw_inline;
+ uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta;
+ uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta;
+ uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk;
+ uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size;
+ uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size;
+ uresp.hw_rev = uk_attrs->hw_rev;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR;
+ uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE;
+ uresp.max_hw_srq_quanta = uk_attrs->max_hw_srq_quanta;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA;
+ if (ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen))) {
+ rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
+ return -EFAULT;
+ }
+ }
+
+ INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
+ spin_lock_init(&ucontext->cq_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
+ spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->srq_reg_mem_list);
+ spin_lock_init(&ucontext->srq_reg_mem_list_lock);
+
+ return 0;
+
+ver_error:
+ ibdev_err(&iwdev->ibdev,
+ "Invalid userspace driver version detected. Detected version %d, should be %d\n",
+ req.userspace_ver, IRDMA_ABI_VER);
+ return -EINVAL;
+}
+
+/**
+ * irdma_dealloc_ucontext - deallocate the user context data structure
+ * @context: user context created during alloc
+ */
+static void irdma_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct irdma_ucontext *ucontext = to_ucontext(context);
+
+ rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
+}
+
+/**
+ * irdma_alloc_pd - allocate protection domain
+ * @pd: PD pointer
+ * @udata: user data
+ */
+static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+{
+#define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd)
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_alloc_pd_resp uresp = {};
+ struct irdma_sc_pd *sc_pd;
+ u32 pd_id = 0;
+ int err;
+
+ if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN)
+ return -EINVAL;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id,
+ &rf->next_pd);
+ if (err)
+ return err;
+
+ sc_pd = &iwpd->sc_pd;
+ if (udata) {
+ struct irdma_ucontext *ucontext =
+ rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
+ uresp.pd_id = pd_id;
+ if (ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen))) {
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER);
+ }
+
+ return 0;
+error:
+ irdma_free_rsrc(rf, rf->allocated_pds, pd_id);
+
+ return err;
+}
+
+/**
+ * irdma_dealloc_pd - deallocate pd
+ * @ibpd: ptr of pd to be deallocated
+ * @udata: user data
+ */
+static int irdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id);
+
+ return 0;
+}
+
+/**
+ * irdma_get_pbl - Retrieve pbl from a list given a virtual
+ * address
+ * @va: user virtual address
+ * @pbl_list: pbl list to search in (QP's or CQ's)
+ */
+static struct irdma_pbl *irdma_get_pbl(unsigned long va,
+ struct list_head *pbl_list)
+{
+ struct irdma_pbl *iwpbl;
+
+ list_for_each_entry (iwpbl, pbl_list, list) {
+ if (iwpbl->user_base == va) {
+ list_del(&iwpbl->list);
+ iwpbl->on_list = false;
+ return iwpbl;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_clean_cqes - clean cq entries for qp
+ * @iwqp: qp ptr (user or kernel)
+ * @iwcq: cq ptr
+ */
+static void irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq)
+{
+ struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ irdma_uk_clean_cq(&iwqp->sc_qp.qp_uk, ukcq);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+}
+
+static void irdma_remove_push_mmap_entries(struct irdma_qp *iwqp)
+{
+ if (iwqp->push_db_mmap_entry) {
+ rdma_user_mmap_entry_remove(iwqp->push_db_mmap_entry);
+ iwqp->push_db_mmap_entry = NULL;
+ }
+ if (iwqp->push_wqe_mmap_entry) {
+ rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry);
+ iwqp->push_wqe_mmap_entry = NULL;
+ }
+}
+
+static int irdma_setup_push_mmap_entries(struct irdma_ucontext *ucontext,
+ struct irdma_qp *iwqp,
+ u64 *push_wqe_mmap_key,
+ u64 *push_db_mmap_key)
+{
+ struct irdma_device *iwdev = ucontext->iwdev;
+ u64 rsvd, bar_off;
+
+ rsvd = IRDMA_PF_BAR_RSVD;
+ bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
+ /* skip over db page */
+ bar_off += IRDMA_HW_PAGE_SIZE;
+ /* push wqe page */
+ bar_off += rsvd + iwqp->sc_qp.push_idx * IRDMA_HW_PAGE_SIZE;
+ iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_insert(ucontext,
+ bar_off, IRDMA_MMAP_IO_WC,
+ push_wqe_mmap_key);
+ if (!iwqp->push_wqe_mmap_entry)
+ return -ENOMEM;
+
+ /* push doorbell page */
+ bar_off += IRDMA_HW_PAGE_SIZE;
+ iwqp->push_db_mmap_entry = irdma_user_mmap_entry_insert(ucontext,
+ bar_off, IRDMA_MMAP_IO_NC,
+ push_db_mmap_key);
+ if (!iwqp->push_db_mmap_entry) {
+ rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_destroy_qp - destroy qp
+ * @ibqp: qp's ib pointer also to get to device's qp address
+ * @udata: user data
+ */
+static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+
+ iwqp->sc_qp.qp_uk.destroy_pending = true;
+
+ if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE)
+ irdma_modify_qp_to_err(&iwqp->sc_qp);
+
+ if (!iwqp->user_mode)
+ cancel_delayed_work_sync(&iwqp->dwork_flush);
+
+ if (!iwqp->user_mode) {
+ if (iwqp->iwscq) {
+ irdma_clean_cqes(iwqp, iwqp->iwscq);
+ if (iwqp->iwrcq != iwqp->iwscq)
+ irdma_clean_cqes(iwqp, iwqp->iwrcq);
+ }
+ }
+
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
+
+ irdma_remove_push_mmap_entries(iwqp);
+
+ if (iwqp->sc_qp.qp_uk.qp_id == 1)
+ iwdev->rf->hwqp1_rsvd = false;
+ irdma_free_qp_rsrc(iwqp);
+
+ return 0;
+}
+
+/**
+ * irdma_setup_virt_qp - setup for allocation of virtual qp
+ * @iwdev: irdma device
+ * @iwqp: qp ptr
+ * @init_info: initialize info to return
+ */
+static void irdma_setup_virt_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *init_info)
+{
+ struct irdma_pbl *iwpbl = iwqp->iwpbl;
+ struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
+
+ iwqp->page = qpmr->sq_page;
+ init_info->shadow_area_pa = qpmr->shadow;
+ if (iwpbl->pbl_allocated) {
+ init_info->virtual_map = true;
+ init_info->sq_pa = qpmr->sq_pbl.idx;
+ /* Need to use contiguous buffer for RQ of QP
+ * in case it is associated with SRQ.
+ */
+ init_info->rq_pa = init_info->qp_uk_init_info.srq_uk ?
+ qpmr->rq_pa : qpmr->rq_pbl.idx;
+ } else {
+ init_info->sq_pa = qpmr->sq_pbl.addr;
+ init_info->rq_pa = qpmr->rq_pbl.addr;
+ }
+}
+
+/**
+ * irdma_setup_umode_qp - setup sq and rq size in user mode qp
+ * @udata: udata
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ * @init_attr: Initial QP create attributes
+ */
+static int irdma_setup_umode_qp(struct ib_udata *udata,
+ struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *info,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+ struct irdma_create_qp_req req;
+ unsigned long flags;
+ int ret;
+
+ ret = ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen));
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n");
+ return ret;
+ }
+
+ iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+ iwqp->user_mode = 1;
+ if (req.user_wqe_bufs) {
+ info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode;
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwqp->iwpbl) {
+ ret = -ENODATA;
+ ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n");
+ return ret;
+ }
+ }
+
+ if (!ucontext->use_raw_attrs) {
+ /**
+ * Maintain backward compat with older ABI which passes sq and
+ * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr.
+ * There is no way to compute the correct value of
+ * iwqp->max_send_wr/max_recv_wr in the kernel.
+ */
+ iwqp->max_send_wr = init_attr->cap.max_send_wr;
+ iwqp->max_recv_wr = init_attr->cap.max_recv_wr;
+ ukinfo->sq_size = init_attr->cap.max_send_wr;
+ ukinfo->rq_size = init_attr->cap.max_recv_wr;
+ irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift,
+ &ukinfo->rq_shift);
+ } else {
+ ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
+ &ukinfo->sq_shift);
+ if (ret)
+ return ret;
+
+ ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
+ &ukinfo->rq_shift);
+ if (ret)
+ return ret;
+
+ iwqp->max_send_wr =
+ (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
+ iwqp->max_recv_wr =
+ (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
+ ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
+ ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+ }
+
+ irdma_setup_virt_qp(iwdev, iwqp, info);
+
+ return 0;
+}
+
+/**
+ * irdma_setup_kmode_qp - setup initialization for kernel mode qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ * @init_attr: Initial QP create attributes
+ */
+static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *info,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem;
+ u32 size;
+ int status;
+ struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+
+ status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
+ &ukinfo->sq_shift);
+ if (status)
+ return status;
+
+ status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
+ &ukinfo->rq_shift);
+ if (status)
+ return status;
+
+ iwqp->kqp.sq_wrid_mem =
+ kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL);
+ if (!iwqp->kqp.sq_wrid_mem)
+ return -ENOMEM;
+
+ iwqp->kqp.rq_wrid_mem =
+ kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL);
+
+ if (!iwqp->kqp.rq_wrid_mem) {
+ kfree(iwqp->kqp.sq_wrid_mem);
+ iwqp->kqp.sq_wrid_mem = NULL;
+ return -ENOMEM;
+ }
+
+ ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem;
+ ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem;
+
+ size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE;
+ size += (IRDMA_SHADOW_AREA_SIZE << 3);
+
+ mem->size = ALIGN(size, 256);
+ mem->va = dma_alloc_coherent(iwdev->rf->hw.device, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va) {
+ kfree(iwqp->kqp.sq_wrid_mem);
+ iwqp->kqp.sq_wrid_mem = NULL;
+ kfree(iwqp->kqp.rq_wrid_mem);
+ iwqp->kqp.rq_wrid_mem = NULL;
+ return -ENOMEM;
+ }
+
+ ukinfo->sq = mem->va;
+ info->sq_pa = mem->pa;
+ ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth];
+ info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem;
+ info->shadow_area_pa =
+ info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
+ ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+ ukinfo->qp_id = info->qp_uk_init_info.qp_id;
+
+ iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
+ iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
+ init_attr->cap.max_send_wr = iwqp->max_send_wr;
+ init_attr->cap.max_recv_wr = iwqp->max_recv_wr;
+
+ return 0;
+}
+
+static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp)
+{
+ struct irdma_pci_f *rf = iwqp->iwdev->rf;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_create_qp_info *qp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+ qp_info->mac_valid = true;
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE;
+
+ cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
+ struct irdma_qp_host_ctx_info *ctx_info)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp_info;
+
+ udp_info = &iwqp->udp_info;
+ udp_info->snd_mss = ib_mtu_enum_to_int(ib_mtu_int_to_enum(iwdev->vsi.mtu));
+ udp_info->cwnd = iwdev->roce_cwnd;
+ udp_info->rexmit_thresh = 2;
+ udp_info->rnr_nak_thresh = 2;
+ udp_info->src_port = 0xc000;
+ udp_info->dst_port = ROCE_V2_UDP_DPORT;
+ roce_info = &iwqp->roce_info;
+ ether_addr_copy(roce_info->mac_addr, iwdev->netdev->dev_addr);
+
+ if (iwqp->ibqp.qp_type == IB_QPT_GSI && iwqp->ibqp.qp_num != 1)
+ roce_info->is_qp1 = true;
+ roce_info->rd_en = true;
+ roce_info->wr_rdresp_en = true;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ roce_info->bind_en = true;
+ roce_info->dcqcn_en = false;
+ roce_info->rtomin = 5;
+
+ roce_info->ack_credits = iwdev->roce_ackcreds;
+ roce_info->ird_size = dev->hw_attrs.max_hw_ird;
+ roce_info->ord_size = dev->hw_attrs.max_hw_ord;
+
+ if (!iwqp->user_mode) {
+ roce_info->priv_mode_en = true;
+ roce_info->fast_reg_en = true;
+ roce_info->udprivcq_en = true;
+ }
+ roce_info->roce_tver = 0;
+
+ ctx_info->roce_info = &iwqp->roce_info;
+ ctx_info->udp_info = &iwqp->udp_info;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+}
+
+static void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
+ struct irdma_qp_host_ctx_info *ctx_info)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_iwarp_offload_info *iwarp_info;
+
+ iwarp_info = &iwqp->iwarp_info;
+ ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr);
+ iwarp_info->rd_en = true;
+ iwarp_info->wr_rdresp_en = true;
+ iwarp_info->ecn_en = true;
+ iwarp_info->rtomin = 5;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ iwarp_info->ib_rd_en = true;
+ if (!iwqp->user_mode) {
+ iwarp_info->priv_mode_en = true;
+ iwarp_info->fast_reg_en = true;
+ }
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->rdmap_ver = 1;
+
+ ctx_info->iwarp_info = &iwqp->iwarp_info;
+ ctx_info->iwarp_info_valid = true;
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+ ctx_info->iwarp_info_valid = false;
+}
+
+static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
+ struct irdma_device *iwdev)
+{
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
+
+ if (init_attr->create_flags)
+ return -EOPNOTSUPP;
+
+ if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline ||
+ init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
+ init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags ||
+ init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
+ init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta)
+ return -EINVAL;
+
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (init_attr->qp_type != IB_QPT_RC &&
+ init_attr->qp_type != IB_QPT_UD &&
+ init_attr->qp_type != IB_QPT_GSI)
+ return -EOPNOTSUPP;
+ } else {
+ if (init_attr->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void irdma_flush_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
+
+ irdma_generate_flush_completions(iwqp);
+}
+
+static int irdma_setup_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 *qp_num)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ unsigned long flags;
+ int ret;
+
+ if (rf->rdma_ver <= IRDMA_GEN_2) {
+ *qp_num = 1;
+ return 0;
+ }
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ if (!rf->hwqp1_rsvd) {
+ *qp_num = 1;
+ rf->hwqp1_rsvd = true;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ } else {
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ ret = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
+ qp_num, &rf->next_qp);
+ if (ret)
+ return ret;
+ }
+
+ ret = irdma_vchnl_req_add_vport(&rf->sc_dev, iwdev->vport_id, *qp_num,
+ (&iwdev->vsi)->qos);
+ if (ret) {
+ if (*qp_num != 1) {
+ irdma_free_rsrc(rf, rf->allocated_qps, *qp_num);
+ } else {
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rf->hwqp1_rsvd = false;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ }
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_create_qp - create qp
+ * @ibqp: ptr of qp
+ * @init_attr: attributes for qp
+ * @udata: user data for create qp
+ */
+static int irdma_create_qp(struct ib_qp *ibqp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx)
+#define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd)
+ struct ib_pd *ibpd = ibqp->pd;
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_create_qp_resp uresp = {};
+ u32 qp_num = 0;
+ int err_code;
+ struct irdma_sc_qp *qp;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
+ struct irdma_qp_init_info init_info = {};
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_srq *iwsrq;
+ bool srq_valid = false;
+ u32 srq_id = 0;
+
+ if (init_attr->srq) {
+ iwsrq = to_iwsrq(init_attr->srq);
+ srq_valid = true;
+ srq_id = iwsrq->srq_num;
+ init_attr->cap.max_recv_sge = uk_attrs->max_hw_wq_frags;
+ init_attr->cap.max_recv_wr = 4;
+ init_info.qp_uk_init_info.srq_uk = &iwsrq->sc_srq.srq_uk;
+ }
+
+ err_code = irdma_validate_qp_attrs(init_attr, iwdev);
+ if (err_code)
+ return err_code;
+
+ if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN))
+ return -EINVAL;
+
+ init_info.vsi = &iwdev->vsi;
+ init_info.qp_uk_init_info.uk_attrs = uk_attrs;
+ init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr;
+ init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr;
+ init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
+ init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+ init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
+
+ qp = &iwqp->sc_qp;
+ qp->qp_uk.back_qp = iwqp;
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+
+ iwqp->iwdev = iwdev;
+ iwqp->q2_ctx_mem.size = ALIGN(IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE,
+ 256);
+ iwqp->q2_ctx_mem.va = dma_alloc_coherent(dev->hw->device,
+ iwqp->q2_ctx_mem.size,
+ &iwqp->q2_ctx_mem.pa,
+ GFP_KERNEL);
+ if (!iwqp->q2_ctx_mem.va)
+ return -ENOMEM;
+
+ init_info.q2 = iwqp->q2_ctx_mem.va;
+ init_info.q2_pa = iwqp->q2_ctx_mem.pa;
+ init_info.host_ctx = (__le64 *)(init_info.q2 + IRDMA_Q2_BUF_SIZE);
+ init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE;
+
+ if (init_attr->qp_type == IB_QPT_GSI) {
+ err_code = irdma_setup_gsi_qp_rsrc(iwqp, &qp_num);
+ if (err_code)
+ goto error;
+ iwqp->ibqp.qp_num = 1;
+ } else {
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
+ &qp_num, &rf->next_qp);
+ if (err_code)
+ goto error;
+ iwqp->ibqp.qp_num = qp_num;
+ }
+
+ iwqp->iwpd = iwpd;
+ qp = &iwqp->sc_qp;
+ iwqp->iwscq = to_iwcq(init_attr->send_cq);
+ iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
+ iwqp->host_ctx.va = init_info.host_ctx;
+ iwqp->host_ctx.pa = init_info.host_ctx_pa;
+ iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE;
+
+ init_info.pd = &iwpd->sc_pd;
+ init_info.qp_uk_init_info.qp_id = qp_num;
+ if (!rdma_protocol_roce(&iwdev->ibdev, 1))
+ init_info.qp_uk_init_info.first_sq_wq = 1;
+ iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
+ init_waitqueue_head(&iwqp->waitq);
+ init_waitqueue_head(&iwqp->mod_qp_waitq);
+
+ if (udata) {
+ init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
+ err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info,
+ init_attr);
+ } else {
+ INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
+ init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
+ err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
+ }
+
+ if (err_code) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: setup qp failed\n");
+ goto error;
+ }
+
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (init_attr->qp_type == IB_QPT_RC) {
+ init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_RC;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM |
+ IRDMA_WRITE_WITH_IMM |
+ IRDMA_ROCE;
+ } else {
+ init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_UD;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM |
+ IRDMA_ROCE;
+ }
+ } else {
+ init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_IWARP;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM;
+ }
+
+ if (dev->hw_attrs.uk_attrs.hw_rev > IRDMA_GEN_1)
+ init_info.qp_uk_init_info.qp_caps |= IRDMA_PUSH_MODE;
+
+ err_code = irdma_sc_qp_init(qp, &init_info);
+ if (err_code) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: qp_init fail\n");
+ goto error;
+ }
+
+ ctx_info = &iwqp->ctx_info;
+ ctx_info->srq_valid = srq_valid;
+ ctx_info->srq_id = srq_id;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (dev->ws_add(&iwdev->vsi, 0)) {
+ irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp);
+ err_code = -EINVAL;
+ goto error;
+ }
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info);
+ } else {
+ irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info);
+ }
+
+ err_code = irdma_cqp_create_qp_cmd(iwqp);
+ if (err_code)
+ goto error;
+
+ refcount_set(&iwqp->refcnt, 1);
+ spin_lock_init(&iwqp->lock);
+ spin_lock_init(&iwqp->sc_qp.pfpdu.lock);
+ iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ rf->qp_table[qp_num] = iwqp;
+
+ if (udata) {
+ /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */
+ if (udata->outlen < sizeof(uresp)) {
+ uresp.lsmm = 1;
+ uresp.push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1;
+ } else {
+ if (rdma_protocol_iwarp(&iwdev->ibdev, 1))
+ uresp.lsmm = 1;
+ }
+ uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size;
+ uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size;
+ uresp.qp_id = qp_num;
+ uresp.qp_caps = qp->qp_uk.qp_caps;
+
+ err_code = ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen));
+ if (err_code) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: copy_to_udata failed\n");
+ irdma_destroy_qp(&iwqp->ibqp, udata);
+ return err_code;
+ }
+ }
+
+ init_completion(&iwqp->free_qp);
+ return 0;
+
+error:
+ irdma_free_qp_rsrc(iwqp);
+ return err_code;
+}
+
+static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
+{
+ int acc_flags = 0;
+
+ if (rdma_protocol_roce(iwqp->ibqp.device, 1)) {
+ if (iwqp->roce_info.wr_rdresp_en) {
+ acc_flags |= IB_ACCESS_LOCAL_WRITE;
+ acc_flags |= IB_ACCESS_REMOTE_WRITE;
+ }
+ if (iwqp->roce_info.rd_en)
+ acc_flags |= IB_ACCESS_REMOTE_READ;
+ if (iwqp->roce_info.bind_en)
+ acc_flags |= IB_ACCESS_MW_BIND;
+ if (iwqp->ctx_info.remote_atomics_en)
+ acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ } else {
+ if (iwqp->iwarp_info.wr_rdresp_en) {
+ acc_flags |= IB_ACCESS_LOCAL_WRITE;
+ acc_flags |= IB_ACCESS_REMOTE_WRITE;
+ }
+ if (iwqp->iwarp_info.rd_en)
+ acc_flags |= IB_ACCESS_REMOTE_READ;
+ if (iwqp->ctx_info.remote_atomics_en)
+ acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ }
+ return acc_flags;
+}
+
+/**
+ * irdma_query_qp - query qp attributes
+ * @ibqp: qp pointer
+ * @attr: attributes pointer
+ * @attr_mask: Not used
+ * @init_attr: qp attributes to return
+ */
+static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
+
+ attr->qp_state = iwqp->ibqp_state;
+ attr->cur_qp_state = iwqp->ibqp_state;
+ attr->cap.max_send_wr = iwqp->max_send_wr;
+ attr->cap.max_recv_wr = iwqp->max_recv_wr;
+ attr->cap.max_inline_data = qp->qp_uk.max_inline_data;
+ attr->cap.max_send_sge = qp->qp_uk.max_sq_frag_cnt;
+ attr->cap.max_recv_sge = qp->qp_uk.max_rq_frag_cnt;
+ attr->qp_access_flags = irdma_get_ib_acc_flags(iwqp);
+ attr->port_num = 1;
+ if (rdma_protocol_roce(ibqp->device, 1)) {
+ attr->path_mtu = ib_mtu_int_to_enum(iwqp->udp_info.snd_mss);
+ attr->qkey = iwqp->roce_info.qkey;
+ attr->rq_psn = iwqp->udp_info.epsn;
+ attr->sq_psn = iwqp->udp_info.psn_nxt;
+ attr->dest_qp_num = iwqp->roce_info.dest_qp;
+ attr->pkey_index = iwqp->roce_info.p_key;
+ attr->retry_cnt = iwqp->udp_info.rexmit_thresh;
+ attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh;
+ attr->min_rnr_timer = iwqp->udp_info.min_rnr_timer;
+ attr->max_rd_atomic = iwqp->roce_info.ord_size;
+ attr->max_dest_rd_atomic = iwqp->roce_info.ird_size;
+ }
+
+ init_attr->event_handler = iwqp->ibqp.event_handler;
+ init_attr->qp_context = iwqp->ibqp.qp_context;
+ init_attr->send_cq = iwqp->ibqp.send_cq;
+ init_attr->recv_cq = iwqp->ibqp.recv_cq;
+ init_attr->srq = iwqp->ibqp.srq;
+ init_attr->cap = attr->cap;
+
+ return 0;
+}
+
+/**
+ * irdma_query_pkey - Query partition key
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: index of pkey
+ * @pkey: pointer to store the pkey
+ */
+static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (index >= IRDMA_PKEY_TBL_SZ)
+ return -EINVAL;
+
+ *pkey = IRDMA_DEFAULT_PKEY;
+ return 0;
+}
+
+static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio)
+{
+ struct net_device *ndev;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev)
+ goto exit;
+ if (is_vlan_dev(ndev)) {
+ u16 vlan_qos = vlan_dev_get_egress_qos_mask(ndev, prio);
+
+ prio = (vlan_qos & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ }
+exit:
+ rcu_read_unlock();
+ return prio;
+}
+
+static int irdma_wait_for_suspend(struct irdma_qp *iwqp)
+{
+ if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
+ !iwqp->suspend_pending,
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
+ iwqp->suspend_pending = false;
+ ibdev_warn(&iwqp->iwdev->ibdev,
+ "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
+ iwqp->ibqp.qp_num, iwqp->last_aeq);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_modify_qp_roce - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
+#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
+ struct irdma_pd *iwpd = to_iwpd(ibqp->pd);
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp_info;
+ struct irdma_modify_qp_info info = {};
+ struct irdma_modify_qp_resp uresp = {};
+ struct irdma_modify_qp_req ureq = {};
+ unsigned long flags;
+ u8 issue_modify_qp = 0;
+ int ret = 0;
+
+ ctx_info = &iwqp->ctx_info;
+ roce_info = &iwqp->roce_info;
+ udp_info = &iwqp->udp_info;
+
+ if (udata) {
+ /* udata inlen/outlen can be 0 when supporting legacy libi40iw */
+ if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
+ (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
+ return -EINVAL;
+ }
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ roce_info->dest_qp = attr->dest_qp_num;
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ ret = irdma_query_pkey(ibqp->device, 0, attr->pkey_index,
+ &roce_info->p_key);
+ if (ret)
+ return ret;
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ roce_info->qkey = attr->qkey;
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ udp_info->snd_mss = ib_mtu_enum_to_int(attr->path_mtu);
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ udp_info->psn_nxt = attr->sq_psn;
+ udp_info->lsn = 0xffff;
+ udp_info->psn_una = attr->sq_psn;
+ udp_info->psn_max = attr->sq_psn;
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ udp_info->epsn = attr->rq_psn;
+
+ if (attr_mask & IB_QP_RNR_RETRY)
+ udp_info->rnr_nak_thresh = attr->rnr_retry;
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ udp_info->min_rnr_timer = attr->min_rnr_timer;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ udp_info->rexmit_thresh = attr->retry_cnt;
+
+ ctx_info->roce_info->pd_id = iwpd->sc_pd.pd_id;
+
+ if (attr_mask & IB_QP_AV) {
+ struct irdma_av *av = &iwqp->roce_ah.av;
+ const struct ib_gid_attr *sgid_attr =
+ attr->ah_attr.grh.sgid_attr;
+ u16 vlan_id = VLAN_N_VID;
+ u32 local_ip[4];
+
+ memset(&iwqp->roce_ah, 0, sizeof(iwqp->roce_ah));
+ if (attr->ah_attr.ah_flags & IB_AH_GRH) {
+ udp_info->ttl = attr->ah_attr.grh.hop_limit;
+ udp_info->flow_label = attr->ah_attr.grh.flow_label;
+ udp_info->tos = attr->ah_attr.grh.traffic_class;
+ udp_info->src_port =
+ rdma_get_udp_sport(udp_info->flow_label,
+ ibqp->qp_num,
+ roce_info->dest_qp);
+ irdma_qp_rem_qos(&iwqp->sc_qp);
+ dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri);
+ if (iwqp->sc_qp.vsi->dscp_mode)
+ ctx_info->user_pri =
+ iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)];
+ else
+ ctx_info->user_pri = rt_tos2priority(udp_info->tos);
+ }
+ ret = rdma_read_gid_l2_fields(sgid_attr, &vlan_id,
+ ctx_info->roce_info->mac_addr);
+ if (ret)
+ return ret;
+ ctx_info->user_pri = irdma_roce_get_vlan_prio(sgid_attr,
+ ctx_info->user_pri);
+ if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri))
+ return -ENOMEM;
+ iwqp->sc_qp.user_pri = ctx_info->user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+
+ if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ vlan_id = 0;
+ if (vlan_id < VLAN_N_VID) {
+ udp_info->insert_vlan_tag = true;
+ udp_info->vlan_tag = vlan_id |
+ ctx_info->user_pri << VLAN_PRIO_SHIFT;
+ } else {
+ udp_info->insert_vlan_tag = false;
+ }
+
+ av->attrs = attr->ah_attr;
+ rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
+ rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
+ av->net_type = rdma_gid_attr_network_type(sgid_attr);
+ if (av->net_type == RDMA_NETWORK_IPV6) {
+ __be32 *daddr =
+ av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
+ __be32 *saddr =
+ av->sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
+
+ irdma_copy_ip_ntohl(&udp_info->dest_ip_addr[0], daddr);
+ irdma_copy_ip_ntohl(&udp_info->local_ipaddr[0], saddr);
+
+ udp_info->ipv4 = false;
+ irdma_copy_ip_ntohl(local_ip, daddr);
+
+ } else if (av->net_type == RDMA_NETWORK_IPV4) {
+ __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr;
+ __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr;
+
+ local_ip[0] = ntohl(daddr);
+
+ udp_info->ipv4 = true;
+ udp_info->dest_ip_addr[0] = 0;
+ udp_info->dest_ip_addr[1] = 0;
+ udp_info->dest_ip_addr[2] = 0;
+ udp_info->dest_ip_addr[3] = local_ip[0];
+
+ udp_info->local_ipaddr[0] = 0;
+ udp_info->local_ipaddr[1] = 0;
+ udp_info->local_ipaddr[2] = 0;
+ udp_info->local_ipaddr[3] = ntohl(saddr);
+ }
+ udp_info->arp_idx =
+ irdma_add_arp(iwdev->rf, local_ip, udp_info->ipv4,
+ attr->ah_attr.roce.dmac);
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic > dev->hw_attrs.max_hw_ord) {
+ ibdev_err(&iwdev->ibdev,
+ "rd_atomic = %d, above max_hw_ord=%d\n",
+ attr->max_rd_atomic,
+ dev->hw_attrs.max_hw_ord);
+ return -EINVAL;
+ }
+ if (attr->max_rd_atomic)
+ roce_info->ord_size = attr->max_rd_atomic;
+ info.ord_valid = true;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) {
+ ibdev_err(&iwdev->ibdev,
+ "rd_atomic = %d, above max_hw_ird=%d\n",
+ attr->max_dest_rd_atomic,
+ dev->hw_attrs.max_hw_ird);
+ return -EINVAL;
+ }
+ if (attr->max_dest_rd_atomic)
+ roce_info->ird_size = attr->max_dest_rd_atomic;
+ }
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ roce_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ roce_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ roce_info->rd_en = true;
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ ctx_info->remote_atomics_en = true;
+ }
+
+ wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
+
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n",
+ __builtin_return_address(0), ibqp->qp_num, attr->qp_state,
+ iwqp->ibqp_state, iwqp->iwarp_state, attr_mask);
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (attr_mask & IB_QP_STATE) {
+ if (!ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state,
+ iwqp->ibqp.qp_type, attr_mask)) {
+ ibdev_warn(&iwdev->ibdev, "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n",
+ iwqp->ibqp.qp_num, iwqp->ibqp_state,
+ attr->qp_state);
+ ret = -EINVAL;
+ goto exit;
+ }
+ info.curr_iwarp_state = iwqp->iwarp_state;
+
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
+ info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ break;
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTR;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_RTS:
+ if (iwqp->ibqp_state < IB_QPS_RTR ||
+ iwqp->ibqp_state == IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ info.ord_valid = true;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTS;
+ issue_modify_qp = 1;
+ if (iwdev->push_mode && udata &&
+ iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_alloc_push_page(iwqp);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ }
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_SQD)
+ goto exit;
+
+ if (iwqp->iwarp_state != IRDMA_QP_STATE_RTS) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_SQD;
+ issue_modify_qp = 1;
+ iwqp->suspend_pending = true;
+ break;
+ case IB_QPS_SQE:
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (udata && udata->inlen) {
+ if (ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen)))
+ return -EINVAL;
+
+ irdma_flush_wqes(iwqp,
+ (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) |
+ (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) |
+ IRDMA_REFLUSH);
+ }
+ return 0;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ issue_modify_qp = 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+ }
+
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (attr_mask & IB_QP_STATE) {
+ if (issue_modify_qp) {
+ ctx_info->rem_endpoint_idx = udp_info->arp_idx;
+ if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
+ return -EINVAL;
+ if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
+ ret = irdma_wait_for_suspend(iwqp);
+ if (ret)
+ return ret;
+ }
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->iwarp_state == info.curr_iwarp_state) {
+ iwqp->iwarp_state = info.next_iwarp_state;
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (iwqp->ibqp_state > IB_QPS_RTS &&
+ !iwqp->flush_issued) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
+ IRDMA_FLUSH_RQ |
+ IRDMA_FLUSH_WAIT);
+ iwqp->flush_issued = 1;
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else {
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ !iwqp->push_wqe_mmap_entry &&
+ !irdma_setup_push_mmap_entries(ucontext, iwqp,
+ &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) {
+ uresp.push_valid = 1;
+ uresp.push_offset = iwqp->sc_qp.push_offset;
+ }
+ ret = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
+ udata->outlen));
+ if (ret) {
+ irdma_remove_push_mmap_entries(iwqp);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: copy_to_udata failed\n");
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return ret;
+}
+
+/**
+ * irdma_modify_qp - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
+#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_tcp_offload_info *tcp_info;
+ struct irdma_iwarp_offload_info *offload_info;
+ struct irdma_modify_qp_info info = {};
+ struct irdma_modify_qp_resp uresp = {};
+ struct irdma_modify_qp_req ureq = {};
+ u8 issue_modify_qp = 0;
+ u8 dont_wait = 0;
+ int err;
+ unsigned long flags;
+
+ if (udata) {
+ /* udata inlen/outlen can be 0 when supporting legacy libi40iw */
+ if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
+ (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
+ return -EINVAL;
+ }
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ ctx_info = &iwqp->ctx_info;
+ offload_info = &iwqp->iwarp_info;
+ tcp_info = &iwqp->tcp_info;
+ wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d last_aeq=%d hw_tcp_state=%d hw_iwarp_state=%d attr_mask=0x%x\n",
+ __builtin_return_address(0), ibqp->qp_num, attr->qp_state,
+ iwqp->ibqp_state, iwqp->iwarp_state, iwqp->last_aeq,
+ iwqp->hw_tcp_state, iwqp->hw_iwarp_state, attr_mask);
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (attr_mask & IB_QP_STATE) {
+ info.curr_iwarp_state = iwqp->iwarp_state;
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
+ info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ if (iwdev->push_mode && udata &&
+ iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_alloc_push_page(iwqp);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ }
+ break;
+ case IB_QPS_RTS:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_RTS ||
+ !iwqp->cm_id) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ issue_modify_qp = 1;
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ iwqp->hte_added = 1;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTS;
+ info.tcp_ctx_valid = true;
+ info.ord_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->hw_iwarp_state > IRDMA_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_CLOSING ||
+ iwqp->iwarp_state < IRDMA_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_CLOSING) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_CLOSING;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_SQE:
+ if (iwqp->iwarp_state >= IRDMA_QP_STATE_TERMINATE) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_TERMINATE;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (udata && udata->inlen) {
+ if (ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen)))
+ return -EINVAL;
+
+ irdma_flush_wqes(iwqp,
+ (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) |
+ (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) |
+ IRDMA_REFLUSH);
+ }
+ return 0;
+ }
+
+ if (iwqp->sc_qp.term_flags) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_terminate_del_timer(&iwqp->sc_qp);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ }
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ if (iwqp->hw_tcp_state > IRDMA_TCP_STATE_CLOSED &&
+ iwdev->iw_status &&
+ iwqp->hw_tcp_state != IRDMA_TCP_STATE_TIME_WAIT)
+ info.reset_tcp_conn = true;
+ else
+ dont_wait = 1;
+
+ issue_modify_qp = 1;
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ break;
+ default:
+ err = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ ctx_info->iwarp_info_valid = true;
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ offload_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ offload_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ offload_info->rd_en = true;
+ }
+
+ if (ctx_info->iwarp_info_valid) {
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (attr_mask & IB_QP_STATE) {
+ if (issue_modify_qp) {
+ ctx_info->rem_endpoint_idx = tcp_info->arp_idx;
+ if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->iwarp_state == info.curr_iwarp_state) {
+ iwqp->iwarp_state = info.next_iwarp_state;
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+
+ if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
+ if (dont_wait) {
+ if (iwqp->hw_tcp_state) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ irdma_cm_disconn(iwqp);
+ } else {
+ int close_timer_started;
+
+ spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+
+ if (iwqp->cm_node) {
+ refcount_inc(&iwqp->cm_node->refcnt);
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ close_timer_started = atomic_inc_return(&iwqp->close_timer_started);
+ if (iwqp->cm_id && close_timer_started == 1)
+ irdma_schedule_cm_timer(iwqp->cm_node,
+ (struct irdma_puda_buf *)iwqp,
+ IRDMA_TIMER_TYPE_CLOSE, 1, 0);
+
+ irdma_rem_ref_cm_node(iwqp->cm_node);
+ } else {
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ }
+ }
+ }
+ if (attr_mask & IB_QP_STATE && udata && udata->outlen &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ !iwqp->push_wqe_mmap_entry &&
+ !irdma_setup_push_mmap_entries(ucontext, iwqp,
+ &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) {
+ uresp.push_valid = 1;
+ uresp.push_offset = iwqp->sc_qp.push_offset;
+ }
+
+ err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
+ udata->outlen));
+ if (err) {
+ irdma_remove_push_mmap_entries(iwqp);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: copy_to_udata failed\n");
+ return err;
+ }
+ }
+
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return err;
+}
+
+/**
+ * irdma_srq_free_rsrc - free up resources for srq
+ * @rf: RDMA PCI function
+ * @iwsrq: srq ptr
+ */
+static void irdma_srq_free_rsrc(struct irdma_pci_f *rf, struct irdma_srq *iwsrq)
+{
+ struct irdma_sc_srq *srq = &iwsrq->sc_srq;
+
+ if (!iwsrq->user_mode) {
+ dma_free_coherent(rf->sc_dev.hw->device, iwsrq->kmem.size,
+ iwsrq->kmem.va, iwsrq->kmem.pa);
+ iwsrq->kmem.va = NULL;
+ }
+
+ irdma_free_rsrc(rf, rf->allocated_srqs, srq->srq_uk.srq_id);
+}
+
+/**
+ * irdma_cq_free_rsrc - free up resources for cq
+ * @rf: RDMA PCI function
+ * @iwcq: cq ptr
+ */
+static void irdma_cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq)
+{
+ struct irdma_sc_cq *cq = &iwcq->sc_cq;
+
+ if (!iwcq->user_mode) {
+ dma_free_coherent(rf->sc_dev.hw->device, iwcq->kmem.size,
+ iwcq->kmem.va, iwcq->kmem.pa);
+ iwcq->kmem.va = NULL;
+ dma_free_coherent(rf->sc_dev.hw->device,
+ iwcq->kmem_shadow.size,
+ iwcq->kmem_shadow.va, iwcq->kmem_shadow.pa);
+ iwcq->kmem_shadow.va = NULL;
+ }
+
+ irdma_free_rsrc(rf, rf->allocated_cqs, cq->cq_uk.cq_id);
+}
+
+/**
+ * irdma_free_cqbuf - worker to free a cq buffer
+ * @work: provides access to the cq buffer to free
+ */
+static void irdma_free_cqbuf(struct work_struct *work)
+{
+ struct irdma_cq_buf *cq_buf = container_of(work, struct irdma_cq_buf, work);
+
+ dma_free_coherent(cq_buf->hw->device, cq_buf->kmem_buf.size,
+ cq_buf->kmem_buf.va, cq_buf->kmem_buf.pa);
+ cq_buf->kmem_buf.va = NULL;
+ kfree(cq_buf);
+}
+
+/**
+ * irdma_process_resize_list - remove resized cq buffers from the resize_list
+ * @iwcq: cq which owns the resize_list
+ * @iwdev: irdma device
+ * @lcqe_buf: the buffer where the last cqe is received
+ */
+static int irdma_process_resize_list(struct irdma_cq *iwcq,
+ struct irdma_device *iwdev,
+ struct irdma_cq_buf *lcqe_buf)
+{
+ struct list_head *tmp_node, *list_node;
+ struct irdma_cq_buf *cq_buf;
+ int cnt = 0;
+
+ list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) {
+ cq_buf = list_entry(list_node, struct irdma_cq_buf, list);
+ if (cq_buf == lcqe_buf)
+ return cnt;
+
+ list_del(&cq_buf->list);
+ queue_work(iwdev->cleanup_wq, &cq_buf->work);
+ cnt++;
+ }
+
+ return cnt;
+}
+
+/**
+ * irdma_destroy_srq - destroy srq
+ * @ibsrq: srq pointer
+ * @udata: user data
+ */
+static int irdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_sc_srq *srq = &iwsrq->sc_srq;
+
+ irdma_srq_wq_destroy(iwdev->rf, srq);
+ irdma_srq_free_rsrc(iwdev->rf, iwsrq);
+ return 0;
+}
+
+/**
+ * irdma_destroy_cq - destroy cq
+ * @ib_cq: cq pointer
+ * @udata: user data
+ */
+static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_cq->device);
+ struct irdma_cq *iwcq = to_iwcq(ib_cq);
+ struct irdma_sc_cq *cq = &iwcq->sc_cq;
+ struct irdma_sc_dev *dev = cq->dev;
+ struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id];
+ struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ if (!list_empty(&iwcq->cmpl_generated))
+ irdma_remove_cmpls_list(iwcq);
+ if (!list_empty(&iwcq->resize_list))
+ irdma_process_resize_list(iwcq, iwdev, NULL);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ irdma_cq_rem_ref(ib_cq);
+ wait_for_completion(&iwcq->free_cq);
+
+ irdma_cq_wq_destroy(iwdev->rf, cq);
+
+ spin_lock_irqsave(&iwceq->ce_lock, flags);
+ irdma_sc_cleanup_ceqes(cq, ceq);
+ spin_unlock_irqrestore(&iwceq->ce_lock, flags);
+ irdma_cq_free_rsrc(iwdev->rf, iwcq);
+
+ return 0;
+}
+
+/**
+ * irdma_resize_cq - resize cq
+ * @ibcq: cq to be resized
+ * @entries: desired cq size
+ * @udata: user data
+ */
+static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
+ struct ib_udata *udata)
+{
+#define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer)
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ struct irdma_sc_dev *dev = iwcq->sc_cq.dev;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_modify_cq_info *m_info;
+ struct irdma_modify_cq_info info = {};
+ struct irdma_dma_mem kmem_buf;
+ struct irdma_cq_mr *cqmr_buf;
+ struct irdma_pbl *iwpbl_buf;
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ struct irdma_cq_buf *cq_buf = NULL;
+ unsigned long flags;
+ u8 cqe_size;
+ int ret;
+
+ iwdev = to_iwdev(ibcq->device);
+ rf = iwdev->rf;
+
+ if (!(rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_CQ_RESIZE))
+ return -EOPNOTSUPP;
+
+ if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN)
+ return -EINVAL;
+
+ if (entries > rf->max_cqe)
+ return -EINVAL;
+
+ if (!iwcq->user_mode) {
+ entries += 2;
+
+ if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ entries *= 2;
+
+ if (entries & 1)
+ entries += 1; /* cq size must be an even number */
+
+ cqe_size = iwcq->sc_cq.cq_uk.avoid_mem_cflct ? 64 : 32;
+ if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
+ entries += 2;
+ }
+
+ info.cq_size = max(entries, 4);
+
+ if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1)
+ return 0;
+
+ if (udata) {
+ struct irdma_resize_cq_req req = {};
+ struct irdma_ucontext *ucontext =
+ rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+
+ /* CQ resize not supported with legacy GEN_1 libi40iw */
+ if (ucontext->legacy_mode)
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen)))
+ return -EINVAL;
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ if (!iwpbl_buf)
+ return -ENOMEM;
+
+ cqmr_buf = &iwpbl_buf->cq_mr;
+ if (iwpbl_buf->pbl_allocated) {
+ info.virtual_map = true;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx;
+ } else {
+ info.cq_pa = cqmr_buf->cq_pbl.addr;
+ }
+ } else {
+ /* Kmode CQ resize */
+ int rsize;
+
+ rsize = info.cq_size * sizeof(struct irdma_cqe);
+ kmem_buf.size = ALIGN(round_up(rsize, 256), 256);
+ kmem_buf.va = dma_alloc_coherent(dev->hw->device,
+ kmem_buf.size, &kmem_buf.pa,
+ GFP_KERNEL);
+ if (!kmem_buf.va)
+ return -ENOMEM;
+
+ info.cq_base = kmem_buf.va;
+ info.cq_pa = kmem_buf.pa;
+ cq_buf = kzalloc(sizeof(*cq_buf), GFP_KERNEL);
+ if (!cq_buf) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ info.shadow_read_threshold = iwcq->sc_cq.shadow_read_threshold;
+ info.cq_resize = true;
+
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.cq_modify.info;
+ memcpy(m_info, &info, sizeof(*m_info));
+
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_MODIFY;
+ cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq;
+ cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request;
+ cqp_info->post_sq = 1;
+ ret = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (ret)
+ goto error;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ if (cq_buf) {
+ cq_buf->kmem_buf = iwcq->kmem;
+ cq_buf->hw = dev->hw;
+ memcpy(&cq_buf->cq_uk, &iwcq->sc_cq.cq_uk, sizeof(cq_buf->cq_uk));
+ INIT_WORK(&cq_buf->work, irdma_free_cqbuf);
+ list_add_tail(&cq_buf->list, &iwcq->resize_list);
+ iwcq->kmem = kmem_buf;
+ }
+
+ irdma_sc_cq_resize(&iwcq->sc_cq, &info);
+ ibcq->cqe = info.cq_size - 1;
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return 0;
+error:
+ if (!udata) {
+ dma_free_coherent(dev->hw->device, kmem_buf.size, kmem_buf.va,
+ kmem_buf.pa);
+ kmem_buf.va = NULL;
+ }
+ kfree(cq_buf);
+
+ return ret;
+}
+
+/**
+ * irdma_srq_event - event notification for srq limit
+ * @srq: shared srq struct
+ */
+void irdma_srq_event(struct irdma_sc_srq *srq)
+{
+ struct irdma_srq *iwsrq = container_of(srq, struct irdma_srq, sc_srq);
+ struct ib_srq *ibsrq = &iwsrq->ibsrq;
+ struct ib_event event;
+
+ srq->srq_limit = 0;
+
+ if (!ibsrq->event_handler)
+ return;
+
+ event.device = ibsrq->device;
+ event.element.port_num = 1;
+ event.element.srq = ibsrq;
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+}
+
+/**
+ * irdma_modify_srq - modify srq request
+ * @ibsrq: srq's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+static int irdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_modify_srq_info *info;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (!(attr_mask & IB_SRQ_LIMIT))
+ return 0;
+
+ if (attr->srq_limit > iwsrq->sc_srq.srq_uk.srq_size)
+ return -EINVAL;
+
+ /* Execute this cqp op synchronously, so we can update srq_limit
+ * upon successful completion.
+ */
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.srq_modify.info;
+ info->srq_limit = attr->srq_limit;
+ if (info->srq_limit > 0xFFF)
+ info->srq_limit = 0xFFF;
+ info->arm_limit_event = 1;
+
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_modify.srq = &iwsrq->sc_srq;
+ cqp_info->in.u.srq_modify.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwsrq->sc_srq.srq_limit = info->srq_limit;
+
+ return 0;
+}
+
+static int irdma_setup_umode_srq(struct irdma_device *iwdev,
+ struct irdma_srq *iwsrq,
+ struct irdma_srq_init_info *info,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_SRQ_MIN_REQ_LEN \
+ offsetofend(struct irdma_create_srq_req, user_shadow_area)
+ struct irdma_create_srq_req req = {};
+ struct irdma_ucontext *ucontext;
+ struct irdma_srq_mr *srqmr;
+ struct irdma_pbl *iwpbl;
+ unsigned long flags;
+
+ iwsrq->user_mode = true;
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+
+ if (udata->inlen < IRDMA_CREATE_SRQ_MIN_REQ_LEN)
+ return -EINVAL;
+
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen)))
+ return -EFAULT;
+
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ iwpbl = irdma_get_pbl((unsigned long)req.user_srq_buf,
+ &ucontext->srq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+ if (!iwpbl)
+ return -EPROTO;
+
+ iwsrq->iwpbl = iwpbl;
+ srqmr = &iwpbl->srq_mr;
+
+ if (iwpbl->pbl_allocated) {
+ info->virtual_map = true;
+ info->pbl_chunk_size = 1;
+ info->first_pm_pbl_idx = srqmr->srq_pbl.idx;
+ info->leaf_pbl_size = 1;
+ } else {
+ info->srq_pa = srqmr->srq_pbl.addr;
+ }
+ info->shadow_area_pa = srqmr->shadow;
+
+ return 0;
+}
+
+static int irdma_setup_kmode_srq(struct irdma_device *iwdev,
+ struct irdma_srq *iwsrq,
+ struct irdma_srq_init_info *info, u32 depth,
+ u8 shift)
+{
+ struct irdma_srq_uk_init_info *ukinfo = &info->srq_uk_init_info;
+ struct irdma_dma_mem *mem = &iwsrq->kmem;
+ u32 size, ring_size;
+
+ ring_size = depth * IRDMA_QP_WQE_MIN_SIZE;
+ size = ring_size + (IRDMA_SHADOW_AREA_SIZE << 3);
+
+ mem->size = ALIGN(size, 256);
+ mem->va = dma_alloc_coherent(iwdev->rf->hw.device, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va)
+ return -ENOMEM;
+
+ ukinfo->srq = mem->va;
+ ukinfo->srq_size = depth >> shift;
+ ukinfo->shadow_area = mem->va + ring_size;
+
+ info->srq_pa = mem->pa;
+ info->shadow_area_pa = info->srq_pa + ring_size;
+
+ return 0;
+}
+
+/**
+ * irdma_create_srq - create srq
+ * @ibsrq: ib's srq pointer
+ * @initattrs: attributes for srq
+ * @udata: user data for create srq
+ */
+static int irdma_create_srq(struct ib_srq *ibsrq,
+ struct ib_srq_init_attr *initattrs,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct ib_srq_attr *attr = &initattrs->attr;
+ struct irdma_pd *iwpd = to_iwpd(ibsrq->pd);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_srq_uk_init_info *ukinfo;
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_srq_init_info info = {};
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_uk_attrs *uk_attrs;
+ struct cqp_cmds_info *cqp_info;
+ int err_code = 0;
+ u32 depth;
+ u8 shift;
+
+ uk_attrs = &rf->sc_dev.hw_attrs.uk_attrs;
+ ukinfo = &info.srq_uk_init_info;
+
+ if (initattrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
+ if (!(uk_attrs->feature_flags & IRDMA_FEATURE_SRQ) ||
+ attr->max_sge > uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ refcount_set(&iwsrq->refcnt, 1);
+ spin_lock_init(&iwsrq->lock);
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_srqs, rf->max_srq,
+ &iwsrq->srq_num, &rf->next_srq);
+ if (err_code)
+ return err_code;
+
+ ukinfo->max_srq_frag_cnt = attr->max_sge;
+ ukinfo->uk_attrs = uk_attrs;
+ ukinfo->srq_id = iwsrq->srq_num;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_srq_frag_cnt, 0,
+ &shift);
+
+ err_code = irdma_get_srqdepth(ukinfo->uk_attrs, attr->max_wr,
+ shift, &depth);
+ if (err_code)
+ return err_code;
+
+ /* Actual SRQ size in WRs for ring and HW */
+ ukinfo->srq_size = depth >> shift;
+
+ /* Max postable WRs to SRQ */
+ iwsrq->max_wr = (depth - IRDMA_RQ_RSVD) >> shift;
+ attr->max_wr = iwsrq->max_wr;
+
+ if (udata)
+ err_code = irdma_setup_umode_srq(iwdev, iwsrq, &info, udata);
+ else
+ err_code = irdma_setup_kmode_srq(iwdev, iwsrq, &info, depth,
+ shift);
+
+ if (err_code)
+ goto free_rsrc;
+
+ info.vsi = &iwdev->vsi;
+ info.pd = &iwpd->sc_pd;
+
+ iwsrq->sc_srq.srq_uk.lock = &iwsrq->lock;
+ err_code = irdma_sc_srq_init(&iwsrq->sc_srq, &info);
+ if (err_code)
+ goto free_dmem;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto free_dmem;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_create.srq = &iwsrq->sc_srq;
+ cqp_info->in.u.srq_create.scratch = (uintptr_t)cqp_request;
+ err_code = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (err_code)
+ goto free_dmem;
+
+ if (udata) {
+ struct irdma_create_srq_resp resp = {};
+
+ resp.srq_id = iwsrq->srq_num;
+ resp.srq_size = ukinfo->srq_size;
+ if (ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen))) {
+ err_code = -EPROTO;
+ goto srq_destroy;
+ }
+ }
+
+ return 0;
+
+srq_destroy:
+ irdma_srq_wq_destroy(rf, &iwsrq->sc_srq);
+
+free_dmem:
+ if (!iwsrq->user_mode)
+ dma_free_coherent(rf->hw.device, iwsrq->kmem.size,
+ iwsrq->kmem.va, iwsrq->kmem.pa);
+free_rsrc:
+ irdma_free_rsrc(rf, rf->allocated_srqs, iwsrq->srq_num);
+ return err_code;
+}
+
+/**
+ * irdma_query_srq - get SRQ attributes
+ * @ibsrq: the SRQ to query
+ * @attr: the attributes of the SRQ
+ */
+static int irdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+
+ attr->max_wr = iwsrq->max_wr;
+ attr->max_sge = iwsrq->sc_srq.srq_uk.max_srq_frag_cnt;
+ attr->srq_limit = iwsrq->sc_srq.srq_limit;
+
+ return 0;
+}
+
+static inline int cq_validate_flags(u32 flags, u8 hw_rev)
+{
+ /* GEN1/2 does not support CQ create flags */
+ if (hw_rev <= IRDMA_GEN_2)
+ return flags ? -EOPNOTSUPP : 0;
+
+ return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * irdma_create_cq - create cq
+ * @ibcq: CQ allocated
+ * @attr: attributes for cq
+ * @attrs: uverbs attribute bundle
+ */
+static int irdma_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+#define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf)
+#define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size)
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ u32 cq_num = 0;
+ struct irdma_sc_cq *cq;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cq_init_info info = {};
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+ unsigned long flags;
+ int err_code;
+ int entries = attr->cqe;
+ bool cqe_64byte_ena;
+ u8 cqe_size;
+
+ err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
+ if (err_code)
+ return err_code;
+
+ if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN))
+ return -EINVAL;
+
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num,
+ &rf->next_cq);
+ if (err_code)
+ return err_code;
+
+ cq = &iwcq->sc_cq;
+ cq->back_cq = iwcq;
+ refcount_set(&iwcq->refcnt, 1);
+ spin_lock_init(&iwcq->lock);
+ INIT_LIST_HEAD(&iwcq->resize_list);
+ INIT_LIST_HEAD(&iwcq->cmpl_generated);
+ iwcq->cq_num = cq_num;
+ info.dev = dev;
+ ukinfo->cq_size = max(entries, 4);
+ ukinfo->cq_id = cq_num;
+ cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ?
+ true : false;
+ cqe_size = cqe_64byte_ena ? 64 : 32;
+ ukinfo->avoid_mem_cflct = cqe_64byte_ena;
+ iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+ if (attr->comp_vector < rf->ceqs_count)
+ info.ceq_id = attr->comp_vector;
+ info.ceq_id_valid = true;
+ info.ceqe_mask = 1;
+ info.type = IRDMA_CQ_TYPE_IWARP;
+ info.vsi = &iwdev->vsi;
+
+ if (udata) {
+ struct irdma_ucontext *ucontext;
+ struct irdma_create_cq_req req = {};
+ struct irdma_cq_mr *cqmr;
+ struct irdma_pbl *iwpbl;
+ struct irdma_pbl *iwpbl_shadow;
+ struct irdma_cq_mr *cqmr_shadow;
+
+ iwcq->user_mode = true;
+ ucontext =
+ rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen))) {
+ err_code = -EFAULT;
+ goto cq_free_rsrc;
+ }
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ if (!iwpbl) {
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+
+ cqmr = &iwpbl->cq_mr;
+
+ if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) {
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl_shadow = irdma_get_pbl(
+ (unsigned long)req.user_shadow_area,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ if (!iwpbl_shadow) {
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+ cqmr_shadow = &iwpbl_shadow->cq_mr;
+ info.shadow_area_pa = cqmr_shadow->cq_pbl.addr;
+ cqmr->split = true;
+ } else {
+ info.shadow_area_pa = cqmr->shadow;
+ }
+ if (iwpbl->pbl_allocated) {
+ info.virtual_map = true;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+ } else {
+ info.cq_base_pa = cqmr->cq_pbl.addr;
+ }
+ } else {
+ /* Kmode allocations */
+ int rsize;
+
+ if (entries < 1 || entries > rf->max_cqe) {
+ err_code = -EINVAL;
+ goto cq_free_rsrc;
+ }
+
+ entries += 2;
+ if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ entries *= 2;
+
+ if (entries & 1)
+ entries += 1; /* cq size must be an even number */
+
+ if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
+ entries += 2;
+
+ ukinfo->cq_size = entries;
+
+ if (cqe_64byte_ena)
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe);
+ else
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
+ iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256);
+ iwcq->kmem.va = dma_alloc_coherent(dev->hw->device,
+ iwcq->kmem.size,
+ &iwcq->kmem.pa, GFP_KERNEL);
+ if (!iwcq->kmem.va) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ iwcq->kmem_shadow.size = ALIGN(IRDMA_SHADOW_AREA_SIZE << 3,
+ 64);
+ iwcq->kmem_shadow.va = dma_alloc_coherent(dev->hw->device,
+ iwcq->kmem_shadow.size,
+ &iwcq->kmem_shadow.pa,
+ GFP_KERNEL);
+ if (!iwcq->kmem_shadow.va) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+ info.shadow_area_pa = iwcq->kmem_shadow.pa;
+ ukinfo->shadow_area = iwcq->kmem_shadow.va;
+ ukinfo->cq_base = iwcq->kmem.va;
+ info.cq_base_pa = iwcq->kmem.pa;
+ }
+
+ info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
+ (u32)IRDMA_MAX_CQ_READ_THRESH);
+
+ if (irdma_sc_cq_init(cq, &info)) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n");
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.check_overflow = true;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ err_code = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (err_code)
+ goto cq_free_rsrc;
+
+ if (udata) {
+ struct irdma_create_cq_resp resp = {};
+
+ resp.cq_id = info.cq_uk_init_info.cq_id;
+ resp.cq_size = info.cq_uk_init_info.cq_size;
+ if (ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen))) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: copy to user data\n");
+ err_code = -EPROTO;
+ goto cq_destroy;
+ }
+ }
+ rf->cq_table[cq_num] = iwcq;
+ init_completion(&iwcq->free_cq);
+
+ return 0;
+cq_destroy:
+ irdma_cq_wq_destroy(rf, cq);
+cq_free_rsrc:
+ irdma_cq_free_rsrc(rf, iwcq);
+
+ return err_code;
+}
+
+/**
+ * irdma_get_mr_access - get hw MR access permissions from IB access flags
+ * @access: IB access flags
+ * @hw_rev: Hardware version
+ */
+static inline u16 irdma_get_mr_access(int access, u8 hw_rev)
+{
+ u16 hw_access = 0;
+
+ hw_access |= (access & IB_ACCESS_LOCAL_WRITE) ?
+ IRDMA_ACCESS_FLAGS_LOCALWRITE : 0;
+ hw_access |= (access & IB_ACCESS_REMOTE_WRITE) ?
+ IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0;
+ hw_access |= (access & IB_ACCESS_REMOTE_READ) ?
+ IRDMA_ACCESS_FLAGS_REMOTEREAD : 0;
+ if (hw_rev >= IRDMA_GEN_3) {
+ hw_access |= (access & IB_ACCESS_MW_BIND) ?
+ IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+ }
+ hw_access |= (access & IB_ZERO_BASED) ?
+ IRDMA_ACCESS_FLAGS_ZERO_BASED : 0;
+ hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD;
+
+ return hw_access;
+}
+
+/**
+ * irdma_free_stag - free stag resource
+ * @iwdev: irdma device
+ * @stag: stag to free
+ */
+static void irdma_free_stag(struct irdma_device *iwdev, u32 stag)
+{
+ u32 stag_idx;
+
+ stag_idx = (stag & iwdev->rf->mr_stagmask) >> IRDMA_CQPSQ_STAG_IDX_S;
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, stag_idx);
+}
+
+/**
+ * irdma_create_stag - create random stag
+ * @iwdev: irdma device
+ */
+static u32 irdma_create_stag(struct irdma_device *iwdev)
+{
+ u32 stag = 0;
+ u32 stag_index = 0;
+ u32 next_stag_index;
+ u32 driver_key;
+ u32 random;
+ u8 consumer_key;
+ int ret;
+
+ get_random_bytes(&random, sizeof(random));
+ consumer_key = (u8)random;
+
+ driver_key = random & ~iwdev->rf->mr_stagmask;
+ next_stag_index = (random & iwdev->rf->mr_stagmask) >> 8;
+ next_stag_index %= iwdev->rf->max_mr;
+
+ ret = irdma_alloc_rsrc(iwdev->rf, iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr, &stag_index,
+ &next_stag_index);
+ if (ret)
+ return stag;
+ stag = stag_index << IRDMA_CQPSQ_STAG_IDX_S;
+ stag |= driver_key;
+ stag += (u32)consumer_key;
+
+ return stag;
+}
+
+/**
+ * irdma_next_pbl_addr - Get next pbl address
+ * @pbl: pointer to a pble
+ * @pinfo: info pointer
+ * @idx: index
+ */
+static inline u64 *irdma_next_pbl_addr(u64 *pbl, struct irdma_pble_info **pinfo,
+ u32 *idx)
+{
+ *idx += 1;
+ if (!(*pinfo) || *idx != (*pinfo)->cnt)
+ return ++pbl;
+ *idx = 0;
+ (*pinfo)++;
+
+ return (*pinfo)->addr;
+}
+
+/**
+ * irdma_copy_user_pgaddrs - copy user page address to pble's os locally
+ * @iwmr: iwmr for IB's user page addresses
+ * @pbl: ple pointer to save 1 level or 0 level pble
+ * @level: indicated level 0, 1 or 2
+ */
+static void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl,
+ enum irdma_pble_level level)
+{
+ struct ib_umem *region = iwmr->region;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_pble_info *pinfo;
+ struct ib_block_iter biter;
+ u32 idx = 0;
+ u32 pbl_cnt = 0;
+
+ pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf;
+
+ if (iwmr->type == IRDMA_MEMREG_TYPE_QP)
+ iwpbl->qp_mr.sq_page = sg_page(region->sgt_append.sgt.sgl);
+
+ rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
+ *pbl = rdma_block_iter_dma_address(&biter);
+ if (++pbl_cnt == palloc->total_cnt)
+ break;
+ pbl = irdma_next_pbl_addr(pbl, &pinfo, &idx);
+ }
+}
+
+/**
+ * irdma_check_mem_contiguous - check if pbls stored in arr are contiguous
+ * @arr: lvl1 pbl array
+ * @npages: page count
+ * @pg_size: page size
+ *
+ */
+static bool irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
+{
+ u32 pg_idx;
+
+ for (pg_idx = 0; pg_idx < npages; pg_idx++) {
+ if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * irdma_check_mr_contiguous - check if MR is physically contiguous
+ * @palloc: pbl allocation struct
+ * @pg_size: page size
+ */
+static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
+ u32 pg_size)
+{
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *leaf = lvl2->leaf;
+ u64 *arr = NULL;
+ u64 *start_addr = NULL;
+ int i;
+ bool ret;
+
+ if (palloc->level == PBLE_LEVEL_1) {
+ arr = palloc->level1.addr;
+ ret = irdma_check_mem_contiguous(arr, palloc->total_cnt,
+ pg_size);
+ return ret;
+ }
+
+ start_addr = leaf->addr;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ arr = leaf->addr;
+ if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
+ return false;
+ ret = irdma_check_mem_contiguous(arr, leaf->cnt, pg_size);
+ if (!ret)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * irdma_setup_pbles - copy user pg address to pble's
+ * @rf: RDMA PCI function
+ * @iwmr: mr pointer for this memory registration
+ * @lvl: requested pble levels
+ */
+static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
+ u8 lvl)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_pble_info *pinfo;
+ u64 *pbl;
+ int status;
+ enum irdma_pble_level level = PBLE_LEVEL_1;
+
+ if (lvl) {
+ status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
+ lvl);
+ if (status)
+ return status;
+
+ iwpbl->pbl_allocated = true;
+ level = palloc->level;
+ pinfo = (level == PBLE_LEVEL_1) ? &palloc->level1 :
+ palloc->level2.leaf;
+ pbl = pinfo->addr;
+ } else {
+ pbl = iwmr->pgaddrmem;
+ }
+
+ irdma_copy_user_pgaddrs(iwmr, pbl, level);
+
+ if (lvl)
+ iwmr->pgaddrmem[0] = *pbl;
+
+ return 0;
+}
+
+/**
+ * irdma_handle_q_mem - handle memory for qp and cq
+ * @iwdev: irdma device
+ * @req: information for q memory management
+ * @iwpbl: pble struct
+ * @lvl: pble level mask
+ */
+static int irdma_handle_q_mem(struct irdma_device *iwdev,
+ struct irdma_mem_reg_req *req,
+ struct irdma_pbl *iwpbl, u8 lvl)
+{
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_mr *iwmr = iwpbl->iwmr;
+ struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
+ struct irdma_cq_mr *cqmr = &iwpbl->cq_mr;
+ struct irdma_srq_mr *srqmr = &iwpbl->srq_mr;
+ struct irdma_hmc_pble *hmc_p;
+ u64 *arr = iwmr->pgaddrmem;
+ u32 pg_size, total;
+ int err = 0;
+ bool ret = true;
+
+ pg_size = iwmr->page_size;
+ err = irdma_setup_pbles(iwdev->rf, iwmr, lvl);
+ if (err)
+ return err;
+
+ if (lvl)
+ arr = palloc->level1.addr;
+
+ switch (iwmr->type) {
+ case IRDMA_MEMREG_TYPE_QP:
+ total = req->sq_pages + req->rq_pages;
+ hmc_p = &qpmr->sq_pbl;
+ qpmr->shadow = (dma_addr_t)arr[total];
+ /* Need to use physical address for RQ of QP
+ * in case it is associated with SRQ.
+ */
+ qpmr->rq_pa = (dma_addr_t)arr[req->sq_pages];
+ if (lvl) {
+ ret = irdma_check_mem_contiguous(arr, req->sq_pages,
+ pg_size);
+ if (ret)
+ ret = irdma_check_mem_contiguous(&arr[req->sq_pages],
+ req->rq_pages,
+ pg_size);
+ }
+
+ if (!ret) {
+ hmc_p->idx = palloc->level1.idx;
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->idx = palloc->level1.idx + req->sq_pages;
+ } else {
+ hmc_p->addr = arr[0];
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->addr = arr[req->sq_pages];
+ }
+ break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ hmc_p = &srqmr->srq_pbl;
+ srqmr->shadow = (dma_addr_t)arr[req->rq_pages];
+ if (lvl)
+ ret = irdma_check_mem_contiguous(arr, req->rq_pages,
+ pg_size);
+
+ if (!ret)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ break;
+ case IRDMA_MEMREG_TYPE_CQ:
+ hmc_p = &cqmr->cq_pbl;
+
+ if (!cqmr->split)
+ cqmr->shadow = (dma_addr_t)arr[req->cq_pages];
+
+ if (lvl)
+ ret = irdma_check_mem_contiguous(arr, req->cq_pages,
+ pg_size);
+
+ if (!ret)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ break;
+ default:
+ ibdev_dbg(&iwdev->ibdev, "VERBS: MR type error\n");
+ err = -EINVAL;
+ }
+
+ if (lvl && ret) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+
+ return err;
+}
+
+/**
+ * irdma_hw_alloc_mw - create the hw memory window
+ * @iwdev: irdma device
+ * @iwmr: pointer to memory window info
+ */
+static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr)
+{
+ struct irdma_mw_alloc_info *info;
+ struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.mw_alloc.info;
+ memset(info, 0, sizeof(*info));
+ if (iwmr->ibmw.type == IB_MW_TYPE_1)
+ info->mw_wide = true;
+
+ info->page_size = PAGE_SIZE;
+ info->mw_stag_index = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->remote_access = true;
+ cqp_info->cqp_cmd = IRDMA_OP_MW_ALLOC;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mw_alloc.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.mw_alloc.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_alloc_mw - Allocate memory window
+ * @ibmw: Memory Window
+ * @udata: user data pointer
+ */
+static int irdma_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibmw->device);
+ struct irdma_mr *iwmr = to_iwmw(ibmw);
+ int err_code;
+ u32 stag;
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag)
+ return -ENOMEM;
+
+ iwmr->stag = stag;
+ ibmw->rkey = stag;
+
+ err_code = irdma_hw_alloc_mw(iwdev, iwmr);
+ if (err_code) {
+ irdma_free_stag(iwdev, stag);
+ return err_code;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_dealloc_mw - Dealloc memory window
+ * @ibmw: memory window structure.
+ */
+static int irdma_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct ib_pd *ibpd = ibmw->pd;
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_mr *iwmr = to_iwmr((struct ib_mr *)ibmw);
+ struct irdma_device *iwdev = to_iwdev(ibmw->device);
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_dealloc_stag_info *info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = false;
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ irdma_free_stag(iwdev, iwmr->stag);
+
+ return 0;
+}
+
+/**
+ * irdma_hw_alloc_stag - cqp command to allocate stag
+ * @iwdev: irdma device
+ * @iwmr: irdma mr pointer
+ */
+static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_allocate_stag_info *info;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ int status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.alloc_stag.info;
+ info->page_size = PAGE_SIZE;
+ info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->total_len = iwmr->len;
+ info->remote_access = true;
+ cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwmr->is_hwreg = true;
+ return 0;
+}
+
+/**
+ * irdma_alloc_mr - register stag for fast memory registration
+ * @pd: ibpd pointer
+ * @mr_type: memory for stag registrion
+ * @max_num_sg: man number of pages
+ */
+static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pble_alloc *palloc;
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ u32 stag;
+ int err_code;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err_code = -ENOMEM;
+ goto err;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IRDMA_MEMREG_TYPE_MEM;
+ palloc = &iwpbl->pble_alloc;
+ iwmr->page_cnt = max_num_sg;
+ /* Use system PAGE_SIZE as the sg page sizes are unknown at this point */
+ iwmr->len = max_num_sg * PAGE_SIZE;
+ err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
+ false);
+ if (err_code)
+ goto err_get_pble;
+
+ err_code = irdma_hw_alloc_stag(iwdev, iwmr);
+ if (err_code)
+ goto err_alloc_stag;
+
+ iwpbl->pbl_allocated = true;
+
+ return &iwmr->ibmr;
+err_alloc_stag:
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+err_get_pble:
+ irdma_free_stag(iwdev, stag);
+err:
+ kfree(iwmr);
+
+ return ERR_PTR(err_code);
+}
+
+/**
+ * irdma_set_page - populate pbl list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @addr: page dma address fro pbl list
+ */
+static int irdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct irdma_mr *iwmr = to_iwmr(ibmr);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ u64 *pbl;
+
+ if (unlikely(iwmr->npages == iwmr->page_cnt))
+ return -ENOMEM;
+
+ if (palloc->level == PBLE_LEVEL_2) {
+ struct irdma_pble_info *palloc_info =
+ palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT);
+
+ palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr;
+ } else {
+ pbl = palloc->level1.addr;
+ pbl[iwmr->npages] = addr;
+ }
+ iwmr->npages++;
+
+ return 0;
+}
+
+/**
+ * irdma_map_mr_sg - map of sg list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @sg: scatter gather list
+ * @sg_nents: number of sg pages
+ * @sg_offset: scatter gather list for fmr
+ */
+static int irdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
+{
+ struct irdma_mr *iwmr = to_iwmr(ibmr);
+
+ iwmr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, irdma_set_page);
+}
+
+/**
+ * irdma_hwreg_mr - send cqp command for memory registration
+ * @iwdev: irdma device
+ * @iwmr: irdma mr pointer
+ * @access: access for MR
+ */
+static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
+ u16 access)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_reg_ns_stag_info *stag_info;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int ret;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
+ stag_info->va = iwpbl->user_base;
+ stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ stag_info->stag_key = (u8)iwmr->stag;
+ stag_info->total_len = iwmr->len;
+ stag_info->access_rights = irdma_get_mr_access(access,
+ iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev);
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ stag_info->remote_atomics_en = (access & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ stag_info->pd_id = iwpd->sc_pd.pd_id;
+ stag_info->all_memory = iwmr->dma_mr;
+ if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED)
+ stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED;
+ else
+ stag_info->addr_type = IRDMA_ADDR_TYPE_VA_BASED;
+ stag_info->page_size = iwmr->page_size;
+
+ if (iwpbl->pbl_allocated) {
+ if (palloc->level == PBLE_LEVEL_1) {
+ stag_info->first_pm_pbl_index = palloc->level1.idx;
+ stag_info->chunk_size = 1;
+ } else {
+ stag_info->first_pm_pbl_index = palloc->level2.root.idx;
+ stag_info->chunk_size = 3;
+ }
+ } else {
+ stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
+ }
+
+ cqp_info->cqp_cmd = IRDMA_OP_MR_REG_NON_SHARED;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
+ ret = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ if (!ret)
+ iwmr->is_hwreg = true;
+
+ return ret;
+}
+
+static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access,
+ bool create_stag)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ u32 stag = 0;
+ u8 lvl;
+ int err;
+
+ lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0;
+
+ err = irdma_setup_pbles(iwdev->rf, iwmr, lvl);
+ if (err)
+ return err;
+
+ if (lvl) {
+ err = irdma_check_mr_contiguous(&iwpbl->pble_alloc,
+ iwmr->page_size);
+ if (err) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+ iwpbl->pbl_allocated = false;
+ }
+ }
+
+ if (create_stag) {
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err = -ENOMEM;
+ goto free_pble;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ }
+
+ err = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (err)
+ goto err_hwreg;
+
+ return 0;
+
+err_hwreg:
+ if (stag)
+ irdma_free_stag(iwdev, stag);
+
+free_pble:
+ if (iwpbl->pble_alloc.level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ return err;
+}
+
+static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
+ struct ib_pd *pd, u64 virt,
+ enum irdma_memreg_type reg_type)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ unsigned long pgsz_bitmap;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->region = region;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwmr->ibmr.iova = virt;
+ iwmr->type = reg_type;
+
+ pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
+
+ iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
+ if (unlikely(!iwmr->page_size)) {
+ kfree(iwmr);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
+
+ return iwmr;
+}
+
+static void irdma_free_iwmr(struct irdma_mr *iwmr)
+{
+ kfree(iwmr);
+}
+
+static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext = NULL;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ /* iWarp: Catch page not starting on OS page boundary */
+ if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
+ ib_umem_offset(iwmr->region))
+ return -EINVAL;
+
+ total = req.sq_pages + req.rq_pages + 1;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ total = req.sq_pages + req.rq_pages;
+ lvl = total > 2 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+static int irdma_reg_user_mr_type_srq(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ total = req.rq_pages + IRDMA_SHADOW_PGCNT;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ lvl = req.rq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->srq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+static int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext = NULL;
+ u8 shadow_pgcnt = 1;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
+ shadow_pgcnt = 0;
+ total = req.cq_pages + shadow_pgcnt;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ lvl = req.cq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_reg_user_mr - Register a user memory region
+ * @pd: ptr of pd
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ * @access: access of mr
+ * @dmah: dma handle
+ * @udata: user data
+ */
+static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
+ u64 virt, int access,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages)
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_mem_reg_req req = {};
+ struct ib_umem *region = NULL;
+ struct irdma_mr *iwmr = NULL;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
+ return ERR_PTR(-EINVAL);
+
+ region = ib_umem_get(pd->device, start, len, access);
+
+ if (IS_ERR(region)) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: Failed to create ib_umem region\n");
+ return (struct ib_mr *)region;
+ }
+
+ if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
+ ib_umem_release(region);
+ return ERR_PTR(-EFAULT);
+ }
+
+ iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type);
+ if (IS_ERR(iwmr)) {
+ ib_umem_release(region);
+ return (struct ib_mr *)iwmr;
+ }
+
+ switch (req.reg_type) {
+ case IRDMA_MEMREG_TYPE_QP:
+ err = irdma_reg_user_mr_type_qp(req, udata, iwmr);
+ if (err)
+ goto error;
+
+ break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ err = irdma_reg_user_mr_type_srq(req, udata, iwmr);
+ if (err)
+ goto error;
+
+ break;
+ case IRDMA_MEMREG_TYPE_CQ:
+ err = irdma_reg_user_mr_type_cq(req, udata, iwmr);
+ if (err)
+ goto error;
+ break;
+ case IRDMA_MEMREG_TYPE_MEM:
+ err = irdma_reg_user_mr_type_mem(iwmr, access, true);
+ if (err)
+ goto error;
+
+ break;
+ default:
+ err = -EINVAL;
+ goto error;
+ }
+
+ return &iwmr->ibmr;
+error:
+ ib_umem_release(region);
+ irdma_free_iwmr(iwmr);
+
+ return ERR_PTR(err);
+}
+
+static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 len, u64 virt,
+ int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct irdma_mr *iwmr;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access);
+ if (IS_ERR(umem_dmabuf)) {
+ ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%pe]\n",
+ umem_dmabuf);
+ return ERR_CAST(umem_dmabuf);
+ }
+
+ iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM);
+ if (IS_ERR(iwmr)) {
+ err = PTR_ERR(iwmr);
+ goto err_release;
+ }
+
+ err = irdma_reg_user_mr_type_mem(iwmr, access, true);
+ if (err)
+ goto err_iwmr;
+
+ return &iwmr->ibmr;
+
+err_iwmr:
+ irdma_free_iwmr(iwmr);
+
+err_release:
+ ib_umem_release(&umem_dmabuf->umem);
+
+ return ERR_PTR(err);
+}
+
+static int irdma_hwdereg_mr(struct ib_mr *ib_mr)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_pd *iwpd = to_iwpd(ib_mr->pd);
+ struct irdma_dealloc_stag_info *info;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ /* Skip HW MR de-register when it is already de-registered
+ * during an MR re-reregister and the re-registration fails
+ */
+ if (!iwmr->is_hwreg)
+ return 0;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = true;
+ if (iwpbl->pbl_allocated)
+ info->dealloc_pbl = true;
+
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwmr->is_hwreg = false;
+ return 0;
+}
+
+/*
+ * irdma_rereg_mr_trans - Re-register a user MR for a change translation.
+ * @iwmr: ptr of iwmr
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ *
+ * Re-register a user memory region when a change translation is requested.
+ * Re-register a new region while reusing the stag from the original registration.
+ */
+static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len,
+ u64 virt)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct ib_umem *region;
+ int err;
+
+ region = ib_umem_get(pd->device, start, len, iwmr->access);
+ if (IS_ERR(region))
+ return PTR_ERR(region);
+
+ iwmr->region = region;
+ iwmr->ibmr.iova = virt;
+ iwmr->ibmr.pd = pd;
+ iwmr->page_size = ib_umem_find_best_pgsz(region,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
+ virt);
+ if (unlikely(!iwmr->page_size)) {
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
+
+ err = irdma_reg_user_mr_type_mem(iwmr, iwmr->access, false);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ ib_umem_release(region);
+ return err;
+}
+
+/*
+ * irdma_rereg_user_mr - Re-Register a user memory region(MR)
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @flags: bit mask to indicate which of the attr's of MR modified
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ * @new_access: bit mask of access flags
+ * @new_pd: ptr of pd
+ * @udata: user data
+ *
+ * Return:
+ * NULL - Success, existing MR updated
+ * ERR_PTR - error occurred
+ */
+static struct ib_mr *irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags,
+ u64 start, u64 len, u64 virt,
+ int new_access, struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ int ret;
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ret = irdma_hwdereg_mr(ib_mr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (flags & IB_MR_REREG_ACCESS)
+ iwmr->access = new_access;
+
+ if (flags & IB_MR_REREG_PD) {
+ iwmr->ibmr.pd = new_pd;
+ iwmr->ibmr.device = new_pd->device;
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ if (iwpbl->pbl_allocated) {
+ irdma_free_pble(iwdev->rf->pble_rsrc,
+ &iwpbl->pble_alloc);
+ iwpbl->pbl_allocated = false;
+ }
+ if (iwmr->region) {
+ ib_umem_release(iwmr->region);
+ iwmr->region = NULL;
+ }
+
+ ret = irdma_rereg_mr_trans(iwmr, start, len, virt);
+ } else
+ ret = irdma_hwreg_mr(iwdev, iwmr, iwmr->access);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return NULL;
+}
+
+/**
+ * irdma_reg_phys_mr - register kernel physical memory
+ * @pd: ibpd pointer
+ * @addr: physical address of memory to register
+ * @size: size of memory to register
+ * @access: Access rights
+ * @iova_start: start of virtual address for physical buffers
+ * @dma_mr: Flag indicating whether this region is a PD DMA MR
+ */
+struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access,
+ u64 *iova_start, bool dma_mr)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ u32 stag;
+ int ret;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IRDMA_MEMREG_TYPE_MEM;
+ iwmr->dma_mr = dma_mr;
+ iwpbl->user_base = *iova_start;
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.iova = *iova_start;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->page_cnt = 1;
+ iwmr->pgaddrmem[0] = addr;
+ iwmr->len = size;
+ iwmr->page_size = SZ_4K;
+ ret = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (ret) {
+ irdma_free_stag(iwdev, stag);
+ goto err;
+ }
+
+ return &iwmr->ibmr;
+
+err:
+ kfree(iwmr);
+
+ return ERR_PTR(ret);
+}
+
+/**
+ * irdma_get_dma_mr - register physical mem
+ * @pd: ptr of pd
+ * @acc: access for memory
+ */
+static struct ib_mr *irdma_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ u64 kva = 0;
+
+ return irdma_reg_phys_mr(pd, 0, 0, acc, &kva, true);
+}
+
+/**
+ * irdma_del_memlist - Deleting pbl list entries for CQ/QP
+ * @iwmr: iwmr for IB's user page addresses
+ * @ucontext: ptr to user context
+ */
+static void irdma_del_memlist(struct irdma_mr *iwmr,
+ struct irdma_ucontext *ucontext)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ unsigned long flags;
+
+ switch (iwmr->type) {
+ case IRDMA_MEMREG_TYPE_CQ:
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ break;
+ case IRDMA_MEMREG_TYPE_QP:
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_dereg_mr - deregister mr
+ * @ib_mr: mr ptr for dereg
+ * @udata: user data
+ */
+static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
+{
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ int ret;
+
+ if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
+ if (iwmr->region) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext,
+ ibucontext);
+ irdma_del_memlist(iwmr, ucontext);
+ }
+ goto done;
+ }
+
+ ret = irdma_hwdereg_mr(ib_mr);
+ if (ret)
+ return ret;
+
+ irdma_free_stag(iwdev, iwmr->stag);
+done:
+ if (iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ if (iwmr->region)
+ ib_umem_release(iwmr->region);
+
+ kfree(iwmr);
+
+ return 0;
+}
+
+/**
+ * irdma_post_send - kernel application wr
+ * @ibqp: qp ptr for wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+static int irdma_post_send(struct ib_qp *ibqp,
+ const struct ib_send_wr *ib_wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_qp_uk *ukqp;
+ struct irdma_sc_dev *dev;
+ struct irdma_post_sq_info info;
+ int err = 0;
+ unsigned long flags;
+ bool inv_stag;
+ struct irdma_ah *ah;
+
+ iwqp = to_iwqp(ibqp);
+ ukqp = &iwqp->sc_qp.qp_uk;
+ dev = &iwqp->iwdev->rf->sc_dev;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ while (ib_wr) {
+ memset(&info, 0, sizeof(info));
+ inv_stag = false;
+ info.wr_id = (ib_wr->wr_id);
+ if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
+ info.signaled = true;
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ info.read_fence = true;
+ switch (ib_wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_ATOMIC_OPS))) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP;
+ info.op.atomic_compare_swap.tagged_offset = ib_wr->sg_list[0].addr;
+ info.op.atomic_compare_swap.remote_tagged_offset =
+ atomic_wr(ib_wr)->remote_addr;
+ info.op.atomic_compare_swap.swap_data_bytes = atomic_wr(ib_wr)->swap;
+ info.op.atomic_compare_swap.compare_data_bytes =
+ atomic_wr(ib_wr)->compare_add;
+ info.op.atomic_compare_swap.stag = ib_wr->sg_list[0].lkey;
+ info.op.atomic_compare_swap.remote_stag = atomic_wr(ib_wr)->rkey;
+ err = irdma_uk_atomic_compare_swap(ukqp, &info, false);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_ATOMIC_OPS))) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD;
+ info.op.atomic_fetch_add.tagged_offset = ib_wr->sg_list[0].addr;
+ info.op.atomic_fetch_add.remote_tagged_offset =
+ atomic_wr(ib_wr)->remote_addr;
+ info.op.atomic_fetch_add.fetch_add_data_bytes =
+ atomic_wr(ib_wr)->compare_add;
+ info.op.atomic_fetch_add.stag = ib_wr->sg_list[0].lkey;
+ info.op.atomic_fetch_add.remote_stag =
+ atomic_wr(ib_wr)->rkey;
+ err = irdma_uk_atomic_fetch_add(ukqp, &info, false);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->ex.imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ fallthrough;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (ib_wr->opcode == IB_WR_SEND ||
+ ib_wr->opcode == IB_WR_SEND_WITH_IMM) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_SEND_SOL;
+ else
+ info.op_type = IRDMA_OP_TYPE_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV;
+ else
+ info.op_type = IRDMA_OP_TYPE_SEND_INV;
+ info.stag_to_inv = ib_wr->ex.invalidate_rkey;
+ }
+
+ info.op.send.num_sges = ib_wr->num_sge;
+ info.op.send.sg_list = ib_wr->sg_list;
+ if (iwqp->ibqp.qp_type == IB_QPT_UD ||
+ iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ ah = to_iwah(ud_wr(ib_wr)->ah);
+ info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
+ info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
+ info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_INLINE)
+ err = irdma_uk_inline_send(ukqp, &info, false);
+ else
+ err = irdma_uk_send(ukqp, &info, false);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->ex.imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ fallthrough;
+ case IB_WR_RDMA_WRITE:
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL;
+ else
+ info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
+
+ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+ info.op.rdma_write.lo_sg_list = ib_wr->sg_list;
+ info.op.rdma_write.rem_addr.addr =
+ rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+ if (ib_wr->send_flags & IB_SEND_INLINE)
+ err = irdma_uk_inline_rdma_write(ukqp, &info, false);
+ else
+ err = irdma_uk_rdma_write(ukqp, &info, false);
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ inv_stag = true;
+ fallthrough;
+ case IB_WR_RDMA_READ:
+ if (ib_wr->num_sge >
+ dev->hw_attrs.uk_attrs.max_hw_read_sges) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_RDMA_READ;
+ info.op.rdma_read.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
+ err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false);
+ break;
+ case IB_WR_LOCAL_INV:
+ info.op_type = IRDMA_OP_TYPE_INV_STAG;
+ info.local_fence = true;
+ info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
+ err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
+ break;
+ case IB_WR_REG_MR: {
+ struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
+ struct irdma_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
+ struct irdma_fast_reg_stag_info stag_info = {};
+
+ stag_info.signaled = info.signaled;
+ stag_info.read_fence = info.read_fence;
+ stag_info.access_rights =
+ irdma_get_mr_access(reg_wr(ib_wr)->access,
+ dev->hw_attrs.uk_attrs.hw_rev);
+ stag_info.stag_key = reg_wr(ib_wr)->key & 0xff;
+ stag_info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ stag_info.page_size = reg_wr(ib_wr)->mr->page_size;
+ stag_info.wr_id = ib_wr->wr_id;
+ stag_info.addr_type = IRDMA_ADDR_TYPE_VA_BASED;
+ stag_info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
+ stag_info.total_len = iwmr->ibmr.length;
+ stag_info.reg_addr_pa = *palloc->level1.addr;
+ stag_info.first_pm_pbl_index = palloc->level1.idx;
+ stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
+ if (iwmr->npages > IRDMA_MIN_PAGES_PER_FMR)
+ stag_info.chunk_size = 1;
+ err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info,
+ true);
+ break;
+ }
+ default:
+ err = -EINVAL;
+ ibdev_dbg(&iwqp->iwdev->ibdev,
+ "VERBS: upost_send bad opcode = 0x%x\n",
+ ib_wr->opcode);
+ break;
+ }
+
+ if (err)
+ break;
+ ib_wr = ib_wr->next;
+ }
+
+ if (!iwqp->flush_issued) {
+ if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
+ irdma_uk_qp_post_wr(ukqp);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_post_srq_recv - post receive wr for kernel application
+ * @ibsrq: ib srq pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int irdma_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *ib_wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_srq_uk *uksrq = &iwsrq->sc_srq.srq_uk;
+ struct irdma_post_rq_info post_recv = {};
+ unsigned long flags;
+ int err = 0;
+
+ spin_lock_irqsave(&iwsrq->lock, flags);
+ while (ib_wr) {
+ if (ib_wr->num_sge > uksrq->max_srq_frag_cnt) {
+ err = -EINVAL;
+ goto out;
+ }
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ post_recv.sg_list = ib_wr->sg_list;
+ err = irdma_uk_srq_post_receive(uksrq, &post_recv);
+ if (err)
+ goto out;
+
+ ib_wr = ib_wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&iwsrq->lock, flags);
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_post_recv - post receive wr for kernel application
+ * @ibqp: ib qp pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int irdma_post_recv(struct ib_qp *ibqp,
+ const struct ib_recv_wr *ib_wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_qp_uk *ukqp;
+ struct irdma_post_rq_info post_recv = {};
+ unsigned long flags;
+ int err = 0;
+
+ iwqp = to_iwqp(ibqp);
+ ukqp = &iwqp->sc_qp.qp_uk;
+
+ if (ukqp->srq_uk) {
+ *bad_wr = ib_wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ while (ib_wr) {
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ post_recv.sg_list = ib_wr->sg_list;
+ err = irdma_uk_post_receive(ukqp, &post_recv);
+ if (err) {
+ ibdev_dbg(&iwqp->iwdev->ibdev,
+ "VERBS: post_recv err %d\n", err);
+ goto out;
+ }
+
+ ib_wr = ib_wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (iwqp->flush_issued)
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_flush_err_to_ib_wc_status - return change flush error code to IB status
+ * @opcode: iwarp flush code
+ */
+static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode)
+{
+ switch (opcode) {
+ case FLUSH_PROT_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case FLUSH_REM_ACCESS_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case FLUSH_LOC_QP_OP_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case FLUSH_REM_OP_ERR:
+ return IB_WC_REM_OP_ERR;
+ case FLUSH_LOC_LEN_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case FLUSH_GENERAL_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case FLUSH_RETRY_EXC_ERR:
+ return IB_WC_RETRY_EXC_ERR;
+ case FLUSH_MW_BIND_ERR:
+ return IB_WC_MW_BIND_ERR;
+ case FLUSH_REM_INV_REQ_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case FLUSH_RNR_RETRY_EXC_ERR:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case FLUSH_FATAL_ERR:
+ default:
+ return IB_WC_FATAL_ERR;
+ }
+}
+
+/**
+ * irdma_process_cqe - process cqe info
+ * @entry: processed cqe
+ * @cq_poll_info: cqe info
+ */
+static void irdma_process_cqe(struct ib_wc *entry,
+ struct irdma_cq_poll_info *cq_poll_info)
+{
+ struct irdma_sc_qp *qp;
+
+ entry->wc_flags = 0;
+ entry->pkey_index = 0;
+ entry->wr_id = cq_poll_info->wr_id;
+
+ qp = cq_poll_info->qp_handle;
+ entry->qp = qp->qp_uk.back_qp;
+
+ if (cq_poll_info->error) {
+ entry->status = (cq_poll_info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ?
+ irdma_flush_err_to_ib_wc_status(cq_poll_info->minor_err) : IB_WC_GENERAL_ERR;
+
+ entry->vendor_err = cq_poll_info->major_err << 16 |
+ cq_poll_info->minor_err;
+ } else {
+ entry->status = IB_WC_SUCCESS;
+ if (cq_poll_info->imm_valid) {
+ entry->ex.imm_data = htonl(cq_poll_info->imm_data);
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ }
+ if (cq_poll_info->ud_smac_valid) {
+ ether_addr_copy(entry->smac, cq_poll_info->ud_smac);
+ entry->wc_flags |= IB_WC_WITH_SMAC;
+ }
+
+ if (cq_poll_info->ud_vlan_valid) {
+ u16 vlan = cq_poll_info->ud_vlan & VLAN_VID_MASK;
+
+ entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT;
+ if (vlan) {
+ entry->vlan_id = vlan;
+ entry->wc_flags |= IB_WC_WITH_VLAN;
+ }
+ } else {
+ entry->sl = 0;
+ }
+ }
+
+ if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
+ set_ib_wc_op_sq(cq_poll_info, entry);
+ } else {
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ set_ib_wc_op_rq(cq_poll_info, entry,
+ qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
+ true : false);
+ else
+ set_ib_wc_op_rq_gen_3(cq_poll_info, entry);
+ if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
+ cq_poll_info->stag_invalid_set) {
+ entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
+ entry->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+ }
+
+ if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) {
+ entry->src_qp = cq_poll_info->ud_src_qpn;
+ entry->slid = 0;
+ entry->wc_flags |=
+ (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ entry->network_hdr_type = cq_poll_info->ipv4 ?
+ RDMA_NETWORK_IPV4 :
+ RDMA_NETWORK_IPV6;
+ } else {
+ entry->src_qp = cq_poll_info->qp_id;
+ }
+
+ entry->byte_len = cq_poll_info->bytes_xfered;
+}
+
+/**
+ * irdma_poll_one - poll one entry of the CQ
+ * @ukcq: ukcq to poll
+ * @cur_cqe: current CQE info to be filled in
+ * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
+ *
+ * Returns the internal irdma device error code or 0 on success
+ */
+static inline int irdma_poll_one(struct irdma_cq_uk *ukcq,
+ struct irdma_cq_poll_info *cur_cqe,
+ struct ib_wc *entry)
+{
+ int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe);
+
+ if (ret)
+ return ret;
+
+ irdma_process_cqe(entry, cur_cqe);
+
+ return 0;
+}
+
+/**
+ * __irdma_poll_cq - poll cq for completion (kernel apps)
+ * @iwcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of a completed entry
+ */
+static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc *entry)
+{
+ struct list_head *tmp_node, *list_node;
+ struct irdma_cq_buf *last_buf = NULL;
+ struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe;
+ struct irdma_cq_buf *cq_buf;
+ int ret;
+ struct irdma_device *iwdev;
+ struct irdma_cq_uk *ukcq;
+ bool cq_new_cqe = false;
+ int resized_bufs = 0;
+ int npolled = 0;
+
+ iwdev = to_iwdev(iwcq->ibcq.device);
+ ukcq = &iwcq->sc_cq.cq_uk;
+
+ /* go through the list of previously resized CQ buffers */
+ list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) {
+ cq_buf = container_of(list_node, struct irdma_cq_buf, list);
+ while (npolled < num_entries) {
+ ret = irdma_poll_one(&cq_buf->cq_uk, cur_cqe, entry + npolled);
+ if (!ret) {
+ ++npolled;
+ cq_new_cqe = true;
+ continue;
+ }
+ if (ret == -ENOENT)
+ break;
+ /* QP using the CQ is destroyed. Skip reporting this CQE */
+ if (ret == -EFAULT) {
+ cq_new_cqe = true;
+ continue;
+ }
+ goto error;
+ }
+
+ /* save the resized CQ buffer which received the last cqe */
+ if (cq_new_cqe)
+ last_buf = cq_buf;
+ cq_new_cqe = false;
+ }
+
+ /* check the current CQ for new cqes */
+ while (npolled < num_entries) {
+ ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
+ if (ret == -ENOENT) {
+ ret = irdma_generated_cmpls(iwcq, cur_cqe);
+ if (!ret)
+ irdma_process_cqe(entry + npolled, cur_cqe);
+ }
+ if (!ret) {
+ ++npolled;
+ cq_new_cqe = true;
+ continue;
+ }
+
+ if (ret == -ENOENT)
+ break;
+ /* QP using the CQ is destroyed. Skip reporting this CQE */
+ if (ret == -EFAULT) {
+ cq_new_cqe = true;
+ continue;
+ }
+ goto error;
+ }
+
+ if (cq_new_cqe)
+ /* all previous CQ resizes are complete */
+ resized_bufs = irdma_process_resize_list(iwcq, iwdev, NULL);
+ else if (last_buf)
+ /* only CQ resizes up to the last_buf are complete */
+ resized_bufs = irdma_process_resize_list(iwcq, iwdev, last_buf);
+ if (resized_bufs)
+ /* report to the HW the number of complete CQ resizes */
+ irdma_uk_cq_set_resized_cnt(ukcq, resized_bufs);
+
+ return npolled;
+error:
+ ibdev_dbg(&iwdev->ibdev, "%s: Error polling CQ, irdma_err: %d\n",
+ __func__, ret);
+
+ return ret;
+}
+
+/**
+ * irdma_poll_cq - poll cq for completion (kernel apps)
+ * @ibcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of a completed entry
+ */
+static int irdma_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry)
+{
+ struct irdma_cq *iwcq;
+ unsigned long flags;
+ int ret;
+
+ iwcq = to_iwcq(ibcq);
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ ret = __irdma_poll_cq(iwcq, num_entries, entry);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return ret;
+}
+
+/**
+ * irdma_req_notify_cq - arm cq kernel application
+ * @ibcq: cq to arm
+ * @notify_flags: notofication flags
+ */
+static int irdma_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags notify_flags)
+{
+ struct irdma_cq *iwcq;
+ struct irdma_cq_uk *ukcq;
+ unsigned long flags;
+ enum irdma_cmpl_notify cq_notify;
+ bool promo_event = false;
+ int ret = 0;
+
+ cq_notify = notify_flags == IB_CQ_SOLICITED ?
+ IRDMA_CQ_COMPL_SOLICITED : IRDMA_CQ_COMPL_EVENT;
+ iwcq = to_iwcq(ibcq);
+ ukcq = &iwcq->sc_cq.cq_uk;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ /* Only promote to arm the CQ for any event if the last arm event was solicited. */
+ if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
+ promo_event = true;
+
+ if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
+ iwcq->last_notify = cq_notify;
+ irdma_uk_cq_request_notification(ukcq, cq_notify);
+ }
+
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ (!irdma_uk_cq_empty(ukcq) || !list_empty(&iwcq->cmpl_generated)))
+ ret = 1;
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return ret;
+}
+
+static const struct rdma_stat_desc irdma_hw_stat_descs[] = {
+ /* gen1 - 32-bit */
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards",
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes",
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards",
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes",
+ [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors",
+ /* gen1 - 64-bit */
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS].name = "ip4InOctets",
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS].name = "ip4InPkts",
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS].name = "ip4InReasmRqd",
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS].name = "ip4InMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS].name = "ip4OutOctets",
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS].name = "ip4OutPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS].name = "ip4OutSegRqd",
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS].name = "ip4OutMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS].name = "ip6InOctets",
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS].name = "ip6InPkts",
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS].name = "ip6InReasmRqd",
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS].name = "ip6InMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS].name = "ip6OutOctets",
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS].name = "ip6OutPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS].name = "ip6OutSegRqd",
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS].name = "ip6OutMcastPkts",
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS].name = "InRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS].name = "InRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS].name = "InRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS].name = "OutRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS].name = "OutRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS].name = "OutRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMAVBND].name = "RdmaBnd",
+ [IRDMA_HW_STAT_INDEX_RDMAVINV].name = "RdmaInv",
+
+ /* gen2 - 32-bit */
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled",
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored",
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent",
+ /* gen2 - 64-bit */
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS].name = "ip4InMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS].name = "ip4OutMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS].name = "ip6InMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS].name = "ip6OutMcastOctets",
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS].name = "RxUDP",
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS].name = "TxUDP",
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS].name = "RxECNMrkd",
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "RetransSegs",
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "InOptErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "InProtoErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS].name = "InSegs",
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG].name = "OutSegs",
+
+ /* gen3 */
+ [IRDMA_HW_STAT_INDEX_RNR_SENT].name = "RNR sent",
+ [IRDMA_HW_STAT_INDEX_RNR_RCVD].name = "RNR received",
+ [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT].name = "ord limit count",
+ [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT].name = "ird limit count",
+ [IRDMA_HW_STAT_INDEX_RDMARXATS].name = "Rx atomics",
+ [IRDMA_HW_STAT_INDEX_RDMATXATS].name = "Tx atomics",
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR].name = "Nak Sequence Error",
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED].name = "Nak Sequence Error Implied",
+ [IRDMA_HW_STAT_INDEX_RTO].name = "RTO",
+ [IRDMA_HW_STAT_INDEX_RXOOOPKTS].name = "Rcvd Out of order packets",
+ [IRDMA_HW_STAT_INDEX_ICRCERR].name = "CRC errors",
+};
+
+static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static void irdma_get_dev_fw_str(struct ib_device *dev, char *str)
+{
+ struct irdma_device *iwdev = to_iwdev(dev);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u",
+ irdma_fw_major_ver(&iwdev->rf->sc_dev),
+ irdma_fw_minor_ver(&iwdev->rf->sc_dev));
+}
+
+/**
+ * irdma_alloc_hw_port_stats - Allocate a hw stats structure
+ * @ibdev: device pointer from stack
+ * @port_num: port number
+ */
+static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+
+ int num_counters = dev->hw_attrs.max_stat_idx;
+ unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
+
+ return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters,
+ lifespan);
+}
+
+/**
+ * irdma_get_hw_stats - Populates the rdma_hw_stats structure
+ * @ibdev: device pointer from stack
+ * @stats: stats pointer from stack
+ * @port_num: port number
+ * @index: which hw counter the stack is requesting we update
+ */
+static int irdma_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats, u32 port_num,
+ int index)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_dev_hw_stats *hw_stats = &iwdev->vsi.pestat->hw_stats;
+
+ if (iwdev->rf->rdma_ver >= IRDMA_GEN_2)
+ irdma_cqp_gather_stats_cmd(&iwdev->rf->sc_dev, iwdev->vsi.pestat, true);
+ else
+ irdma_cqp_gather_stats_gen1(&iwdev->rf->sc_dev, iwdev->vsi.pestat);
+
+ memcpy(&stats->value[0], hw_stats, sizeof(u64) * stats->num_counters);
+
+ return stats->num_counters;
+}
+
+/**
+ * irdma_query_gid - Query port GID
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int irdma_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ memset(gid->raw, 0, sizeof(gid->raw));
+ ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
+
+ return 0;
+}
+
+/**
+ * mcast_list_add - Add a new mcast item to list
+ * @rf: RDMA PCI function
+ * @new_elem: pointer to element to add
+ */
+static void mcast_list_add(struct irdma_pci_f *rf,
+ struct mc_table_list *new_elem)
+{
+ list_add(&new_elem->list, &rf->mc_qht_list.list);
+}
+
+/**
+ * mcast_list_del - Remove an mcast item from list
+ * @mc_qht_elem: pointer to mcast table list element
+ */
+static void mcast_list_del(struct mc_table_list *mc_qht_elem)
+{
+ if (mc_qht_elem)
+ list_del(&mc_qht_elem->list);
+}
+
+/**
+ * mcast_list_lookup_ip - Search mcast list for address
+ * @rf: RDMA PCI function
+ * @ip_mcast: pointer to mcast IP address
+ */
+static struct mc_table_list *mcast_list_lookup_ip(struct irdma_pci_f *rf,
+ u32 *ip_mcast)
+{
+ struct mc_table_list *mc_qht_el;
+ struct list_head *pos, *q;
+
+ list_for_each_safe (pos, q, &rf->mc_qht_list.list) {
+ mc_qht_el = list_entry(pos, struct mc_table_list, list);
+ if (!memcmp(mc_qht_el->mc_info.dest_ip, ip_mcast,
+ sizeof(mc_qht_el->mc_info.dest_ip)))
+ return mc_qht_el;
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_mcast_cqp_op - perform a mcast cqp operation
+ * @iwdev: irdma device
+ * @mc_grp_ctx: mcast group info
+ * @op: operation
+ *
+ * returns error status
+ */
+static int irdma_mcast_cqp_op(struct irdma_device *iwdev,
+ struct irdma_mcast_grp_info *mc_grp_ctx, u8 op)
+{
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cqp_request *cqp_request;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_request->info.in.u.mc_create.info = *mc_grp_ctx;
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = op;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_mcast_mac - Get the multicast MAC for an IP address
+ * @ip_addr: IPv4 or IPv6 address
+ * @mac: pointer to result MAC address
+ * @ipv4: flag indicating IPv4 or IPv6
+ *
+ */
+void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4)
+{
+ u8 *ip = (u8 *)ip_addr;
+
+ if (ipv4) {
+ unsigned char mac4[ETH_ALEN] = {0x01, 0x00, 0x5E, 0x00,
+ 0x00, 0x00};
+
+ mac4[3] = ip[2] & 0x7F;
+ mac4[4] = ip[1];
+ mac4[5] = ip[0];
+ ether_addr_copy(mac, mac4);
+ } else {
+ unsigned char mac6[ETH_ALEN] = {0x33, 0x33, 0x00, 0x00,
+ 0x00, 0x00};
+
+ mac6[2] = ip[3];
+ mac6[3] = ip[2];
+ mac6[4] = ip[1];
+ mac6[5] = ip[0];
+ ether_addr_copy(mac, mac6);
+ }
+}
+
+/**
+ * irdma_attach_mcast - attach a qp to a multicast group
+ * @ibqp: ptr to qp
+ * @ibgid: pointer to global ID
+ * @lid: local ID
+ *
+ * returns error status
+ */
+static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct mc_table_list *mc_qht_elem;
+ struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
+ unsigned long flags;
+ u32 ip_addr[4] = {};
+ u32 mgn;
+ u32 no_mgs;
+ int ret = 0;
+ bool ipv4;
+ u16 vlan_id;
+ union irdma_sockaddr sgid_addr;
+ unsigned char dmac[ETH_ALEN];
+
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
+
+ if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) {
+ irdma_copy_ip_ntohl(ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_get_vlan_mac_ipv6(ip_addr, &vlan_id, NULL);
+ ipv4 = false;
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: qp_id=%d, IP6address=%pI6\n", ibqp->qp_num,
+ ip_addr);
+ irdma_mcast_mac(ip_addr, dmac, false);
+ } else {
+ ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ ipv4 = true;
+ vlan_id = irdma_get_vlan_ipv4(ip_addr);
+ irdma_mcast_mac(ip_addr, dmac, true);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: qp_id=%d, IP4address=%pI4, MAC=%pM\n",
+ ibqp->qp_num, ip_addr, dmac);
+ }
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
+ if (!mc_qht_elem) {
+ struct irdma_dma_mem *dma_mem_mc;
+
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ mc_qht_elem = kzalloc(sizeof(*mc_qht_elem), GFP_KERNEL);
+ if (!mc_qht_elem)
+ return -ENOMEM;
+
+ mc_qht_elem->mc_info.ipv4_valid = ipv4;
+ memcpy(mc_qht_elem->mc_info.dest_ip, ip_addr,
+ sizeof(mc_qht_elem->mc_info.dest_ip));
+ ret = irdma_alloc_rsrc(rf, rf->allocated_mcgs, rf->max_mcg,
+ &mgn, &rf->next_mcg);
+ if (ret) {
+ kfree(mc_qht_elem);
+ return -ENOMEM;
+ }
+
+ mc_qht_elem->mc_info.mgn = mgn;
+ dma_mem_mc = &mc_qht_elem->mc_grp_ctx.dma_mem_mc;
+ dma_mem_mc->size = ALIGN(sizeof(u64) * IRDMA_MAX_MGS_PER_CTX,
+ IRDMA_HW_PAGE_SIZE);
+ dma_mem_mc->va = dma_alloc_coherent(rf->hw.device,
+ dma_mem_mc->size,
+ &dma_mem_mc->pa,
+ GFP_KERNEL);
+ if (!dma_mem_mc->va) {
+ irdma_free_rsrc(rf, rf->allocated_mcgs, mgn);
+ kfree(mc_qht_elem);
+ return -ENOMEM;
+ }
+
+ mc_qht_elem->mc_grp_ctx.mg_id = (u16)mgn;
+ memcpy(mc_qht_elem->mc_grp_ctx.dest_ip_addr, ip_addr,
+ sizeof(mc_qht_elem->mc_grp_ctx.dest_ip_addr));
+ mc_qht_elem->mc_grp_ctx.ipv4_valid = ipv4;
+ mc_qht_elem->mc_grp_ctx.vlan_id = vlan_id;
+ if (vlan_id < VLAN_N_VID)
+ mc_qht_elem->mc_grp_ctx.vlan_valid = true;
+ mc_qht_elem->mc_grp_ctx.hmc_fcn_id = iwdev->rf->sc_dev.hmc_fn_id;
+ mc_qht_elem->mc_grp_ctx.qs_handle =
+ iwqp->sc_qp.vsi->qos[iwqp->sc_qp.user_pri].qs_handle;
+ ether_addr_copy(mc_qht_elem->mc_grp_ctx.dest_mac_addr, dmac);
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mcast_list_add(rf, mc_qht_elem);
+ } else {
+ if (mc_qht_elem->mc_grp_ctx.no_of_mgs ==
+ IRDMA_MAX_MGS_PER_CTX) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ return -ENOMEM;
+ }
+ }
+
+ mcg_info.qp_id = iwqp->ibqp.qp_num;
+ no_mgs = mc_qht_elem->mc_grp_ctx.no_of_mgs;
+ irdma_sc_add_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+
+ /* Only if there is a change do we need to modify or create */
+ if (!no_mgs) {
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_CREATE);
+ } else if (no_mgs != mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_MODIFY);
+ } else {
+ return 0;
+ }
+
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
+ if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ mcast_list_del(mc_qht_elem);
+ dma_free_coherent(rf->hw.device,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.size,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.pa);
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va = NULL;
+ irdma_free_rsrc(rf, rf->allocated_mcgs,
+ mc_qht_elem->mc_grp_ctx.mg_id);
+ kfree(mc_qht_elem);
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_detach_mcast - detach a qp from a multicast group
+ * @ibqp: ptr to qp
+ * @ibgid: pointer to global ID
+ * @lid: local ID
+ *
+ * returns error status
+ */
+static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ u32 ip_addr[4] = {};
+ struct mc_table_list *mc_qht_elem;
+ struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
+ int ret;
+ unsigned long flags;
+ union irdma_sockaddr sgid_addr;
+
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
+ if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid))
+ irdma_copy_ip_ntohl(ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ else
+ ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
+ if (!mc_qht_elem) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: address not found MCG\n");
+ return 0;
+ }
+
+ mcg_info.qp_id = iwqp->ibqp.qp_num;
+ irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
+ if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ mcast_list_del(mc_qht_elem);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_DESTROY);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: failed MC_DESTROY MCG\n");
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mcast_list_add(rf, mc_qht_elem);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ return -EAGAIN;
+ }
+
+ dma_free_coherent(rf->hw.device,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.size,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.pa);
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va = NULL;
+ irdma_free_rsrc(rf, rf->allocated_mcgs,
+ mc_qht_elem->mc_grp_ctx.mg_id);
+ kfree(mc_qht_elem);
+ } else {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_MODIFY);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: failed Modify MCG\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int irdma_create_hw_ah(struct irdma_device *iwdev, struct irdma_ah *ah, bool sleep)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+ int err;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah->sc_ah.ah_info.ah_idx,
+ &rf->next_ah);
+ if (err)
+ return err;
+
+ err = irdma_ah_cqp_op(rf, &ah->sc_ah, IRDMA_OP_AH_CREATE, sleep,
+ irdma_gsi_ud_qp_ah_cb, &ah->sc_ah);
+
+ if (err) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: CQP-OP Create AH fail");
+ goto err_ah_create;
+ }
+
+ if (!sleep) {
+ int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD;
+
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ mdelay(1);
+ } while (!ah->sc_ah.ah_info.ah_valid && --cnt);
+
+ if (!cnt) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: CQP create AH timed out");
+ err = -ETIMEDOUT;
+ goto err_ah_create;
+ }
+ }
+ return 0;
+
+err_ah_create:
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx);
+
+ return err;
+}
+
+static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr)
+{
+ struct irdma_pd *pd = to_iwpd(ibah->pd);
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_gid_attr *sgid_attr;
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_sc_ah *sc_ah;
+ struct irdma_ah_info *ah_info;
+ union irdma_sockaddr sgid_addr, dgid_addr;
+ int err;
+ u8 dmac[ETH_ALEN];
+
+ ah->pd = pd;
+ sc_ah = &ah->sc_ah;
+ sc_ah->ah_info.vsi = &iwdev->vsi;
+ irdma_sc_init_ah(&rf->sc_dev, sc_ah);
+ ah->sgid_index = ah_attr->grh.sgid_index;
+ sgid_attr = ah_attr->grh.sgid_attr;
+ memcpy(&ah->dgid, &ah_attr->grh.dgid, sizeof(ah->dgid));
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid_attr->gid);
+ rdma_gid2ip((struct sockaddr *)&dgid_addr, &ah_attr->grh.dgid);
+ ah->av.attrs = *ah_attr;
+ ah->av.net_type = rdma_gid_attr_network_type(sgid_attr);
+ ah_info = &sc_ah->ah_info;
+ ah_info->pd_idx = pd->sc_pd.pd_id;
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ ah_info->flow_label = ah_attr->grh.flow_label;
+ ah_info->hop_ttl = ah_attr->grh.hop_limit;
+ ah_info->tc_tos = ah_attr->grh.traffic_class;
+ }
+
+ ether_addr_copy(dmac, ah_attr->roce.dmac);
+ if (ah->av.net_type == RDMA_NETWORK_IPV4) {
+ ah_info->ipv4_valid = true;
+ ah_info->dest_ip_addr[0] =
+ ntohl(dgid_addr.saddr_in.sin_addr.s_addr);
+ ah_info->src_ip_addr[0] =
+ ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ ah_info->do_lpbk = irdma_ipv4_is_lpb(ah_info->src_ip_addr[0],
+ ah_info->dest_ip_addr[0]);
+ if (ipv4_is_multicast(dgid_addr.saddr_in.sin_addr.s_addr)) {
+ ah_info->do_lpbk = true;
+ irdma_mcast_mac(ah_info->dest_ip_addr, dmac, true);
+ }
+ } else {
+ irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
+ dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(ah_info->src_ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ ah_info->do_lpbk = irdma_ipv6_is_lpb(ah_info->src_ip_addr,
+ ah_info->dest_ip_addr);
+ if (rdma_is_multicast_addr(&dgid_addr.saddr_in6.sin6_addr)) {
+ ah_info->do_lpbk = true;
+ irdma_mcast_mac(ah_info->dest_ip_addr, dmac, false);
+ }
+ }
+
+ err = rdma_read_gid_l2_fields(sgid_attr, &ah_info->vlan_tag,
+ ah_info->mac_addr);
+ if (err)
+ return err;
+
+ ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr,
+ ah_info->ipv4_valid, dmac);
+
+ if (ah_info->dst_arpindex == -1)
+ return -EINVAL;
+
+ if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ ah_info->vlan_tag = 0;
+
+ if (ah_info->vlan_tag < VLAN_N_VID) {
+ u8 prio = rt_tos2priority(ah_info->tc_tos);
+
+ prio = irdma_roce_get_vlan_prio(sgid_attr, prio);
+
+ ah_info->vlan_tag |= (u16)prio << VLAN_PRIO_SHIFT;
+ ah_info->insert_vlan_tag = true;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_ah_exists - Check for existing identical AH
+ * @iwdev: irdma device
+ * @new_ah: AH to check for
+ *
+ * returns true if AH is found, false if not found.
+ */
+static bool irdma_ah_exists(struct irdma_device *iwdev,
+ struct irdma_ah *new_ah)
+{
+ struct irdma_ah *ah;
+ u32 key = new_ah->sc_ah.ah_info.dest_ip_addr[0] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[1] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[2] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[3];
+
+ hash_for_each_possible(iwdev->rf->ah_hash_tbl, ah, list, key) {
+ /* Set ah_valid and ah_id the same so memcmp can work */
+ new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx;
+ new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid;
+ if (!memcmp(&ah->sc_ah.ah_info, &new_ah->sc_ah.ah_info,
+ sizeof(ah->sc_ah.ah_info))) {
+ refcount_inc(&ah->refcnt);
+ new_ah->parent_ah = ah;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * irdma_destroy_ah - Destroy address handle
+ * @ibah: pointer to address handle
+ * @ah_flags: flags for sleepable
+ */
+static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags)
+{
+ struct irdma_device *iwdev = to_iwdev(ibah->device);
+ struct irdma_ah *ah = to_iwah(ibah);
+
+ if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) {
+ mutex_lock(&iwdev->rf->ah_tbl_lock);
+ if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) {
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+ return 0;
+ }
+ hash_del(&ah->parent_ah->list);
+ kfree(ah->parent_ah);
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+ }
+
+ irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY,
+ false, NULL, ah);
+
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs,
+ ah->sc_ah.ah_info.ah_idx);
+
+ return 0;
+}
+
+/**
+ * irdma_create_user_ah - create user address handle
+ * @ibah: address handle
+ * @attr: address handle attributes
+ * @udata: User data
+ *
+ * returns 0 on success, error otherwise
+ */
+static int irdma_create_user_ah(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd)
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ struct irdma_create_ah_resp uresp;
+ struct irdma_ah *parent_ah;
+ int err;
+
+ if (udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN)
+ return -EINVAL;
+
+ err = irdma_setup_ah(ibah, attr);
+ if (err)
+ return err;
+ mutex_lock(&iwdev->rf->ah_tbl_lock);
+ if (!irdma_ah_exists(iwdev, ah)) {
+ err = irdma_create_hw_ah(iwdev, ah, true);
+ if (err) {
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+ return err;
+ }
+ /* Add new AH to list */
+ parent_ah = kmemdup(ah, sizeof(*ah), GFP_KERNEL);
+ if (parent_ah) {
+ u32 key = parent_ah->sc_ah.ah_info.dest_ip_addr[0] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[1] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[2] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[3];
+
+ ah->parent_ah = parent_ah;
+ hash_add(iwdev->rf->ah_hash_tbl, &parent_ah->list, key);
+ refcount_set(&parent_ah->refcnt, 1);
+ }
+ }
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+
+ uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
+ err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen));
+ if (err)
+ irdma_destroy_ah(ibah, attr->flags);
+
+ return err;
+}
+
+/**
+ * irdma_create_ah - create address handle
+ * @ibah: address handle
+ * @attr: address handle attributes
+ * @udata: NULL
+ *
+ * returns 0 on success, error otherwise
+ */
+static int irdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ int err;
+
+ err = irdma_setup_ah(ibah, attr);
+ if (err)
+ return err;
+ err = irdma_create_hw_ah(iwdev, ah, attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+
+ return err;
+}
+
+/**
+ * irdma_query_ah - Query address handle
+ * @ibah: pointer to address handle
+ * @ah_attr: address handle attributes
+ */
+static int irdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct irdma_ah *ah = to_iwah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ if (ah->av.attrs.ah_flags & IB_AH_GRH) {
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.flow_label = ah->sc_ah.ah_info.flow_label;
+ ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos;
+ ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl;
+ ah_attr->grh.sgid_index = ah->sgid_index;
+ memcpy(&ah_attr->grh.dgid, &ah->dgid,
+ sizeof(ah_attr->grh.dgid));
+ }
+
+ return 0;
+}
+
+static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static const struct ib_device_ops irdma_gen1_dev_ops = {
+ .dealloc_driver = irdma_ib_dealloc_device,
+};
+
+static const struct ib_device_ops irdma_gen3_dev_ops = {
+ .alloc_mw = irdma_alloc_mw,
+ .create_srq = irdma_create_srq,
+ .dealloc_mw = irdma_dealloc_mw,
+ .destroy_srq = irdma_destroy_srq,
+ .modify_srq = irdma_modify_srq,
+ .post_srq_recv = irdma_post_srq_recv,
+ .query_srq = irdma_query_srq,
+};
+
+static const struct ib_device_ops irdma_roce_dev_ops = {
+ .attach_mcast = irdma_attach_mcast,
+ .create_ah = irdma_create_ah,
+ .create_user_ah = irdma_create_user_ah,
+ .destroy_ah = irdma_destroy_ah,
+ .detach_mcast = irdma_detach_mcast,
+ .get_link_layer = irdma_get_link_layer,
+ .get_port_immutable = irdma_roce_port_immutable,
+ .modify_qp = irdma_modify_qp_roce,
+ .query_ah = irdma_query_ah,
+ .query_pkey = irdma_query_pkey,
+};
+
+static const struct ib_device_ops irdma_iw_dev_ops = {
+ .get_port_immutable = irdma_iw_port_immutable,
+ .iw_accept = irdma_accept,
+ .iw_add_ref = irdma_qp_add_ref,
+ .iw_connect = irdma_connect,
+ .iw_create_listen = irdma_create_listen,
+ .iw_destroy_listen = irdma_destroy_listen,
+ .iw_get_qp = irdma_get_qp,
+ .iw_reject = irdma_reject,
+ .iw_rem_ref = irdma_qp_rem_ref,
+ .modify_qp = irdma_modify_qp,
+ .query_gid = irdma_query_gid,
+};
+
+static const struct ib_device_ops irdma_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_IRDMA,
+ .uverbs_abi_ver = IRDMA_ABI_VER,
+
+ .alloc_hw_port_stats = irdma_alloc_hw_port_stats,
+ .alloc_mr = irdma_alloc_mr,
+ .alloc_pd = irdma_alloc_pd,
+ .alloc_ucontext = irdma_alloc_ucontext,
+ .create_cq = irdma_create_cq,
+ .create_qp = irdma_create_qp,
+ .dealloc_driver = irdma_ib_dealloc_device,
+ .dealloc_mw = irdma_dealloc_mw,
+ .dealloc_pd = irdma_dealloc_pd,
+ .dealloc_ucontext = irdma_dealloc_ucontext,
+ .dereg_mr = irdma_dereg_mr,
+ .destroy_cq = irdma_destroy_cq,
+ .destroy_qp = irdma_destroy_qp,
+ .disassociate_ucontext = irdma_disassociate_ucontext,
+ .get_dev_fw_str = irdma_get_dev_fw_str,
+ .get_dma_mr = irdma_get_dma_mr,
+ .get_hw_stats = irdma_get_hw_stats,
+ .map_mr_sg = irdma_map_mr_sg,
+ .mmap = irdma_mmap,
+ .mmap_free = irdma_mmap_free,
+ .poll_cq = irdma_poll_cq,
+ .post_recv = irdma_post_recv,
+ .post_send = irdma_post_send,
+ .query_device = irdma_query_device,
+ .query_port = irdma_query_port,
+ .query_qp = irdma_query_qp,
+ .reg_user_mr = irdma_reg_user_mr,
+ .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf,
+ .rereg_user_mr = irdma_rereg_user_mr,
+ .req_notify_cq = irdma_req_notify_cq,
+ .resize_cq = irdma_resize_cq,
+ INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, irdma_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_mw, irdma_mr, ibmw),
+ INIT_RDMA_OBJ_SIZE(ib_qp, irdma_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, irdma_srq, ibsrq),
+};
+
+/**
+ * irdma_init_roce_device - initialization of roce rdma device
+ * @iwdev: irdma device
+ */
+static void irdma_init_roce_device(struct irdma_device *iwdev)
+{
+ iwdev->ibdev.node_type = RDMA_NODE_IB_CA;
+ addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
+ iwdev->netdev->dev_addr);
+ ib_set_device_ops(&iwdev->ibdev, &irdma_roce_dev_ops);
+}
+
+/**
+ * irdma_init_iw_device - initialization of iwarp rdma device
+ * @iwdev: irdma device
+ */
+static void irdma_init_iw_device(struct irdma_device *iwdev)
+{
+ struct net_device *netdev = iwdev->netdev;
+
+ iwdev->ibdev.node_type = RDMA_NODE_RNIC;
+ addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
+ netdev->dev_addr);
+ memcpy(iwdev->ibdev.iw_ifname, netdev->name,
+ sizeof(iwdev->ibdev.iw_ifname));
+ ib_set_device_ops(&iwdev->ibdev, &irdma_iw_dev_ops);
+}
+
+/**
+ * irdma_init_rdma_device - initialization of rdma device
+ * @iwdev: irdma device
+ */
+static void irdma_init_rdma_device(struct irdma_device *iwdev)
+{
+ struct pci_dev *pcidev = iwdev->rf->pcidev;
+
+ if (iwdev->roce_mode)
+ irdma_init_roce_device(iwdev);
+ else
+ irdma_init_iw_device(iwdev);
+
+ iwdev->ibdev.phys_port_cnt = 1;
+ iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count;
+ iwdev->ibdev.dev.parent = &pcidev->dev;
+ ib_set_device_ops(&iwdev->ibdev, &irdma_dev_ops);
+ if (iwdev->rf->rdma_ver == IRDMA_GEN_1)
+ ib_set_device_ops(&iwdev->ibdev, &irdma_gen1_dev_ops);
+ if (iwdev->rf->rdma_ver >= IRDMA_GEN_3)
+ ib_set_device_ops(&iwdev->ibdev, &irdma_gen3_dev_ops);
+}
+
+/**
+ * irdma_port_ibevent - indicate port event
+ * @iwdev: irdma device
+ */
+void irdma_port_ibevent(struct irdma_device *iwdev)
+{
+ struct ib_event event;
+
+ event.device = &iwdev->ibdev;
+ event.element.port_num = 1;
+ event.event =
+ iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&event);
+}
+
+/**
+ * irdma_ib_unregister_device - unregister rdma device from IB
+ * core
+ * @iwdev: irdma device
+ */
+void irdma_ib_unregister_device(struct irdma_device *iwdev)
+{
+ iwdev->iw_status = 0;
+ irdma_port_ibevent(iwdev);
+ ib_unregister_device(&iwdev->ibdev);
+}
+
+/**
+ * irdma_ib_register_device - register irdma device to IB core
+ * @iwdev: irdma device
+ */
+int irdma_ib_register_device(struct irdma_device *iwdev)
+{
+ int ret;
+
+ irdma_init_rdma_device(iwdev);
+
+ ret = ib_device_set_netdev(&iwdev->ibdev, iwdev->netdev, 1);
+ if (ret)
+ goto error;
+ dma_set_max_seg_size(iwdev->rf->hw.device, UINT_MAX);
+ ret = ib_register_device(&iwdev->ibdev, "irdma%d", iwdev->rf->hw.device);
+ if (ret)
+ goto error;
+
+ iwdev->iw_status = 1;
+ irdma_port_ibevent(iwdev);
+
+ return 0;
+
+error:
+ if (ret)
+ ibdev_dbg(&iwdev->ibdev, "VERBS: Register RDMA device fail\n");
+
+ return ret;
+}
+
+/**
+ * irdma_ib_dealloc_device
+ * @ibdev: ib device
+ *
+ * callback from ibdev dealloc_driver to deallocate resources
+ * unber irdma device
+ */
+void irdma_ib_dealloc_device(struct ib_device *ibdev)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ irdma_rt_deinit_hw(iwdev);
+ if (!iwdev->is_vport) {
+ irdma_ctrl_deinit_hw(iwdev->rf);
+ if (iwdev->rf->vchnl_wq) {
+ destroy_workqueue(iwdev->rf->vchnl_wq);
+ mutex_destroy(&iwdev->rf->sc_dev.vchnl_mutex);
+ }
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
new file mode 100644
index 000000000000..aabbb3442098
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_VERBS_H
+#define IRDMA_VERBS_H
+
+#define IRDMA_MAX_SAVED_PHY_PGADDR 4
+#define IRDMA_FLUSH_DELAY_MS 20
+
+#define IRDMA_PKEY_TBL_SZ 1
+#define IRDMA_DEFAULT_PKEY 0xFFFF
+#define IRDMA_SHADOW_PGCNT 1
+
+struct irdma_ucontext {
+ struct ib_ucontext ibucontext;
+ struct irdma_device *iwdev;
+ struct rdma_user_mmap_entry *db_mmap_entry;
+ struct list_head cq_reg_mem_list;
+ spinlock_t cq_reg_mem_list_lock; /* protect CQ memory list */
+ struct list_head qp_reg_mem_list;
+ spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */
+ struct list_head srq_reg_mem_list;
+ spinlock_t srq_reg_mem_list_lock; /* protect SRQ memory list */
+ int abi_ver;
+ u8 legacy_mode : 1;
+ u8 use_raw_attrs : 1;
+};
+
+struct irdma_pd {
+ struct ib_pd ibpd;
+ struct irdma_sc_pd sc_pd;
+};
+
+union irdma_sockaddr {
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+};
+
+struct irdma_av {
+ u8 macaddr[16];
+ struct rdma_ah_attr attrs;
+ union irdma_sockaddr sgid_addr;
+ union irdma_sockaddr dgid_addr;
+ u8 net_type;
+};
+
+struct irdma_ah {
+ struct ib_ah ibah;
+ struct irdma_sc_ah sc_ah;
+ struct irdma_pd *pd;
+ struct irdma_av av;
+ u8 sgid_index;
+ union ib_gid dgid;
+ struct hlist_node list;
+ refcount_t refcnt;
+ struct irdma_ah *parent_ah; /* AH from cached list */
+};
+
+struct irdma_hmc_pble {
+ union {
+ u32 idx;
+ dma_addr_t addr;
+ };
+};
+
+struct irdma_cq_mr {
+ struct irdma_hmc_pble cq_pbl;
+ dma_addr_t shadow;
+ bool split;
+};
+
+struct irdma_srq_mr {
+ struct irdma_hmc_pble srq_pbl;
+ dma_addr_t shadow;
+};
+
+struct irdma_qp_mr {
+ struct irdma_hmc_pble sq_pbl;
+ struct irdma_hmc_pble rq_pbl;
+ dma_addr_t shadow;
+ dma_addr_t rq_pa;
+ struct page *sq_page;
+};
+
+struct irdma_cq_buf {
+ struct irdma_dma_mem kmem_buf;
+ struct irdma_cq_uk cq_uk;
+ struct irdma_hw *hw;
+ struct list_head list;
+ struct work_struct work;
+};
+
+struct irdma_pbl {
+ struct list_head list;
+ union {
+ struct irdma_qp_mr qp_mr;
+ struct irdma_cq_mr cq_mr;
+ struct irdma_srq_mr srq_mr;
+ };
+
+ bool pbl_allocated:1;
+ bool on_list:1;
+ u64 user_base;
+ struct irdma_pble_alloc pble_alloc;
+ struct irdma_mr *iwmr;
+};
+
+struct irdma_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ };
+ struct ib_umem *region;
+ int access;
+ bool is_hwreg:1;
+ bool dma_mr:1;
+ u16 type;
+ u32 page_cnt;
+ u64 page_size;
+ u32 npages;
+ u32 stag;
+ u64 len;
+ u64 pgaddrmem[IRDMA_MAX_SAVED_PHY_PGADDR];
+ struct irdma_pbl iwpbl;
+};
+
+struct irdma_srq {
+ struct ib_srq ibsrq;
+ struct irdma_sc_srq sc_srq __aligned(64);
+ struct irdma_dma_mem kmem;
+ u64 *srq_wrid_mem;
+ refcount_t refcnt;
+ spinlock_t lock; /* for poll srq */
+ struct irdma_pbl *iwpbl;
+ struct irdma_sge *sg_list;
+ u16 srq_head;
+ u32 srq_num;
+ u32 max_wr;
+ bool user_mode:1;
+};
+
+struct irdma_cq {
+ struct ib_cq ibcq;
+ struct irdma_sc_cq sc_cq;
+ u32 cq_num;
+ bool user_mode;
+ atomic_t armed;
+ enum irdma_cmpl_notify last_notify;
+ struct irdma_dma_mem kmem;
+ struct irdma_dma_mem kmem_shadow;
+ struct completion free_cq;
+ refcount_t refcnt;
+ spinlock_t lock; /* for poll cq */
+ struct list_head resize_list;
+ struct irdma_cq_poll_info cur_cqe;
+ struct list_head cmpl_generated;
+};
+
+struct irdma_cmpl_gen {
+ struct list_head list;
+ struct irdma_cq_poll_info cpi;
+};
+
+struct disconn_work {
+ struct work_struct work;
+ struct irdma_qp *iwqp;
+};
+
+struct iw_cm_id;
+
+struct irdma_qp_kmode {
+ struct irdma_dma_mem dma_mem;
+ struct irdma_sq_uk_wr_trk_info *sq_wrid_mem;
+ u64 *rq_wrid_mem;
+};
+
+struct irdma_qp {
+ struct ib_qp ibqp;
+ struct irdma_sc_qp sc_qp;
+ struct irdma_device *iwdev;
+ struct irdma_cq *iwscq;
+ struct irdma_cq *iwrcq;
+ struct irdma_pd *iwpd;
+ struct rdma_user_mmap_entry *push_wqe_mmap_entry;
+ struct rdma_user_mmap_entry *push_db_mmap_entry;
+ struct irdma_qp_host_ctx_info ctx_info;
+ union {
+ struct irdma_iwarp_offload_info iwarp_info;
+ struct irdma_roce_offload_info roce_info;
+ };
+
+ union {
+ struct irdma_tcp_offload_info tcp_info;
+ struct irdma_udp_offload_info udp_info;
+ };
+
+ struct irdma_ah roce_ah;
+ struct list_head teardown_entry;
+ refcount_t refcnt;
+ struct iw_cm_id *cm_id;
+ struct irdma_cm_node *cm_node;
+ struct delayed_work dwork_flush;
+ struct ib_mr *lsmm_mr;
+ atomic_t hw_mod_qp_pend;
+ enum ib_qp_state ibqp_state;
+ u32 qp_mem_size;
+ u32 last_aeq;
+ int max_send_wr;
+ int max_recv_wr;
+ atomic_t close_timer_started;
+ spinlock_t lock; /* serialize posting WRs to SQ/RQ */
+ struct irdma_qp_context *iwqp_context;
+ void *pbl_vbase;
+ dma_addr_t pbl_pbase;
+ struct page *page;
+ u8 active_conn : 1;
+ u8 user_mode : 1;
+ u8 hte_added : 1;
+ u8 flush_issued : 1;
+ u8 sig_all : 1;
+ u8 pau_mode : 1;
+ u8 suspend_pending : 1;
+ u8 rsvd : 1;
+ u8 iwarp_state;
+ u16 term_sq_flush_code;
+ u16 term_rq_flush_code;
+ u8 hw_iwarp_state;
+ u8 hw_tcp_state;
+ struct irdma_qp_kmode kqp;
+ struct irdma_dma_mem host_ctx;
+ struct timer_list terminate_timer;
+ struct irdma_pbl *iwpbl;
+ struct irdma_dma_mem q2_ctx_mem;
+ struct irdma_dma_mem ietf_mem;
+ struct completion free_qp;
+ wait_queue_head_t waitq;
+ wait_queue_head_t mod_qp_waitq;
+ u8 rts_ae_rcvd;
+};
+
+enum irdma_mmap_flag {
+ IRDMA_MMAP_IO_NC,
+ IRDMA_MMAP_IO_WC,
+};
+
+struct irdma_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u64 bar_offset;
+ u8 mmap_flag;
+};
+
+static inline u16 irdma_fw_major_ver(struct irdma_sc_dev *dev)
+{
+ return (u16)FIELD_GET(IRDMA_FW_VER_MAJOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
+}
+
+static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev)
+{
+ return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
+}
+
+static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
+ struct ib_wc *entry)
+{
+ switch (cq_poll_info->op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
+ case IRDMA_OP_TYPE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ break;
+ case IRDMA_OP_TYPE_SEND_SOL:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case IRDMA_OP_TYPE_FAST_REG_NSMR:
+ entry->opcode = IB_WC_REG_MR;
+ break;
+ case IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP:
+ entry->opcode = IB_WC_COMP_SWAP;
+ break;
+ case IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD:
+ entry->opcode = IB_WC_FETCH_ADD;
+ break;
+ case IRDMA_OP_TYPE_INV_STAG:
+ entry->opcode = IB_WC_LOCAL_INV;
+ break;
+ default:
+ entry->status = IB_WC_GENERAL_ERR;
+ }
+}
+
+static inline void set_ib_wc_op_rq_gen_3(struct irdma_cq_poll_info *info,
+ struct ib_wc *entry)
+{
+ switch (info->op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ }
+}
+
+static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info,
+ struct ib_wc *entry, bool send_imm_support)
+{
+ /**
+ * iWARP does not support sendImm, so the presence of Imm data
+ * must be WriteImm.
+ */
+ if (!send_imm_support) {
+ entry->opcode = cq_poll_info->imm_valid ?
+ IB_WC_RECV_RDMA_WITH_IMM :
+ IB_WC_RECV;
+ return;
+ }
+
+ switch (cq_poll_info->op_type) {
+ case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
+ case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ }
+}
+
+void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4);
+int irdma_ib_register_device(struct irdma_device *iwdev);
+void irdma_ib_unregister_device(struct irdma_device *iwdev);
+void irdma_ib_dealloc_device(struct ib_device *ibdev);
+void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event);
+void irdma_generate_flush_completions(struct irdma_qp *iwqp);
+void irdma_remove_cmpls_list(struct irdma_cq *iwcq);
+int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info);
+#endif /* IRDMA_VERBS_H */
diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c
new file mode 100644
index 000000000000..16ad27247527
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/virtchnl.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "virtchnl.h"
+#include "ws.h"
+#include "i40iw_hw.h"
+#include "ig3rdma_hw.h"
+
+struct vchnl_reg_map_elem {
+ u16 reg_id;
+ u16 reg_idx;
+ bool pg_rel;
+};
+
+struct vchnl_regfld_map_elem {
+ u16 regfld_id;
+ u16 regfld_idx;
+};
+
+static struct vchnl_reg_map_elem vchnl_reg_map[] = {
+ {IRDMA_VCHNL_REG_ID_CQPTAIL, IRDMA_CQPTAIL, false},
+ {IRDMA_VCHNL_REG_ID_CQPDB, IRDMA_CQPDB, false},
+ {IRDMA_VCHNL_REG_ID_CCQPSTATUS, IRDMA_CCQPSTATUS, false},
+ {IRDMA_VCHNL_REG_ID_CCQPHIGH, IRDMA_CCQPHIGH, false},
+ {IRDMA_VCHNL_REG_ID_CCQPLOW, IRDMA_CCQPLOW, false},
+ {IRDMA_VCHNL_REG_ID_CQARM, IRDMA_CQARM, false},
+ {IRDMA_VCHNL_REG_ID_CQACK, IRDMA_CQACK, false},
+ {IRDMA_VCHNL_REG_ID_AEQALLOC, IRDMA_AEQALLOC, false},
+ {IRDMA_VCHNL_REG_ID_CQPERRCODES, IRDMA_CQPERRCODES, false},
+ {IRDMA_VCHNL_REG_ID_WQEALLOC, IRDMA_WQEALLOC, false},
+ {IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET, IRDMA_DB_ADDR_OFFSET, false },
+ {IRDMA_VCHNL_REG_ID_DYN_CTL, IRDMA_GLINT_DYN_CTL, false },
+ {IRDMA_VCHNL_REG_INV_ID, IRDMA_VCHNL_REG_INV_ID, false }
+};
+
+static struct vchnl_regfld_map_elem vchnl_regfld_map[] = {
+ {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR, IRDMA_CCQPSTATUS_CCQP_ERR_M},
+ {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE, IRDMA_CCQPSTATUS_CCQP_DONE_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID, IRDMA_CQPSQ_STAG_PDID_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID, IRDMA_CQPSQ_CQ_CEQID_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID, IRDMA_CQPSQ_CQ_CQID_M},
+ {IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT, IRDMA_COMMIT_FPM_CQCNT_M},
+ {IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID, IRDMA_CQPSQ_UPESD_HMCFNID_M},
+ {IRDMA_VCHNL_REGFLD_INV_ID, IRDMA_VCHNL_REGFLD_INV_ID}
+};
+
+#define IRDMA_VCHNL_REG_COUNT ARRAY_SIZE(vchnl_reg_map)
+#define IRDMA_VCHNL_REGFLD_COUNT ARRAY_SIZE(vchnl_regfld_map)
+#define IRDMA_VCHNL_REGFLD_BUF_SIZE \
+ (IRDMA_VCHNL_REG_COUNT * sizeof(struct irdma_vchnl_reg_info) + \
+ IRDMA_VCHNL_REGFLD_COUNT * sizeof(struct irdma_vchnl_reg_field_info))
+#define IRDMA_REGMAP_RESP_BUF_SIZE (IRDMA_VCHNL_RESP_MIN_SIZE + IRDMA_VCHNL_REGFLD_BUF_SIZE)
+
+/**
+ * irdma_sc_vchnl_init - Initialize dev virtchannel and get hw_rev
+ * @dev: dev structure to update
+ * @info: virtchannel info parameters to fill into the dev structure
+ */
+int irdma_sc_vchnl_init(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_init_info *info)
+{
+ dev->vchnl_up = true;
+ dev->privileged = info->privileged;
+ dev->is_pf = info->is_pf;
+ dev->hw_attrs.uk_attrs.hw_rev = info->hw_rev;
+
+ if (!dev->privileged) {
+ int ret = irdma_vchnl_req_get_ver(dev, IRDMA_VCHNL_CHNL_VER_MAX,
+ &dev->vchnl_ver);
+
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Channel version ret = %d, version is %u\n",
+ ret, dev->vchnl_ver);
+
+ if (ret)
+ return ret;
+
+ ret = irdma_vchnl_req_get_caps(dev);
+ if (ret)
+ return ret;
+
+ dev->hw_attrs.uk_attrs.hw_rev = dev->vc_caps.hw_rev;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_verify_resp - Verify requested response size
+ * @vchnl_req: vchnl message requested
+ * @resp_len: response length sent from vchnl peer
+ */
+static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req,
+ u16 resp_len)
+{
+ switch (vchnl_req->vchnl_msg->op_code) {
+ case IRDMA_VCHNL_OP_GET_VER:
+ case IRDMA_VCHNL_OP_GET_HMC_FCN:
+ case IRDMA_VCHNL_OP_PUT_HMC_FCN:
+ if (resp_len != vchnl_req->parm_len)
+ return -EBADMSG;
+ break;
+ case IRDMA_VCHNL_OP_GET_RDMA_CAPS:
+ if (resp_len < IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE)
+ return -EBADMSG;
+ break;
+ case IRDMA_VCHNL_OP_GET_REG_LAYOUT:
+ case IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP:
+ case IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP:
+ case IRDMA_VCHNL_OP_ADD_VPORT:
+ case IRDMA_VCHNL_OP_DEL_VPORT:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void irdma_free_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req)
+{
+ kfree(vchnl_req->vchnl_msg);
+}
+
+static int irdma_alloc_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req,
+ struct irdma_vchnl_req_init_info *info)
+{
+ struct irdma_vchnl_op_buf *vchnl_msg;
+
+ vchnl_msg = kzalloc(IRDMA_VCHNL_MAX_MSG_SIZE, GFP_KERNEL);
+
+ if (!vchnl_msg)
+ return -ENOMEM;
+
+ vchnl_msg->op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->buf_len = sizeof(*vchnl_msg) + info->req_parm_len;
+ if (info->req_parm_len)
+ memcpy(vchnl_msg->buf, info->req_parm, info->req_parm_len);
+ vchnl_msg->op_code = info->op_code;
+ vchnl_msg->op_ver = info->op_ver;
+
+ vchnl_req->vchnl_msg = vchnl_msg;
+ vchnl_req->parm = info->resp_parm;
+ vchnl_req->parm_len = info->resp_parm_len;
+
+ return 0;
+}
+
+static int irdma_vchnl_req_send_sync(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req_init_info *info)
+{
+ u16 resp_len = sizeof(dev->vc_recv_buf);
+ struct irdma_vchnl_req vchnl_req = {};
+ u16 msg_len;
+ u8 *msg;
+ int ret;
+
+ ret = irdma_alloc_vchnl_req_msg(&vchnl_req, info);
+ if (ret)
+ return ret;
+
+ msg_len = vchnl_req.vchnl_msg->buf_len;
+ msg = (u8 *)vchnl_req.vchnl_msg;
+
+ mutex_lock(&dev->vchnl_mutex);
+ ret = ig3rdma_vchnl_send_sync(dev, msg, msg_len, dev->vc_recv_buf,
+ &resp_len);
+ dev->vc_recv_len = resp_len;
+ if (ret)
+ goto exit;
+
+ ret = irdma_vchnl_req_get_resp(dev, &vchnl_req);
+exit:
+ mutex_unlock(&dev->vchnl_mutex);
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: virtual channel send %s caller: %pS ret=%d op=%u op_ver=%u req_len=%u parm_len=%u resp_len=%u\n",
+ !ret ? "SUCCEEDS" : "FAILS", __builtin_return_address(0),
+ ret, vchnl_req.vchnl_msg->op_code,
+ vchnl_req.vchnl_msg->op_ver, vchnl_req.vchnl_msg->buf_len,
+ vchnl_req.parm_len, vchnl_req.resp_len);
+ irdma_free_vchnl_req_msg(&vchnl_req);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_get_reg_layout - Get Register Layout
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev)
+{
+ u16 reg_idx, reg_id, tmp_reg_id, regfld_idx, regfld_id, tmp_regfld_id;
+ struct irdma_vchnl_reg_field_info *regfld_array = NULL;
+ u8 resp_buffer[IRDMA_REGMAP_RESP_BUF_SIZE] = {};
+ struct vchnl_regfld_map_elem *regfld_map_array;
+ struct irdma_vchnl_req_init_info info = {};
+ struct vchnl_reg_map_elem *reg_map_array;
+ struct irdma_vchnl_reg_info *reg_array;
+ u8 num_bits, shift_cnt;
+ u16 buf_len = 0;
+ u64 bitmask;
+ u32 rindex;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_REG_LAYOUT;
+ info.op_ver = IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0;
+ info.resp_parm = resp_buffer;
+ info.resp_parm_len = sizeof(resp_buffer);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ /* parse the response buffer and update reg info*/
+ /* Parse registers till invalid */
+ /* Parse register fields till invalid */
+ reg_array = (struct irdma_vchnl_reg_info *)resp_buffer;
+ for (rindex = 0; rindex < IRDMA_VCHNL_REG_COUNT; rindex++) {
+ buf_len += sizeof(struct irdma_vchnl_reg_info);
+ if (buf_len >= sizeof(resp_buffer))
+ return -ENOMEM;
+
+ regfld_array =
+ (struct irdma_vchnl_reg_field_info *)&reg_array[rindex + 1];
+ reg_id = reg_array[rindex].reg_id;
+ if (reg_id == IRDMA_VCHNL_REG_INV_ID)
+ break;
+
+ reg_id &= ~IRDMA_VCHNL_REG_PAGE_REL;
+ if (reg_id >= IRDMA_VCHNL_REG_COUNT)
+ return -EINVAL;
+
+ /* search regmap for register index in hw_regs.*/
+ reg_map_array = vchnl_reg_map;
+ do {
+ tmp_reg_id = reg_map_array->reg_id;
+ if (tmp_reg_id == reg_id)
+ break;
+
+ reg_map_array++;
+ } while (tmp_reg_id != IRDMA_VCHNL_REG_INV_ID);
+ if (tmp_reg_id != reg_id)
+ continue;
+
+ reg_idx = reg_map_array->reg_idx;
+
+ /* Page relative, DB Offset do not need bar offset */
+ if (reg_idx == IRDMA_DB_ADDR_OFFSET ||
+ (reg_array[rindex].reg_id & IRDMA_VCHNL_REG_PAGE_REL)) {
+ dev->hw_regs[reg_idx] =
+ (u32 __iomem *)(uintptr_t)reg_array[rindex].reg_offset;
+ continue;
+ }
+
+ /* Update the local HW struct */
+ dev->hw_regs[reg_idx] = ig3rdma_get_reg_addr(dev->hw,
+ reg_array[rindex].reg_offset);
+ if (!dev->hw_regs[reg_idx])
+ return -EINVAL;
+ }
+
+ if (!regfld_array)
+ return -ENOMEM;
+
+ /* set up doorbell variables using mapped DB page */
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+
+ for (rindex = 0; rindex < IRDMA_VCHNL_REGFLD_COUNT; rindex++) {
+ buf_len += sizeof(struct irdma_vchnl_reg_field_info);
+ if ((buf_len - 1) > sizeof(resp_buffer))
+ break;
+
+ if (regfld_array[rindex].fld_id == IRDMA_VCHNL_REGFLD_INV_ID)
+ break;
+
+ regfld_id = regfld_array[rindex].fld_id;
+ regfld_map_array = vchnl_regfld_map;
+ do {
+ tmp_regfld_id = regfld_map_array->regfld_id;
+ if (tmp_regfld_id == regfld_id)
+ break;
+
+ regfld_map_array++;
+ } while (tmp_regfld_id != IRDMA_VCHNL_REGFLD_INV_ID);
+
+ if (tmp_regfld_id != regfld_id)
+ continue;
+
+ regfld_idx = regfld_map_array->regfld_idx;
+
+ num_bits = regfld_array[rindex].fld_bits;
+ shift_cnt = regfld_array[rindex].fld_shift;
+ if ((num_bits + shift_cnt > 64) || !num_bits) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: Invalid field mask id %d bits %d shift %d",
+ regfld_id, num_bits, shift_cnt);
+
+ continue;
+ }
+
+ bitmask = (1ULL << num_bits) - 1;
+ dev->hw_masks[regfld_idx] = bitmask << shift_cnt;
+ dev->hw_shifts[regfld_idx] = shift_cnt;
+ }
+
+ return 0;
+}
+
+int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id, struct irdma_qos *qos)
+{
+ struct irdma_vchnl_resp_vport_info resp_vport = { 0 };
+ struct irdma_vchnl_req_vport_info req_vport = { 0 };
+ struct irdma_vchnl_req_init_info info = { 0 };
+ int ret, i;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_ADD_VPORT;
+ info.op_ver = IRDMA_VCHNL_OP_ADD_VPORT_V0;
+ req_vport.vport_id = vport_id;
+ req_vport.qp1_id = qp1_id;
+ info.req_parm_len = sizeof(req_vport);
+ info.req_parm = &req_vport;
+ info.resp_parm = &resp_vport;
+ info.resp_parm_len = sizeof(resp_vport);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ qos[i].qs_handle = resp_vport.qs_handle[i];
+ qos[i].valid = true;
+ }
+
+ return 0;
+}
+
+int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id, u32 qp1_id)
+{
+ struct irdma_vchnl_req_init_info info = { 0 };
+ struct irdma_vchnl_req_vport_info req_vport = { 0 };
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_DEL_VPORT;
+ info.op_ver = IRDMA_VCHNL_OP_DEL_VPORT_V0;
+ req_vport.vport_id = vport_id;
+ req_vport.qp1_id = qp1_id;
+ info.req_parm_len = sizeof(req_vport);
+ info.req_parm = &req_vport;
+
+ return irdma_vchnl_req_send_sync(dev, &info);
+}
+
+/**
+ * irdma_vchnl_req_aeq_vec_map - Map AEQ to vector on this function
+ * @dev: RDMA device pointer
+ * @v_idx: vector index
+ */
+int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ struct irdma_vchnl_qvlist_info *qvl;
+ struct irdma_vchnl_qv_info *qv;
+ u16 qvl_size, num_vectors = 1;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ qvl_size = struct_size(qvl, qv_info, num_vectors);
+
+ qvl = kzalloc(qvl_size, GFP_KERNEL);
+ if (!qvl)
+ return -ENOMEM;
+
+ qvl->num_vectors = 1;
+ qv = qvl->qv_info;
+
+ qv->ceq_idx = IRDMA_Q_INVALID_IDX;
+ qv->v_idx = v_idx;
+ qv->itr_idx = IRDMA_IDX_ITR0;
+
+ info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP;
+ info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0;
+ info.req_parm = qvl;
+ info.req_parm_len = qvl_size;
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ kfree(qvl);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_ceq_vec_map - Map CEQ to vector on this function
+ * @dev: RDMA device pointer
+ * @ceq_id: CEQ index
+ * @v_idx: vector index
+ */
+int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id, u32 v_idx)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ struct irdma_vchnl_qvlist_info *qvl;
+ struct irdma_vchnl_qv_info *qv;
+ u16 qvl_size, num_vectors = 1;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ qvl_size = struct_size(qvl, qv_info, num_vectors);
+
+ qvl = kzalloc(qvl_size, GFP_KERNEL);
+ if (!qvl)
+ return -ENOMEM;
+
+ qvl->num_vectors = num_vectors;
+ qv = qvl->qv_info;
+
+ qv->aeq_idx = IRDMA_Q_INVALID_IDX;
+ qv->ceq_idx = ceq_id;
+ qv->v_idx = v_idx;
+ qv->itr_idx = IRDMA_IDX_ITR0;
+
+ info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP;
+ info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0;
+ info.req_parm = qvl;
+ info.req_parm_len = qvl_size;
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ kfree(qvl);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_get_ver - Request Channel version
+ * @dev: RDMA device pointer
+ * @ver_req: Virtual channel version requested
+ * @ver_res: Virtual channel version response
+ */
+int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req, u32 *ver_res)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_VER;
+ info.op_ver = ver_req;
+ info.resp_parm = ver_res;
+ info.resp_parm_len = sizeof(*ver_res);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ if (ret)
+ return ret;
+
+ if (*ver_res < IRDMA_VCHNL_CHNL_VER_MIN) {
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: %s unsupported vchnl version 0x%0x\n",
+ __func__, *ver_res);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_get_hmc_fcn - Request VF HMC Function
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_hmc_info req_hmc = {};
+ struct irdma_vchnl_resp_hmc_info resp_hmc = {};
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_HMC_FCN;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ info.op_ver = IRDMA_VCHNL_OP_GET_HMC_FCN_V2;
+ req_hmc.protocol_used = dev->protocol_used;
+ info.req_parm_len = sizeof(req_hmc);
+ info.req_parm = &req_hmc;
+ info.resp_parm = &resp_hmc;
+ info.resp_parm_len = sizeof(resp_hmc);
+ }
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ int i;
+
+ dev->hmc_fn_id = resp_hmc.hmc_func;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ dev->qos[i].qs_handle = resp_hmc.qs_handle[i];
+ dev->qos[i].valid = true;
+ }
+ }
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_put_hmc_fcn - Free VF HMC Function
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_init_info info = {};
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_PUT_HMC_FCN;
+ info.op_ver = IRDMA_VCHNL_OP_PUT_HMC_FCN_V0;
+
+ return irdma_vchnl_req_send_sync(dev, &info);
+}
+
+/**
+ * irdma_vchnl_req_get_caps - Request RDMA capabilities
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_RDMA_CAPS;
+ info.op_ver = IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0;
+ info.resp_parm = &dev->vc_caps;
+ info.resp_parm_len = sizeof(dev->vc_caps);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ if (dev->vc_caps.hw_rev > IRDMA_GEN_MAX ||
+ dev->vc_caps.hw_rev < IRDMA_GEN_2) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: %s unsupported hw_rev version 0x%0x\n",
+ __func__, dev->vc_caps.hw_rev);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_get_resp - Receive the inbound vchnl response.
+ * @dev: Dev pointer
+ * @vchnl_req: Vchannel request
+ */
+int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req *vchnl_req)
+{
+ struct irdma_vchnl_resp_buf *vchnl_msg_resp =
+ (struct irdma_vchnl_resp_buf *)dev->vc_recv_buf;
+ u16 resp_len;
+ int ret;
+
+ if ((uintptr_t)vchnl_req != (uintptr_t)vchnl_msg_resp->op_ctx) {
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: error vchnl context value does not match\n");
+ return -EBADMSG;
+ }
+
+ resp_len = dev->vc_recv_len - sizeof(*vchnl_msg_resp);
+ resp_len = min(resp_len, vchnl_req->parm_len);
+
+ ret = irdma_vchnl_req_verify_resp(vchnl_req, resp_len);
+ if (ret)
+ return ret;
+
+ ret = (int)vchnl_msg_resp->op_ret;
+ if (ret)
+ return ret;
+
+ vchnl_req->resp_len = 0;
+ if (vchnl_req->parm_len && vchnl_req->parm && resp_len) {
+ memcpy(vchnl_req->parm, vchnl_msg_resp->buf, resp_len);
+ vchnl_req->resp_len = resp_len;
+ ibdev_dbg(to_ibdev(dev), "VIRT: Got response, data size %u\n",
+ resp_len);
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h
new file mode 100644
index 000000000000..aa955a9125bd
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/virtchnl.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+#ifndef IRDMA_VIRTCHNL_H
+#define IRDMA_VIRTCHNL_H
+
+#include "hmc.h"
+#include "irdma.h"
+
+/* IRDMA_VCHNL_CHNL_VER_V0 is for legacy hw, no longer supported. */
+#define IRDMA_VCHNL_CHNL_VER_V2 2
+#define IRDMA_VCHNL_CHNL_VER_MIN IRDMA_VCHNL_CHNL_VER_V2
+#define IRDMA_VCHNL_CHNL_VER_MAX IRDMA_VCHNL_CHNL_VER_V2
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V0 0
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V1 1
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V2 2
+#define IRDMA_VCHNL_OP_PUT_HMC_FCN_V0 0
+#define IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0 0
+#define IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0 0
+#define IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP_V0 0
+#define IRDMA_VCHNL_OP_ADD_VPORT_V0 0
+#define IRDMA_VCHNL_OP_DEL_VPORT_V0 0
+#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0
+#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1
+
+#define IRDMA_VCHNL_REG_ID_CQPTAIL 0
+#define IRDMA_VCHNL_REG_ID_CQPDB 1
+#define IRDMA_VCHNL_REG_ID_CCQPSTATUS 2
+#define IRDMA_VCHNL_REG_ID_CCQPHIGH 3
+#define IRDMA_VCHNL_REG_ID_CCQPLOW 4
+#define IRDMA_VCHNL_REG_ID_CQARM 5
+#define IRDMA_VCHNL_REG_ID_CQACK 6
+#define IRDMA_VCHNL_REG_ID_AEQALLOC 7
+#define IRDMA_VCHNL_REG_ID_CQPERRCODES 8
+#define IRDMA_VCHNL_REG_ID_WQEALLOC 9
+#define IRDMA_VCHNL_REG_ID_IPCONFIG0 10
+#define IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET 11
+#define IRDMA_VCHNL_REG_ID_DYN_CTL 12
+#define IRDMA_VCHNL_REG_ID_AEQITRMASK 13
+#define IRDMA_VCHNL_REG_ID_CEQITRMASK 14
+#define IRDMA_VCHNL_REG_INV_ID 0xFFFF
+#define IRDMA_VCHNL_REG_PAGE_REL 0x8000
+
+#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR 2
+#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE 5
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID 6
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID 7
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID 8
+#define IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT 9
+#define IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID 10
+#define IRDMA_VCHNL_REGFLD_INV_ID 0xFFFF
+
+#define IRDMA_VCHNL_RESP_MIN_SIZE (sizeof(struct irdma_vchnl_resp_buf))
+
+enum irdma_vchnl_ops {
+ IRDMA_VCHNL_OP_GET_VER = 0,
+ IRDMA_VCHNL_OP_GET_HMC_FCN = 1,
+ IRDMA_VCHNL_OP_PUT_HMC_FCN = 2,
+ IRDMA_VCHNL_OP_GET_REG_LAYOUT = 11,
+ IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13,
+ IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP = 14,
+ IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP = 15,
+ IRDMA_VCHNL_OP_ADD_VPORT = 16,
+ IRDMA_VCHNL_OP_DEL_VPORT = 17,
+};
+
+struct irdma_vchnl_req_hmc_info {
+ u8 protocol_used;
+ u8 disable_qos;
+} __packed;
+
+struct irdma_vchnl_resp_hmc_info {
+ u16 hmc_func;
+ u16 qs_handle[IRDMA_MAX_USER_PRIORITY];
+} __packed;
+
+struct irdma_vchnl_qv_info {
+ u32 v_idx;
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct irdma_vchnl_qvlist_info {
+ u32 num_vectors;
+ struct irdma_vchnl_qv_info qv_info[];
+};
+
+struct irdma_vchnl_req_vport_info {
+ u16 vport_id;
+ u32 qp1_id;
+};
+
+struct irdma_vchnl_resp_vport_info {
+ u16 qs_handle[IRDMA_MAX_USER_PRIORITY];
+};
+
+struct irdma_vchnl_op_buf {
+ u16 op_code;
+ u16 op_ver;
+ u16 buf_len;
+ u16 rsvd;
+ u64 op_ctx;
+ u8 buf[];
+} __packed;
+
+struct irdma_vchnl_resp_buf {
+ u64 op_ctx;
+ u16 buf_len;
+ s16 op_ret;
+ u16 rsvd[2];
+ u8 buf[];
+} __packed;
+
+struct irdma_vchnl_rdma_caps {
+ u8 hw_rev;
+ u16 cqp_timeout_s;
+ u16 cqp_def_timeout_s;
+ u16 max_hw_push_len;
+} __packed;
+
+struct irdma_vchnl_init_info {
+ struct workqueue_struct *vchnl_wq;
+ enum irdma_vers hw_rev;
+ bool privileged;
+ bool is_pf;
+};
+
+struct irdma_vchnl_reg_info {
+ u32 reg_offset;
+ u16 field_cnt;
+ u16 reg_id; /* High bit of reg_id: bar or page relative */
+};
+
+struct irdma_vchnl_reg_field_info {
+ u8 fld_shift;
+ u8 fld_bits;
+ u16 fld_id;
+};
+
+struct irdma_vchnl_req {
+ struct irdma_vchnl_op_buf *vchnl_msg;
+ void *parm;
+ u32 vf_id;
+ u16 parm_len;
+ u16 resp_len;
+};
+
+struct irdma_vchnl_req_init_info {
+ void *req_parm;
+ void *resp_parm;
+ u16 req_parm_len;
+ u16 resp_parm_len;
+ u16 op_code;
+ u16 op_ver;
+} __packed;
+
+struct irdma_qos;
+
+int irdma_sc_vchnl_init(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_init_info *info);
+int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req,
+ u32 *ver_res);
+int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req *vc_req);
+int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx);
+int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id,
+ u32 v_idx);
+int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id, struct irdma_qos *qos);
+int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id);
+#endif /* IRDMA_VIRTCHNL_H */
diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c
new file mode 100644
index 000000000000..542bc0b1bb03
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ws.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+
+#include "ws.h"
+
+/**
+ * irdma_alloc_node - Allocate a WS node and init
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ * @node_type: Type of node, leaf or parent
+ * @parent: parent node pointer
+ */
+static struct irdma_ws_node *irdma_alloc_node(struct irdma_sc_vsi *vsi,
+ u8 user_pri,
+ enum irdma_ws_node_type node_type,
+ struct irdma_ws_node *parent)
+{
+ struct irdma_virt_mem ws_mem;
+ struct irdma_ws_node *node;
+ u16 node_index = 0;
+
+ ws_mem.size = sizeof(struct irdma_ws_node);
+ ws_mem.va = kzalloc(ws_mem.size, GFP_KERNEL);
+ if (!ws_mem.va)
+ return NULL;
+
+ if (parent) {
+ node_index = irdma_alloc_ws_node_id(vsi->dev);
+ if (node_index == IRDMA_WS_NODE_INVALID) {
+ kfree(ws_mem.va);
+ return NULL;
+ }
+ }
+
+ node = ws_mem.va;
+ node->index = node_index;
+ node->vsi_index = vsi->vsi_idx;
+ INIT_LIST_HEAD(&node->child_list_head);
+ if (node_type == WS_NODE_TYPE_LEAF) {
+ node->type_leaf = true;
+ node->traffic_class = vsi->qos[user_pri].traffic_class;
+ node->user_pri = user_pri;
+ node->rel_bw = vsi->qos[user_pri].rel_bw;
+ if (!node->rel_bw)
+ node->rel_bw = 1;
+
+ node->lan_qs_handle = vsi->qos[user_pri].lan_qos_handle;
+ node->prio_type = IRDMA_PRIO_WEIGHTED_RR;
+ } else {
+ node->rel_bw = 1;
+ node->prio_type = IRDMA_PRIO_WEIGHTED_RR;
+ node->enable = true;
+ }
+
+ node->parent = parent;
+
+ return node;
+}
+
+/**
+ * irdma_free_node - Free a WS node
+ * @vsi: VSI stricture of device
+ * @node: Pointer to node to free
+ */
+static void irdma_free_node(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node)
+{
+ struct irdma_virt_mem ws_mem;
+
+ if (node->index)
+ irdma_free_ws_node_id(vsi->dev, node->index);
+
+ ws_mem.va = node;
+ ws_mem.size = sizeof(struct irdma_ws_node);
+ kfree(ws_mem.va);
+}
+
+/**
+ * irdma_ws_cqp_cmd - Post CQP work scheduler node cmd
+ * @vsi: vsi pointer
+ * @node: pointer to node
+ * @cmd: add, remove or modify
+ */
+static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node, u8 cmd)
+{
+ struct irdma_ws_node_info node_info = {};
+
+ node_info.id = node->index;
+ node_info.vsi = node->vsi_index;
+ if (node->parent)
+ node_info.parent_id = node->parent->index;
+ else
+ node_info.parent_id = node_info.id;
+
+ node_info.weight = node->rel_bw;
+ node_info.tc = node->traffic_class;
+ node_info.prio_type = node->prio_type;
+ node_info.type_leaf = node->type_leaf;
+ node_info.enable = node->enable;
+ if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) {
+ ibdev_dbg(to_ibdev(vsi->dev), "WS: CQP WS CMD failed\n");
+ return -ENOMEM;
+ }
+
+ if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) {
+ node->qs_handle = node_info.qs_handle;
+ vsi->qos[node->user_pri].qs_handle = node_info.qs_handle;
+ }
+
+ return 0;
+}
+
+/**
+ * ws_find_node - Find SC WS node based on VSI id or TC
+ * @parent: parent node of First VSI or TC node
+ * @match_val: value to match
+ * @type: match type VSI/TC
+ */
+static struct irdma_ws_node *ws_find_node(struct irdma_ws_node *parent,
+ u16 match_val,
+ enum irdma_ws_match_type type)
+{
+ struct irdma_ws_node *node;
+
+ switch (type) {
+ case WS_MATCH_TYPE_VSI:
+ list_for_each_entry(node, &parent->child_list_head, siblings) {
+ if (node->vsi_index == match_val)
+ return node;
+ }
+ break;
+ case WS_MATCH_TYPE_TC:
+ list_for_each_entry(node, &parent->child_list_head, siblings) {
+ if (node->traffic_class == match_val)
+ return node;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_tc_in_use - Checks to see if a leaf node is in use
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+static bool irdma_tc_in_use(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ int i;
+
+ mutex_lock(&vsi->qos[user_pri].qos_mutex);
+ if (!list_empty(&vsi->qos[user_pri].qplist)) {
+ mutex_unlock(&vsi->qos[user_pri].qos_mutex);
+ return true;
+ }
+
+ /* Check if the traffic class associated with the given user priority
+ * is in use by any other user priority. If so, nothing left to do
+ */
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->qos[i].traffic_class == vsi->qos[user_pri].traffic_class &&
+ !list_empty(&vsi->qos[i].qplist)) {
+ mutex_unlock(&vsi->qos[user_pri].qos_mutex);
+ return true;
+ }
+ }
+ mutex_unlock(&vsi->qos[user_pri].qos_mutex);
+
+ return false;
+}
+
+/**
+ * irdma_remove_leaf - Remove leaf node unconditionally
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ struct irdma_ws_node *ws_tree_root, *vsi_node, *tc_node;
+ int i;
+ u16 traffic_class;
+
+ traffic_class = vsi->qos[user_pri].traffic_class;
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ if (vsi->qos[i].traffic_class == traffic_class)
+ vsi->qos[i].valid = false;
+
+ ws_tree_root = vsi->dev->ws_tree_root;
+ if (!ws_tree_root)
+ return;
+
+ vsi_node = ws_find_node(ws_tree_root, vsi->vsi_idx,
+ WS_MATCH_TYPE_VSI);
+ if (!vsi_node)
+ return;
+
+ tc_node = ws_find_node(vsi_node,
+ vsi->qos[user_pri].traffic_class,
+ WS_MATCH_TYPE_TC);
+ if (!tc_node)
+ return;
+
+ irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE);
+ vsi->unregister_qset(vsi, tc_node);
+ list_del(&tc_node->siblings);
+ irdma_free_node(vsi, tc_node);
+ /* Check if VSI node can be freed */
+ if (list_empty(&vsi_node->child_list_head)) {
+ irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE);
+ list_del(&vsi_node->siblings);
+ irdma_free_node(vsi, vsi_node);
+ /* Free head node there are no remaining VSI nodes */
+ if (list_empty(&ws_tree_root->child_list_head)) {
+ irdma_ws_cqp_cmd(vsi, ws_tree_root,
+ IRDMA_OP_WS_DELETE_NODE);
+ irdma_free_node(vsi, ws_tree_root);
+ vsi->dev->ws_tree_root = NULL;
+ }
+ }
+}
+
+/**
+ * irdma_ws_add - Build work scheduler tree, set RDMA qs_handle
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ struct irdma_ws_node *ws_tree_root;
+ struct irdma_ws_node *vsi_node;
+ struct irdma_ws_node *tc_node;
+ u16 traffic_class;
+ int ret = 0;
+ int i;
+
+ mutex_lock(&vsi->dev->ws_mutex);
+ if (vsi->tc_change_pending) {
+ ret = -EBUSY;
+ goto exit;
+ }
+
+ if (vsi->qos[user_pri].valid)
+ goto exit;
+
+ ws_tree_root = vsi->dev->ws_tree_root;
+ if (!ws_tree_root) {
+ ibdev_dbg(to_ibdev(vsi->dev), "WS: Creating root node\n");
+ ws_tree_root = irdma_alloc_node(vsi, user_pri,
+ WS_NODE_TYPE_PARENT, NULL);
+ if (!ws_tree_root) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, ws_tree_root);
+ goto exit;
+ }
+
+ vsi->dev->ws_tree_root = ws_tree_root;
+ }
+
+ /* Find a second tier node that matches the VSI */
+ vsi_node = ws_find_node(ws_tree_root, vsi->vsi_idx,
+ WS_MATCH_TYPE_VSI);
+
+ /* If VSI node doesn't exist, add one */
+ if (!vsi_node) {
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Node not found matching VSI %d\n",
+ vsi->vsi_idx);
+ vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT,
+ ws_tree_root);
+ if (!vsi_node) {
+ ret = -ENOMEM;
+ goto vsi_add_err;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, vsi_node);
+ goto vsi_add_err;
+ }
+
+ list_add(&vsi_node->siblings, &ws_tree_root->child_list_head);
+ }
+
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Using node %d which represents VSI %d\n",
+ vsi_node->index, vsi->vsi_idx);
+ traffic_class = vsi->qos[user_pri].traffic_class;
+ tc_node = ws_find_node(vsi_node, traffic_class,
+ WS_MATCH_TYPE_TC);
+ if (!tc_node) {
+ /* Add leaf node */
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Node not found matching VSI %d and TC %d\n",
+ vsi->vsi_idx, traffic_class);
+ tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF,
+ vsi_node);
+ if (!tc_node) {
+ ret = -ENOMEM;
+ goto leaf_add_err;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, tc_node);
+ goto leaf_add_err;
+ }
+
+ list_add(&tc_node->siblings, &vsi_node->child_list_head);
+ /*
+ * callback to LAN to update the LAN tree with our node
+ */
+ ret = vsi->register_qset(vsi, tc_node);
+ if (ret)
+ goto reg_err;
+
+ tc_node->enable = true;
+ ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE);
+ if (ret) {
+ vsi->unregister_qset(vsi, tc_node);
+ goto reg_err;
+ }
+ }
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Using node %d which represents VSI %d TC %d\n",
+ tc_node->index, vsi->vsi_idx, traffic_class);
+ /*
+ * Iterate through other UPs and update the QS handle if they have
+ * a matching traffic class.
+ */
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->qos[i].traffic_class == traffic_class) {
+ vsi->qos[i].qs_handle = tc_node->qs_handle;
+ vsi->qos[i].lan_qos_handle = tc_node->lan_qs_handle;
+ vsi->qos[i].l2_sched_node_id = tc_node->l2_sched_node_id;
+ vsi->qos[i].valid = true;
+ }
+ }
+ goto exit;
+
+reg_err:
+ irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE);
+ list_del(&tc_node->siblings);
+ irdma_free_node(vsi, tc_node);
+leaf_add_err:
+ if (list_empty(&vsi_node->child_list_head)) {
+ if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE))
+ goto exit;
+ list_del(&vsi_node->siblings);
+ irdma_free_node(vsi, vsi_node);
+ }
+
+vsi_add_err:
+ /* Free head node there are no remaining VSI nodes */
+ if (list_empty(&ws_tree_root->child_list_head)) {
+ irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_DELETE_NODE);
+ vsi->dev->ws_tree_root = NULL;
+ irdma_free_node(vsi, ws_tree_root);
+ }
+
+exit:
+ mutex_unlock(&vsi->dev->ws_mutex);
+ return ret;
+}
+
+/**
+ * irdma_ws_remove - Free WS scheduler node, update WS tree
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ mutex_lock(&vsi->dev->ws_mutex);
+ if (irdma_tc_in_use(vsi, user_pri))
+ goto exit;
+ irdma_remove_leaf(vsi, user_pri);
+exit:
+ mutex_unlock(&vsi->dev->ws_mutex);
+}
+
+/**
+ * irdma_ws_reset - Reset entire WS tree
+ * @vsi: vsi pointer
+ */
+void irdma_ws_reset(struct irdma_sc_vsi *vsi)
+{
+ u8 i;
+
+ mutex_lock(&vsi->dev->ws_mutex);
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; ++i)
+ irdma_remove_leaf(vsi, i);
+ mutex_unlock(&vsi->dev->ws_mutex);
+}
diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h
new file mode 100644
index 000000000000..45490031a389
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ws.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_WS_H
+#define IRDMA_WS_H
+
+#include "osdep.h"
+
+enum irdma_ws_node_type {
+ WS_NODE_TYPE_PARENT,
+ WS_NODE_TYPE_LEAF,
+};
+
+enum irdma_ws_match_type {
+ WS_MATCH_TYPE_VSI,
+ WS_MATCH_TYPE_TC,
+};
+
+struct irdma_ws_node {
+ struct list_head siblings;
+ struct list_head child_list_head;
+ struct irdma_ws_node *parent;
+ u64 lan_qs_handle; /* opaque handle used by LAN */
+ u32 l2_sched_node_id;
+ u16 index;
+ u16 qs_handle;
+ u16 vsi_index;
+ u8 traffic_class;
+ u8 user_pri;
+ u8 rel_bw;
+ u8 abstraction_layer; /* used for splitting a TC */
+ u8 prio_type;
+ bool type_leaf:1;
+ bool enable:1;
+};
+
+struct irdma_sc_vsi;
+int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri);
+void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri);
+void irdma_ws_reset(struct irdma_sc_vsi *vsi);
+
+#endif /* IRDMA_WS_H */
diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig
new file mode 100644
index 000000000000..546640657bac
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MANA_INFINIBAND
+ tristate "Microsoft Azure Network Adapter support"
+ depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA
+ help
+ This driver provides low-level RDMA support for Microsoft Azure
+ Network Adapter (MANA). MANA supports RDMA features that can be used
+ for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly
+ access hardware from user-mode processes in Microsoft Azure cloud
+ environment.
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
new file mode 100644
index 000000000000..921c05e08b11
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
+
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e964e74be48d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+ [MANA_IB_DUP_RX_REQ].name = "dup_rx_requests",
+ [MANA_IB_TX_BYTES].name = "tx_bytes",
+ [MANA_IB_RX_BYTES].name = "rx_bytes",
+ [MANA_IB_RX_SEND_REQ].name = "rx_send_requests",
+ [MANA_IB_RX_WRITE_REQ].name = "rx_write_requests",
+ [MANA_IB_RX_READ_REQ].name = "rx_read_requests",
+ [MANA_IB_TX_PKT].name = "tx_packets",
+ [MANA_IB_RX_PKT].name = "rx_packets",
+};
+
+static const struct rdma_stat_desc mana_ib_device_stats_desc[] = {
+ [MANA_IB_SENT_CNPS].name = "sent_cnps",
+ [MANA_IB_RECEIVED_ECNS].name = "received_ecns",
+ [MANA_IB_RECEIVED_CNP_COUNT].name = "received_cnp_count",
+ [MANA_IB_QP_CONGESTED_EVENTS].name = "qp_congested_events",
+ [MANA_IB_QP_RECOVERED_EVENTS].name = "qp_recovered_events",
+ [MANA_IB_DEV_RATE_INC_EVENTS].name = "rate_inc_events",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_device_stats_desc,
+ ARRAY_SIZE(mana_ib_device_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mana_ib_get_hw_device_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_device_cntrs_resp resp = {};
+ struct mana_rnic_query_device_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_DEVICE_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query device counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_SENT_CNPS] = resp.sent_cnps;
+ stats->value[MANA_IB_RECEIVED_ECNS] = resp.received_ecns;
+ stats->value[MANA_IB_RECEIVED_CNP_COUNT] = resp.received_cnp_count;
+ stats->value[MANA_IB_QP_CONGESTED_EVENTS] = resp.qp_congested_events;
+ stats->value[MANA_IB_QP_RECOVERED_EVENTS] = resp.qp_recovered_events;
+ stats->value[MANA_IB_DEV_RATE_INC_EVENTS] = resp.rate_inc_events;
+
+ return ARRAY_SIZE(mana_ib_device_stats_desc);
+}
+
+static int mana_ib_get_hw_port_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ stats->value[MANA_IB_DUP_RX_REQ] = resp.dup_rx_req;
+ stats->value[MANA_IB_TX_BYTES] = resp.tx_bytes;
+ stats->value[MANA_IB_RX_BYTES] = resp.rx_bytes;
+ stats->value[MANA_IB_RX_SEND_REQ] = resp.rx_send_req;
+ stats->value[MANA_IB_RX_WRITE_REQ] = resp.rx_write_req;
+ stats->value[MANA_IB_RX_READ_REQ] = resp.rx_read_req;
+ stats->value[MANA_IB_TX_PKT] = resp.tx_pkt;
+ stats->value[MANA_IB_RX_PKT] = resp.rx_pkt;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ if (!port_num)
+ return mana_ib_get_hw_device_stats(ibdev, stats);
+ else
+ return mana_ib_get_hw_port_stats(ibdev, stats, port_num);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..f68e776bb41d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+ MANA_IB_DUP_RX_REQ,
+ MANA_IB_TX_BYTES,
+ MANA_IB_RX_BYTES,
+ MANA_IB_RX_SEND_REQ,
+ MANA_IB_RX_WRITE_REQ,
+ MANA_IB_RX_READ_REQ,
+ MANA_IB_TX_PKT,
+ MANA_IB_RX_PKT,
+};
+
+enum mana_ib_device_counters {
+ MANA_IB_SENT_CNPS,
+ MANA_IB_RECEIVED_ECNS,
+ MANA_IB_RECEIVED_CNP_COUNT,
+ MANA_IB_QP_CONGESTED_EVENTS,
+ MANA_IB_QP_RECOVERED_EVENTS,
+ MANA_IB_DEV_RATE_INC_EVENTS,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+struct rdma_hw_stats *mana_ib_alloc_hw_device_stats(struct ib_device *ibdev);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
new file mode 100644
index 000000000000..1becc8779123
--- /dev/null
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_create_cq_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_create_cq ucmd = {};
+ struct mana_ib_dev *mdev;
+ bool is_rnic_cq;
+ u32 doorbell;
+ u32 buf_size;
+ int err;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
+ cq->cq_handle = INVALID_MANA_HANDLE;
+
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
+
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
+
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+ }
+
+ if (is_rnic_cq) {
+ err = mana_ib_gd_create_cq(mdev, cq, doorbell);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
+ goto err_destroy_queue;
+ }
+
+ err = mana_ib_install_cq_cb(mdev, cq);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
+ goto err_destroy_rnic_cq;
+ }
+ }
+
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
+ }
+
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
+ return 0;
+
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, cq);
+err_destroy_rnic_cq:
+ mana_ib_gd_destroy_cq(mdev, cq);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
+ return err;
+}
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_dev *mdev;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ mana_ib_remove_cq_cb(mdev, cq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_cq(mdev, cq);
+
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
+ return 0;
+}
+
+static void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
+{
+ struct mana_ib_cq *cq = ctx;
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue *gdma_cq;
+
+ if (cq->queue.id >= gc->max_num_cqs)
+ return -EINVAL;
+ /* Create CQ table entry */
+ WARN_ON(gc->cq_table[cq->queue.id]);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (!gdma_cq)
+ return -ENOMEM;
+
+ gdma_cq->cq.context = cq;
+ gdma_cq->type = GDMA_CQ;
+ gdma_cq->cq.callback = mana_ib_cq_handler;
+ gdma_cq->id = cq->queue.id;
+ gc->cq_table[cq->queue.id] = gdma_cq;
+ return 0;
+}
+
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+
+ if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
+ return;
+
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
+ kfree(gc->cq_table[cq->queue.id]);
+ gc->cq_table[cq->queue.id] = NULL;
+}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, MANA_GSI_QPN, false);
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ struct mana_ib_cq *cq;
+ unsigned long flags;
+
+ if (!qp)
+ return;
+
+ cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while ((shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq))
+ != NULL) {
+ shadow_wqe->header.error_code = IB_WC_GENERAL_ERR;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+
+ mana_put_qp_ref(qp);
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
new file mode 100644
index 000000000000..bdeddb642b87
--- /dev/null
+++ b/drivers/infiniband/hw/mana/device.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include <net/mana/mana_auxiliary.h>
+#include <net/addrconf.h>
+
+MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("NET_MANA");
+
+static const struct ib_device_ops mana_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MANA,
+ .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+
+ .add_gid = mana_ib_gd_add_gid,
+ .alloc_pd = mana_ib_alloc_pd,
+ .alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
+ .create_cq = mana_ib_create_cq,
+ .create_qp = mana_ib_create_qp,
+ .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
+ .create_wq = mana_ib_create_wq,
+ .dealloc_pd = mana_ib_dealloc_pd,
+ .dealloc_ucontext = mana_ib_dealloc_ucontext,
+ .del_gid = mana_ib_gd_del_gid,
+ .dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
+ .destroy_cq = mana_ib_destroy_cq,
+ .destroy_qp = mana_ib_destroy_qp,
+ .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
+ .destroy_wq = mana_ib_destroy_wq,
+ .disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
+ .get_link_layer = mana_ib_get_link_layer,
+ .get_port_immutable = mana_ib_get_port_immutable,
+ .mmap = mana_ib_mmap,
+ .modify_qp = mana_ib_modify_qp,
+ .modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
+ .query_device = mana_ib_query_device,
+ .query_gid = mana_ib_query_gid,
+ .query_pkey = mana_ib_query_pkey,
+ .query_port = mana_ib_query_port,
+ .reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table,
+ ib_ind_table),
+};
+
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static const struct ib_device_ops mana_ib_device_stats_ops = {
+ .alloc_hw_device_stats = mana_ib_alloc_hw_device_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+ int i;
+
+ /* Only process events from our parent device */
+ for (i = 0; i < dev->ib_dev.phys_port_cnt; i++)
+ if (event_dev == mc->ports[i]) {
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, i, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, i + 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ if (ndev)
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+ }
+ return NOTIFY_DONE;
+}
+
+static int mana_ib_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_context *gc = madev->mdev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct gdma_dev *mdev = madev->mdev;
+ struct net_device *ndev;
+ struct mana_ib_dev *dev;
+ u8 mac_addr[ETH_ALEN];
+ int ret, i;
+
+ dev = ib_alloc_device(mana_ib_dev, ib_dev);
+ if (!dev)
+ return -ENOMEM;
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+ dev->ib_dev.dev.parent = gc->dev;
+ dev->gdma_dev = mdev;
+ xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
+
+ if (mana_ib_is_rnic(dev)) {
+ dev->ib_dev.phys_port_cnt = 1;
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, mc->ports[0]->dev_addr);
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+ if (dev->adapter_caps.feature_flags & MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT)
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_device_stats_ops);
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ if (dev->adapter_caps.feature_flags & MANA_IB_FEATURE_MULTI_PORTS_SUPPORT)
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+
+ for (i = 0; i < dev->ib_dev.phys_port_cnt; i++) {
+ ndev = mana_get_primary_netdev(mc, i, &dev->dev_tracker);
+ if (!ndev) {
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev,
+ "Failed to get netdev for IB port %d", i + 1);
+ goto destroy_rnic;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, i + 1);
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto destroy_rnic;
+ }
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+ goto destroy_rnic;
+ }
+ }
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", ret);
+ goto destroy_rnic;
+ }
+ } else {
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+ ret = mana_eth_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+ goto free_ib_device;
+ }
+ }
+
+ dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+ MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
+ goto deregister_net_notifier;
+ }
+
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+ gc->dev);
+ if (ret)
+ goto deallocate_pool;
+
+ dev_set_drvdata(&adev->dev, dev);
+
+ return 0;
+
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
+deregister_net_notifier:
+ if (mana_ib_is_rnic(dev))
+ unregister_netdevice_notifier(&dev->nb);
+destroy_rnic:
+ if (mana_ib_is_rnic(dev))
+ mana_ib_gd_destroy_rnic_adapter(dev);
+destroy_eqs:
+ if (mana_ib_is_rnic(dev))
+ mana_ib_destroy_eqs(dev);
+free_ib_device:
+ xa_destroy(&dev->qp_table_wq);
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
+}
+
+static void mana_ib_remove(struct auxiliary_device *adev)
+{
+ struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+
+ if (mana_ib_is_rnic(dev))
+ mana_drain_gsi_sqs(dev);
+
+ ib_unregister_device(&dev->ib_dev);
+ dma_pool_destroy(dev->av_pool);
+ if (mana_ib_is_rnic(dev)) {
+ unregister_netdevice_notifier(&dev->nb);
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ }
+ xa_destroy(&dev->qp_table_wq);
+ ib_dealloc_device(&dev->ib_dev);
+}
+
+static const struct auxiliary_device_id mana_id_table[] = {
+ { .name = "mana.rdma", },
+ { .name = "mana.eth", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
+
+static struct auxiliary_driver mana_driver = {
+ .probe = mana_ib_probe,
+ .remove = mana_ib_remove,
+ .id_table = mana_id_table,
+};
+
+module_auxiliary_driver(mana_driver);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
new file mode 100644
index 000000000000..fac159f7128d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/main.c
@@ -0,0 +1,1134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include "linux/pci.h"
+
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+ u32 port)
+{
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
+ mpc = netdev_priv(ndev);
+
+ mutex_lock(&pd->vport_mutex);
+
+ pd->vport_use_count--;
+ WARN_ON(pd->vport_use_count < 0);
+
+ if (!pd->vport_use_count)
+ mana_uncfg_vport(mpc);
+
+ mutex_unlock(&pd->vport_mutex);
+}
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
+ u32 doorbell_id)
+{
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
+ mpc = netdev_priv(ndev);
+
+ mutex_lock(&pd->vport_mutex);
+
+ pd->vport_use_count++;
+ if (pd->vport_use_count > 1) {
+ ibdev_dbg(&dev->ib_dev,
+ "Skip as this PD is already configured vport\n");
+ mutex_unlock(&pd->vport_mutex);
+ return 0;
+ }
+
+ err = mana_cfg_vport(mpc, pd->pdn, doorbell_id);
+ if (err) {
+ pd->vport_use_count--;
+ mutex_unlock(&pd->vport_mutex);
+
+ ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
+ return err;
+ }
+
+ mutex_unlock(&pd->vport_mutex);
+
+ pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
+ pd->tx_vp_offset = mpc->tx_vp_offset;
+
+ ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
+ mpc->port_handle, pd->pdn, doorbell_id);
+
+ return 0;
+}
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct gdma_create_pd_resp resp = {};
+ struct gdma_create_pd_req req = {};
+ enum gdma_pd_flags flags = 0;
+ struct mana_ib_dev *dev;
+ struct gdma_context *gc;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(dev);
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
+ sizeof(resp));
+
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
+ req.flags = flags;
+ err = mana_gd_send_request(gc, sizeof(req), &req,
+ sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to get pd_id err %d status %u\n", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ pd->pd_handle = resp.pd_handle;
+ pd->pdn = resp.pd_id;
+ ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
+ pd->pd_handle, pd->pdn);
+
+ mutex_init(&pd->vport_mutex);
+ pd->vport_use_count = 0;
+ return 0;
+}
+
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct gdma_destory_pd_resp resp = {};
+ struct gdma_destroy_pd_req req = {};
+ struct mana_ib_dev *dev;
+ struct gdma_context *gc;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(dev);
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
+ sizeof(resp));
+
+ req.pd_handle = pd->pd_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req,
+ sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to destroy pd_handle 0x%llx err %d status %u",
+ pd->pd_handle, err, resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+ }
+
+ return err;
+}
+
+static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
+ int doorbell_page)
+{
+ struct gdma_destroy_resource_range_req req = {};
+ struct gdma_resp_hdr resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE,
+ sizeof(req), sizeof(resp));
+
+ req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+ req.num_resources = 1;
+ req.allocated_resources = doorbell_page;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.status) {
+ dev_err(gc->dev,
+ "Failed to destroy doorbell page: ret %d, 0x%x\n",
+ err, resp.status);
+ return err ?: -EPROTO;
+ }
+
+ return 0;
+}
+
+static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
+ int *doorbell_page)
+{
+ struct gdma_allocate_resource_range_req req = {};
+ struct gdma_allocate_resource_range_resp resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE,
+ sizeof(req), sizeof(resp));
+
+ req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+ req.num_resources = 1;
+ req.alignment = PAGE_SIZE / MANA_PAGE_SIZE;
+
+ /* Have GDMA start searching from 0 */
+ req.allocated_resources = 0;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.hdr.status) {
+ dev_err(gc->dev,
+ "Failed to allocate doorbell page: ret %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ?: -EPROTO;
+ }
+
+ *doorbell_page = resp.allocated_resources;
+
+ return 0;
+}
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+ struct ib_udata *udata)
+{
+ struct mana_ib_ucontext *ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ int doorbell_page;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
+
+ /* Allocate a doorbell page index */
+ ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
+ if (ret) {
+ ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
+ return ret;
+ }
+
+ ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page);
+
+ ucontext->doorbell = doorbell_page;
+
+ return 0;
+}
+
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct mana_ib_ucontext *mana_ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
+
+ ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
+ if (ret)
+ ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
+}
+
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(mdev->gdma_dev, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue)
+{
+ struct ib_umem *umem;
+ int err;
+
+ queue->umem = NULL;
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+
+ umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
+ return PTR_ERR(umem);
+ }
+
+ err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
+ goto free_umem;
+ }
+ queue->umem = umem;
+
+ ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
+
+ return 0;
+free_umem:
+ ib_umem_release(umem);
+ return err;
+}
+
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue)
+{
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
+ ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
+}
+
+static int
+mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
+ struct gdma_context *gc,
+ struct gdma_create_dma_region_req *create_req,
+ size_t num_pages, mana_handle_t *gdma_region,
+ u32 expected_status)
+{
+ struct gdma_create_dma_region_resp create_resp = {};
+ unsigned int create_req_msg_size;
+ int err;
+
+ create_req_msg_size =
+ struct_size(create_req, page_addr_list, num_pages);
+ create_req->page_addr_list_len = num_pages;
+
+ err = mana_gd_send_request(gc, create_req_msg_size, create_req,
+ sizeof(create_resp), &create_resp);
+ if (err || create_resp.hdr.status != expected_status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to create DMA region: %d, 0x%x\n",
+ err, create_resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ *gdma_region = create_resp.dma_region_handle;
+ ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
+ *gdma_region);
+
+ return 0;
+}
+
+static int
+mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
+ struct gdma_dma_region_add_pages_req *add_req,
+ unsigned int num_pages, u32 expected_status)
+{
+ unsigned int add_req_msg_size =
+ struct_size(add_req, page_addr_list, num_pages);
+ struct gdma_general_resp add_resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES,
+ add_req_msg_size, sizeof(add_resp));
+ add_req->page_addr_list_len = num_pages;
+
+ err = mana_gd_send_request(gc, add_req_msg_size, add_req,
+ sizeof(add_resp), &add_resp);
+ if (err || add_resp.hdr.status != expected_status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to create DMA region: %d, 0x%x\n",
+ err, add_resp.hdr.status);
+
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ return 0;
+}
+
+static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, unsigned long page_sz)
+{
+ struct gdma_dma_region_add_pages_req *add_req = NULL;
+ size_t num_pages_processed = 0, num_pages_to_handle;
+ struct gdma_create_dma_region_req *create_req;
+ unsigned int create_req_msg_size;
+ struct hw_channel_context *hwc;
+ struct ib_block_iter biter;
+ size_t max_pgs_add_cmd = 0;
+ size_t max_pgs_create_cmd;
+ struct gdma_context *gc;
+ size_t num_pages_total;
+ unsigned int tail = 0;
+ u64 *page_addr_list;
+ void *request_buf;
+ int err = 0;
+
+ gc = mdev_to_gc(dev);
+ hwc = gc->hwc.driver_data;
+
+ num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
+
+ max_pgs_create_cmd =
+ (hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64);
+ num_pages_to_handle =
+ min_t(size_t, num_pages_total, max_pgs_create_cmd);
+ create_req_msg_size =
+ struct_size(create_req, page_addr_list, num_pages_to_handle);
+
+ request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL);
+ if (!request_buf)
+ return -ENOMEM;
+
+ create_req = request_buf;
+ mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION,
+ create_req_msg_size,
+ sizeof(struct gdma_create_dma_region_resp));
+
+ create_req->length = umem->length;
+ create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz);
+ create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT;
+ create_req->page_count = num_pages_total;
+
+ ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
+ umem->length, num_pages_total);
+
+ ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
+ page_sz, create_req->offset_in_page);
+
+ ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
+ num_pages_to_handle, create_req->gdma_page_type);
+
+ page_addr_list = create_req->page_addr_list;
+ rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
+ u32 expected_status = 0;
+
+ page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
+ if (tail < num_pages_to_handle)
+ continue;
+
+ if (num_pages_processed + num_pages_to_handle <
+ num_pages_total)
+ expected_status = GDMA_STATUS_MORE_ENTRIES;
+
+ if (!num_pages_processed) {
+ /* First create message */
+ err = mana_ib_gd_first_dma_region(dev, gc, create_req,
+ tail, gdma_region,
+ expected_status);
+ if (err)
+ goto out;
+
+ max_pgs_add_cmd = (hwc->max_req_msg_size -
+ sizeof(*add_req)) / sizeof(u64);
+
+ add_req = request_buf;
+ add_req->dma_region_handle = *gdma_region;
+ add_req->reserved3 = 0;
+ page_addr_list = add_req->page_addr_list;
+ } else {
+ /* Subsequent create messages */
+ err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
+ expected_status);
+ if (err)
+ break;
+ }
+
+ num_pages_processed += tail;
+ tail = 0;
+
+ /* The remaining pages to create */
+ num_pages_to_handle =
+ min_t(size_t,
+ num_pages_total - num_pages_processed,
+ max_pgs_add_cmd);
+ }
+
+ if (err)
+ mana_ib_gd_destroy_dma_region(dev, *gdma_region);
+
+out:
+ kfree(request_buf);
+ return err;
+}
+
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt)
+{
+ unsigned long page_sz;
+
+ page_sz = ib_umem_find_best_pgsz(umem, dev->adapter_caps.page_size_cap, virt);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region)
+{
+ unsigned long page_sz;
+
+ /* Hardware requires dma region to align to chosen page size */
+ page_sz = ib_umem_find_best_pgoff(umem, dev->adapter_caps.page_size_cap, 0);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
+{
+ struct gdma_context *gc = mdev_to_gc(dev);
+
+ ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
+
+ return mana_gd_destroy_dma_region(gc, gdma_region);
+}
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+ struct mana_ib_ucontext *mana_ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ phys_addr_t pfn;
+ pgprot_t prot;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
+
+ if (vma->vm_pgoff != 0) {
+ ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ /* Map to the page indexed by ucontext->doorbell */
+ pfn = (gc->phys_db_page_base +
+ gc->db_page_size * mana_ucontext->doorbell) >>
+ PAGE_SHIFT;
+ prot = pgprot_writecombine(vma->vm_page_prot);
+
+ ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot,
+ NULL);
+ if (ret)
+ ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
+ else
+ ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n",
+ pfn, PAGE_SIZE, ret);
+
+ return ret;
+}
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct ib_port_attr attr;
+ int err;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ if (mana_ib_is_rnic(dev)) {
+ if (port_num == 1) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP
+ | RDMA_CORE_CAP_ETH_AH;
+ immutable->max_mad_size = 0;
+ }
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+ }
+
+ return 0;
+}
+
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
+
+ memset(props, 0, sizeof(*props));
+ props->vendor_id = pdev->vendor;
+ props->vendor_part_id = dev->gdma_dev->dev_id.type;
+ props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+ props->page_size_cap = dev->adapter_caps.page_size_cap;
+ props->max_qp = dev->adapter_caps.max_qp_count;
+ props->max_qp_wr = dev->adapter_caps.max_qp_wr;
+ props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_send_sge = dev->adapter_caps.max_send_sge_count;
+ props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_sge_rd = dev->adapter_caps.max_recv_sge_count;
+ props->max_cq = dev->adapter_caps.max_cq_count;
+ props->max_cqe = dev->adapter_caps.max_qp_wr;
+ props->max_mr = dev->adapter_caps.max_mr_count;
+ props->max_pd = dev->adapter_caps.max_pd_count;
+ props->max_qp_rd_atom = dev->adapter_caps.max_inbound_read_limit;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_qp_init_rd_atom = dev->adapter_caps.max_outbound_read_limit;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
+ props->max_ah = INT_MAX;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+ if (!mana_ib_is_rnic(dev))
+ props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
+
+ return 0;
+}
+
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
+
+ if (!ndev)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+
+ if (netif_carrier_ok(ndev) && netif_running(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_EDR;
+ props->pkey_tbl_len = 1;
+ if (mana_ib_is_rnic(dev)) {
+ props->gid_tbl_len = 16;
+ props->ip_gids = true;
+ if (port == 1)
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ }
+
+ return 0;
+}
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index != 0)
+ return -EINVAL;
+ *pkey = IB_DEFAULT_PKEY_FULL;
+ return 0;
+}
+
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid)
+{
+ /* This version doesn't return GID properties */
+ return 0;
+}
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
+int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct mana_ib_query_adapter_caps_resp resp = {};
+ struct mana_ib_query_adapter_caps_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
+ sizeof(resp));
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
+ req.hdr.dev_id = dev->gdma_dev->dev_id;
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
+ &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_sq_id = resp.max_sq_id;
+ caps->max_rq_id = resp.max_rq_id;
+ caps->max_cq_id = resp.max_cq_id;
+ caps->max_qp_count = resp.max_qp_count;
+ caps->max_cq_count = resp.max_cq_count;
+ caps->max_mr_count = resp.max_mr_count;
+ caps->max_pd_count = resp.max_pd_count;
+ caps->max_inbound_read_limit = resp.max_inbound_read_limit;
+ caps->max_outbound_read_limit = resp.max_outbound_read_limit;
+ caps->mw_count = resp.mw_count;
+ caps->max_srq_count = resp.max_srq_count;
+ caps->max_qp_wr = min_t(u32,
+ resp.max_requester_sq_size / GDMA_MAX_SQE_SIZE,
+ resp.max_requester_rq_size / GDMA_MAX_RQE_SIZE);
+ caps->max_inline_data_size = resp.max_inline_data_size;
+ caps->max_send_sge_count = resp.max_send_sge_count;
+ caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
+
+ caps->page_size_cap = PAGE_SZ_BM;
+ if (mdev_to_gc(dev)->pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB)
+ caps->page_size_cap |= (SZ_4M | SZ_1G | SZ_2G);
+
+ return 0;
+}
+
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct gdma_query_max_resources_resp resp = {};
+ struct gdma_general_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+ sizeof(req), sizeof(resp));
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+ caps->max_cq_count = resp.max_cq;
+ caps->max_mr_count = resp.max_mst;
+ caps->max_pd_count = 0x6000;
+ caps->max_qp_wr = min_t(u32,
+ 0x100000 / GDMA_MAX_SQE_SIZE,
+ 0x100000 / GDMA_MAX_RQE_SIZE);
+ caps->max_send_sge_count = 30;
+ caps->max_recv_sge_count = 15;
+ caps->page_size_cap = PAGE_SZ_BM;
+
+ return 0;
+}
+
+static void
+mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
+{
+ struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx;
+ struct mana_ib_qp *qp;
+ struct ib_event ev;
+ u32 qpn;
+
+ switch (event->type) {
+ case GDMA_EQE_RNIC_QP_FATAL:
+ qpn = event->details[0];
+ qp = mana_get_qp_ref(mdev, qpn, false);
+ if (!qp)
+ break;
+ if (qp->ibqp.event_handler) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_FATAL;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ mana_put_qp_ref(qp);
+ break;
+ default:
+ break;
+ }
+}
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err, i;
+
+ spec.type = GDMA_EQ;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = EQ_SIZE;
+ spec.eq.callback = mana_ib_event_handler;
+ spec.eq.context = mdev;
+ spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+ spec.eq.msix_index = 0;
+
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->fatal_err_eq);
+ if (err)
+ return err;
+
+ mdev->eqs = kcalloc(mdev->ib_dev.num_comp_vectors, sizeof(struct gdma_queue *),
+ GFP_KERNEL);
+ if (!mdev->eqs) {
+ err = -ENOMEM;
+ goto destroy_fatal_eq;
+ }
+ spec.eq.callback = NULL;
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) {
+ spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]);
+ if (err)
+ goto destroy_eqs;
+ }
+
+ return 0;
+
+destroy_eqs:
+ while (i-- > 0)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+ kfree(mdev->eqs);
+destroy_fatal_eq:
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+ return err;
+}
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int i;
+
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+
+ kfree(mdev->eqs);
+}
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_create_adapter_resp resp = {};
+ struct mana_rnic_create_adapter_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.req.msg_version = GDMA_MESSAGE_V2;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.notify_eq_id = mdev->fatal_err_eq->id;
+
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
+ return err;
+ }
+ mdev->adapter_handle = resp.adapter;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_destroy_adapter_resp resp = {};
+ struct mana_rnic_destroy_adapter_req req = {};
+ struct gdma_context *gc;
+ int err;
+
+ gc = mdev_to_gc(mdev);
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy RNIC adapter err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_ADD;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_REMOVE;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac)
+{
+ struct mana_rnic_config_mac_addr_resp resp = {};
+ struct mana_rnic_config_mac_addr_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = op;
+ copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config Mac addr err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_cq_resp resp = {};
+ struct mana_rnic_create_cq_req req = {};
+ int err;
+
+ if (!mdev->eqs)
+ return -EINVAL;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.gdma_region = cq->queue.gdma_region;
+ req.eq_id = mdev->eqs[cq->comp_vector]->id;
+ req.doorbell_page = doorbell;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create cq err %d", err);
+ return err;
+ }
+
+ cq->queue.id = resp.cq_id;
+ cq->cq_handle = resp.cq_handle;
+ /* The GDMA region is now owned by the CQ handle */
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_destroy_cq_resp resp = {};
+ struct mana_rnic_destroy_cq_req req = {};
+ int err;
+
+ if (cq->cq_handle == INVALID_MANA_HANDLE)
+ return 0;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.cq_handle = cq->cq_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy cq err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_qp_resp resp = {};
+ struct mana_rnic_create_qp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->rc_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.flags = flags;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.rc_qp_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++) {
+ qp->rc_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_rc_qp_resp resp = {0};
+ struct mana_rnic_destroy_rc_qp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.rc_qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy rc qp err %d", err);
+ return err;
+ }
+ return 0;
+}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
new file mode 100644
index 000000000000..9d36232ed880
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -0,0 +1,738 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_IB_H_
+#define _MANA_IB_H_
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_umem.h>
+#include <rdma/mana-abi.h>
+#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
+
+#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
+
+#define PAGE_SZ_BM \
+ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
+ SZ_512K | SZ_1M | SZ_2M)
+
+/* MANA doesn't have any limit for MR size */
+#define MANA_IB_MAX_MR_SIZE U64_MAX
+
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
+/*
+ * The hardware limit of number of MRs is greater than maximum number of MRs
+ * that can possibly represent in 24 bits
+ */
+#define MANA_IB_MAX_MR 0xFFFFFFu
+
+/*
+ * The CA timeout is approx. 260ms (4us * 2^(DELAY))
+ */
+#define MANA_CA_ACK_DELAY 16
+
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
+#define MANA_GSI_QPN (1)
+
+struct mana_ib_adapter_caps {
+ u32 max_sq_id;
+ u32 max_rq_id;
+ u32 max_cq_id;
+ u32 max_qp_count;
+ u32 max_cq_count;
+ u32 max_mr_count;
+ u32 max_pd_count;
+ u32 max_inbound_read_limit;
+ u32 max_outbound_read_limit;
+ u32 mw_count;
+ u32 max_srq_count;
+ u32 max_qp_wr;
+ u32 max_send_sge_count;
+ u32 max_recv_sge_count;
+ u32 max_inline_data_size;
+ u64 feature_flags;
+ u64 page_size_cap;
+};
+
+struct mana_ib_queue {
+ struct ib_umem *umem;
+ struct gdma_queue *kmem;
+ u64 gdma_region;
+ u64 id;
+};
+
+struct mana_ib_dev {
+ struct ib_device ib_dev;
+ struct gdma_dev *gdma_dev;
+ mana_handle_t adapter_handle;
+ struct gdma_queue *fatal_err_eq;
+ struct gdma_queue **eqs;
+ struct xarray qp_table_wq;
+ struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
+};
+
+struct mana_ib_wq {
+ struct ib_wq ibwq;
+ struct mana_ib_queue queue;
+ int wqe;
+ u32 wq_buf_size;
+ mana_handle_t rx_object;
+};
+
+struct mana_ib_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+ mana_handle_t pd_handle;
+
+ /* Mutex for sharing access to vport_use_count */
+ struct mutex vport_mutex;
+ int vport_use_count;
+
+ bool tx_shortform_allowed;
+ u32 tx_vp_offset;
+};
+
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
+struct mana_ib_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ mana_handle_t mr_handle;
+};
+
+struct mana_ib_cq {
+ struct ib_cq ibcq;
+ struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
+ int cqe;
+ u32 comp_vector;
+ mana_handle_t cq_handle;
+};
+
+enum mana_rc_queue_type {
+ MANA_RC_SEND_QUEUE_REQUESTER = 0,
+ MANA_RC_SEND_QUEUE_RESPONDER,
+ MANA_RC_SEND_QUEUE_FMR,
+ MANA_RC_RECV_QUEUE_REQUESTER,
+ MANA_RC_RECV_QUEUE_RESPONDER,
+ MANA_RC_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_rc_qp {
+ struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
+};
+
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
+};
+
+struct mana_ib_qp {
+ struct ib_qp ibqp;
+
+ mana_handle_t qp_handle;
+ union {
+ struct mana_ib_queue raw_sq;
+ struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
+ };
+
+ /* The port on the IB device, starting with 1 */
+ u32 port;
+
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
+ refcount_t refcount;
+ struct completion free;
+};
+
+struct mana_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ u32 doorbell;
+};
+
+struct mana_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_ind_table;
+};
+
+enum mana_ib_command_code {
+ MANA_IB_GET_ADAPTER_CAP = 0x30001,
+ MANA_IB_CREATE_ADAPTER = 0x30002,
+ MANA_IB_DESTROY_ADAPTER = 0x30003,
+ MANA_IB_CONFIG_IP_ADDR = 0x30004,
+ MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
+ MANA_IB_CREATE_CQ = 0x30008,
+ MANA_IB_DESTROY_CQ = 0x30009,
+ MANA_IB_CREATE_RC_QP = 0x3000a,
+ MANA_IB_DESTROY_RC_QP = 0x3000b,
+ MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
+ MANA_IB_QUERY_DEVICE_COUNTERS = 0x30023,
+};
+
+struct mana_ib_query_adapter_caps_req {
+ struct gdma_req_hdr hdr;
+}; /*HW Data */
+
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+ MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT = BIT(5),
+ MANA_IB_FEATURE_MULTI_PORTS_SUPPORT = BIT(6),
+};
+
+struct mana_ib_query_adapter_caps_resp {
+ struct gdma_resp_hdr hdr;
+ u32 max_sq_id;
+ u32 max_rq_id;
+ u32 max_cq_id;
+ u32 max_qp_count;
+ u32 max_cq_count;
+ u32 max_mr_count;
+ u32 max_pd_count;
+ u32 max_inbound_read_limit;
+ u32 max_outbound_read_limit;
+ u32 mw_count;
+ u32 max_srq_count;
+ u32 max_requester_sq_size;
+ u32 max_responder_sq_size;
+ u32 max_requester_rq_size;
+ u32 max_responder_rq_size;
+ u32 max_send_sge_count;
+ u32 max_recv_sge_count;
+ u32 max_inline_data_size;
+ u64 feature_flags;
+}; /* HW Data */
+
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_req {
+ struct gdma_req_hdr hdr;
+ u32 notify_eq_id;
+ u32 reserved;
+ u64 feature_flags;
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_destroy_adapter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /*HW Data */
+
+struct mana_rnic_destroy_adapter_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_ib_addr_op {
+ ADDR_OP_ADD = 1,
+ ADDR_OP_REMOVE = 2,
+};
+
+enum sgid_entry_type {
+ SGID_TYPE_IPV4 = 1,
+ SGID_TYPE_IPV6 = 2,
+};
+
+struct mana_rnic_config_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ enum sgid_entry_type sgid_type;
+ u8 ip_addr[16];
+}; /* HW Data */
+
+struct mana_rnic_config_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ u8 mac_addr[ETH_ALEN];
+ u8 reserved[6];
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ u64 gdma_region;
+ u32 eq_id;
+ u32 doorbell_page;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t cq_handle;
+ u32 cq_id;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t cq_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_rnic_create_rc_flags {
+ MANA_RC_FLAG_NO_FMR = 2,
+};
+
+struct mana_rnic_create_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_RC_QUEUE_TYPE_MAX];
+ u64 deprecated[2];
+ u64 flags;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_create_qp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t rc_qp_handle;
+ u32 queue_ids[MANA_RC_QUEUE_TYPE_MAX];
+ u32 reserved;
+}; /* HW Data*/
+
+struct mana_rnic_destroy_rc_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t rc_qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_rc_qp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_ib_ah_attr {
+ u8 src_addr[16];
+ u8 dest_addr[16];
+ u8 src_mac[ETH_ALEN];
+ u8 dest_mac[ETH_ALEN];
+ u8 src_addr_type;
+ u8 dest_addr_type;
+ u8 hop_limit;
+ u8 traffic_class;
+ u16 src_port;
+ u16 dest_port;
+ u32 flow_label;
+};
+
+struct mana_rnic_set_qp_state_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+ u64 attr_mask;
+ u32 qp_state;
+ u32 path_mtu;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qpn;
+ u32 max_dest_rd_atomic;
+ u32 retry_cnt;
+ u32 rnr_retry;
+ u32 min_rnr_timer;
+ u32 rate_limit;
+ struct mana_ib_ah_attr ah_attr;
+ u64 reserved1;
+ u32 qkey;
+ u32 qp_access_flags;
+ u8 local_ack_timeout;
+ u8 max_rd_atomic;
+ u16 reserved2;
+ u32 reserved3;
+}; /* HW Data */
+
+struct mana_rnic_set_qp_state_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+ u64 dup_rx_req;
+ u64 tx_bytes;
+ u64 rx_bytes;
+ u64 rx_send_req;
+ u64 rx_write_req;
+ u64 rx_read_req;
+ u64 tx_pkt;
+ u64 rx_pkt;
+}; /* HW Data */
+
+struct mana_rnic_query_device_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_device_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u32 sent_cnps;
+ u32 received_ecns;
+ u32 reserved1;
+ u32 received_cnp_count;
+ u32 qp_congested_events;
+ u32 qp_recovered_events;
+ u32 rate_inc_events;
+ u32 reserved2;
+}; /* HW Data */
+
+static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->gdma_context;
+}
+
+static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
+ u32 qid, bool is_sq)
+{
+ struct mana_ib_qp *qp;
+ unsigned long flag;
+
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
+ xa_lock_irqsave(&mdev->qp_table_wq, flag);
+ qp = xa_load(&mdev->qp_table_wq, qid);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&mdev->qp_table_wq, flag);
+ return qp;
+}
+
+static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
+{
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
+static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_context *mc = gc->mana.driver_data;
+
+ if (port < 1 || port > mc->num_ports)
+ return NULL;
+ return mc->ports[port - 1];
+}
+
+static inline void copy_in_reverse(u8 *dst, const u8 *src, u32 size)
+{
+ u32 i;
+
+ for (i = 0; i < size; i++)
+ dst[size - 1 - i] = src[i];
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region);
+
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt);
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
+ mana_handle_t gdma_region);
+
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue);
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata);
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl);
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata);
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+
+int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
+ struct mana_ib_pd *pd, u32 doorbell_id);
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+ u32 port);
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+ struct ib_udata *udata);
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma);
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable);
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+ struct ib_udata *uhw);
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props);
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid);
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev);
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num);
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac);
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell);
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev);
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
new file mode 100644
index 000000000000..3d0245a4c1ed
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define VALID_MR_FLAGS (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC | IB_ZERO_BASED)
+
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
+
+static enum gdma_mr_access_flags
+mana_ib_verbs_to_gdma_access_flags(int access_flags)
+{
+ enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ;
+
+ if (access_flags & IB_ACCESS_LOCAL_WRITE)
+ flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_ATOMIC;
+
+ return flags;
+}
+
+static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
+ struct gdma_create_mr_params *mr_params)
+{
+ struct gdma_create_mr_response resp = {};
+ struct gdma_create_mr_request req = {};
+ struct gdma_context *gc = mdev_to_gc(dev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
+ sizeof(resp));
+ req.pd_handle = mr_params->pd_handle;
+ req.mr_type = mr_params->mr_type;
+
+ switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
+ case GDMA_MR_TYPE_GVA:
+ req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
+ req.gva.virtual_address = mr_params->gva.virtual_address;
+ req.gva.access_flags = mr_params->gva.access_flags;
+ break;
+ case GDMA_MR_TYPE_ZBVA:
+ req.zbva.dma_region_handle = mr_params->zbva.dma_region_handle;
+ req.zbva.access_flags = mr_params->zbva.access_flags;
+ break;
+ default:
+ ibdev_dbg(&dev->ib_dev,
+ "invalid param (GDMA_MR_TYPE) passed, type %d\n",
+ req.mr_type);
+ return -EINVAL;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ mr->ibmr.lkey = resp.lkey;
+ mr->ibmr.rkey = resp.rkey;
+ mr->mr_handle = resp.mr_handle;
+
+ return 0;
+}
+
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
+{
+ struct gdma_destroy_mr_response resp = {};
+ struct gdma_destroy_mr_request req = {};
+ struct gdma_context *gc = mdev_to_gc(dev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
+ sizeof(resp));
+
+ req.mr_handle = mr_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.hdr.status) {
+ dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+ return err;
+ }
+
+ return 0;
+}
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ ibdev_dbg(ibdev,
+ "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
+ start, iova, length, access_flags);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(ibdev, start, length, access_flags);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ ibdev_dbg(ibdev,
+ "Failed to get umem for register user-mr, %pe\n",
+ mr->umem);
+ goto err_free;
+ }
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ ibdev_dbg(ibdev,
+ "created dma region for user-mr 0x%llx\n",
+ dma_region_handle);
+
+ mr_params.pd_handle = pd->pd_handle;
+ if (access_flags & IB_ZERO_BASED) {
+ mr_params.mr_type = GDMA_MR_TYPE_ZBVA;
+ mr_params.zbva.dma_region_handle = dma_region_handle;
+ mr_params.zbva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ } else {
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ }
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %pe\n",
+ umem_dmabuf);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
+ struct ib_device *ibdev = ibmr->device;
+ struct mana_ib_dev *dev;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
+ if (err)
+ return err;
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
new file mode 100644
index 000000000000..48c1f4977f21
--- /dev/null
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -0,0 +1,921 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
+ struct net_device *ndev,
+ mana_handle_t default_rxobj,
+ mana_handle_t ind_table[],
+ u32 log_ind_tbl_size, u32 rx_hash_key_len,
+ u8 *rx_hash_key)
+{
+ struct mana_port_context *mpc = netdev_priv(ndev);
+ struct mana_cfg_rx_steer_req_v2 *req;
+ struct mana_cfg_rx_steer_resp resp = {};
+ struct gdma_context *gc;
+ u32 req_buf_size;
+ int i, err;
+
+ gc = mdev_to_gc(dev);
+
+ req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
+ req = kzalloc(req_buf_size, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
+ sizeof(resp));
+
+ req->hdr.req.msg_version = GDMA_MESSAGE_V2;
+
+ req->vport = mpc->port_handle;
+ req->rx_enable = 1;
+ req->update_default_rxobj = 1;
+ req->default_rxobj = default_rxobj;
+ req->hdr.dev_id = gc->mana.dev_id;
+
+ /* If there are more than 1 entries in indirection table, enable RSS */
+ if (log_ind_tbl_size)
+ req->rss_enable = true;
+
+ req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
+ req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
+ indir_tab);
+ req->update_indir_tab = true;
+ req->cqe_coalescing_enable = 1;
+
+ /* The ind table passed to the hardware must have
+ * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
+ * ind_table to MANA_INDIRECT_TABLE_SIZE if required
+ */
+ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
+ for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
+ req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+ ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
+ req->indir_tab[i]);
+ }
+
+ req->update_hashkey = true;
+ if (rx_hash_key_len)
+ memcpy(req->hashkey, rx_hash_key, rx_hash_key_len);
+ else
+ netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
+
+ ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
+ req->vport, default_rxobj);
+
+ err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
+ if (err) {
+ netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
+ goto out;
+ }
+
+ if (resp.hdr.status) {
+ netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
+ resp.hdr.status);
+ err = -EPROTO;
+ goto out;
+ }
+
+ netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n",
+ mpc->port_handle, log_ind_tbl_size);
+
+out:
+ kfree(req);
+ return err;
+}
+
+static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_dev *mdev =
+ container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
+ struct mana_ib_create_qp_rss_resp resp = {};
+ struct mana_ib_create_qp_rss ucmd = {};
+ mana_handle_t *mana_ind_table;
+ struct mana_port_context *mpc;
+ unsigned int ind_tbl_size;
+ struct net_device *ndev;
+ struct mana_ib_cq *cq;
+ struct mana_ib_wq *wq;
+ struct mana_eq *eq;
+ struct ib_cq *ibcq;
+ struct ib_wq *ibwq;
+ int i = 0;
+ u32 port;
+ int ret;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (ret) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed copy from udata for create rss-qp, err %d\n",
+ ret);
+ return ret;
+ }
+
+ if (attr->cap.max_recv_wr > mdev->adapter_caps.max_qp_wr) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_recv_wr %d exceeding limit\n",
+ attr->cap.max_recv_wr);
+ return -EINVAL;
+ }
+
+ if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_recv_sge %d exceeding limit\n",
+ attr->cap.max_recv_sge);
+ return -EINVAL;
+ }
+
+ ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Indirect table size %d exceeding limit\n",
+ ind_tbl_size);
+ return -EINVAL;
+ }
+
+ if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
+ ibdev_dbg(&mdev->ib_dev,
+ "RX Hash function is not supported, %d\n",
+ ucmd.rx_hash_function);
+ return -EINVAL;
+ }
+
+ /* IB ports start with 1, MANA start with 0 */
+ port = ucmd.port;
+ ndev = mana_ib_get_netdev(pd->device, port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+
+ ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
+ ucmd.rx_hash_function, port);
+
+ mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
+ GFP_KERNEL);
+ if (!mana_ind_table) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ qp->port = port;
+
+ for (i = 0; i < ind_tbl_size; i++) {
+ struct mana_obj_spec wq_spec = {};
+ struct mana_obj_spec cq_spec = {};
+
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+
+ ibcq = ibwq->cq;
+ cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+ wq_spec.gdma_region = wq->queue.gdma_region;
+ wq_spec.queue_size = wq->wq_buf_size;
+
+ cq_spec.gdma_region = cq->queue.gdma_region;
+ cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
+ cq_spec.modr_ctx_id = 0;
+ eq = &mpc->ac->eqs[cq->comp_vector];
+ cq_spec.attached_eq = eq->eq->id;
+
+ ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
+ &wq_spec, &cq_spec, &wq->rx_object);
+ if (ret) {
+ /* Do cleanup starting with index i-1 */
+ i--;
+ goto fail;
+ }
+
+ /* The GDMA regions are now owned by the WQ object */
+ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ wq->queue.id = wq_spec.queue_index;
+ cq->queue.id = cq_spec.queue_index;
+
+ ibdev_dbg(&mdev->ib_dev,
+ "rx_object 0x%llx wq id %llu cq id %llu\n",
+ wq->rx_object, wq->queue.id, cq->queue.id);
+
+ resp.entries[i].cqid = cq->queue.id;
+ resp.entries[i].wqid = wq->queue.id;
+
+ mana_ind_table[i] = wq->rx_object;
+
+ /* Create CQ table entry */
+ ret = mana_ib_install_cq_cb(mdev, cq);
+ if (ret)
+ goto fail;
+ }
+ resp.num_entries = i;
+
+ ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
+ mana_ind_table,
+ ind_tbl->log_ind_tbl_size,
+ ucmd.rx_hash_key_len,
+ ucmd.rx_hash_key);
+ if (ret)
+ goto fail;
+
+ ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (ret) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy to udata create rss-qp, %d\n",
+ ret);
+ goto fail;
+ }
+
+ kfree(mana_ind_table);
+
+ return 0;
+
+fail:
+ while (i-- > 0) {
+ ibwq = ind_tbl->ind_tbl[i];
+ ibcq = ibwq->cq;
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+ mana_ib_remove_cq_cb(mdev, cq);
+ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+ }
+
+ kfree(mana_ind_table);
+
+ return ret;
+}
+
+static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_dev *mdev =
+ container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_cq *send_cq =
+ container_of(attr->send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_ucontext *mana_ucontext =
+ rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ struct mana_ib_create_qp_resp resp = {};
+ struct mana_ib_create_qp ucmd = {};
+ struct mana_obj_spec wq_spec = {};
+ struct mana_obj_spec cq_spec = {};
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_eq *eq;
+ int eq_vec;
+ u32 port;
+ int err;
+
+ if (!mana_ucontext || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy from udata create qp-raw, %d\n", err);
+ return err;
+ }
+
+ if (attr->cap.max_send_wr > mdev->adapter_caps.max_qp_wr) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_send_wr %d exceeding limit\n",
+ attr->cap.max_send_wr);
+ return -EINVAL;
+ }
+
+ if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_send_sge %d exceeding limit\n",
+ attr->cap.max_send_sge);
+ return -EINVAL;
+ }
+
+ port = ucmd.port;
+ ndev = mana_ib_get_netdev(ibpd->device, port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
+
+ err = mana_ib_cfg_vport(mdev, port, pd, mana_ucontext->doorbell);
+ if (err)
+ return -ENODEV;
+
+ qp->port = port;
+
+ ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
+ ucmd.sq_buf_addr, ucmd.port);
+
+ err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create queue for create qp-raw, err %d\n", err);
+ goto err_free_vport;
+ }
+
+ /* Create a WQ on the same port handle used by the Ethernet */
+ wq_spec.gdma_region = qp->raw_sq.gdma_region;
+ wq_spec.queue_size = ucmd.sq_buf_size;
+
+ cq_spec.gdma_region = send_cq->queue.gdma_region;
+ cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
+ cq_spec.modr_ctx_id = 0;
+ eq_vec = send_cq->comp_vector;
+ eq = &mpc->ac->eqs[eq_vec];
+ cq_spec.attached_eq = eq->eq->id;
+
+ err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
+ &cq_spec, &qp->qp_handle);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create wq for create raw-qp, err %d\n",
+ err);
+ goto err_destroy_queue;
+ }
+
+ /* The GDMA regions are now owned by the WQ object */
+ qp->raw_sq.gdma_region = GDMA_INVALID_DMA_REGION;
+ send_cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ qp->raw_sq.id = wq_spec.queue_index;
+ send_cq->queue.id = cq_spec.queue_index;
+
+ /* Create CQ table entry */
+ err = mana_ib_install_cq_cb(mdev, send_cq);
+ if (err)
+ goto err_destroy_wq_obj;
+
+ ibdev_dbg(&mdev->ib_dev,
+ "qp->qp_handle 0x%llx sq id %llu cq id %llu\n",
+ qp->qp_handle, qp->raw_sq.id, send_cq->queue.id);
+
+ resp.sqid = qp->raw_sq.id;
+ resp.cqid = send_cq->queue.id;
+ resp.tx_vp_offset = pd->tx_vp_offset;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed copy udata for create qp-raw, %d\n",
+ err);
+ goto err_remove_cq_cb;
+ }
+
+ return 0;
+
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, send_cq);
+
+err_destroy_wq_obj:
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
+
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
+
+err_free_vport:
+ mana_ib_uncfg_vport(mdev, pd, port);
+
+ return err;
+}
+
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
+static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ refcount_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
+}
+
+static void mana_table_remove_qp(struct mana_ib_dev *mdev,
+ struct mana_ib_qp *qp)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
+ mana_put_qp_ref(qp);
+ wait_for_completion(&qp->free);
+}
+
+static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_create_rc_qp_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct mana_ib_create_rc_qp ucmd = {};
+ int i, err, j;
+ u64 flags = 0;
+ u32 doorbell;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ flags = MANA_RC_FLAG_NO_FMR;
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy from udata, %d\n", err);
+ return err;
+ }
+
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ /* skip FMR for user-level RC QPs */
+ if (i == MANA_RC_SEND_QUEUE_FMR) {
+ qp->rc_qp.queues[i].id = INVALID_QUEUE_ID;
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ continue;
+ }
+ err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
+ &qp->rc_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
+ goto destroy_queues;
+ }
+ j++;
+ }
+
+ err = mana_ib_gd_create_rc_qp(mdev, qp, attr, doorbell, flags);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp %d\n", err);
+ goto destroy_queues;
+ }
+ qp->ibqp.qp_num = qp->rc_qp.queues[MANA_RC_RECV_QUEUE_RESPONDER].id;
+ qp->port = attr->port_num;
+
+ if (udata) {
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ if (i == MANA_RC_SEND_QUEUE_FMR)
+ continue;
+ resp.queue_id[j] = qp->rc_qp.queues[i].id;
+ j++;
+ }
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto destroy_qp;
+ }
+ }
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+ return err;
+}
+
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+ return err;
+}
+
+int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ switch (attr->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ /* When rwq_ind_tbl is used, it's for creating WQs for RSS */
+ if (attr->rwq_ind_tbl)
+ return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr,
+ udata);
+
+ return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_RC:
+ return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
+ attr->qp_type);
+ }
+
+ return -EINVAL;
+}
+
+static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibqp->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_rnic_set_qp_state_resp resp = {};
+ struct mana_rnic_set_qp_state_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
+
+ req.hdr.req.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ req.qp_state = attr->qp_state;
+ req.attr_mask = attr_mask;
+ req.path_mtu = attr->path_mtu;
+ req.rq_psn = attr->rq_psn;
+ req.sq_psn = attr->sq_psn;
+ req.dest_qpn = attr->dest_qp_num;
+ req.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ req.retry_cnt = attr->retry_cnt;
+ req.rnr_retry = attr->rnr_retry;
+ req.min_rnr_timer = attr->min_rnr_timer;
+ req.rate_limit = attr->rate_limit;
+ req.qkey = attr->qkey;
+ req.local_ack_timeout = attr->timeout;
+ req.qp_access_flags = attr->qp_access_flags;
+ req.max_rd_atomic = attr->max_rd_atomic;
+
+ if (attr_mask & IB_QP_AV) {
+ ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ ibqp->port, ibqp->qp_num);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ copy_in_reverse(req.ah_attr.src_mac, mpc->mac_addr, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.dest_mac, attr->ah_attr.roce.dmac, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.src_addr, attr->ah_attr.grh.sgid_attr->gid.raw,
+ sizeof(union ib_gid));
+ copy_in_reverse(req.ah_attr.dest_addr, attr->ah_attr.grh.dgid.raw,
+ sizeof(union ib_gid));
+ if (rdma_gid_attr_network_type(attr->ah_attr.grh.sgid_attr) == RDMA_NETWORK_IPV4) {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV4;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV4;
+ } else {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV6;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV6;
+ }
+ req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
+ req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ ibqp->qp_num, attr->dest_qp_num);
+ req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2;
+ req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
+ req.ah_attr.flow_label = attr->ah_attr.grh.flow_label;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed modify qp err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
+ struct ib_rwq_ind_table *ind_tbl,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_ib_wq *wq;
+ struct ib_wq *ibwq;
+ int i;
+
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
+ mpc = netdev_priv(ndev);
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
+ wq->rx_object);
+ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+ }
+
+ return 0;
+}
+
+static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct ib_pd *ibpd = qp->ibqp.pd;
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_ib_pd *pd;
+
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
+ mpc = netdev_priv(ndev);
+ pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
+
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
+
+ mana_ib_uncfg_vport(mdev, pd, qp->port);
+
+ return 0;
+}
+
+static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_table_remove_qp(mdev, qp);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+
+ return 0;
+}
+
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+
+ return 0;
+}
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (ibqp->rwq_ind_tbl)
+ return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl,
+ udata);
+
+ return mana_ib_destroy_qp_raw(qp, udata);
+ case IB_QPT_RC:
+ return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
+ ibqp->qp_type);
+ }
+
+ return -ENOENT;
+}
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
new file mode 100644
index 000000000000..f959f4b9244f
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_create_wq ucmd = {};
+ struct mana_ib_wq *wq;
+ int err;
+
+ if (udata->inlen < sizeof(ucmd))
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy from udata for create wq, %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (!wq)
+ return ERR_PTR(-ENOMEM);
+
+ ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
+
+ err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create queue for create wq, %d\n", err);
+ goto err_free_wq;
+ }
+
+ wq->wqe = init_attr->max_wr;
+ wq->wq_buf_size = ucmd.wq_buf_size;
+ wq->rx_object = INVALID_MANA_HANDLE;
+ return &wq->ibwq;
+
+err_free_wq:
+ kfree(wq);
+
+ return ERR_PTR(err);
+}
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ /* modify_wq is not supported by this version of the driver */
+ return -EOPNOTSUPP;
+}
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+{
+ struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ struct ib_device *ib_dev = ibwq->device;
+ struct mana_ib_dev *mdev;
+
+ mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
+
+ mana_ib_destroy_queue(mdev, &wq->queue);
+
+ kfree(wq);
+
+ return 0;
+}
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ /*
+ * There is no additional data in ind_table to be maintained by this
+ * driver, do nothing
+ */
+ return 0;
+}
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ /*
+ * There is no additional data in ind_table to be maintained by this
+ * driver, do nothing
+ */
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index fc01deac1d3c..f30ce9dd080a 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,9 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
select NET_VENDOR_MELLANOX
select MLX4_CORE
- ---help---
+ help
This driver provides low-level InfiniBand support for
Mellanox ConnectX PCI Express host channel adapters (HCAs).
This is required to use InfiniBand protocols such as
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
index f4213b3a8fe1..7b6757b02857 100644
--- a/drivers/infiniband/hw/mlx4/Makefile
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 02a169f8027b..7321d6ab5fe1 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -141,10 +141,11 @@ static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
-
+int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
return -EINVAL;
@@ -167,12 +168,14 @@ int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac, u16 vlan_tag)
{
struct rdma_ah_attr slave_attr = *ah_attr;
+ struct rdma_ah_init_attr init_attr = {};
struct mlx4_ib_ah *mah = to_mah(ah);
int ret;
slave_attr.grh.sgid_attr = NULL;
slave_attr.grh.sgid_index = slave_sgid_index;
- ret = mlx4_ib_create_ah(ah, &slave_attr, 0, NULL);
+ init_attr.ah_attr = &slave_attr;
+ ret = mlx4_ib_create_ah(ah, &init_attr, NULL);
if (ret)
return ret;
@@ -229,8 +232,3 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 2a0b59a4b6eb..d7327735b8d0 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -38,7 +38,6 @@
#include <rdma/ib_sa.h>
#include <rdma/ib_pack.h>
#include <linux/mlx4/cmd.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
@@ -73,12 +72,12 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
int *resched_delay_sec);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
- u8 port_num, u8 *p_data)
+ u32 port_num, u8 *p_data)
{
int i;
u64 guid_indexes;
int slave_id;
- int port_index = port_num - 1;
+ u32 port_index = port_num - 1;
if (!mlx4_is_master(dev->dev))
return;
@@ -86,7 +85,7 @@ void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
- pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+ pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* The location of the specific index starts from bit number 4
@@ -184,7 +183,7 @@ unlock:
* port_number - 1 or 2
*/
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
- int block_num, u8 port_num,
+ int block_num, u32 port_num,
u8 *p_data)
{
int i;
@@ -206,7 +205,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
- pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+ pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
/*calculate the slaves and notify them*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -260,11 +259,11 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
&gen_event);
- pr_debug("slave: %d, port: %d prev_port_state: %d,"
+ pr_debug("slave: %d, port: %u prev_port_state: %d,"
" new_port_state: %d, gen_event: %d\n",
slave_id, port_num, prev_state, new_state, gen_event);
if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
- pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
+ pr_debug("sending PORT_UP event to slave: %d, port: %u\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
@@ -274,7 +273,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event);
if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
- pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ pr_debug("sending PORT DOWN event to slave: %d, port: %u\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev,
slave_id,
@@ -310,7 +309,7 @@ static void aliasguid_query_handler(int status,
if (status) {
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
- rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
+ rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
goto out;
}
@@ -416,7 +415,7 @@ next_entry:
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
- rec->time_to_run = ktime_get_boot_ns() +
+ rec->time_to_run = ktime_get_boottime_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -709,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
}
}
if (resched_delay_sec) {
- u64 curr_time = ktime_get_boot_ns();
+ u64 curr_time = ktime_get_boottime_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
@@ -822,17 +821,14 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
}
spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
}
- for (i = 0 ; i < dev->num_ports; i++) {
- flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ for (i = 0 ; i < dev->num_ports; i++)
destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
- }
ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
kfree(dev->sriov.alias_guid.sa_client);
}
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
- char alias_wq_name[15];
int ret = 0;
int i, j;
union ib_gid gid;
@@ -878,9 +874,8 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
- snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
- alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM);
+ alloc_ordered_workqueue("alias_guid%d", WQ_MEM_RECLAIM, i);
if (!dev->sriov.alias_guid.ports_guid[i].wq) {
ret = -ENOMEM;
goto err_thread;
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index ecd6cadd529a..03aacd526860 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -54,11 +54,20 @@ struct id_map_entry {
struct delayed_work timeout;
};
+struct rej_tmout_entry {
+ int slave;
+ u32 rem_pv_cm_id;
+ struct delayed_work timeout;
+ struct xarray *xa_rej_tmout;
+};
+
struct cm_generic_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
+ unsigned char unused[2];
+ __be16 rej_reason;
};
struct cm_sidr_generic_msg {
@@ -71,6 +80,7 @@ struct cm_req_msg {
union ib_gid primary_path_sgid;
};
+static struct workqueue_struct *cm_wq;
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
@@ -186,23 +196,6 @@ out:
kfree(ent);
}
-static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
-{
- struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
- struct rb_root *sl_id_map = &sriov->sl_id_map;
- struct id_map_entry *ent, *found_ent;
-
- spin_lock(&sriov->id_map_lock);
- ent = xa_erase(&sriov->pv_id_table, pv_cm_id);
- if (!ent)
- goto out;
- found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
- if (found_ent && found_ent == ent)
- rb_erase(&found_ent->node, sl_id_map);
-out:
- spin_unlock(&sriov->id_map_lock);
-}
-
static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
@@ -294,14 +287,18 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
spin_lock(&sriov->id_map_lock);
spin_lock_irqsave(&sriov->going_down_lock, flags);
/*make sure that there is no schedule inside the scheduled work.*/
- if (!sriov->is_going_down) {
+ if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
- schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ } else if (id->scheduled_delete) {
+ /* Adjust timeout if already scheduled */
+ mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
}
+#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason)
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad)
{
@@ -310,8 +307,10 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
int pv_cm_id = -1;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_MRA_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
+ (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) {
sl_cm_id = get_local_comm_id(mad);
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
if (id)
@@ -331,8 +330,8 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
if (!id) {
- pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
- slave_id, sl_cm_id);
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n",
+ slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -EINVAL;
}
@@ -341,17 +340,97 @@ cont:
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
- id_map_find_del(ibdev, pv_cm_id);
+ return 0;
+}
+
+static void rej_tmout_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
+ struct rej_tmout_entry *deleted;
+
+ deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
+
+ if (deleted != item)
+ pr_debug("deleted(%p) != item(%p)\n", deleted, item);
+
+ kfree(item);
+}
+
+static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
+{
+ struct rej_tmout_entry *item;
+ struct rej_tmout_entry *old;
+ int ret = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (item) {
+ if (xa_err(item))
+ ret = xa_err(item);
+ else
+ /* If a retry, adjust delayed work */
+ mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ goto err_or_exists;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
+ item->slave = slave;
+ item->rem_pv_cm_id = rem_pv_cm_id;
+ item->xa_rej_tmout = &sriov->xa_rej_tmout;
+
+ old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
+ if (old) {
+ pr_debug(
+ "Non-null old entry (%p) or error (%d) when inserting\n",
+ old, xa_err(old));
+ kfree(item);
+ return xa_err(old);
+ }
+
+ queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
return 0;
+
+err_or_exists:
+ xa_unlock(&sriov->xa_rej_tmout);
+ return ret;
+}
+
+static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
+{
+ struct rej_tmout_entry *item;
+ int slave;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (!item || xa_err(item)) {
+ pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
+ rem_pv_cm_id, xa_err(item));
+ slave = !item ? -ENOENT : xa_err(item);
+ } else {
+ slave = item->slave;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ return slave;
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad)
{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ u32 rem_pv_cm_id = get_local_comm_id(mad);
u32 pv_cm_id;
struct id_map_entry *id;
+ int sts;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
@@ -367,6 +446,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
be64_to_cpu(gid.global.interface_id));
return -ENOENT;
}
+
+ sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave);
+ if (sts)
+ /* Even if this fails, we pass on the REQ to the slave */
+ pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n",
+ rem_pv_cm_id, *slave, sts);
+
return 0;
}
@@ -374,7 +460,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
if (!id) {
- pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID &&
+ REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) {
+ *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id);
+
+ return (*slave < 0) ? *slave : 0;
+ }
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n",
+ pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -ENOENT;
}
@@ -382,12 +475,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
- if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REJ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
- id_map_find_del(ibdev, (int) pv_cm_id);
- }
return 0;
}
@@ -398,6 +488,34 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
+ xa_init(&dev->sriov.xa_rej_tmout);
+}
+
+static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
+{
+ struct rej_tmout_entry *item;
+ bool flush_needed = false;
+ unsigned long id;
+ int cnt = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ xa_for_each(&sriov->xa_rej_tmout, id, item) {
+ if (slave < 0 || slave == item->slave) {
+ mod_delayed_work(cm_wq, &item->timeout, 0);
+ flush_needed = true;
+ ++cnt;
+ }
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ if (flush_needed) {
+ flush_workqueue(cm_wq);
+ pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
+ cnt, slave);
+ }
+
+ if (slave < 0)
+ WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
}
/* slave = -1 ==> all slaves */
@@ -423,7 +541,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
spin_unlock(&sriov->id_map_lock);
if (need_flush)
- flush_scheduled_work(); /* make sure all timers were flushed */
+ flush_workqueue(cm_wq); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
@@ -467,4 +585,20 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
list_del(&map->list);
kfree(map);
}
+
+ rej_tmout_xa_cleanup(sriov, slave);
+}
+
+int mlx4_ib_cm_init(void)
+{
+ cm_wq = alloc_workqueue("mlx4_ib_cm", WQ_PERCPU, 0);
+ if (!cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_cm_destroy(void)
+{
+ destroy_workqueue(cm_wq);
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 022a0b4ea452..c592374f4a58 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -135,7 +135,7 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
-static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
+static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev,
struct mlx4_ib_cq_buf *buf,
struct ib_umem **umem, u64 buf_addr, int cqe)
{
@@ -144,15 +144,18 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
int shift;
int n;
- *umem = ib_umem_get(udata, buf_addr, cqe * cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ *umem = ib_umem_get(&dev->ib_dev, buf_addr, cqe * cqe_size,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
- n = ib_umem_page_count(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (err)
goto err_buf;
@@ -172,14 +175,15 @@ err_buf:
}
#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_cq *cq;
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
struct mlx4_uar *uar;
void *buf_addr;
int err;
@@ -187,14 +191,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
udata, struct mlx4_ib_ucontext, ibucontext);
if (entries < 1 || entries > dev->dev->caps.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
@@ -215,7 +215,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
}
buf_addr = (void *)(unsigned long)ucmd.buf_addr;
- err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem,
+ err = mlx4_ib_get_cq_umem(dev, &cq->buf, &cq->umem,
ucmd.buf_addr, entries);
if (err)
goto err_cq;
@@ -269,7 +269,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_cq_free;
}
- return &cq->ibcq;
+ return 0;
err_cq_free:
mlx4_cq_free(dev->dev, &cq->mcq);
@@ -281,19 +281,15 @@ err_dbmap:
err_mtt:
mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
- if (udata)
- ib_umem_release(cq->umem);
- else
+ ib_umem_release(cq->umem);
+ if (!udata)
mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
if (!udata)
mlx4_db_free(dev->dev, &cq->db);
-
err_cq:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
@@ -336,8 +332,8 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
if (!cq->resize_buf)
return -ENOMEM;
- err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf,
- &cq->resize_umem, ucmd.buf_addr, entries);
+ err = mlx4_ib_get_cq_umem(dev, &cq->resize_buf->buf, &cq->resize_umem,
+ ucmd.buf_addr, entries);
if (err) {
kfree(cq->resize_buf);
cq->resize_buf = NULL;
@@ -475,11 +471,8 @@ err_buf:
kfree(cq->resize_buf);
cq->resize_buf = NULL;
- if (cq->resize_umem) {
- ib_umem_release(cq->resize_umem);
- cq->resize_umem = NULL;
- }
-
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
out:
mutex_unlock(&cq->resize_mutex);
@@ -501,14 +494,11 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
struct mlx4_ib_ucontext,
ibucontext),
&mcq->db);
- ib_umem_release(mcq->umem);
} else {
mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
-
- kfree(mcq);
-
+ ib_umem_release(mcq->umem);
return 0;
}
@@ -583,18 +573,13 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
wc->vendor_err = cqe->vendor_err_syndrome;
}
-static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
+static int mlx4_ib_ipoib_csum_ok(__be16 status, u8 badfcs_enc, __be16 checksum)
{
- return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPV4F |
- MLX4_CQE_STATUS_IPV4OPT |
- MLX4_CQE_STATUS_IPV6 |
- MLX4_CQE_STATUS_IPOK)) ==
- cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPOK)) &&
- (status & cpu_to_be16(MLX4_CQE_STATUS_UDP |
- MLX4_CQE_STATUS_TCP)) &&
- checksum == cpu_to_be16(0xffff);
+ return ((badfcs_enc & MLX4_CQE_STATUS_L4_CSUM) ||
+ ((status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+ (status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+ MLX4_CQE_STATUS_UDP)) &&
+ (checksum == cpu_to_be16(0xffff))));
}
static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
@@ -785,13 +770,13 @@ repoll:
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
- /* fall through */
+ fallthrough;
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX4_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
- /* fall through */
+ fallthrough;
case MLX4_OPCODE_SEND:
case MLX4_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
@@ -870,6 +855,7 @@ repoll:
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
+ cqe->badfcs_enc,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (is_eth) {
wc->slid = 0;
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index 0f390351cef0..9bbd695a9fd5 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -64,7 +64,8 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
@@ -74,7 +75,8 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 68c951491a08..91c714f72099 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -88,15 +88,15 @@ struct mlx4_rcv_tunnel_mad {
struct ib_mad mad;
} __packed;
-static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
-static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u32 port_num);
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u32 port_num);
static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
int block, u32 change_bitmap);
__be64 mlx4_ib_gen_node_guid(void)
{
#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
- return cpu_to_be64(NODE_GUID_HI | prandom_u32());
+ return cpu_to_be64(NODE_GUID_HI | get_random_u32());
}
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
@@ -186,7 +186,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
return err;
}
-static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
+static void update_sm_ah(struct mlx4_ib_dev *dev, u32 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
struct rdma_ah_attr ah_attr;
@@ -217,8 +217,8 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
* Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
* synthesize LID change, Client-Rereg, GID change, and P_Key change events.
*/
-static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad,
- u16 prev_lid)
+static void smp_snoop(struct ib_device *ibdev, u32 port_num,
+ const struct ib_mad *mad, u16 prev_lid)
{
struct ib_port_info *pinfo;
u16 lid;
@@ -274,7 +274,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad
be16_to_cpu(base[i]);
}
}
- pr_debug("PKEY Change event: port=%d, "
+ pr_debug("PKEY Change event: port=%u, "
"block=0x%x, change_bitmap=0x%x\n",
port_num, bn, pkey_change_bitmap);
@@ -380,7 +380,8 @@ static void node_desc_override(struct ib_device *dev,
}
}
-static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad)
+static void forward_trap(struct mlx4_ib_dev *dev, u32 port_num,
+ const struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
struct ib_mad_send_buf *send_buf;
@@ -429,7 +430,7 @@ static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave
return ret;
}
-int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u32 port, __be64 guid)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int i;
@@ -443,7 +444,7 @@ int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
- u8 port, u16 pkey, u16 *ix)
+ u32 port, u16 pkey, u16 *ix)
{
int i, ret;
u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
@@ -500,7 +501,14 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
sgid, dgid);
}
-int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type dest_qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad)
{
@@ -520,8 +528,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
u16 cached_pkey;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
- if (dest_qpt > IB_QPT_GSI)
+ if (dest_qpt > IB_QPT_GSI) {
+ pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt);
return -EINVAL;
+ }
tun_ctx = dev->sriov.demux[port-1].tun[slave];
@@ -538,12 +548,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (dest_qpt) {
u16 pkey_ix;
ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s cached pkey for index %d, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ wc->pkey_index, ret);
return -EINVAL;
+ }
ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ cached_pkey, ret);
return -EINVAL;
+ }
tun_pkey_ix = pkey_ix;
} else
tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
@@ -661,7 +679,7 @@ end:
return ret;
}
-static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
+static int mlx4_ib_demux_mad(struct ib_device *ibdev, u32 port,
struct ib_wc *wc, struct ib_grh *grh,
struct ib_mad *mad)
{
@@ -715,7 +733,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
@@ -794,12 +813,13 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
-static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
@@ -807,27 +827,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
- if (in_wc && in_wc->qp) {
- pr_debug("received MAD: port:%d slid:%d sqpn:%d "
- "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
- port_num,
- in_wc->slid, in_wc->src_qp,
- in_wc->dlid_path_bits,
- in_wc->qp->qp_num,
- in_wc->wc_flags,
- be64_to_cpu(in_mad->mad_hdr.tid),
- in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
- be16_to_cpu(in_mad->mad_hdr.attr_id));
- if (in_wc->wc_flags & IB_WC_GRH) {
- pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->sgid.global.subnet_prefix),
- be64_to_cpu(in_grh->sgid.global.interface_id));
- pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->dgid.global.subnet_prefix),
- be64_to_cpu(in_grh->dgid.global.interface_id));
- }
- }
-
slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -934,9 +933,10 @@ static int iboe_process_mad_port_info(void *out_mad)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
+static int iboe_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
struct mlx4_counter counter_stats;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -966,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
}
mutex_unlock(&dev->counters_table[port_num - 1].mutex);
if (stats_avail) {
- memset(out_mad->data, 0, sizeof out_mad->data);
switch (counter_stats.counter_mode & 0xf) {
case 0:
edit_counter(&counter_stats,
@@ -982,40 +981,33 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return err;
}
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
-
/* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA
* queries, should be called only by VFs and for that specific purpose
*/
if (link == IB_LINK_LAYER_INFINIBAND) {
if (mlx4_is_slave(dev->dev) &&
- (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
- in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
- in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
- return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+ (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+ in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+ in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
+ return iboe_process_mad(ibdev, mad_flags, port_num,
+ in_wc, in_grh, in, out);
- return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+ in, out);
}
if (link == IB_LINK_LAYER_ETHERNET)
return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ in_grh, in, out);
return -EINVAL;
}
@@ -1083,7 +1075,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
}
-static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u32 port_num)
{
mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
@@ -1092,7 +1084,7 @@ static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
}
-static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u32 port_num)
{
/* re-configure the alias-guid and mcg's */
if (mlx4_is_master(dev->dev)) {
@@ -1131,7 +1123,7 @@ static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
GET_MASK_FROM_EQE(eqe));
}
-static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
+static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u32 port_num,
u32 guid_tbl_blk_num, u32 change_bitmap)
{
struct ib_smp *in_mad = NULL;
@@ -1187,7 +1179,7 @@ void handle_port_mgmt_change_event(struct work_struct *work)
struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
struct mlx4_ib_dev *dev = ew->ib_dev;
struct mlx4_eqe *eqe = &(ew->ib_eqe);
- u8 port = eqe->event.port_mgmt_change.port;
+ u32 port = eqe->event.port_mgmt_change.port;
u32 changed_attr;
u32 tbl_block;
u32 change_bitmap;
@@ -1284,7 +1276,7 @@ void handle_port_mgmt_change_event(struct work_struct *work)
kfree(ew);
}
-void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u32 port_num,
enum ib_event_type type)
{
struct ib_event event;
@@ -1307,6 +1299,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
+static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wi_wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_demux_pv_qp *tun_qp,
int index)
@@ -1349,15 +1353,7 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
return ret;
}
-static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
-{
- int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
-
- return (qpn >= proxy_start && qpn <= proxy_start + 1);
-}
-
-
-int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
@@ -1409,10 +1405,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
- (MLX4_NUM_TUNNEL_BUFS - 1))
+ (MLX4_NUM_WIRE_BUFS - 1))
ret = -EAGAIN;
else
- wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
spin_unlock(&sqp->tx_lock);
if (ret)
goto out;
@@ -1492,6 +1488,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
u16 vlan_id;
u8 qos;
u8 *dmac;
+ int sts;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1528,6 +1525,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
return;
} else
*slave_id = slave;
+ break;
default:
/* nothing */;
}
@@ -1588,13 +1586,17 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
&vlan_id, &qos))
rdma_ah_set_sl(&ah_attr, qos);
- mlx4_ib_send_to_wire(dev, slave, ctx->port,
- is_proxy_qp0(dev, wc->src_qp, slave) ?
- IB_QPT_SMI : IB_QPT_GSI,
- be16_to_cpu(tunnel->hdr.pkey_index),
- be32_to_cpu(tunnel->hdr.remote_qpn),
- be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ sts = mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ if (sts)
+ pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ slave, sts);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1603,19 +1605,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
tun_qp = &ctx->qp[qp_type];
- tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->ring = kcalloc(nmbr_bufs,
sizeof(struct mlx4_ib_buf),
GFP_KERNEL);
if (!tun_qp->ring)
return -ENOMEM;
- tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->tx_ring = kcalloc(nmbr_bufs,
sizeof (struct mlx4_ib_tun_tx_buf),
GFP_KERNEL);
if (!tun_qp->tx_ring) {
@@ -1632,7 +1635,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
if (!tun_qp->ring[i].addr)
goto err;
@@ -1646,7 +1649,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->tx_ring[i].buf.addr =
kmalloc(tx_buf_size, GFP_KERNEL);
if (!tun_qp->tx_ring[i].buf.addr)
@@ -1677,9 +1680,7 @@ tx_err:
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
}
- kfree(tun_qp->tx_ring);
- tun_qp->tx_ring = NULL;
- i = MLX4_NUM_TUNNEL_BUFS;
+ i = nmbr_bufs;
err:
while (i > 0) {
--i;
@@ -1687,6 +1688,8 @@ err:
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
+ kfree(tun_qp->tx_ring);
+ tun_qp->tx_ring = NULL;
kfree(tun_qp->ring);
tun_qp->ring = NULL;
return -ENOMEM;
@@ -1698,6 +1701,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return;
@@ -1712,13 +1716,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
@@ -1752,9 +1756,6 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
"buf:%lld\n", wc.wr_id);
break;
case IB_WC_SEND:
- pr_debug("received tunnel send completion:"
- "wrid=0x%llx, status=0x%x\n",
- wc.wr_id, wc.status);
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
(MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
@@ -1801,6 +1802,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
struct ib_qp_attr attr;
int qp_attr_mask_INIT;
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
@@ -1811,8 +1813,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
qp_init_attr.init_attr.send_cq = ctx->cq;
qp_init_attr.init_attr.recv_cq = ctx->cq;
qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
- qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
+ qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
qp_init_attr.init_attr.cap.max_send_sge = 1;
qp_init_attr.init_attr.cap.max_recv_sge = 1;
if (create_tun) {
@@ -1834,9 +1836,9 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
if (IS_ERR(tun_qp->qp)) {
ret = PTR_ERR(tun_qp->qp);
+ pr_err("Couldn't create %s QP (%pe)\n",
+ create_tun ? "tunnel" : "special", tun_qp->qp);
tun_qp->qp = NULL;
- pr_err("Couldn't create %s QP (%d)\n",
- create_tun ? "tunnel" : "special", ret);
return ret;
}
@@ -1874,7 +1876,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
goto err_qp;
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
if (ret) {
pr_err(" mlx4_ib_post_pv_buf error"
@@ -1910,8 +1912,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
switch (wc.opcode) {
case IB_WC_SEND:
kfree(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@@ -1920,13 +1922,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
case IB_WC_RECV:
mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
grh = &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)))
+ (MLX4_NUM_WIRE_BUFS - 1)))
pr_err("Failed reposting SQP "
"buf:%lld\n", wc.wr_id);
break;
@@ -1939,8 +1941,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
kfree(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@@ -1980,6 +1982,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
{
int ret, cq_size;
struct ib_cq_init_attr cq_attr = {};
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (ctx->state != DEMUX_PV_STATE_DOWN)
return -EEXIST;
@@ -2004,23 +2007,24 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
goto err_out_qp0;
}
- cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ cq_size = 2 * nmbr_bufs;
if (ctx->has_smi)
cq_size *= 2;
cq_attr.cqe = cq_size;
- ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ ctx->cq = ib_create_cq(ctx->ib_dev,
+ create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler,
NULL, ctx, &cq_attr);
if (IS_ERR(ctx->cq)) {
ret = PTR_ERR(ctx->cq);
- pr_err("Couldn't create tunnel CQ (%d)\n", ret);
+ pr_err("Couldn't create tunnel CQ (%pe)\n", ctx->cq);
goto err_buf;
}
ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
if (IS_ERR(ctx->pd)) {
ret = PTR_ERR(ctx->pd);
- pr_err("Couldn't create tunnel PD (%d)\n", ret);
+ pr_err("Couldn't create tunnel PD (%pe)\n", ctx->pd);
goto err_cq;
}
@@ -2046,6 +2050,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+ ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq;
ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
if (ret) {
@@ -2153,7 +2158,6 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
struct mlx4_ib_demux_ctx *ctx,
int port)
{
- char name[12];
int ret = 0;
int i;
@@ -2189,16 +2193,21 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
goto err_mcg;
}
- snprintf(name, sizeof name, "mlx4_ibt%d", port);
- ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->wq = alloc_ordered_workqueue("mlx4_ibt%d", WQ_MEM_RECLAIM, port);
if (!ctx->wq) {
pr_err("Failed to create tunnelling WQ for port %d\n", port);
ret = -ENOMEM;
goto err_wq;
}
- snprintf(name, sizeof name, "mlx4_ibud%d", port);
- ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->wi_wq = alloc_ordered_workqueue("mlx4_ibwi%d", WQ_MEM_RECLAIM, port);
+ if (!ctx->wi_wq) {
+ pr_err("Failed to create wire WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wiwq;
+ }
+
+ ctx->ud_wq = alloc_ordered_workqueue("mlx4_ibud%d", WQ_MEM_RECLAIM, port);
if (!ctx->ud_wq) {
pr_err("Failed to create up/down WQ for port %d\n", port);
ret = -ENOMEM;
@@ -2208,6 +2217,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
return 0;
err_udwq:
+ destroy_workqueue(ctx->wi_wq);
+ ctx->wi_wq = NULL;
+
+err_wiwq:
destroy_workqueue(ctx->wq);
ctx->wq = NULL;
@@ -2255,12 +2268,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
}
flush_workqueue(ctx->wq);
+ flush_workqueue(ctx->wi_wq);
for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
free_pv_object(dev, i, ctx->port);
}
kfree(ctx->tun);
destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wi_wq);
destroy_workqueue(ctx->wq);
}
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 25d09d53b51c..dd35e03402ab 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -81,18 +81,12 @@ static const char mlx4_ib_version[] =
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
- u8 port_num);
+ u32 port_num);
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *param);
static struct workqueue_struct *wq;
-static void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
@@ -129,17 +123,20 @@ static int num_ib_ports(struct mlx4_dev *dev)
return ib_ports;
}
-static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
+static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
+ u32 port_num)
{
struct mlx4_ib_dev *ibdev = to_mdev(device);
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
rcu_read_lock();
- dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
+ for_each_netdev_rcu(&init_net, dev) {
+ if (dev->dev.parent != ibdev->ib_dev.dev.parent ||
+ dev->dev_port + 1 != port_num)
+ continue;
- if (dev) {
if (mlx4_is_bonded(ibdev->dev)) {
- struct net_device *upper = NULL;
+ struct net_device *upper;
upper = netdev_master_upper_dev_get_rcu(dev);
if (upper) {
@@ -150,17 +147,19 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
dev = active;
}
}
- }
- if (dev)
+
dev_hold(dev);
+ ret = dev;
+ break;
+ }
rcu_read_unlock();
- return dev;
+ return ret;
}
static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
- u8 port_num)
+ u32 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -193,7 +192,7 @@ static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
- u8 port_num)
+ u32 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -238,7 +237,7 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
static int mlx4_ib_update_gids(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
- u8 port_num)
+ u32 port_num)
{
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
@@ -246,6 +245,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
+static void free_gid_entry(struct gid_entry *entry)
+{
+ memset(&entry->gid, 0, sizeof(entry->gid));
+ kfree(entry->ctx);
+ entry->ctx = NULL;
+}
+
static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
@@ -255,7 +261,9 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
int ret = 0;
int hw_update = 0;
int i;
- struct gid_entry *gids = NULL;
+ struct gid_entry *gids;
+ u16 vlan_id = 0xffff;
+ u8 mac[ETH_ALEN];
if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
@@ -266,12 +274,16 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
if (!context)
return -EINVAL;
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
+ if (ret)
+ return ret;
port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
if (!memcmp(&port_gid_table->gids[i].gid,
&attr->gid, sizeof(attr->gid)) &&
- port_gid_table->gids[i].gid_type == attr->gid_type) {
+ port_gid_table->gids[i].gid_type == attr->gid_type &&
+ port_gid_table->gids[i].vlan_id == vlan_id) {
found = i;
break;
}
@@ -288,9 +300,9 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
ret = -ENOMEM;
} else {
*context = port_gid_table->gids[free].ctx;
- memcpy(&port_gid_table->gids[free].gid,
- &attr->gid, sizeof(attr->gid));
+ port_gid_table->gids[free].gid = attr->gid;
port_gid_table->gids[free].gid_type = attr->gid_type;
+ port_gid_table->gids[free].vlan_id = vlan_id;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -306,6 +318,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
} else {
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
@@ -317,6 +331,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
if (!ret && hw_update) {
ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
+ if (ret) {
+ spin_lock_bh(&iboe->lock);
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
+ spin_unlock_bh(&iboe->lock);
+ }
kfree(gids);
}
@@ -346,10 +366,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
if (!ctx->refcount) {
unsigned int real_index = ctx->real_index;
- memset(&port_gid_table->gids[real_index].gid, 0,
- sizeof(port_gid_table->gids[real_index].gid));
- kfree(port_gid_table->gids[real_index].ctx);
- port_gid_table->gids[real_index].ctx = NULL;
+ free_gid_entry(&port_gid_table->gids[real_index]);
hw_update = 1;
}
}
@@ -372,10 +389,10 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
}
spin_unlock_bh(&iboe->lock);
- if (!ret && hw_update) {
+ if (gids)
ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
- kfree(gids);
- }
+
+ kfree(gids);
return ret;
}
@@ -388,7 +405,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
int real_index = -EINVAL;
int i;
unsigned long flags;
- u8 port_num = attr->port_num;
+ u32 port_num = attr->port_num;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -415,20 +432,17 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
return real_index;
}
-#define field_avail(type, fld, sz) (offsetof(type, fld) + \
- sizeof(((type *)0)->fld) <= (sz))
-
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err;
int have_ib_ports;
struct mlx4_uverbs_ex_query_device cmd;
- struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
+ struct mlx4_uverbs_ex_query_device_resp resp = {};
struct mlx4_clock_params clock_params;
if (uhw->inlen) {
@@ -454,7 +468,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
@@ -470,8 +484,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_RC_RNR_NAK_GEN;
+ props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -485,9 +499,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
- props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ props->kernel_cap_flags |= IBK_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
- props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
@@ -542,7 +556,6 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
@@ -565,12 +578,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
- if (!mlx4_is_slave(dev->dev))
- err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
-
if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
resp.response_length += sizeof(resp.hca_core_clock_offset);
- if (!err && !mlx4_is_slave(dev->dev)) {
+ if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
}
@@ -583,7 +593,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
sizeof(struct mlx4_wqe_data_seg);
}
- if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) {
if (props->rss_caps.supported_qpts) {
resp.rss_caps.rx_hash_function =
MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
@@ -607,7 +617,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
sizeof(resp.rss_caps);
}
- if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) {
if (dev->dev->caps.max_gso_sz &&
((mlx4_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
@@ -634,7 +644,7 @@ out:
}
static enum rdma_link_layer
-mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
+mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num)
{
struct mlx4_dev *dev = to_mdev(device)->dev;
@@ -642,11 +652,11 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}
-static int ib_link_query_port(struct ib_device *ibdev, u8 port,
+static int ib_link_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int ext_active_speed;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -656,7 +666,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -708,7 +718,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == IB_SPEED_QDR) {
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -734,10 +744,11 @@ out:
static u8 state_to_phys_state(enum ib_port_state state)
{
- return state == IB_PORT_ACTIVE ? 5 : 3;
+ return state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
}
-static int eth_link_query_port(struct ib_device *ibdev, u8 port,
+static int eth_link_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
@@ -768,7 +779,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->ip_gids = true;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
- props->pkey_tbl_len = 1;
+ if (mdev->dev->caps.pkey_table_len[port])
+ props->pkey_tbl_len = 1;
props->max_mtu = IB_MTU_4096;
props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
@@ -797,7 +809,7 @@ out:
return err;
}
-int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view)
{
int err;
@@ -811,18 +823,18 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
/* returns host view */
return __mlx4_ib_query_port(ibdev, port, props, 0);
}
-int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int clear = 0;
@@ -833,7 +845,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -855,7 +867,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
}
}
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -874,7 +886,7 @@ out:
return err;
}
-static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
if (rdma_protocol_ib(ibdev, port))
@@ -882,11 +894,12 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
return 0;
}
-static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
+static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
+ u64 *sl2vl_tbl)
{
union sl2vl_tbl_to_u64 sl2vl64;
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
int jj;
@@ -901,7 +914,7 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
in_mad->attr_mod = 0;
@@ -942,11 +955,11 @@ static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
}
}
-int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -955,7 +968,7 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -975,7 +988,8 @@ out:
return err;
}
-static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
{
return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
}
@@ -1016,8 +1030,8 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
return 0;
}
-static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
- u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u32 port,
+ int reset_qkey_viols, u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -1042,7 +1056,7 @@ static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_vio
return err;
}
-static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+static int mlx4_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
struct ib_port_modify *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -1089,7 +1103,8 @@ static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (!dev->ib_active)
return -EAGAIN;
- if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ if (ibdev->ops.uverbs_abi_ver ==
+ MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
@@ -1111,7 +1126,7 @@ static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
INIT_LIST_HEAD(&context->wqn_ranges_list);
mutex_init(&context->wqn_ranges_mutex);
- if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -1144,7 +1159,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
return rdma_user_mmap_io(context, vma,
to_mucontext(context)->uar.pfn,
PAGE_SIZE,
- pgprot_noncached(vma->vm_page_prot));
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
case 1:
if (dev->dev->caps.bf_reg_size == 0)
@@ -1153,7 +1169,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
context, vma,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
- PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
+ PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
+ NULL);
case 3: {
struct mlx4_clock_params params;
@@ -1169,7 +1186,8 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
params.bar) +
params.offset) >>
PAGE_SHIFT,
- PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
+ PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
+ NULL);
}
default:
@@ -1194,51 +1212,46 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
+ return 0;
}
-static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx4_ib_xrcd *xrcd;
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
struct ib_cq_init_attr cq_attr = {};
int err;
- if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -EOPNOTSUPP;
- err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
if (err)
- goto err1;
+ return err;
- xrcd->pd = ib_alloc_pd(ibdev, 0);
+ xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
if (IS_ERR(xrcd->pd)) {
err = PTR_ERR(xrcd->pd);
goto err2;
}
cq_attr.cqe = 1;
- xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
+ xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
}
- return &xrcd->ibxrcd;
+ return 0;
err3:
ib_dealloc_pd(xrcd->pd);
err2:
- mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
-err1:
- kfree(xrcd);
- return ERR_PTR(err);
+ mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
+ return err;
}
static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
@@ -1246,8 +1259,6 @@ static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
- kfree(xrcd);
-
return 0;
}
@@ -1301,8 +1312,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
- if (ndev)
- dev_hold(ndev);
+ dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
@@ -1478,8 +1488,9 @@ static int __mlx4_ib_create_default_rules(
int i;
for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
+ union ib_flow_spec ib_spec = {};
int ret;
- union ib_flow_spec ib_spec;
+
switch (pdefault_rules->rules_create_list[i]) {
case 0:
/* no rule */
@@ -1520,23 +1531,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
int default_flow;
- static const u16 __mlx4_domain[] = {
- [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
- [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
- [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
- [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
- };
-
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
pr_err("Invalid priority value %d\n", flow_attr->priority);
return -EINVAL;
}
- if (domain >= IB_FLOW_DOMAIN_NUM) {
- pr_err("Invalid domain value %d\n", domain);
- return -EINVAL;
- }
-
if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
return -EINVAL;
@@ -1545,8 +1544,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
return PTR_ERR(mailbox);
ctrl = mailbox->buf;
- ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
- flow_attr->priority);
+ ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
ctrl->port = flow_attr->port;
ctrl->qpn = cpu_to_be32(qp->qp_num);
@@ -1688,8 +1686,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
}
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain, struct ib_udata *udata)
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
@@ -1697,9 +1695,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
int is_bonded = mlx4_is_bonded(dev);
- if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
- return ERR_PTR(-EINVAL);
-
if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
return ERR_PTR(-EOPNOTSUPP);
@@ -1755,8 +1750,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
}
while (i < ARRAY_SIZE(type) && type[i]) {
- err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i].id);
+ err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
+ type[i], &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
if (is_bonded) {
@@ -1765,7 +1760,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
*/
flow_attr->port = 2;
err = __mlx4_ib_create_flow(qp, flow_attr,
- domain, type[j],
+ MLX4_DOMAIN_UVERBS, type[j],
&mflow->reg_id[j].mirror);
flow_attr->port = 1;
if (err)
@@ -1964,11 +1959,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ge) {
spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
- if (ndev)
- dev_hold(ndev);
+ dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
list_del(&ge->list);
kfree(ge);
} else
@@ -1981,8 +1974,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
static int init_node_data(struct mlx4_ib_dev *dev)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -1991,7 +1984,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
if (mlx4_is_master(dev->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
@@ -2022,7 +2015,8 @@ static ssize_t hca_type_show(struct device *device,
{
struct mlx4_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
- return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
+
+ return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -2031,7 +2025,8 @@ static ssize_t hw_rev_show(struct device *device,
{
struct mlx4_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
- return sprintf(buf, "%x\n", dev->dev->rev_id);
+
+ return sysfs_emit(buf, "%x\n", dev->dev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -2041,8 +2036,7 @@ static ssize_t board_id_show(struct device *device,
struct mlx4_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
- return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
- dev->dev->board_id);
+ return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -2099,23 +2093,35 @@ static const struct diag_counter diag_device_only[] = {
DIAG_COUNTER(rq_num_udsdprd, 0x118),
};
-static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *
+mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
- if (!diag[!!port_num].name)
+ if (!diag[0].descs)
return NULL;
- return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
- diag[!!port_num].num_counters,
+ return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static struct rdma_hw_stats *
+mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+
+ if (!diag[1].descs)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
@@ -2139,10 +2145,8 @@ static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
}
static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
- const char ***name,
- u32 **offset,
- u32 *num,
- bool port)
+ struct rdma_stat_desc **pdescs,
+ u32 **offset, u32 *num, bool port)
{
u32 num_counters;
@@ -2154,53 +2158,59 @@ static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
if (!port)
num_counters += ARRAY_SIZE(diag_device_only);
- *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
- if (!*name)
+ *pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc),
+ GFP_KERNEL);
+ if (!*pdescs)
return -ENOMEM;
*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
if (!*offset)
- goto err_name;
+ goto err;
*num = num_counters;
return 0;
-err_name:
- kfree(*name);
+err:
+ kfree(*pdescs);
return -ENOMEM;
}
static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
- const char **name,
- u32 *offset,
- bool port)
+ struct rdma_stat_desc *descs,
+ u32 *offset, bool port)
{
int i;
int j;
for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
- name[i] = diag_basic[i].name;
+ descs[i].name = diag_basic[i].name;
offset[i] = diag_basic[i].offset;
}
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
- name[j] = diag_ext[i].name;
+ descs[j].name = diag_ext[i].name;
offset[j] = diag_ext[i].offset;
}
}
if (!port) {
for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
- name[j] = diag_device_only[i].name;
+ descs[j].name = diag_device_only[i].name;
offset[j] = diag_device_only[i].offset;
}
}
}
static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
- .alloc_hw_stats = mlx4_ib_alloc_hw_stats,
+ .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
+ .alloc_hw_port_stats = mlx4_ib_alloc_hw_port_stats,
+ .get_hw_stats = mlx4_ib_get_hw_stats,
+};
+
+static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
+ .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
.get_hw_stats = mlx4_ib_get_hw_stats,
};
@@ -2216,17 +2226,24 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
return 0;
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
- /* i == 1 means we are building port counters */
- if (i && !per_port)
- continue;
+ /*
+ * i == 1 means we are building port counters, set a different
+ * stats ops without port stats callback.
+ */
+ if (i && !per_port) {
+ ib_set_device_ops(&ibdev->ib_dev,
+ &mlx4_ib_hw_stats_ops1);
+
+ return 0;
+ }
- ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
+ ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
&diag[i].offset,
&diag[i].num_counters, i);
if (ret)
goto err_alloc;
- mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
+ mlx4_ib_fill_diag_counters(ibdev, diag[i].descs,
diag[i].offset, i);
}
@@ -2236,7 +2253,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
err_alloc:
if (i) {
- kfree(diag[i - 1].name);
+ kfree(diag[i - 1].descs);
kfree(diag[i - 1].offset);
}
@@ -2249,7 +2266,7 @@ static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
kfree(ibdev->diag_counters[i].offset);
- kfree(ibdev->diag_counters[i].name);
+ kfree(ibdev->diag_counters[i].descs);
}
}
@@ -2262,10 +2279,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
u64 release_mac = MLX4_IB_INVALID_MAC;
struct mlx4_ib_qp *qp;
- read_lock(&dev_base_lock);
- new_smac = mlx4_mac_to_u64(dev->dev_addr);
- read_unlock(&dev_base_lock);
-
+ new_smac = ether_addr_to_u64(dev->dev_addr);
atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
/* no need for update QP1 and mac registration in non-SRIOV */
@@ -2311,61 +2325,54 @@ unlock:
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
-static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
- struct net_device *dev,
- unsigned long event)
+static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ unsigned long event)
{
- struct mlx4_ib_iboe *iboe;
- int update_qps_port = -1;
- int port;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
ASSERT_RTNL();
- iboe = &ibdev->iboe;
+ if (dev->dev.parent != ibdev->ib_dev.dev.parent)
+ return;
spin_lock_bh(&iboe->lock);
- mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- iboe->netdevs[port - 1] =
- mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
+ iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;
- if (dev == iboe->netdevs[port - 1] &&
- (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
- event == NETDEV_UP || event == NETDEV_CHANGE))
- update_qps_port = port;
+ spin_unlock_bh(&iboe->lock);
- if (dev == iboe->netdevs[port - 1] &&
- (event == NETDEV_UP || event == NETDEV_DOWN)) {
- enum ib_port_state port_state;
- struct ib_event ibev = { };
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER)
+ mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
+}
- if (ib_get_cached_port_state(&ibdev->ib_dev, port,
- &port_state))
- continue;
+static void mlx4_ib_port_event(struct ib_device *ibdev, struct net_device *ndev,
+ unsigned long event)
+{
+ struct mlx4_ib_dev *mlx4_ibdev =
+ container_of(ibdev, struct mlx4_ib_dev, ib_dev);
+ struct mlx4_ib_iboe *iboe = &mlx4_ibdev->iboe;
- if (event == NETDEV_UP &&
- (port_state != IB_PORT_ACTIVE ||
- iboe->last_port_state[port - 1] != IB_PORT_DOWN))
- continue;
- if (event == NETDEV_DOWN &&
- (port_state != IB_PORT_DOWN ||
- iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
- continue;
- iboe->last_port_state[port - 1] = port_state;
+ if (!net_eq(dev_net(ndev), &init_net))
+ return;
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = port;
- ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
- IB_EVENT_PORT_ERR;
- ib_dispatch_event(&ibev);
- }
+ ASSERT_RTNL();
+
+ if (ndev->dev.parent != mlx4_ibdev->ib_dev.dev.parent)
+ return;
+
+ spin_lock_bh(&iboe->lock);
+
+ iboe->netdevs[ndev->dev_port] = event != NETDEV_UNREGISTER ? ndev : NULL;
+
+ if (event == NETDEV_UP || event == NETDEV_DOWN)
+ ib_dispatch_port_state_event(&mlx4_ibdev->ib_dev, ndev);
- }
spin_unlock_bh(&iboe->lock);
- if (update_qps_port > 0)
- mlx4_ib_update_qps(ibdev, dev, update_qps_port);
+ if (event == NETDEV_UP || event == NETDEV_CHANGE)
+ mlx4_ib_update_qps(mlx4_ibdev, ndev, ndev->dev_port + 1);
}
static int mlx4_ib_netdev_event(struct notifier_block *this,
@@ -2378,7 +2385,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
- mlx4_ib_scan_netdevs(ibdev, dev, event);
+ mlx4_ib_scan_netdev(ibdev, dev, event);
return NOTIFY_DONE;
}
@@ -2466,7 +2473,7 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
ibdev->eq_table = NULL;
}
-static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx4_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -2509,6 +2516,10 @@ static void get_fw_ver_str(struct ib_device *device, char *str)
}
static const struct ib_device_ops mlx4_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX4,
+ .uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
+
.add_gid = mlx4_ib_add_gid,
.alloc_mr = mlx4_ib_alloc_mr,
.alloc_pd = mlx4_ib_alloc_pd,
@@ -2527,6 +2538,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.destroy_qp = mlx4_ib_destroy_qp,
.destroy_srq = mlx4_ib_destroy_srq,
.detach_mcast = mlx4_ib_mcg_detach,
+ .device_group = &mlx4_attr_group,
.disassociate_ucontext = mlx4_ib_disassociate_ucontext,
.drain_rq = mlx4_ib_drain_rq,
.drain_sq = mlx4_ib_drain_sq,
@@ -2558,9 +2570,12 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.req_notify_cq = mlx4_ib_arm_cq,
.rereg_user_mr = mlx4_ib_rereg_user_mr,
.resize_cq = mlx4_ib_resize_cq,
+ .report_port_event = mlx4_ib_port_event,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mlx4_ib_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
};
@@ -2571,23 +2586,23 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
.destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
.destroy_wq = mlx4_ib_destroy_wq,
.modify_wq = mlx4_ib_modify_wq,
-};
-static const struct ib_device_ops mlx4_ib_dev_fmr_ops = {
- .alloc_fmr = mlx4_ib_fmr_alloc,
- .dealloc_fmr = mlx4_ib_fmr_dealloc,
- .map_phys_fmr = mlx4_ib_map_phys_fmr,
- .unmap_fmr = mlx4_ib_unmap_fmr,
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
};
static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
.alloc_mw = mlx4_ib_alloc_mw,
.dealloc_mw = mlx4_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
};
static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
.alloc_xrcd = mlx4_ib_alloc_xrcd,
.dealloc_xrcd = mlx4_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
};
static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
@@ -2595,8 +2610,11 @@ static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
.destroy_flow = mlx4_ib_destroy_flow,
};
-static void *mlx4_ib_add(struct mlx4_dev *dev)
+static int mlx4_ib_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
{
+ struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
+ struct mlx4_dev *dev = madev->mdev;
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
int i, j;
@@ -2606,7 +2624,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int num_req_counters;
int allocated;
u32 counter_index;
- struct counter_index *new_counter_index = NULL;
+ struct counter_index *new_counter_index;
pr_info_once("%s", mlx4_ib_version);
@@ -2616,33 +2634,36 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
/* No point in registering a device with no ports... */
if (num_ports == 0)
- return NULL;
+ return -ENODEV;
ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
if (!ibdev) {
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
- return NULL;
+ return -ENOMEM;
}
iboe = &ibdev->iboe;
- if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
+ err = mlx4_pd_alloc(dev, &ibdev->priv_pdn);
+ if (err)
goto err_dealloc;
- if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
+ err = mlx4_uar_alloc(dev, &ibdev->priv_uar);
+ if (err)
goto err_pd;
ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
PAGE_SIZE);
- if (!ibdev->uar_map)
+ if (!ibdev->uar_map) {
+ err = -ENOMEM;
goto err_uar;
+ }
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
ibdev->bond_next_port = 0;
- ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
@@ -2651,89 +2672,38 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
- if (dev->caps.userspace_caps)
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
- else
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
-
- ibdev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
-
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
(mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
- IB_LINK_LAYER_ETHERNET))) {
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ IB_LINK_LAYER_ETHERNET)))
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
- }
-
- if (!mlx4_is_slave(ibdev->dev))
- ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops);
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
- dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
- }
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
}
+ if (!dev->caps.userspace_caps)
+ ibdev->ib_dev.ops.uverbs_abi_ver =
+ MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
- if (init_node_data(ibdev))
+ err = init_node_data(ibdev);
+ if (err)
goto err_map;
mlx4_init_sl2vl_tbl(ibdev);
@@ -2765,6 +2735,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
new_counter_index = kmalloc(sizeof(*new_counter_index),
GFP_KERNEL);
if (!new_counter_index) {
+ err = -ENOMEM;
if (allocated)
mlx4_counter_free(ibdev->dev, counter_index);
goto err_counter;
@@ -2782,8 +2753,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
new_counter_index =
kmalloc(sizeof(struct counter_index),
GFP_KERNEL);
- if (!new_counter_index)
+ if (!new_counter_index) {
+ err = -ENOMEM;
goto err_counter;
+ }
new_counter_index->index = counter_index;
new_counter_index->allocated = 0;
list_add_tail(&new_counter_index->list,
@@ -2810,12 +2783,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (err)
goto err_counter;
- ibdev->ib_uc_qpns_bitmap =
- kmalloc_array(BITS_TO_LONGS(ibdev->steer_qpn_count),
- sizeof(long),
- GFP_KERNEL);
- if (!ibdev->ib_uc_qpns_bitmap)
+ ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ err = -ENOMEM;
goto err_steer_qp_release;
+ }
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
bitmap_zero(ibdev->ib_uc_qpns_bitmap,
@@ -2835,18 +2808,21 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
- if (mlx4_ib_alloc_diag_counters(ibdev))
+ err = mlx4_ib_alloc_diag_counters(ibdev);
+ if (err)
goto err_steer_free_bitmap;
- rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
- ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
- if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
+ err = ib_register_device(&ibdev->ib_dev, "mlx4_%d",
+ &dev->persist->pdev->dev);
+ if (err)
goto err_diag_counters;
- if (mlx4_ib_mad_init(ibdev))
+ err = mlx4_ib_mad_init(ibdev);
+ if (err)
goto err_reg;
- if (mlx4_ib_init_sriov(ibdev))
+ err = mlx4_ib_init_sriov(ibdev);
+ if (err)
goto err_mad;
if (!iboe->nb.notifier_call) {
@@ -2880,7 +2856,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
do_slave_init(ibdev, j, 1);
}
}
- return ibdev;
+
+ /* register mlx4 core notifier */
+ ibdev->mlx_nb.notifier_call = mlx4_ib_event;
+ err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
+ WARN(err, "failed to register mlx4 event notifier (%d)", err);
+
+ auxiliary_set_drvdata(adev, ibdev);
+ return 0;
err_notif:
if (ibdev->iboe.nb.notifier_call) {
@@ -2902,7 +2885,7 @@ err_diag_counters:
mlx4_ib_diag_cleanup(ibdev);
err_steer_free_bitmap:
- kfree(ibdev->ib_uc_qpns_bitmap);
+ bitmap_free(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
@@ -2924,7 +2907,7 @@ err_pd:
err_dealloc:
ib_dealloc_device(&ibdev->ib_dev);
- return NULL;
+ return err;
}
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
@@ -2964,7 +2947,7 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
{
int err;
size_t flow_size;
- struct ib_flow_attr *flow = NULL;
+ struct ib_flow_attr *flow;
struct ib_flow_spec_ib *ib_spec;
if (is_attach) {
@@ -2982,41 +2965,44 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
/* Add an empty rule for IB L2 */
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
- err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
- IB_FLOW_DOMAIN_NIC,
- MLX4_FS_REGULAR,
- &mqp->reg_id);
- } else {
- err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
+ MLX4_FS_REGULAR, &mqp->reg_id);
+ kfree(flow);
+ return err;
}
- kfree(flow);
- return err;
+
+ return __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
-static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
+static void mlx4_ib_remove(struct auxiliary_device *adev)
{
- struct mlx4_ib_dev *ibdev = ibdev_ptr;
+ struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
+ struct mlx4_dev *dev = madev->mdev;
+ struct mlx4_ib_dev *ibdev = auxiliary_get_drvdata(adev);
int p;
int i;
+ mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
ibdev->ib_active = false;
flush_workqueue(wq);
- mlx4_ib_close_sriov(ibdev);
- mlx4_ib_mad_cleanup(ibdev);
- ib_unregister_device(&ibdev->ib_dev);
- mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+ mlx4_ib_close_sriov(ibdev);
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
+
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
- kfree(ibdev->ib_uc_qpns_bitmap);
+ bitmap_free(ibdev->ib_uc_qpns_bitmap);
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
@@ -3034,7 +3020,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
{
- struct mlx4_ib_demux_work **dm = NULL;
+ struct mlx4_ib_demux_work **dm;
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
@@ -3218,11 +3204,13 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
}
}
-static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, unsigned long param)
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *param)
{
+ struct mlx4_ib_dev *ibdev =
+ container_of(this, struct mlx4_ib_dev, mlx_nb);
+ struct mlx4_dev *dev = ibdev->dev;
struct ib_event ibev;
- struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int p = 0;
@@ -3232,22 +3220,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
if (!ew)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_bonded_port_state_event);
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
- return;
+ return NOTIFY_DONE;
}
- if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ switch (event) {
+ case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
eqe = (struct mlx4_eqe *)param;
- else
- p = (int) param;
+ break;
+ default:
+ p = *(int *)param;
+ break;
+ }
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
if (!mlx4_is_slave(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
@@ -3262,7 +3256,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
ibev.event = IB_EVENT_PORT_ERR;
break;
@@ -3275,7 +3269,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
- break;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
@@ -3285,7 +3279,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
@@ -3301,7 +3295,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1);
}
}
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
@@ -3317,22 +3311,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
- return;
+ return NOTIFY_DONE;
default:
- return;
+ return NOTIFY_DONE;
}
- ibev.device = ibdev_ptr;
+ ibev.device = &ibdev->ib_dev;
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
+ return NOTIFY_DONE;
}
-static struct mlx4_interface mlx4_ib_interface = {
- .add = mlx4_ib_add,
- .remove = mlx4_ib_remove,
- .event = mlx4_ib_event,
+static const struct auxiliary_device_id mlx4_ib_id_table[] = {
+ { .name = MLX4_ADEV_NAME ".ib" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx4_ib_id_table);
+
+static struct mlx4_adrv mlx4_ib_adrv = {
+ .adrv = {
+ .name = "ib",
+ .probe = mlx4_ib_probe,
+ .remove = mlx4_ib_remove,
+ .id_table = mlx4_ib_id_table,
+ },
.protocol = MLX4_PROT_IB_IPV6,
.flags = MLX4_INTFF_BONDING
};
@@ -3345,11 +3350,19 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
- err = mlx4_ib_mcg_init();
+ err = mlx4_ib_qp_event_init();
+ if (err)
+ goto clean_qp_event;
+
+ err = mlx4_ib_cm_init();
if (err)
goto clean_wq;
- err = mlx4_register_interface(&mlx4_ib_interface);
+ err = mlx4_ib_mcg_init();
+ if (err)
+ goto clean_cm;
+
+ err = mlx4_register_auxiliary_driver(&mlx4_ib_adrv);
if (err)
goto clean_mcg;
@@ -3358,15 +3371,23 @@ static int __init mlx4_ib_init(void)
clean_mcg:
mlx4_ib_mcg_destroy();
+clean_cm:
+ mlx4_ib_cm_destroy();
+
clean_wq:
+ mlx4_ib_qp_event_cleanup();
+
+clean_qp_event:
destroy_workqueue(wq);
return err;
}
static void __exit mlx4_ib_cleanup(void)
{
- mlx4_unregister_interface(&mlx4_ib_interface);
+ mlx4_unregister_auxiliary_driver(&mlx4_ib_adrv);
mlx4_ib_mcg_destroy();
+ mlx4_ib_cm_destroy();
+ mlx4_ib_qp_event_cleanup();
destroy_workqueue(wq);
}
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index d844831179cf..e279e69b9a51 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -43,7 +43,7 @@
#define MAX_VFS 80
#define MAX_PEND_REQS_PER_FUNC 4
-#define MAD_TIMEOUT_MS 2000
+#define MAD_TIMEOUT_SEC 2
#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
@@ -270,7 +270,7 @@ static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad
if (!ret) {
/* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
- msecs_to_jiffies(MAD_TIMEOUT_MS));
+ secs_to_jiffies(MAD_TIMEOUT_SEC));
}
return ret;
@@ -309,7 +309,7 @@ static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
if (!ret) {
/* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
- msecs_to_jiffies(MAD_TIMEOUT_MS));
+ secs_to_jiffies(MAD_TIMEOUT_SEC));
}
return ret;
@@ -944,7 +944,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
switch (sa_mad->mad_hdr.method) {
case IB_MGMT_METHOD_SET:
may_create = 1;
- /* fall through */
+ fallthrough;
case IB_SA_METHOD_DELETE:
req = kzalloc(sizeof *req, GFP_KERNEL);
if (!req)
@@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
}
static ssize_t sysfs_show_group(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct mcast_group *group =
container_of(attr, struct mcast_group, dentry);
struct mcast_req *req = NULL;
- char pending_str[40];
char state_str[40];
- ssize_t len = 0;
- int f;
+ char pending_str[40];
+ int len;
+ int i;
+ u32 hoplimit;
if (group->state == MCAST_IDLE)
- sprintf(state_str, "%s", get_state_string(group->state));
+ scnprintf(state_str, sizeof(state_str), "%s",
+ get_state_string(group->state));
else
- sprintf(state_str, "%s(TID=0x%llx)",
- get_state_string(group->state),
- be64_to_cpu(group->last_req_tid));
+ scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)",
+ get_state_string(group->state),
+ be64_to_cpu(group->last_req_tid));
+
if (list_empty(&group->pending_list)) {
- sprintf(pending_str, "No");
+ scnprintf(pending_str, sizeof(pending_str), "No");
} else {
- req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
- sprintf(pending_str, "Yes(TID=0x%llx)",
- be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)",
+ be64_to_cpu(req->sa_mad.mad_hdr.tid));
}
- len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
- group->rec.scope_join_state & 0xf,
- group->members[2], group->members[1], group->members[0],
- atomic_read(&group->refcount),
- pending_str,
- state_str);
- for (f = 0; f < MAX_VFS; ++f)
- if (group->func[f].state == MCAST_MEMBER)
- len += sprintf(buf + len, "%d[%1x] ",
- f, group->func[f].join_state);
-
- len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
- "%4x %4x %2x %2x)\n",
- be16_to_cpu(group->rec.pkey),
- be32_to_cpu(group->rec.qkey),
- (group->rec.mtusel_mtu & 0xc0) >> 6,
- group->rec.mtusel_mtu & 0x3f,
- group->rec.tclass,
- (group->rec.ratesel_rate & 0xc0) >> 6,
- group->rec.ratesel_rate & 0x3f,
- (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
- (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
- be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
- group->rec.proxy_join);
+
+ len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
+ group->rec.scope_join_state & 0xf,
+ group->members[2],
+ group->members[1],
+ group->members[0],
+ atomic_read(&group->refcount),
+ pending_str,
+ state_str);
+
+ for (i = 0; i < MAX_VFS; i++) {
+ if (group->func[i].state == MCAST_MEMBER)
+ len += sysfs_emit_at(buf, len, "%d[%1x] ", i,
+ group->func[i].join_state);
+ }
+
+ hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit);
+ len += sysfs_emit_at(buf, len,
+ "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n",
+ be16_to_cpu(group->rec.pkey),
+ be32_to_cpu(group->rec.qkey),
+ (group->rec.mtusel_mtu & 0xc0) >> 6,
+ (group->rec.mtusel_mtu & 0x3f),
+ group->rec.tclass,
+ (group->rec.ratesel_rate & 0xc0) >> 6,
+ (group->rec.ratesel_rate & 0x3f),
+ (hoplimit & 0xf0000000) >> 28,
+ (hoplimit & 0x0fffff00) >> 8,
+ (hoplimit & 0x000000ff),
+ group->rec.proxy_join);
return len;
}
@@ -1081,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
for (i = 0; i < MAX_VFS; ++i)
clean_vf_mcast(ctx, i);
- end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
+ end = jiffies + secs_to_jiffies(MAD_TIMEOUT_SEC + 3);
do {
count = 0;
mutex_lock(&ctx->mcg_table_lock);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 26897102057d..5df5b955114e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -146,11 +147,6 @@ struct mlx4_ib_mw {
struct mlx4_mw mmw;
};
-struct mlx4_ib_fmr {
- struct ib_fmr ibfmr;
- struct mlx4_fmr mfmr;
-};
-
#define MAX_REGS_PER_FLOW 2
struct mlx4_flow_reg_id {
@@ -238,7 +234,8 @@ enum mlx4_ib_mad_ifc_flags {
};
enum {
- MLX4_NUM_TUNNEL_BUFS = 256,
+ MLX4_NUM_TUNNEL_BUFS = 512,
+ MLX4_NUM_WIRE_BUFS = 2048,
};
struct mlx4_ib_tunnel_header {
@@ -303,6 +300,26 @@ struct mlx4_ib_rss {
u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
};
+enum {
+ /*
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
+ */
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
+};
+
+struct mlx4_ib_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
+};
+
struct mlx4_ib_qp {
union {
struct ib_qp ibqp;
@@ -348,7 +365,10 @@ struct mlx4_ib_qp {
struct mlx4_wqn_range *wqn_range;
/* Number of RSS QP parents that uses this WQ */
u32 rss_usecnt;
- struct mlx4_ib_rss *rss_ctx;
+ union {
+ struct mlx4_ib_rss *rss_ctx;
+ struct mlx4_ib_sqp *sqp;
+ };
};
struct mlx4_ib_srq {
@@ -371,6 +391,10 @@ struct mlx4_ib_ah {
union mlx4_ext_av av;
};
+struct mlx4_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_rwq_ind_tbl;
+};
+
/****************************************/
/* alias guid support */
/****************************************/
@@ -406,7 +430,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
- u8 port;
+ u32 port;
u32 state_flags;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
@@ -459,6 +483,7 @@ struct mlx4_ib_demux_pv_ctx {
struct ib_pd *pd;
struct work_struct work;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct mlx4_ib_demux_pv_qp qp[2];
};
@@ -466,6 +491,7 @@ struct mlx4_ib_demux_ctx {
struct ib_device *ib_dev;
int port;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
atomic64_t subnet_prefix;
@@ -497,6 +523,7 @@ struct mlx4_ib_sriov {
spinlock_t id_map_lock;
struct rb_root sl_id_map;
struct list_head cm_list;
+ struct xarray xa_rej_tmout;
};
struct gid_cache_context {
@@ -508,6 +535,7 @@ struct gid_entry {
union ib_gid gid;
enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
+ u16 vlan_id;
};
struct mlx4_port_gid_table {
@@ -574,7 +602,7 @@ struct mlx4_ib_counters {
#define MLX4_DIAG_COUNTERS_TYPES 2
struct mlx4_ib_diag_counters {
- const char **name;
+ struct rdma_stat_desc *descs;
u32 *offset;
u32 num_counters;
};
@@ -617,6 +645,7 @@ struct mlx4_ib_dev {
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
+ struct notifier_block mlx_nb;
};
struct ib_event_work {
@@ -630,7 +659,7 @@ struct mlx4_ib_qp_tunnel_init_attr {
struct ib_qp_init_attr init_attr;
int slave;
enum ib_qp_type proxy_qp_type;
- u8 port;
+ u32 port;
};
struct mlx4_uverbs_ex_query_device {
@@ -638,6 +667,9 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
+/* 4k - 4G */
+#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12))
+
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -678,11 +710,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx4_ib_mw, ibmw);
}
-static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
-}
-
static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
{
return container_of(ibflow, struct mlx4_ib_flow, ibflow);
@@ -732,46 +759,47 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx4_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
void mlx4_ib_drain_sq(struct ib_qp *qp);
void mlx4_ib_drain_rq(struct ib_qp *qp);
@@ -787,31 +815,24 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
-struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
-int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
- u64 iova);
-int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
-int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
-int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view);
-int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey, int netw_view);
-int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid, int netw_view);
static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
- u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
+ u32 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
return true;
@@ -825,7 +846,7 @@ void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
int mlx4_ib_mcg_init(void);
void mlx4_ib_mcg_destroy(void);
-int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u32 port, __be64 guid);
int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad);
@@ -835,16 +856,16 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
-void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u32 port_num,
enum ib_event_type type);
void mlx4_ib_tunnels_update_work(struct work_struct *work);
-int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
-int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct rdma_ah_attr *attr, u8 *s_mac,
u16 vlan_id, struct ib_mad *mad);
@@ -868,10 +889,10 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
int block_num,
- u8 port_num, u8 *p_data);
+ u32 port_num, u8 *p_data);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
- int block_num, u8 port_num,
+ int block_num, u32 port_num,
u8 *p_data);
int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
@@ -892,10 +913,10 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
-int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
- u64 start, u64 length, u64 virt_addr,
- int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata);
+struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
const struct ib_gid_attr *attr);
@@ -911,12 +932,32 @@ int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
-int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int *num_of_mtts);
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+static inline int
+mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
+{
+ return 0;
+}
+static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start,
+ int *num_of_mtts)
+{
+ unsigned long pg_sz;
+
+ pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start);
+ if (!pg_sz)
+ return -EOPNOTSUPP;
+
+ *num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz);
+ return order_base_2(pg_sz);
+}
+
+int mlx4_ib_cm_init(void);
+void mlx4_ib_cm_destroy(void);
+
+int mlx4_ib_qp_event_init(void);
+void mlx4_ib_qp_event_cleanup(void);
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 355205a28544..94464f1694d9 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -87,289 +87,24 @@ err_free:
return ERR_PTR(err);
}
-enum {
- MLX4_MAX_MTT_SHIFT = 31
-};
-
-static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
- struct mlx4_mtt *mtt,
- u64 mtt_size, u64 mtt_shift, u64 len,
- u64 cur_start_addr, u64 *pages,
- int *start_index, int *npages)
-{
- u64 cur_end_addr = cur_start_addr + len;
- u64 cur_end_addr_aligned = 0;
- u64 mtt_entries;
- int err = 0;
- int k;
-
- len += (cur_start_addr & (mtt_size - 1ULL));
- cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
- len += (cur_end_addr_aligned - cur_end_addr);
- if (len & (mtt_size - 1ULL)) {
- pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
- len, mtt_size);
- return -EINVAL;
- }
-
- mtt_entries = (len >> mtt_shift);
-
- /*
- * Align the MTT start address to the mtt_size.
- * Required to handle cases when the MR starts in the middle of an MTT
- * record. Was not required in old code since the physical addresses
- * provided by the dma subsystem were page aligned, which was also the
- * MTT size.
- */
- cur_start_addr = round_down(cur_start_addr, mtt_size);
- /* A new block is started ... */
- for (k = 0; k < mtt_entries; ++k) {
- pages[*npages] = cur_start_addr + (mtt_size * k);
- (*npages)++;
- /*
- * Be friendly to mlx4_write_mtt() and pass it chunks of
- * appropriate size.
- */
- if (*npages == PAGE_SIZE / sizeof(u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, *start_index,
- *npages, pages);
- if (err)
- return err;
-
- (*start_index) += *npages;
- *npages = 0;
- }
- }
-
- return 0;
-}
-
-static inline u64 alignment_of(u64 ptr)
-{
- return ilog2(ptr & (~(ptr - 1)));
-}
-
-static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
- u64 current_block_end,
- u64 block_shift)
-{
- /* Check whether the alignment of the new block is aligned as well as
- * the previous block.
- * Block address must start with zeros till size of entity_size.
- */
- if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
- /*
- * It is not as well aligned as the previous block-reduce the
- * mtt size accordingly. Here we take the last right bit which
- * is 1.
- */
- block_shift = alignment_of(next_block_start);
-
- /*
- * Check whether the alignment of the end of previous block - is it
- * aligned as well as the start of the block
- */
- if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
- /*
- * It is not as well aligned as the start of the block -
- * reduce the mtt size accordingly.
- */
- block_shift = alignment_of(current_block_end);
-
- return block_shift;
-}
-
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
- u64 *pages;
- u64 len = 0;
- int err = 0;
- u64 mtt_size;
- u64 cur_start_addr = 0;
- u64 mtt_shift;
- int start_index = 0;
- int npages = 0;
- struct scatterlist *sg;
- int i;
-
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
+ struct ib_block_iter biter;
+ int err, i = 0;
+ u64 addr;
- mtt_shift = mtt->page_shift;
- mtt_size = 1ULL << mtt_shift;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
- if (cur_start_addr + len == sg_dma_address(sg)) {
- /* still the same block */
- len += sg_dma_len(sg);
- continue;
- }
- /*
- * A new block is started ...
- * If len is malaligned, write an extra mtt entry to cover the
- * misaligned area (round up the division)
- */
- err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
- mtt_shift, len,
- cur_start_addr,
- pages, &start_index,
- &npages);
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) {
+ addr = rdma_block_iter_dma_address(&biter);
+ err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr);
if (err)
- goto out;
-
- cur_start_addr = sg_dma_address(sg);
- len = sg_dma_len(sg);
+ return err;
}
-
- /* Handle the last block */
- if (len > 0) {
- /*
- * If len is malaligned, write an extra mtt entry to cover
- * the misaligned area (round up the division)
- */
- err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
- mtt_shift, len,
- cur_start_addr, pages,
- &start_index, &npages);
- if (err)
- goto out;
- }
-
- if (npages)
- err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
-
-out:
- free_page((unsigned long) pages);
- return err;
-}
-
-/*
- * Calculate optimal mtt size based on contiguous pages.
- * Function will return also the number of pages that are not aligned to the
- * calculated mtt_size to be added to total number of pages. For that we should
- * check the first chunk length & last chunk length and if not aligned to
- * mtt_size we should increment the non_aligned_pages number. All chunks in the
- * middle already handled as part of mtt shift calculation for both their start
- * & end addresses.
- */
-int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int *num_of_mtts)
-{
- u64 block_shift = MLX4_MAX_MTT_SHIFT;
- u64 min_shift = umem->page_shift;
- u64 last_block_aligned_end = 0;
- u64 current_block_start = 0;
- u64 first_block_start = 0;
- u64 current_block_len = 0;
- u64 last_block_end = 0;
- struct scatterlist *sg;
- u64 current_block_end;
- u64 misalignment_bits;
- u64 next_block_start;
- u64 total_len = 0;
- int i;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
- /*
- * Initialization - save the first chunk start as the
- * current_block_start - block means contiguous pages.
- */
- if (current_block_len == 0 && current_block_start == 0) {
- current_block_start = sg_dma_address(sg);
- first_block_start = current_block_start;
- /*
- * Find the bits that are different between the physical
- * address and the virtual address for the start of the
- * MR.
- * umem_get aligned the start_va to a page boundary.
- * Therefore, we need to align the start va to the same
- * boundary.
- * misalignment_bits is needed to handle the case of a
- * single memory region. In this case, the rest of the
- * logic will not reduce the block size. If we use a
- * block size which is bigger than the alignment of the
- * misalignment bits, we might use the virtual page
- * number instead of the physical page number, resulting
- * in access to the wrong data.
- */
- misalignment_bits =
- (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL)))
- ^ current_block_start;
- block_shift = min(alignment_of(misalignment_bits),
- block_shift);
- }
-
- /*
- * Go over the scatter entries and check if they continue the
- * previous scatter entry.
- */
- next_block_start = sg_dma_address(sg);
- current_block_end = current_block_start + current_block_len;
- /* If we have a split (non-contig.) between two blocks */
- if (current_block_end != next_block_start) {
- block_shift = mlx4_ib_umem_calc_block_mtt
- (next_block_start,
- current_block_end,
- block_shift);
-
- /*
- * If we reached the minimum shift for 4k page we stop
- * the loop.
- */
- if (block_shift <= min_shift)
- goto end;
-
- /*
- * If not saved yet we are in first block - we save the
- * length of first block to calculate the
- * non_aligned_pages number at the end.
- */
- total_len += current_block_len;
-
- /* Start a new block */
- current_block_start = next_block_start;
- current_block_len = sg_dma_len(sg);
- continue;
- }
- /* The scatter entry is another part of the current block,
- * increase the block size.
- * An entry in the scatter can be larger than 4k (page) as of
- * dma mapping which merge some blocks together.
- */
- current_block_len += sg_dma_len(sg);
- }
-
- /* Account for the last block in the total len */
- total_len += current_block_len;
- /* Add to the first block the misalignment that it suffers from. */
- total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
- last_block_end = current_block_start + current_block_len;
- last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
- total_len += (last_block_aligned_end - last_block_end);
-
- if (total_len & ((1ULL << block_shift) - 1ULL))
- pr_warn("misaligned total length detected (%llu, %llu)!",
- total_len, block_shift);
-
- *num_of_mtts = total_len >> block_shift;
-end:
- if (block_shift < min_shift) {
- /*
- * If shift is less than the min we set a warning and return the
- * min shift.
- */
- pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
-
- block_shift = min_shift;
- }
- return block_shift;
+ return 0;
}
-static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
- u64 length, u64 virt_addr,
- int access_flags)
+static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
+ u64 length, int access_flags)
{
/*
* Force registering the memory as writable if the underlying pages
@@ -378,31 +113,33 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
* again
*/
if (!ib_access_writable(access_flags)) {
+ unsigned long untagged_start = untagged_addr(start);
struct vm_area_struct *vma;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
/*
* FIXME: Ideally this would iterate over all the vmas that
* cover the memory, but for now it requires a single vma to
* entirely cover the MR to support RO mappings.
*/
- vma = find_vma(current->mm, start);
- if (vma && vma->vm_end >= start + length &&
- vma->vm_start <= start) {
+ vma = find_vma(current->mm, untagged_start);
+ if (vma && vma->vm_end >= untagged_start + length &&
+ vma->vm_start <= untagged_start) {
if (vma->vm_flags & VM_WRITE)
access_flags |= IB_ACCESS_LOCAL_WRITE;
} else {
access_flags |= IB_ACCESS_LOCAL_WRITE;
}
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
}
- return ib_umem_get(udata, start, length, access_flags, 0);
+ return ib_umem_get(device, start, length, access_flags);
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
@@ -411,19 +148,24 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem =
- mlx4_get_umem_mr(udata, start, length, virt_addr, access_flags);
+ mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
- n = ib_umem_page_count(mr->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_umem;
+ }
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
@@ -439,8 +181,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
- mr->ibmr.length = length;
- mr->ibmr.iova = virt_addr;
mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
@@ -457,10 +197,10 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
- u64 start, u64 length, u64 virt_addr,
- int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata)
+struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(mr->device);
struct mlx4_ib_mr *mmr = to_mmr(mr);
@@ -473,9 +213,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
* race exists.
*/
err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
-
if (err)
- return err;
+ return ERR_PTR(err);
if (flags & IB_MR_REREG_PD) {
err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
@@ -505,7 +244,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
- mmr->umem = mlx4_get_umem_mr(udata, start, length, virt_addr,
+ mmr->umem = mlx4_get_umem_mr(mr->device, start, length,
mr_access_flags);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
@@ -513,8 +252,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mmr->umem = NULL;
goto release_mpt_entry;
}
- n = ib_umem_page_count(mmr->umem);
- shift = mmr->umem->page_shift;
+ n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
+ shift = PAGE_SHIFT;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
@@ -543,8 +282,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
release_mpt_entry:
mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
-
- return err;
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
static int
@@ -612,37 +352,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
return 0;
}
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_mw *mw;
+ struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
+ struct mlx4_ib_mw *mw = to_mmw(ibmw);
int err;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
- to_mlx4_type(type), &mw->mmw);
+ err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
+ to_mlx4_type(ibmw->type), &mw->mmw);
if (err)
- goto err_free;
+ return err;
err = mlx4_mw_enable(dev->dev, &mw->mmw);
if (err)
goto err_mw;
- mw->ibmw.rkey = mw->mmw.key;
-
- return &mw->ibmw;
+ ibmw->rkey = mw->mmw.key;
+ return 0;
err_mw:
mlx4_mw_free(dev->dev, &mw->mmw);
-
-err_free:
- kfree(mw);
-
- return ERR_PTR(err);
+ return err;
}
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
@@ -650,13 +380,11 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
struct mlx4_ib_mw *mw = to_mmw(ibmw);
mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
- kfree(mw);
-
return 0;
}
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+ u32 max_num_sg)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
struct mlx4_ib_mr *mr;
@@ -699,99 +427,6 @@ err_free:
return ERR_PTR(err);
}
-struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
- struct ib_fmr_attr *fmr_attr)
-{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_fmr *fmr;
- int err = -ENOMEM;
-
- fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
- if (!fmr)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
- fmr_attr->max_pages, fmr_attr->max_maps,
- fmr_attr->page_shift, &fmr->mfmr);
- if (err)
- goto err_free;
-
- err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
- if (err)
- goto err_mr;
-
- fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
-
- return &fmr->ibfmr;
-
-err_mr:
- (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
-
-err_free:
- kfree(fmr);
-
- return ERR_PTR(err);
-}
-
-int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int npages, u64 iova)
-{
- struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
- struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
-
- return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
- &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
-}
-
-int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
-{
- struct ib_fmr *ibfmr;
- int err;
- struct mlx4_dev *mdev = NULL;
-
- list_for_each_entry(ibfmr, fmr_list, list) {
- if (mdev && to_mdev(ibfmr->device)->dev != mdev)
- return -EINVAL;
- mdev = to_mdev(ibfmr->device)->dev;
- }
-
- if (!mdev)
- return 0;
-
- list_for_each_entry(ibfmr, fmr_list, list) {
- struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
-
- mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
- }
-
- /*
- * Make sure all MPT status updates are visible before issuing
- * SYNC_TPT firmware command.
- */
- wmb();
-
- err = mlx4_SYNC_TPT(mdev);
- if (err)
- pr_warn("SYNC_TPT error %d when "
- "unmapping FMRs\n", err);
-
- return 0;
-}
-
-int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
-{
- struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
- struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
- int err;
-
- err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
-
- if (!err)
- kfree(ifmr);
-
- return err;
-}
-
static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5221c0794d1d..f2887ae6390e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -68,27 +68,6 @@ enum {
};
enum {
- /*
- * Largest possible UD header: send with GRH and immediate
- * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
- * tag. (LRH would only use 8 bytes, so Ethernet is the
- * biggest case)
- */
- MLX4_IB_UD_HEADER_SIZE = 82,
- MLX4_IB_LSO_HEADER_SPARE = 128,
-};
-
-struct mlx4_ib_sqp {
- struct mlx4_ib_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
- struct ib_qp *roce_v2_gsi;
-};
-
-enum {
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
};
@@ -123,10 +102,13 @@ enum mlx4_ib_source_type {
MLX4_IB_RWQ_SRC = 1,
};
-static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
-{
- return container_of(mqp, struct mlx4_ib_sqp, qp);
-}
+struct mlx4_ib_qp_event_work {
+ struct work_struct work;
+ struct mlx4_qp *qp;
+ enum mlx4_event type;
+};
+
+static struct workqueue_struct *mlx4_ib_qp_event_wq;
static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
@@ -226,50 +208,77 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
}
}
+static void mlx4_ib_handle_qp_event(struct work_struct *_work)
+{
+ struct mlx4_ib_qp_event_work *qpe_work =
+ container_of(_work, struct mlx4_ib_qp_event_work, work);
+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
+ struct ib_event event = {};
+
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+
+ switch (qpe_work->type) {
+ case MLX4_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX4_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX4_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("Unexpected event type %d on QP %06x\n",
+ qpe_work->type, qpe_work->qp->qpn);
+ goto out;
+ }
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+
+out:
+ mlx4_put_qp(qpe_work->qp);
+ kfree(qpe_work);
+}
+
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
- struct ib_event event;
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+ struct mlx4_ib_qp_event_work *qpe_work;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
- if (ibqp->event_handler) {
- event.device = ibqp->device;
- event.element.qp = ibqp;
- switch (type) {
- case MLX4_EVENT_TYPE_PATH_MIG:
- event.event = IB_EVENT_PATH_MIG;
- break;
- case MLX4_EVENT_TYPE_COMM_EST:
- event.event = IB_EVENT_COMM_EST;
- break;
- case MLX4_EVENT_TYPE_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- break;
- case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
- break;
- case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
- event.event = IB_EVENT_QP_FATAL;
- break;
- case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
- event.event = IB_EVENT_PATH_MIG_ERR;
- break;
- case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- event.event = IB_EVENT_QP_REQ_ERR;
- break;
- case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- break;
- default:
- pr_warn("Unexpected event type %d "
- "on QP %06x\n", type, qp->qpn);
- return;
- }
+ if (!ibqp->event_handler)
+ goto out_no_handler;
- ibqp->event_handler(&event, ibqp->qp_context);
- }
+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
+ if (!qpe_work)
+ goto out_no_handler;
+
+ qpe_work->qp = qp;
+ qpe_work->type = type;
+ INIT_WORK(&qpe_work->work, mlx4_ib_handle_qp_event);
+ queue_work(mlx4_ib_qp_event_wq, &qpe_work->work);
+ return;
+
+out_no_handler:
+ mlx4_put_qp(qp);
}
static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type)
@@ -325,7 +334,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- bool is_user, int has_rq, struct mlx4_ib_qp *qp,
+ bool is_user, bool has_rq, struct mlx4_ib_qp *qp,
u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
@@ -438,9 +447,13 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
struct mlx4_ib_qp *qp,
struct mlx4_ib_create_qp *ucmd)
{
+ u32 cnt;
+
/* Sanity check SQ size before proceeding */
- if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
- ucmd->log_sq_stride >
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
+ cnt > dev->dev->caps.max_wqes)
+ return -EINVAL;
+ if (ucmd->log_sq_stride >
ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
return -EINVAL;
@@ -506,10 +519,10 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
kfree(qp->sqp_proxy_rcv);
}
-static int qp_has_rq(struct ib_qp_init_attr *attr)
+static bool qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
- return 0;
+ return false;
return !attr->srq;
}
@@ -552,15 +565,15 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
return (-EOPNOTSUPP);
}
- if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4 |
- MLX4_IB_RX_HASH_DST_IPV4 |
- MLX4_IB_RX_HASH_SRC_IPV6 |
- MLX4_IB_RX_HASH_DST_IPV6 |
- MLX4_IB_RX_HASH_SRC_PORT_TCP |
- MLX4_IB_RX_HASH_DST_PORT_TCP |
- MLX4_IB_RX_HASH_SRC_PORT_UDP |
- MLX4_IB_RX_HASH_DST_PORT_UDP |
- MLX4_IB_RX_HASH_INNER)) {
+ if (ucmd->rx_hash_fields_mask & ~(u64)(MLX4_IB_RX_HASH_SRC_IPV4 |
+ MLX4_IB_RX_HASH_DST_IPV4 |
+ MLX4_IB_RX_HASH_SRC_IPV6 |
+ MLX4_IB_RX_HASH_DST_IPV6 |
+ MLX4_IB_RX_HASH_SRC_PORT_TCP |
+ MLX4_IB_RX_HASH_DST_PORT_TCP |
+ MLX4_IB_RX_HASH_SRC_PORT_UDP |
+ MLX4_IB_RX_HASH_DST_PORT_UDP |
+ MLX4_IB_RX_HASH_INNER)) {
pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
ucmd->rx_hash_fields_mask);
return (-EOPNOTSUPP);
@@ -656,8 +669,6 @@ static int create_qp_rss(struct mlx4_ib_dev *dev,
if (err)
goto err_qpn;
- mutex_init(&qp->mutex);
-
INIT_LIST_HEAD(&qp->gid_list);
INIT_LIST_HEAD(&qp->steering_rules);
@@ -696,80 +707,72 @@ err_qpn:
return err;
}
-static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp;
struct mlx4_ib_create_qp_rss ucmd = {};
size_t required_cmd_sz;
int err;
if (!udata) {
pr_debug("RSS QP with NULL udata\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (udata->outlen)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
sizeof(ucmd.reserved1);
if (udata->inlen < required_cmd_sz) {
pr_debug("invalid inlen\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
pr_debug("copy failed\n");
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (ucmd.comp_mask || ucmd.reserved1)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
udata->inlen - sizeof(ucmd))) {
pr_debug("inlen is not supported\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
pr_debug("RSS QP with unsupported QP type %d\n",
init_attr->qp_type);
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->create_flags) {
pr_debug("RSS QP doesn't support create flags\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
if (init_attr->send_cq || init_attr->cap.max_send_wr) {
pr_debug("RSS QP with unsupported send attributes\n");
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
}
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
-
- return &qp->ibqp;
+ return 0;
}
/*
@@ -849,28 +852,158 @@ static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
* reused for further WQN allocations.
* The next created WQ will allocate a new range.
*/
- range->dirty = 1;
+ range->dirty = true;
}
mutex_unlock(&context->wqn_ranges_mutex);
}
-static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
- enum mlx4_ib_source_type src,
- struct ib_qp_init_attr *init_attr,
+static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, struct mlx4_ib_qp *qp)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ int qpn;
+ int err;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
+ struct mlx4_ib_cq *mcq;
+ unsigned long flags;
+ int range_size;
+ struct mlx4_ib_create_wq wq;
+ size_t copy_len;
+ int shift;
+ int n;
+
+ qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+
+ qp->state = IB_QPS_RESET;
+
+ copy_len = min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+
+ if (ib_copy_from_udata(&wq, udata, copy_len)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (wq.comp_mask || wq.reserved[0] || wq.reserved[1] ||
+ wq.reserved[2]) {
+ pr_debug("user command isn't supported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (wq.log_range_size > ilog2(dev->dev->caps.max_rss_tbl_sz)) {
+ pr_debug("WQN range size must be equal or smaller than %d\n",
+ dev->dev->caps.max_rss_tbl_sz);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+ range_size = 1 << wq.log_range_size;
+
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS)
+ qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+
+ err = set_rq_size(dev, &init_attr->cap, true, true, qp, qp->inl_recv_sz);
+ if (err)
+ goto err;
+
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+ qp->umem = ib_umem_get(pd->device, wq.buf_addr, qp->buf_size, 0);
+ if (IS_ERR(qp->umem)) {
+ err = PTR_ERR(qp->umem);
+ goto err;
+ }
+
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
+ if (err)
+ goto err_mtt;
+
+ err = mlx4_ib_db_map_user(udata, wq.db_addr, &qp->db);
+ if (err)
+ goto err_mtt;
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+ err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
+ if (err)
+ goto err_wrid;
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ /*
+ * Hardware wants QPN written in big-endian order (after
+ * shifting) for send doorbell. Precompute this value to save
+ * a little bit when posting sends.
+ */
+ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+ qp->mqp.event = mlx4_ib_wq_event;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ /* Maintain device to QPs access, needed for further handling
+ * via reset flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling
+ * via reset flow
+ */
+ mcq = to_mcq(init_attr->send_cq);
+ list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
+ mcq = to_mcq(init_attr->recv_cq);
+ list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
+ mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+ return 0;
+
+err_qpn:
+ mlx4_ib_release_wqn(context, qp, 0);
+err_wrid:
+ mlx4_ib_db_unmap_user(context, &qp->db);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+err_buf:
+ ib_umem_release(qp->umem);
+err:
+ return err;
+}
+
+static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn,
- struct mlx4_ib_qp **caller_qp)
+ struct mlx4_ib_qp *qp)
{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
int qpn;
int err;
- struct mlx4_ib_sqp *sqp = NULL;
- struct mlx4_ib_qp *qp;
struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx4_ib_ucontext, ibucontext);
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
unsigned long flags;
- int range_size = 0;
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
@@ -913,76 +1046,41 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
sqpn = qpn;
}
- if (!*caller_qp) {
- if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
- (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
- MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
- if (!sqp)
- return -ENOMEM;
- qp = &sqp->qp;
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
- } else {
- qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL);
- if (!qp)
- return -ENOMEM;
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
- }
- } else
- qp = *caller_qp;
+ if (init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI ||
+ qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
+ if (!qp->sqp)
+ return -ENOMEM;
+ }
qp->mlx4_ib_qp_type = qp_type;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
INIT_LIST_HEAD(&qp->steering_rules);
- qp->state = IB_QPS_RESET;
+ qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
-
if (udata) {
- union {
- struct mlx4_ib_create_qp qp;
- struct mlx4_ib_create_wq wq;
- } ucmd;
+ struct mlx4_ib_create_qp ucmd;
size_t copy_len;
int shift;
int n;
- copy_len = (src == MLX4_IB_QP_SRC) ?
- sizeof(struct mlx4_ib_create_qp) :
- min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+ copy_len = sizeof(struct mlx4_ib_create_qp);
if (ib_copy_from_udata(&ucmd, udata, copy_len)) {
err = -EFAULT;
goto err;
}
- if (src == MLX4_IB_RWQ_SRC) {
- if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] ||
- ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) {
- pr_debug("user command isn't supported\n");
- err = -EOPNOTSUPP;
- goto err;
- }
-
- if (ucmd.wq.log_range_size >
- ilog2(dev->dev->caps.max_rss_tbl_sz)) {
- pr_debug("WQN range size must be equal or smaller than %d\n",
- dev->dev->caps.max_rss_tbl_sz);
- err = -EOPNOTSUPP;
- goto err;
- }
- range_size = 1 << ucmd.wq.log_range_size;
- } else {
- qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
- }
+ qp->inl_recv_sz = ucmd.inl_recv_sz;
if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
if (!(dev->dev->caps.flags &
@@ -1000,39 +1098,26 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err;
- if (src == MLX4_IB_QP_SRC) {
- qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch;
+ qp->sq_no_prefetch = ucmd.sq_no_prefetch;
- err = set_user_sq_size(dev, qp,
- (struct mlx4_ib_create_qp *)
- &ucmd);
- if (err)
- goto err;
- } else {
- qp->sq_no_prefetch = 1;
- qp->sq.wqe_cnt = 1;
- qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
- /* Allocated buffer expects to have at least that SQ
- * size.
- */
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << qp->sq.wqe_shift);
- }
+ err = set_user_sq_size(dev, qp, &ucmd);
+ if (err)
+ goto err;
qp->umem =
- ib_umem_get(udata,
- (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
- ucmd.wq.buf_addr,
- qp->buf_size, 0, 0);
+ ib_umem_get(pd->device, ucmd.buf_addr, qp->buf_size, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
}
- n = ib_umem_page_count(qp->umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
@@ -1041,11 +1126,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_mtt;
if (qp_has_rq(init_attr)) {
- err = mlx4_ib_db_map_user(udata,
- (src == MLX4_IB_QP_SRC) ?
- ucmd.qp.db_addr :
- ucmd.wq.db_addr,
- &qp->db);
+ err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &qp->db);
if (err)
goto err_mtt;
}
@@ -1065,8 +1146,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (dev->steering_support ==
MLX4_STEERING_MODE_DEVICE_MANAGED)
qp->flags |= MLX4_IB_QP_NETIF;
- else
+ else {
+ err = -EINVAL;
goto err;
+ }
}
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
@@ -1115,10 +1198,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_wrid;
}
}
- } else if (src == MLX4_IB_RWQ_SRC) {
- err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
- if (err)
- goto err_wrid;
} else {
/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
* otherwise, the WQE BlueFlame setup flow wrongly causes
@@ -1157,11 +1236,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
*/
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event :
- mlx4_ib_wq_event;
-
- if (!*caller_qp)
- *caller_qp = qp;
+ qp->mqp.event = mlx4_ib_qp_event;
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
@@ -1186,8 +1261,6 @@ err_qpn:
if (!sqpn) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qpn, 1);
- else if (src == MLX4_IB_RWQ_SRC)
- mlx4_ib_release_wqn(context, qp, 0);
else
mlx4_qp_release_range(dev->dev, qpn, 1);
}
@@ -1207,20 +1280,16 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
err_buf:
- if (qp->umem)
- ib_umem_release(qp->umem);
- else
+ if (!qp->umem)
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+ ib_umem_release(qp->umem);
err_db:
if (!udata && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
err:
- if (!sqp && !*caller_qp)
- kfree(qp);
- kfree(sqp);
-
+ kfree(qp->sqp);
return err;
}
@@ -1334,7 +1403,6 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
mlx4_qp_free(dev->dev, &qp->mqp);
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
del_gid_entries(qp);
- kfree(qp->rss_ctx);
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
@@ -1421,7 +1489,6 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_ib_db_unmap_user(mcontext, &qp->db);
}
- ib_umem_release(qp->umem);
} else {
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
@@ -1432,6 +1499,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
+ ib_umem_release(qp->umem);
del_gid_entries(qp);
}
@@ -1453,17 +1521,16 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy;
}
-static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp = NULL;
int err;
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
if (init_attr->rwq_ind_tbl)
- return _mlx4_ib_create_qp_rss(pd, init_attr, udata);
+ return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata);
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
@@ -1475,16 +1542,16 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
MLX4_IB_QP_CREATE_ROCE_V2_GSI))
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (init_attr->qp_type != IB_QPT_UD)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (init_attr->create_flags) {
if (udata && init_attr->create_flags & ~(sup_u_create_flags))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_CREATE_ROCE_V2_GSI |
@@ -1494,7 +1561,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
init_attr->qp_type > IB_QPT_GSI) ||
(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
init_attr->qp_type != IB_QPT_GSI))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
switch (init_attr->qp_type) {
@@ -1502,58 +1569,51 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
pd = to_mxrcd(init_attr->xrcd)->pd;
xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
- /* fall through */
+ fallthrough;
case IB_QPT_XRC_INI:
if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
+ return -ENOSYS;
init_attr->recv_cq = init_attr->send_cq;
- /* fall through */
+ fallthrough;
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ case IB_QPT_UD:
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- /* fall through */
- case IB_QPT_UD:
- {
- err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
- init_attr, udata, 0, &qp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ err = create_qp_common(pd, init_attr, udata, 0, qp);
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
qp->xrcdn = xrcdn;
-
break;
- }
case IB_QPT_SMI:
case IB_QPT_GSI:
{
int sqpn;
- /* Userspace is not allowed to create special QPs: */
- if (udata)
- return ERR_PTR(-EINVAL);
if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
1, 1, &sqpn, 0,
MLX4_RES_USAGE_DRIVER);
if (res)
- return ERR_PTR(res);
+ return res;
} else {
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
- init_attr, udata, sqpn, &qp);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ err = create_qp_common(pd, init_attr, udata, sqpn, qp);
if (err)
- return ERR_PTR(err);
+ return err;
+
+ if (init_attr->create_flags &
+ (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP))
+ /* Internal QP created with ib_create_qp */
+ rdma_restrack_no_track(&qp->ibqp.res);
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
@@ -1562,25 +1622,28 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
}
default:
/* Don't support raw QPs */
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
}
-
- return &qp->ibqp;
+ return 0;
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata) {
- struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
- struct ib_qp *ibqp;
+int mlx4_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_device *device = ibqp->device;
struct mlx4_ib_dev *dev = to_mdev(device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct ib_pd *pd = ibqp->pd;
+ int ret;
- ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+ mutex_init(&qp->mutex);
+ ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata);
+ if (ret)
+ return ret;
- if (!IS_ERR(ibqp) &&
- (init_attr->qp_type == IB_QPT_GSI) &&
+ if (init_attr->qp_type == IB_QPT_GSI &&
!(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
- struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ struct mlx4_ib_sqp *sqp = qp->sqp;
int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
if (is_eth &&
@@ -1589,17 +1652,18 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
if (IS_ERR(sqp->roce_v2_gsi)) {
- pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ pr_err("Failed to create GSI QP for RoCEv2 (%pe)\n",
+ sqp->roce_v2_gsi);
sqp->roce_v2_gsi = NULL;
} else {
- sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
- sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ to_mqp(sqp->roce_v2_gsi)->flags |=
+ MLX4_IB_ROCE_V2_GSI_QP;
}
init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
}
}
- return ibqp;
+ return 0;
}
static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
@@ -1626,11 +1690,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata);
}
- if (is_sqp(dev, mqp))
- kfree(to_msqp(mqp));
- else
- kfree(mqp);
-
+ kfree(mqp->sqp);
return 0;
}
@@ -1639,7 +1699,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
struct mlx4_ib_qp *mqp = to_mqp(qp);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
if (sqp->roce_v2_gsi)
ib_destroy_qp(sqp->roce_v2_gsi);
@@ -1843,7 +1903,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
u16 vlan_id, u8 *smac)
{
return _mlx4_set_path(dev, &qp->ah_attr,
- mlx4_mac_to_u64(smac),
+ ether_addr_to_u64(smac),
vlan_id,
path, &mqp->pri, port);
}
@@ -2501,7 +2561,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+ store_sqp_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -2775,10 +2835,13 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
int ret;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
int err = 0;
if (sqp->roce_v2_gsi)
@@ -2803,12 +2866,13 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
return -EINVAL;
}
-static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
+static int build_sriov_qp0_header(struct mlx4_ib_qp *qp,
const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
- struct ib_device *ib_dev = &mdev->ib_dev;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->ah);
@@ -2817,6 +2881,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
int send_size;
int header_size;
int spc;
+ int err;
int i;
if (wr->wr.opcode != IB_WR_SEND)
@@ -2829,12 +2894,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
sqp->ud_header.lrh.destination_lid =
@@ -2851,24 +2916,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
sqp->ud_header.lrh.virtual_lane = 0;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey);
+ if (err)
+ return err;
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
- cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel);
+ cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
if (mlx4_is_master(mdev->dev)) {
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
} else {
- if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
}
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn);
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -2952,10 +3019,11 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
}
#define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
@@ -2979,7 +3047,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
for (i = 0; i < wr->wr.num_sge; ++i)
send_size += wr->wr.sg_list[i].length;
- is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+ is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
enum ib_gid_type gid_type;
@@ -2993,9 +3061,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
if (err)
return err;
} else {
- err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
- ah->av.ib.gid_index,
- &sgid, &gid_type);
+ err = fill_gid_by_hw_index(ibdev, qp->port,
+ ah->av.ib.gid_index, &sgid,
+ &gid_type);
if (!err) {
is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
@@ -3011,7 +3079,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
}
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
- is_vlan = 1;
+ is_vlan = true;
}
}
err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
@@ -3040,13 +3108,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
* indexes don't necessarily match the hw ones, so
* we must use our own cache
*/
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
- demux[sqp->qp.port - 1].
- subnet_prefix)));
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
+ sqp->ud_header.grh.source_gid.global
+ .subnet_prefix =
+ cpu_to_be64(atomic64_read(
+ &(to_mdev(ib_dev)
+ ->sriov
+ .demux[qp->port - 1]
+ .subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global
+ .interface_id =
+ to_mdev(ib_dev)
+ ->sriov.demux[qp->port - 1]
+ .guid_cache[ah->av.ib.gid_index];
} else {
sqp->ud_header.grh.source_gid =
ah->ibah.sgid_attr->gid;
@@ -3078,10 +3151,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid ==
- IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
+ mlx->flags |=
+ cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ?
+ MLX4_WQE_MLX_SLR :
+ 0) |
+ (sqp->ud_header.lrh.service_level << 8));
if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
@@ -3102,7 +3178,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
}
if (is_eth) {
- struct in6_addr in6;
u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
@@ -3112,11 +3187,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
mlx->sched_prio = cpu_to_be16(pcp);
ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
- memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
+ ether_addr_copy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac);
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
- memcpy(&in6, sgid.raw, sizeof(in6));
-
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
@@ -3127,26 +3200,33 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 :
- sl_to_vl(to_mdev(ib_dev),
- sqp->ud_header.lrh.service_level,
- sqp->qp.port);
- if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
+ sqp->ud_header.lrh.virtual_lane =
+ !qp->ibqp.qp_num ?
+ 15 :
+ sl_to_vl(to_mdev(ib_dev),
+ sqp->ud_header.lrh.service_level,
+ qp->port);
+ if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
return -EINVAL;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
}
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+ if (!qp->ibqp.qp_num)
+ err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index,
+ &pkey);
else
- ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
+ err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index,
+ &pkey);
+ if (err)
+ return err;
+
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -3459,24 +3539,24 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
int nreq;
int err = 0;
unsigned ind;
- int uninitialized_var(size);
- unsigned uninitialized_var(seglen);
+ int size;
+ unsigned seglen;
__be32 dummy;
__be32 *lso_wqe;
- __be32 uninitialized_var(lso_hdr_sz);
+ __be32 lso_hdr_sz;
__be32 blh;
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(qp);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
enum ib_gid_type gid_type;
union ib_gid gid;
- if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+ if (!fill_gid_by_hw_index(mdev, qp->port,
ah->av.ib.gid_index,
&gid, &gid_type))
qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
@@ -3596,8 +3676,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3633,8 +3713,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3667,8 +3747,7 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
- &seglen);
+ err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3976,7 +4055,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->qp_access_flags =
to_ib_qp_access_flags(be32_to_cpu(context.params2));
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
@@ -4048,8 +4129,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev;
- struct ib_qp_init_attr ib_qp_init_attr;
+ struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct ib_qp_init_attr ib_qp_init_attr = {};
struct mlx4_ib_qp *qp;
struct mlx4_ib_create_wq ucmd;
int err, required_cmd_sz;
@@ -4074,14 +4155,13 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
if (udata->outlen)
return ERR_PTR(-EOPNOTSUPP);
- dev = to_mdev(pd->device);
-
if (init_attr->wq_type != IB_WQT_RQ) {
pr_debug("unsupported wq type %d\n", init_attr->wq_type);
return ERR_PTR(-EOPNOTSUPP);
}
- if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) {
+ if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS ||
+ !(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
pr_debug("unsupported create_flags %u\n",
init_attr->create_flags);
return ERR_PTR(-EOPNOTSUPP);
@@ -4091,10 +4171,10 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
if (!qp)
return ERR_PTR(-ENOMEM);
+ mutex_init(&qp->mutex);
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr));
ib_qp_init_attr.qp_context = init_attr->wq_context;
ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET;
ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr;
@@ -4105,8 +4185,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS)
ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS;
- err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
- udata, 0, &qp);
+ err = create_rq(pd, &ib_qp_init_attr, udata, qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -4215,13 +4294,8 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
if (wq_attr_mask & IB_WQ_FLAGS)
return -EOPNOTSUPP;
- cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state :
- ibwq->state;
- new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state;
-
- if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR ||
- new_state < IB_WQS_RESET || new_state > IB_WQS_ERR)
- return -EINVAL;
+ cur_state = wq_attr->curr_wq_state;
+ new_state = wq_attr->wq_state;
if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR))
return -EINVAL;
@@ -4259,38 +4333,35 @@ int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata);
kfree(qp);
-
return 0;
}
-struct ib_rwq_ind_table
-*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct ib_rwq_ind_table *rwq_ind_table;
struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+ struct ib_device *device = rwq_ind_table->device;
unsigned int base_wqn;
size_t min_resp_len;
- int i;
- int err;
+ int i, err = 0;
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (ind_tbl_size >
device->attrs.rss_caps.max_rwq_indirection_table_size) {
pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
ind_tbl_size,
device->attrs.rss_caps.max_rwq_indirection_table_size);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
base_wqn = init_attr->ind_tbl[0]->wq_num;
@@ -4298,39 +4369,23 @@ struct ib_rwq_ind_table
if (base_wqn % ind_tbl_size) {
pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
base_wqn);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
for (i = 1; i < ind_tbl_size; i++) {
if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
pr_debug("indirection table's WQNs aren't consecutive\n");
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
}
- rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL);
- if (!rwq_ind_table)
- return ERR_PTR(-ENOMEM);
-
if (udata->outlen) {
resp.response_length = offsetof(typeof(resp), response_length) +
sizeof(resp.response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err)
- goto err;
}
- return rwq_ind_table;
-
-err:
- kfree(rwq_ind_table);
- return ERR_PTR(err);
-}
-
-int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
-{
- kfree(ib_rwq_ind_tbl);
- return 0;
+ return err;
}
struct mlx4_ib_drain_cqe {
@@ -4461,3 +4516,17 @@ void mlx4_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
+
+int mlx4_ib_qp_event_init(void)
+{
+ mlx4_ib_qp_event_wq = alloc_ordered_workqueue("mlx4_ib_qp_event_wq", 0);
+ if (!mlx4_ib_qp_event_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_qp_event_cleanup(void)
+{
+ destroy_workqueue(mlx4_ib_qp_event_wq);
+}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 4bf2946b9759..c4cf91235eee 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -86,6 +86,10 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
int err;
int i;
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC)
+ return -EOPNOTSUPP;
+
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
@@ -110,12 +114,14 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
return -EFAULT;
- srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
+ srq->umem =
+ ib_umem_get(ib_srq->device, ucmd.buf_addr, buf_size, 0);
if (IS_ERR(srq->umem))
return PTR_ERR(srq->umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- srq->umem->page_shift, &srq->mtt);
+ err = mlx4_mtt_init(
+ dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE),
+ PAGE_SHIFT, &srq->mtt);
if (err)
goto err_buf;
@@ -204,10 +210,9 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
- if (srq->umem)
- ib_umem_release(srq->umem);
- else
+ if (!srq->umem)
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
+ ib_umem_release(srq->umem);
err_db:
if (!udata)
@@ -260,7 +265,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
return 0;
}
-void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
@@ -275,13 +280,14 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
struct mlx4_ib_ucontext,
ibucontext),
&msrq->db);
- ib_umem_release(msrq->umem);
} else {
kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
}
+ ib_umem_release(msrq->umem);
+ return 0;
}
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
@@ -314,7 +320,6 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
- nreq = 0;
goto out;
}
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index ea1f3a081b05..88f534cf690e 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev,
mlx4_ib_iov_dentry->entry_num,
port->num);
- return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
+ return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
union ib_gid gid;
- ssize_t ret;
+ int ret;
+ __be16 *raw;
ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &gid, 1);
if (ret)
return ret;
- ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) gid.raw)[0]),
- be16_to_cpu(((__be16 *) gid.raw)[1]),
- be16_to_cpu(((__be16 *) gid.raw)[2]),
- be16_to_cpu(((__be16 *) gid.raw)[3]),
- be16_to_cpu(((__be16 *) gid.raw)[4]),
- be16_to_cpu(((__be16 *) gid.raw)[5]),
- be16_to_cpu(((__be16 *) gid.raw)[6]),
- be16_to_cpu(((__be16 *) gid.raw)[7]));
- return ret;
+
+ raw = (__be16 *)gid.raw;
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(raw[0]),
+ be16_to_cpu(raw[1]),
+ be16_to_cpu(raw[2]),
+ be16_to_cpu(raw[3]),
+ be16_to_cpu(raw[4]),
+ be16_to_cpu(raw[5]),
+ be16_to_cpu(raw[6]),
+ be16_to_cpu(raw[7]));
}
static ssize_t show_phys_port_pkey(struct device *dev,
@@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev,
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define DENTRY_REMOVE(_dentry) \
@@ -221,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
{
int i;
- char buff[11];
+ char buff[12];
struct mlx4_ib_iov_port *port = NULL;
int ret = 0 ;
struct ib_port_attr attr;
@@ -441,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- ssize_t ret = -ENODEV;
-
- if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
- (p->dev->dev->caps.pkey_table_len[p->port_num]))
- ret = sprintf(buf, "none\n");
- else
- ret = sprintf(buf, "%d\n",
- p->dev->pkeys.virt2phys_pkey[p->slave]
- [p->port_num - 1][tab_attr->index]);
- return ret;
+ struct pkey_mgt *m = &p->dev->pkeys;
+ u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index];
+
+ if (key >= p->dev->dev->caps.pkey_table_len[p->port_num])
+ return sysfs_emit(buf, "none\n");
+ return sysfs_emit(buf, "%d\n", key);
}
static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
@@ -488,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
static ssize_t show_port_gid_idx(struct mlx4_port *p,
struct port_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", p->slave);
+ return sysfs_emit(buf, "%d\n", p->slave);
}
static struct attribute **
@@ -542,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev,
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, smi_enabled);
- ssize_t len = 0;
-
- if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
- len = sprintf(buf, "%d\n", 1);
- else
- len = sprintf(buf, "%d\n", 0);
- return len;
+ return sysfs_emit(buf, "%d\n",
+ !!mlx4_vf_smi_enabled(p->dev->dev, p->slave,
+ p->port_num));
}
static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
@@ -558,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, enable_smi_admin);
- ssize_t len = 0;
- if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
- len = sprintf(buf, "%d\n", 1);
- else
- len = sprintf(buf, "%d\n", 0);
-
- return len;
+ return sysfs_emit(buf, "%d\n",
+ !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave,
+ p->port_num));
}
static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
@@ -808,7 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
{
- int i;
+ unsigned int i;
int ret = 0;
if (!mlx4_is_master(dev->dev))
@@ -827,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
goto err_ports;
}
- for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
+ rdma_for_each_port(&dev->ib_dev, i) {
ret = add_port_entries(dev, i);
if (ret)
goto err_add_entries;
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index 8d651c05de62..ef1ff42eaec5 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,7 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
config MLX5_INFINIBAND
tristate "Mellanox 5th generation network adapters (ConnectX series) support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
- ---help---
+ help
This driver provides low-level InfiniBand support for
Mellanox Connect-IB PCI Express host channel adapters (HCAs).
This is required to use InfiniBand protocols such as
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 33f5adb14e4e..dd7bb377f491 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,9 +1,33 @@
-obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
+
+mlx5_ib-y := ah.o \
+ cmd.o \
+ cong.o \
+ counters.o \
+ cq.o \
+ data_direct.o \
+ dm.o \
+ dmah.o \
+ doorbell.o \
+ fs.o \
+ gsi.o \
+ ib_virt.o \
+ mad.o \
+ main.o \
+ mem.o \
+ mr.o \
+ qp.o \
+ qpc.o \
+ restrack.o \
+ srq.o \
+ srq_cmd.o \
+ umr.o \
+ wr.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
- srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
- cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
-mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
-mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
+ qos.o \
+ std_types.o
+mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 80642dd359bc..531a57f9ee7e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,10 +32,30 @@
#include "mlx5_ib.h"
-static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
- struct rdma_ah_attr *ah_attr)
+static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
+ const struct rdma_ah_attr *ah_attr)
{
+ enum ib_gid_type gid_type = ah_attr->grh.sgid_attr->gid_type;
+ __be16 sport;
+
+ if ((gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) &&
+ (ah_attr->grh.flow_label & IB_GRH_FLOWLABEL_MASK))
+ sport = cpu_to_be16(
+ rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ else
+ sport = mlx5_get_roce_udp_sport_min(dev,
+ ah_attr->grh.sgid_attr);
+
+ return sport;
+}
+
+static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+ struct rdma_ah_init_attr *init_attr)
+{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;
+ int rate_val;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -48,15 +68,21 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
+ rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr));
+ if (rate_val < 0)
+ return rate_val;
+ ah->av.stat_rate_sl = rate_val << 4;
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (init_attr->xmit_slave)
+ ah->xmit_port =
+ mlx5_lag_get_slave_port(dev->mdev,
+ init_attr->xmit_slave);
gid_type = ah_attr->grh.sgid_attr->gid_type;
memcpy(ah->av.rmac, ah_attr->roce.dmac,
sizeof(ah_attr->roce.dmac));
- ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
+ ah->av.udp_sport = mlx5_ah_get_udp_sport(dev, ah_attr);
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
#define MLX5_ECN_ENABLED BIT(1)
@@ -66,12 +92,15 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
+
+ return 0;
}
-int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
- u32 flags, struct ib_udata *udata)
+int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
struct mlx5_ib_ah *ah = to_mah(ibah);
struct mlx5_ib_dev *dev = to_mdev(ibah->device);
enum rdma_ah_attr_type ah_type = ah_attr->type;
@@ -83,8 +112,8 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
int err;
struct mlx5_ib_create_ah_resp resp = {};
- u32 min_resp_len = offsetof(typeof(resp), dmac) +
- sizeof(resp.dmac);
+ u32 min_resp_len =
+ offsetofend(struct mlx5_ib_create_ah_resp, dmac);
if (udata->outlen < min_resp_len)
return -EINVAL;
@@ -97,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr,
return err;
}
- create_ib_ah(dev, ah, ah_attr);
- return 0;
+ return create_ib_ah(dev, ah, init_attr);
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
@@ -124,8 +152,3 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
-{
- return;
-}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index e3ec79b8f7f5..7c08e3008927 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -1,373 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2017-2020, Mellanox Technologies inc. All rights reserved.
*/
#include "cmd.h"
-int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey)
-{
- u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0};
- int err;
-
- MLX5_SET(query_special_contexts_in, in, opcode,
- MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *mkey = MLX5_GET(query_special_contexts_out, out,
- dump_fill_mkey);
- return err;
-}
-
-int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
+int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
- u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ bool is_terminate, is_dump, is_null;
int err;
- MLX5_SET(query_special_contexts_in, in, opcode,
- MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *null_mkey = MLX5_GET(query_special_contexts_out, out,
- null_mkey);
- return err;
-}
-
-int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
- void *out, int out_size)
-{
- u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
-
- MLX5_SET(query_cong_params_in, in, opcode,
- MLX5_CMD_OP_QUERY_CONG_PARAMS);
- MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
-
- return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
-int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
- void *in, int in_size)
-{
- u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
-
- return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
-}
-
-int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
- u64 length, u32 alignment)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
- >> PAGE_SHIFT;
- u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
- u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
- u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
- u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
- u32 mlx5_alignment;
- u64 page_idx = 0;
- int ret = 0;
-
- if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
- return -EINVAL;
-
- /* mlx5 device sets alignment as 64*2^driver_value
- * so normalizing is needed.
- */
- mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
- alignment - MLX5_MEMIC_BASE_ALIGN;
- if (mlx5_alignment > max_alignment)
- return -EINVAL;
-
- MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
- MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
- MLX5_SET(alloc_memic_in, in, memic_size, length);
- MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
- mlx5_alignment);
-
- while (page_idx < num_memic_hw_pages) {
- spin_lock(&dm->lock);
- page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages,
- num_memic_hw_pages,
- page_idx,
- num_pages, 0);
-
- if (page_idx < num_memic_hw_pages)
- bitmap_set(dm->memic_alloc_pages,
- page_idx, num_pages);
-
- spin_unlock(&dm->lock);
-
- if (page_idx >= num_memic_hw_pages)
- break;
-
- MLX5_SET64(alloc_memic_in, in, range_start_addr,
- hw_start_addr + (page_idx * PAGE_SIZE));
-
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (ret) {
- spin_lock(&dm->lock);
- bitmap_clear(dm->memic_alloc_pages,
- page_idx, num_pages);
- spin_unlock(&dm->lock);
-
- if (ret == -EAGAIN) {
- page_idx++;
- continue;
- }
-
- return ret;
- }
-
- *addr = dev->bar_addr +
- MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+ is_terminate = MLX5_CAP_GEN(dev->mdev, terminate_scatter_list_mkey);
+ is_dump = MLX5_CAP_GEN(dev->mdev, dump_fill_mkey);
+ is_null = MLX5_CAP_GEN(dev->mdev, null_mkey);
+ dev->mkeys.terminate_scatter_list_mkey = MLX5_TERMINATE_SCATTER_LIST_LKEY;
+ if (!is_terminate && !is_dump && !is_null)
return 0;
- }
-
- return -ENOMEM;
-}
-
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
- u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
- u32 out[MLX5_ST_SZ_DW(dealloc_memic_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {0};
- u64 start_page_idx;
- int err;
-
- addr -= dev->bar_addr;
- start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
-
- MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
- MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
- MLX5_SET(dealloc_memic_in, in, memic_size, length);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-
- if (!err) {
- spin_lock(&dm->lock);
- bitmap_clear(dm->memic_alloc_pages,
- start_page_idx, num_pages);
- spin_unlock(&dm->lock);
- }
- return err;
-}
-
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t *addr, u32 *obj_id)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
- unsigned long *block_map;
- u64 icm_start_addr;
- u32 log_icm_size;
- u32 max_blocks;
- u64 block_idx;
- void *sw_icm;
- int ret;
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
- MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
- steering_sw_icm_start_address);
- log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
- block_map = dm->steering_sw_icm_alloc_blocks;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
- header_modify_sw_icm_start_address);
- log_icm_size = MLX5_CAP_DEV_MEM(dev,
- log_header_modify_sw_icm_size);
- block_map = dm->header_modify_sw_icm_alloc_blocks;
- break;
- default:
- return -EINVAL;
- }
-
- max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
- spin_lock(&dm->lock);
- block_idx = bitmap_find_next_zero_area(block_map,
- max_blocks,
- 0,
- num_blocks, 0);
-
- if (block_idx < max_blocks)
- bitmap_set(block_map,
- block_idx, num_blocks);
-
- spin_unlock(&dm->lock);
-
- if (block_idx >= max_blocks)
- return -ENOMEM;
-
- sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
- icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
- icm_start_addr);
- MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
-
- ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (ret) {
- spin_lock(&dm->lock);
- bitmap_clear(block_map,
- block_idx, num_blocks);
- spin_unlock(&dm->lock);
-
- return ret;
- }
-
- *addr = icm_start_addr;
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-
- return 0;
-}
-
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t addr, u32 obj_id)
-{
- struct mlx5_core_dev *dev = dm->dev;
- u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
- unsigned long *block_map;
- u64 start_idx;
- int err;
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- start_idx =
- (addr - MLX5_CAP64_DEV_MEM(
- dev, steering_sw_icm_start_address)) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- block_map = dm->steering_sw_icm_alloc_blocks;
- break;
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- start_idx =
- (addr -
- MLX5_CAP64_DEV_MEM(
- dev, header_modify_sw_icm_start_address)) >>
- MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
- block_map = dm->header_modify_sw_icm_alloc_blocks;
- break;
- default:
- return -EINVAL;
- }
-
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
- MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ MLX5_SET(query_special_contexts_in, in, opcode,
+ MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_special_contexts, in, out);
if (err)
return err;
- spin_lock(&dm->lock);
- bitmap_clear(block_map,
- start_idx, num_blocks);
- spin_unlock(&dm->lock);
+ if (is_dump)
+ dev->mkeys.dump_fill_mkey = MLX5_GET(query_special_contexts_out,
+ out, dump_fill_mkey);
+
+ if (is_null)
+ dev->mkeys.null_mkey = cpu_to_be32(
+ MLX5_GET(query_special_contexts_out, out, null_mkey));
+
+ if (is_terminate)
+ dev->mkeys.terminate_scatter_list_mkey =
+ cpu_to_be32(MLX5_GET(query_special_contexts_out, out,
+ terminate_scatter_list_mkey));
return 0;
}
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out)
{
- u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = {};
- MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(query_cong_params_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_PARAMS);
+ MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
- MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
- return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
- 0, 0);
+ return mlx5_cmd_exec_inout(dev, query_cong_params, in, out);
}
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
MLX5_SET(destroy_tir_in, in, tirn, tirn);
MLX5_SET(destroy_tir_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_tir, in);
}
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
MLX5_SET(destroy_tis_in, in, tisn, tisn);
MLX5_SET(destroy_tis_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, destroy_tis, in);
}
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
- u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
MLX5_SET(destroy_rqt_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, destroy_rqt, in);
}
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
int err;
MLX5_SET(alloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
MLX5_SET(alloc_transport_domain_in, in, uid, uid);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out);
if (!err)
*tdn = MLX5_GET(alloc_transport_domain_out, out,
transport_domain);
@@ -378,32 +106,29 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
MLX5_SET(dealloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
MLX5_SET(dealloc_transport_domain_in, in, uid, uid);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
}
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
MLX5_SET(dealloc_pd_in, in, pd, pdn);
MLX5_SET(dealloc_pd_in, in, uid, uid);
- mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, dealloc_pd, in);
}
int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {};
- u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
void *gid;
MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
@@ -411,14 +136,13 @@ int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
MLX5_SET(attach_to_mcg_in, in, uid, uid);
gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
memcpy(gid, mgid, sizeof(*mgid));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, attach_to_mcg, in);
}
int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {};
- u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
void *gid;
MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
@@ -426,18 +150,18 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
MLX5_SET(detach_from_mcg_in, in, uid, uid);
gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
memcpy(gid, mgid, sizeof(*mgid));
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ return mlx5_cmd_exec_in(dev, detach_from_mcg, in);
}
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
{
u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
int err;
MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
MLX5_SET(alloc_xrcd_in, in, uid, uid);
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ err = mlx5_cmd_exec_inout(dev, alloc_xrcd, in, out);
if (!err)
*xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
return err;
@@ -445,33 +169,15 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
MLX5_SET(dealloc_xrcd_in, in, uid, uid);
- return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
- u16 uid)
-{
- u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
- int err;
-
- MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
- MLX5_SET(alloc_q_counter_in, in, uid, uid);
-
- err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- if (!err)
- *counter_id = MLX5_GET(alloc_q_counter_out, out,
- counter_set_id);
- return err;
+ return mlx5_cmd_exec_in(dev, dealloc_xrcd, in);
}
-int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port)
{
int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
@@ -489,12 +195,18 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
MLX5_SET(mad_ifc_in, in, op_mod, opmod);
- MLX5_SET(mad_ifc_in, in, port, port);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ MLX5_SET(mad_ifc_in, in, plane_index, port);
+ MLX5_SET(mad_ifc_in, in, port,
+ smi_to_native_portnum(dev, port));
+ } else {
+ MLX5_SET(mad_ifc_in, in, port, port);
+ }
data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
- err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+ err = mlx5_cmd_exec_inout(dev->mdev, mad_ifc, in, out);
if (err)
goto out;
@@ -507,3 +219,50 @@ out:
kfree(in);
return err;
}
+
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ MLX5_SET(alloc_uar_in, in, uid, uid);
+ err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+ if (err)
+ return err;
+
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return 0;
+}
+
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
+
+ MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ MLX5_SET(dealloc_uar_in, in, uar, uarn);
+ MLX5_SET(dealloc_uar_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev, dealloc_uar, in);
+}
+
+int mlx5_cmd_query_vuid(struct mlx5_core_dev *dev, bool data_direct,
+ char *out_vuid)
+{
+ u8 out[MLX5_ST_SZ_BYTES(query_vuid_out) +
+ MLX5_ST_SZ_BYTES(array1024_auto)] = {};
+ u8 in[MLX5_ST_SZ_BYTES(query_vuid_in)] = {};
+ char *vuid;
+ int err;
+
+ MLX5_SET(query_vuid_in, in, opcode, MLX5_CMD_OPCODE_QUERY_VUID);
+ MLX5_SET(query_vuid_in, in, vhca_id, MLX5_CAP_GEN(dev, vhca_id));
+ MLX5_SET(query_vuid_in, in, data_direct, data_direct);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ vuid = MLX5_ADDR_OF(query_vuid_out, out, vuid);
+ memcpy(out_vuid, vuid, MLX5_ST_SZ_BYTES(array1024_auto));
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 0572dcba6eae..e6c88b6ebd0d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -37,20 +37,13 @@
#include <linux/kernel.h>
#include <linux/mlx5/driver.h>
-int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
-int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
- void *out, int out_size);
-int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
-int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
- void *in, int in_size);
-int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
- u64 length, u32 alignment);
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
-void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+ void *out);
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
-void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
u16 uid);
void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
@@ -61,12 +54,10 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid);
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
-int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
- u16 uid);
-int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t *addr, u32 *obj_id);
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
- u16 uid, phys_addr_t addr, u32 obj_id);
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
+int mlx5_cmd_query_vuid(struct mlx5_core_dev *dev, bool data_direct,
+ char *out_vuid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 8ba439fabf7f..a78a067e3ce7 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -38,6 +38,7 @@
enum mlx5_ib_cong_node_type {
MLX5_IB_RROCE_ECN_RP = 1,
MLX5_IB_RROCE_ECN_NP = 2,
+ MLX5_IB_RROCE_GENERAL = 3,
};
static const char * const mlx5_ib_dbg_cc_name[] = {
@@ -47,6 +48,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
"rp_byte_reset",
"rp_threshold",
"rp_ai_rate",
+ "rp_max_rate",
"rp_hai_rate",
"rp_min_dec_fac",
"rp_min_rate",
@@ -56,9 +58,12 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
"rp_rate_reduce_monitor_period",
"rp_initial_alpha_value",
"rp_gd",
+ "np_min_time_between_cnps",
"np_cnp_dscp",
"np_cnp_prio_mode",
"np_cnp_prio",
+ "rtt_resp_dscp_valid",
+ "rtt_resp_dscp",
};
#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
@@ -66,6 +71,7 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
+#define MLX5_IB_RP_MAX_RATE_ATTR BIT(6)
#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
@@ -77,17 +83,22 @@ static const char * const mlx5_ib_dbg_cc_name[] = {
#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
#define MLX5_IB_RP_GD_ATTR BIT(16)
+#define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR BIT(2)
#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
+#define MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR BIT(0)
+
static enum mlx5_ib_cong_node_type
mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
{
- if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
- param_offset <= MLX5_IB_DBG_CC_RP_GD)
+ if (param_offset <= MLX5_IB_DBG_CC_RP_GD)
return MLX5_IB_RROCE_ECN_RP;
- else
+
+ if (param_offset <= MLX5_IB_DBG_CC_NP_CNP_PRIO)
return MLX5_IB_RROCE_ECN_NP;
+
+ return MLX5_IB_RROCE_GENERAL;
}
static u32 mlx5_get_cc_param_val(void *field, int offset)
@@ -111,6 +122,9 @@ static u32 mlx5_get_cc_param_val(void *field, int offset)
case MLX5_IB_DBG_CC_RP_AI_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_ai_rate);
+ case MLX5_IB_DBG_CC_RP_MAX_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_max_rate);
case MLX5_IB_DBG_CC_RP_HAI_RATE:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_hai_rate);
@@ -138,6 +152,9 @@ static u32 mlx5_get_cc_param_val(void *field, int offset)
case MLX5_IB_DBG_CC_RP_GD:
return MLX5_GET(cong_control_r_roce_ecn_rp, field,
rpg_gd);
+ case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ min_time_between_cnps);
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_dscp);
@@ -147,6 +164,12 @@ static u32 mlx5_get_cc_param_val(void *field, int offset)
case MLX5_IB_DBG_CC_NP_CNP_PRIO:
return MLX5_GET(cong_control_r_roce_ecn_np, field,
cnp_802p_prio);
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
+ return MLX5_GET(cong_control_r_roce_general, field,
+ rtt_resp_dscp_valid);
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
+ return MLX5_GET(cong_control_r_roce_general, field,
+ rtt_resp_dscp);
default:
return 0;
}
@@ -186,6 +209,11 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_ai_rate, var);
break;
+ case MLX5_IB_DBG_CC_RP_MAX_RATE:
+ *attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_max_rate, var);
+ break;
case MLX5_IB_DBG_CC_RP_HAI_RATE:
*attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
MLX5_SET(cong_control_r_roce_ecn_rp, field,
@@ -231,6 +259,11 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
MLX5_SET(cong_control_r_roce_ecn_rp, field,
rpg_gd, var);
break;
+ case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
+ *attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field,
+ min_time_between_cnps, var);
+ break;
case MLX5_IB_DBG_CC_NP_CNP_DSCP:
*attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
@@ -244,10 +277,19 @@ static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
break;
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
+ *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, var);
+ break;
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
+ *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, 1);
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp, var);
+ break;
}
}
-static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
int offset, u32 *var)
{
int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
@@ -270,7 +312,7 @@ static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
node = mlx5_ib_param_to_node(offset);
- err = mlx5_cmd_query_cong_params(mdev, node, out, outlen);
+ err = mlx5_cmd_query_cong_params(mdev, node, out);
if (err)
goto free;
@@ -284,7 +326,7 @@ alloc_err:
return err;
}
-static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
int offset, u32 var)
{
int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
@@ -319,7 +361,7 @@ static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u8 port_num,
MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
attr_mask);
- err = mlx5_cmd_modify_cong_params(mdev, in, inlen);
+ err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in);
kvfree(in);
alloc_err:
mlx5_ib_put_native_port_mdev(dev, port_num + 1);
@@ -377,7 +419,7 @@ static const struct file_operations dbg_cc_fops = {
.read = get_param,
};
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
{
if (!mlx5_debugfs_root ||
!dev->port[port_num].dbg_cc_params ||
@@ -389,7 +431,7 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dev->port[port_num].dbg_cc_params = NULL;
}
-void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
{
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
struct mlx5_core_dev *mdev;
@@ -413,10 +455,15 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num)
dev->port[port_num].dbg_cc_params = dbg_cc_params;
- dbg_cc_params->root = debugfs_create_dir("cc_params",
- mdev->priv.dbg_root);
+ dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID ||
+ i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP))
+ if (!MLX5_CAP_GEN(mdev, roce) ||
+ !MLX5_CAP_ROCE(mdev, roce_cc_general))
+ continue;
+
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
dbg_cc_params->params[i].port_num = port_num;
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
new file mode 100644
index 000000000000..e042e0719ead
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -0,0 +1,1279 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include "mlx5_ib.h"
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
+#include "counters.h"
+#include "ib_rep.h"
+#include "qp.h"
+
+struct mlx5_ib_counter {
+ const char *name;
+ size_t offset;
+ u32 type;
+};
+
+struct mlx5_rdma_counter {
+ struct rdma_counter rdma_counter;
+
+ struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
+ struct xarray qpn_opfc_xa;
+};
+
+static struct mlx5_rdma_counter *to_mcounter(struct rdma_counter *counter)
+{
+ return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
+}
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+#define INIT_VPORT_Q_COUNTER(_name) \
+ { .name = "vport_" #_name, .offset = \
+ MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(rx_dct_connect),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
+static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
+ INIT_VPORT_Q_COUNTER(rx_write_requests),
+ INIT_VPORT_Q_COUNTER(rx_read_requests),
+ INIT_VPORT_Q_COUNTER(rx_atomic_requests),
+ INIT_VPORT_Q_COUNTER(rx_dct_connect),
+ INIT_VPORT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
+ INIT_VPORT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
+ INIT_VPORT_Q_COUNTER(duplicate_request),
+ INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_VPORT_Q_COUNTER(packet_seq_err),
+ INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
+ INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
+};
+
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+ INIT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_Q_COUNTER(req_rnr_retries_exceeded),
+};
+
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+ INIT_Q_COUNTER(roce_adp_retrans),
+ INIT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_Q_COUNTER(roce_slow_restart),
+ INIT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
+ INIT_VPORT_Q_COUNTER(resp_local_length_error),
+ INIT_VPORT_Q_COUNTER(resp_cqe_error),
+ INIT_VPORT_Q_COUNTER(req_cqe_error),
+ INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
+ INIT_VPORT_Q_COUNTER(req_remote_access_errors),
+ INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
+ INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
+ INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
+};
+
+static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
+ INIT_VPORT_Q_COUNTER(roce_adp_retrans),
+ INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_VPORT_Q_COUNTER(roce_slow_restart),
+ INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+#define INIT_EXT_PPCNT_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+ INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
+#define INIT_OP_COUNTER(_name, _type) \
+ { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
+
+static const struct mlx5_ib_counter basic_op_cnts[] = {
+ INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
+};
+
+static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
+ INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
+};
+
+static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
+ INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
+};
+
+static const struct mlx5_ib_counter packets_op_cnts[] = {
+ INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
+ INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
+ INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
+ INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
+};
+
+static int mlx5_ib_read_counters(struct ib_counters *counters,
+ struct ib_counters_read_attr *read_attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct mlx5_read_counters_attr mread_attr = {};
+ struct mlx5_ib_flow_counters_desc *desc;
+ int ret, i;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (mcounters->cntrs_max_index > read_attr->ncounters) {
+ ret = -EINVAL;
+ goto err_bound;
+ }
+
+ mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+ GFP_KERNEL);
+ if (!mread_attr.out) {
+ ret = -ENOMEM;
+ goto err_bound;
+ }
+
+ mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+ mread_attr.flags = read_attr->flags;
+ ret = mcounters->read_counters(counters->device, &mread_attr);
+ if (ret)
+ goto err_read;
+
+ /* do the pass over the counters data array to assign according to the
+ * descriptions and indexing pairs
+ */
+ desc = mcounters->counters_data;
+ for (i = 0; i < mcounters->ncounters; i++)
+ read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+
+err_read:
+ kfree(mread_attr.out);
+err_bound:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+ return ret;
+}
+
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mlx5_ib_counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ return 0;
+}
+
+static int mlx5_ib_create_counters(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_init(&mcounters->mcntrs_mutex);
+ return 0;
+}
+
+static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
+ MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
+}
+
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u32 port_num)
+{
+ if ((is_mdev_switchdev_mode(dev->mdev) &&
+ !vport_qcounters_supported(dev)) || !port_num)
+ return &dev->port[0].cnts;
+
+ return is_mdev_switchdev_mode(dev->mdev) ?
+ &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
+
+ return cnts->set_id;
+}
+
+static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
+{
+ struct rdma_hw_stats *stats;
+ u32 num_hw_counters;
+ int i;
+
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ stats = rdma_alloc_hw_stats_struct(cnts->descs,
+ num_hw_counters +
+ cnts->num_op_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ if (!stats)
+ return NULL;
+
+ for (i = 0; i < cnts->num_op_counters; i++)
+ set_bit(num_hw_counters + i, stats->is_disabled);
+
+ return stats;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
+
+ return do_alloc_stats(cnts);
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return do_alloc_stats(cnts);
+}
+
+static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats,
+ u16 set_id)
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ __be32 val;
+ int ret, i;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+{
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int ret, i;
+ void *out;
+
+ out = kvzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
+ 0, 0);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ cnts->offsets[i + offset]));
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
+ u32 port_num,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ struct mlx5_core_dev *mdev;
+ __be32 val;
+ int ret, i;
+
+ if (!dev->port[port_num].rep ||
+ dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
+ return 0;
+
+ mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
+ if (!mdev)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, other_vport, 1);
+ MLX5_SET(query_q_counter_in, in, vport_number,
+ dev->port[port_num].rep->vport);
+ MLX5_SET(query_q_counter_in, in, aggregate, 1);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int do_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+ struct mlx5_core_dev *mdev;
+ int ret, num_counters;
+
+ if (!stats)
+ return -EINVAL;
+
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
+ ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
+ stats);
+ else
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
+ cnts->set_id);
+ if (ret)
+ return ret;
+
+ /* We don't expose device counters over Vports */
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
+ goto done;
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ if (!port_num)
+ port_num = 1;
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+ * zero. So no need to read from the HCA.
+ */
+ goto done;
+ }
+ ret = mlx5_lag_query_cong_counters(mdev,
+ stats->value +
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ if (ret)
+ return ret;
+ }
+
+done:
+ return num_counters;
+}
+
+static bool is_rdma_bytes_counter(u32 type)
+{
+ if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
+ return true;
+
+ return false;
+}
+
+static int do_per_qp_get_op_stat(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ int i, ret, index, num_hw_counters;
+ u64 packets = 0, bytes = 0;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ num_hw_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
+ num_hw_counters;
+
+ if (is_rdma_bytes_counter(i))
+ counter->stats->value[index] = bytes;
+ else
+ counter->stats->value[index] = packets;
+
+ clear_bit(index, counter->stats->is_disabled);
+ }
+ return 0;
+}
+
+static int do_get_op_stat(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ const struct mlx5_ib_op_fc *opfcs;
+ u64 packets, bytes;
+ u32 type;
+ int ret;
+
+ cnts = get_counters(dev, port_num);
+
+ opfcs = cnts->opfcs;
+ type = *(u32 *)cnts->descs[index].priv;
+ if (type >= MLX5_IB_OPCOUNTER_MAX)
+ return -EINVAL;
+
+ if (!opfcs[type].fc)
+ goto out;
+
+ ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ if (is_rdma_bytes_counter(type))
+ stats->value[index] = bytes;
+ else
+ stats->value[index] = packets;
+out:
+ return index;
+}
+
+static int do_get_op_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ int index, ret, num_hw_counters;
+
+ cnts = get_counters(dev, port_num);
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ for (index = num_hw_counters;
+ index < (num_hw_counters + cnts->num_op_counters); index++) {
+ ret = do_get_op_stat(ibdev, stats, port_num, index);
+ if (ret != index)
+ return ret;
+ }
+
+ return cnts->num_op_counters;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ int num_counters, num_hw_counters, num_op_counters;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+
+ cnts = get_counters(dev, port_num);
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ num_counters = num_hw_counters + cnts->num_op_counters;
+
+ if (index < 0 || index > num_counters)
+ return -EINVAL;
+ else if (index > 0 && index < num_hw_counters)
+ return do_get_hw_stats(ibdev, stats, port_num, index);
+ else if (index >= num_hw_counters && index < num_counters)
+ return do_get_op_stat(ibdev, stats, port_num, index);
+
+ num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
+ if (num_hw_counters < 0)
+ return num_hw_counters;
+
+ num_op_counters = do_get_op_stats(ibdev, stats, port_num);
+ if (num_op_counters < 0)
+ return num_op_counters;
+
+ return num_hw_counters + num_op_counters;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+
+ return do_alloc_stats(cnts);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ int ret;
+
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
+ counter->id);
+ if (ret)
+ return ret;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ return do_per_qp_get_op_stat(counter);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ if (!counter->id)
+ return 0;
+
+ WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
+ mlx5r_fs_destroy_fcs(dev, mcounter->fc);
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp, u32 port)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ bool new = false;
+ int err;
+
+ if (!counter->id) {
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+
+ MLX5_SET(alloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err)
+ return err;
+ counter->id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ new = true;
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ err = mlx5r_fs_bind_op_fc(qp, mcounter->fc, &mcounter->qpn_opfc_xa,
+ port);
+ if (err)
+ goto fail_bind_op_fc;
+
+ return 0;
+
+fail_bind_op_fc:
+ mlx5_ib_qp_set_counter(qp, NULL);
+fail_set_counter:
+ if (new) {
+ mlx5_ib_counter_dealloc(counter);
+ counter->id = 0;
+ }
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
+{
+ struct rdma_counter *counter = qp->counter;
+ struct mlx5_rdma_counter *mcounter;
+ int err;
+
+ mcounter = to_mcounter(counter);
+
+ mlx5r_fs_unbind_op_fc(qp, &mcounter->qpn_opfc_xa);
+
+ err = mlx5_ib_qp_set_counter(qp, NULL);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ if (counter->mode.bind_opcnt)
+ mlx5r_fs_bind_op_fc(qp, mcounter->fc,
+ &mcounter->qpn_opfc_xa, port);
+ return err;
+}
+
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ struct rdma_stat_desc *descs, size_t *offsets,
+ u32 port_num)
+{
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ const struct mlx5_ib_counter *names;
+ int j = 0, i, size;
+
+ names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+
+ names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ if (is_vport)
+ return;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ descs[j].name = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+ descs[j].name = ext_ppcnt_cnts[i].name;
+ offsets[j] = ext_ppcnt_cnts[i].offset;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
+ descs[j].name = basic_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &basic_op_cnts[i].type;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+ for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
+ descs[j].name = rdmarx_cnp_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
+ }
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+ for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
+ descs[j].name = rdmatx_cnp_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
+ descs[j].name = packets_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &packets_op_cnts[i].type;
+ }
+}
+
+
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts, u32 port_num)
+{
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ u32 num_counters, num_op_counters = 0, size;
+
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ num_counters = size;
+
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += size;
+
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += size;
+
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += size;
+
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+ num_counters += size;
+
+ cnts->num_q_counters = num_counters;
+
+ if (is_vport)
+ goto skip_non_qcounters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+ num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+ }
+
+ num_op_counters = ARRAY_SIZE(basic_op_cnts);
+
+ num_op_counters += ARRAY_SIZE(packets_op_cnts);
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode))
+ num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode))
+ num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
+
+skip_non_qcounters:
+ cnts->num_op_counters = num_op_counters;
+ num_counters += num_op_counters;
+ cnts->descs = kcalloc(num_counters,
+ sizeof(struct rdma_stat_desc), GFP_KERNEL);
+ if (!cnts->descs)
+ return -ENOMEM;
+
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(*cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
+ goto err;
+
+ return 0;
+
+err:
+ kfree(cnts->descs);
+ cnts->descs = NULL;
+ return -ENOMEM;
+}
+
+/*
+ * Checks if the given flow counter type should be sharing the same flow counter
+ * with another type and if it should, checks if that other type flow counter
+ * was already created, if both conditions are met return true and the counter
+ * else return false.
+ */
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *opfc = &opfcs[shared_fc_type];
+ if (!(*opfc)->fc)
+ return false;
+
+ return true;
+}
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+ int num_cnt_ports = dev->num_ports;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ int i, j;
+
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ if (dev->port[i].cnts.set_id) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ dev->port[i].cnts.set_id);
+ mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+ }
+ kfree(dev->port[i].cnts.descs);
+ kfree(dev->port[i].cnts.offsets);
+
+ for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
+ if (!dev->port[i].cnts.opfcs[j].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(
+ dev->port[i].cnts.opfcs, j, &in_use_opfc))
+ goto skip;
+
+ mlx5_ib_fs_remove_op_fc(dev,
+ &dev->port[i].cnts.opfcs[j], j);
+ mlx5_fc_destroy(dev->mdev,
+ dev->port[i].cnts.opfcs[j].fc);
+skip:
+ dev->port[i].cnts.opfcs[j].fc = NULL;
+ }
+ }
+}
+
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+ int num_cnt_ports = dev->num_ports;
+ int err = 0;
+ int i;
+ bool is_shared;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+
+ /*
+ * In switchdev we need to allocate two ports, one that is used for
+ * the device Q_counters and it is essentially the real Q_counters of
+ * this device, while the other is used as a helper for PF to be able to
+ * query all other vports.
+ */
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
+ if (err)
+ goto err_alloc;
+
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
+ dev->port[i].cnts.offsets, i);
+
+ MLX5_SET(alloc_q_counter_in, in, uid,
+ is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, err);
+ goto err_alloc;
+ }
+
+ dev->port[i].cnts.set_id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+ return 0;
+
+err_alloc:
+ mlx5_ib_dealloc_counters(dev);
+ return err;
+}
+
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+static int counters_set_description(
+ struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
+ struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 cntrs_max_index = 0;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
+ mcounters->ncounters = ncounters;
+ /* each counter entry have both description and index pair */
+ for (i = 0; i < ncounters; i++) {
+ if (desc_data[i].description > IB_COUNTER_BYTES)
+ return -EINVAL;
+
+ if (cntrs_max_index <= desc_data[i].index)
+ cntrs_max_index = desc_data[i].index + 1;
+ }
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ mcounters->counters_data = desc_data;
+ mcounters->cntrs_max_index = cntrs_max_index;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+ bool hw_hndl = false;
+ int ret = 0;
+
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ desc_data = kcalloc(cntrs_data->ncounters,
+ sizeof(*desc_data),
+ GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ if (copy_from_user(desc_data,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ sizeof(*desc_data) * cntrs_data->ncounters)) {
+ ret = -EFAULT;
+ goto free;
+ }
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl = mlx5_fc_create(
+ to_mdev(ibcounters->device)->mdev, false);
+ if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+ ret = PTR_ERR(mcounters->hw_cntrs_hndl);
+ goto free;
+ }
+ hw_hndl = true;
+ }
+
+ if (desc_data) {
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ ret = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ desc_data,
+ cntrs_data->ncounters);
+ if (ret)
+ goto free_hndl;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ return 0;
+
+free_hndl:
+ if (hw_hndl) {
+ mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ mcounters->hw_cntrs_hndl = NULL;
+ }
+free:
+ kfree(desc_data);
+ return ret;
+}
+
+void mlx5_ib_counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ if (!counters || atomic_read(&counters->usecnt) != 1)
+ return;
+
+ mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
+static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
+ unsigned int index, bool enable)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_op_fc *opfc, *in_use_opfc;
+ struct mlx5_ib_counters *cnts;
+ u32 num_hw_counters, type;
+ int ret;
+
+ cnts = &dev->port[port - 1].cnts;
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ if (index < num_hw_counters ||
+ index >= (num_hw_counters + cnts->num_op_counters))
+ return -EINVAL;
+
+ if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ type = *(u32 *)cnts->descs[index].priv;
+ if (type >= MLX5_IB_OPCOUNTER_MAX)
+ return -EINVAL;
+
+ opfc = &cnts->opfcs[type];
+
+ if (enable) {
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = mlx5_fc_create(dev->mdev, false);
+ if (IS_ERR(opfc->fc))
+ return PTR_ERR(opfc->fc);
+
+ ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
+ if (ret) {
+ mlx5_fc_destroy(dev->mdev, opfc->fc);
+ opfc->fc = NULL;
+ }
+ return ret;
+ }
+
+ if (!opfc->fc)
+ return -EINVAL;
+
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
+ goto out;
+
+ mlx5_ib_fs_remove_op_fc(dev, opfc, type);
+ mlx5_fc_destroy(dev->mdev, opfc->fc);
+out:
+ opfc->fc = NULL;
+ return 0;
+}
+
+static void mlx5_ib_counter_init(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+
+ xa_init(&mcounter->qpn_opfc_xa);
+}
+
+static const struct ib_device_ops hw_stats_ops = {
+ .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+ .modify_hw_stat = mlx5_ib_modify_stat,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
+};
+
+static const struct ib_device_ops hw_switchdev_vport_op = {
+ .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
+};
+
+static const struct ib_device_ops hw_switchdev_stats_ops = {
+ .alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
+};
+
+static const struct ib_device_ops counters_ops = {
+ .create_counters = mlx5_ib_create_counters,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .read_counters = mlx5_ib_read_counters,
+
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+};
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &counters_ops);
+
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return 0;
+
+ if (is_mdev_switchdev_mode(dev->mdev)) {
+ ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
+ if (vport_qcounters_supported(dev))
+ ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
+ } else
+ ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
+ return mlx5_ib_alloc_counters(dev);
+}
+
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return;
+
+ mlx5_ib_dealloc_counters(dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
new file mode 100644
index 000000000000..a04e7dd59455
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_COUNTERS_H
+#define _MLX5_IB_COUNTERS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_clear_description(struct ib_counters *counters);
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc);
+#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 0220736b073e..651d76bca114 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -36,8 +36,12 @@
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
#include "srq.h"
+#include "qp.h"
-static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
@@ -120,13 +124,13 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
case MLX5_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
- /* fall through */
+ fallthrough;
case MLX5_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX5_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
- /* fall through */
+ fallthrough;
case MLX5_OPCODE_SEND:
case MLX5_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
@@ -167,7 +171,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
{
enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
- struct mlx5_ib_srq *srq;
+ struct mlx5_ib_srq *srq = NULL;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
u8 roce_packet_type;
@@ -179,7 +183,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
if (qp->ibqp.xrcd) {
msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
- srq = to_mibsrq(msrq);
+ if (msrq)
+ srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
}
@@ -201,7 +206,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
case MLX5_CQE_RESP_WR_IMM:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = cqe->imm_inval_pkey;
+ wc->ex.imm_data = cqe->immediate;
break;
case MLX5_CQE_RESP_SEND:
wc->opcode = IB_WC_RECV;
@@ -213,20 +218,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
case MLX5_CQE_RESP_SEND_IMM:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = cqe->imm_inval_pkey;
+ wc->ex.imm_data = cqe->immediate;
break;
case MLX5_CQE_RESP_SEND_INV:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_INVALIDATE;
- wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey);
break;
}
wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
wc->wc_flags |= g ? IB_WC_GRH : 0;
- if (unlikely(is_qp1(qp->ibqp.qp_type))) {
- u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
+ if (is_qp1(qp->type)) {
+ u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff;
ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
&wc->pkey_index);
@@ -253,7 +258,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
- wc->network_hdr_type = RDMA_NETWORK_IB;
+ wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
break;
case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
wc->network_hdr_type = RDMA_NETWORK_IPV6;
@@ -265,17 +270,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
-static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
+ struct ib_wc *wc, const char *level)
{
- mlx5_ib_warn(dev, "dump error cqe\n");
- mlx5_dump_err_cqe(dev->mdev, cqe);
+ mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
+ ib_wc_status_msg(wc->status));
+ print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), false);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
struct mlx5_err_cqe *cqe,
struct ib_wc *wc)
{
- int dump = 1;
+ const char *dump = KERN_WARNING;
switch (cqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@@ -285,10 +293,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
- dump = 0;
+ dump = NULL;
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@@ -304,18 +313,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ dump = NULL;
wc->status = IB_WC_RETRY_EXC_ERR;
- dump = 0;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ dump = NULL;
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- dump = 0;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
@@ -327,7 +338,23 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->vendor_err = cqe->vendor_err_synd;
if (dump)
- dump_cqe(dev, cqe);
+ dump_cqe(dev, cqe, wc, dump);
+}
+
+static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
+ u16 tail, u16 head)
+{
+ u16 idx;
+
+ do {
+ idx = tail & (qp->sq.wqe_cnt - 1);
+ if (idx == head)
+ break;
+
+ tail = qp->sq.w_list[idx].next;
+ } while (1);
+ tail = qp->sq.w_list[idx].next;
+ qp->sq.last_poll = tail;
}
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
@@ -368,7 +395,7 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
}
static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
- int *npolled, int is_send)
+ int *npolled, bool is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
@@ -383,10 +410,16 @@ static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
return;
for (i = 0; i < cur && np < num_entries; i++) {
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ unsigned int idx;
+
+ idx = (is_send) ? wq->last_poll : wq->tail;
+ idx &= (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
+ if (is_send)
+ wq->last_poll = wq->w_list[idx].next;
np++;
wc->qp = &qp->ibqp;
wc++;
@@ -423,9 +456,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_cqe64 *cqe64;
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
- struct mlx5_sig_err_cqe *sig_err_cqe;
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
u16 wqe_ctr;
@@ -460,12 +490,12 @@ repoll:
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
- if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
/* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
*/
- mqp = __mlx5_qp_lookup(dev->mdev, qpn);
+ mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
*cur_qp = to_mibqp(mqp);
}
@@ -476,6 +506,7 @@ repoll:
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
+ handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
@@ -498,6 +529,10 @@ repoll:
"Requestor" : "Responder", cq->mcq.cqn);
mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (wc->status != IB_WC_WR_FLUSH_ERR &&
+ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
+ dev->umrc.state = MLX5_UMR_STATE_RECOVER;
+
if (opcode == MLX5_CQE_REQ_ERR) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
@@ -519,27 +554,29 @@ repoll:
}
}
break;
- case MLX5_CQE_SIG_ERR:
- sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
+ case MLX5_CQE_SIG_ERR: {
+ struct mlx5_sig_err_cqe *sig_err_cqe =
+ (struct mlx5_sig_err_cqe *)cqe64;
+ struct mlx5_core_sig_ctx *sig;
- xa_lock(&dev->mdev->priv.mkey_table);
- mmkey = xa_load(&dev->mdev->priv.mkey_table,
+ xa_lock(&dev->sig_mrs);
+ sig = xa_load(&dev->sig_mrs,
mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- mr = to_mibmr(mmkey);
- get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
- mr->sig->sig_err_exists = true;
- mr->sig->sigerr_count++;
+ get_sig_err_item(sig_err_cqe, &sig->err_item);
+ sig->sig_err_exists = true;
+ sig->sigerr_count++;
mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
- cq->mcq.cqn, mr->sig->err_item.key,
- mr->sig->err_item.err_type,
- mr->sig->err_item.sig_err_offset,
- mr->sig->err_item.expected,
- mr->sig->err_item.actual);
+ cq->mcq.cqn, sig->err_item.key,
+ sig->err_item.err_type,
+ sig->err_item.sig_err_offset,
+ sig->err_item.expected,
+ sig->err_item.actual);
- xa_unlock(&dev->mdev->priv.mkey_table);
+ xa_unlock(&dev->sig_mrs);
goto repoll;
}
+ }
return 0;
}
@@ -611,7 +648,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
struct mlx5_ib_cq *cq = to_mcq(ibcq);
- void __iomem *uar_page = mdev->priv.uar->map;
+ void __iomem *uar_page = mdev->priv.bfreg.up->map;
unsigned long irq_flags;
int ret = 0;
@@ -680,50 +717,64 @@ static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct mlx5_ib_cq *cq, int entries, u32 **cqb,
- int *cqe_size, int *index, int *inlen)
+ int *cqe_size, int *index, int *inlen,
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_create_cq ucmd = {};
+ unsigned long page_size;
+ unsigned int page_offset_quantized;
size_t ucmdlen;
- int page_shift;
__be64 *pas;
- int npages;
int ncont;
void *cqc;
int err;
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- ucmdlen = udata->inlen < sizeof(ucmd) ?
- (sizeof(ucmd) - sizeof(ucmd.flags)) : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
+ if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
+ return -EINVAL;
if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
- if (ucmdlen == sizeof(ucmd) &&
- (ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD)))
+ if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
+ MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX |
+ MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)))
return -EINVAL;
- if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+ if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
+ ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
*cqe_size = ucmd.cqe_size;
cq->buf.umem =
- ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
+ entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->buf.umem)) {
err = PTR_ERR(cq->buf.umem);
return err;
}
- err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &cq->db);
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+
+ err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
if (err)
goto err_umem;
- mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
- ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+ ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
+ ucmd.buf_addr, entries * ucmd.cqe_size,
+ ib_umem_num_pages(cq->buf.umem), page_size, ncont);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
@@ -734,13 +785,25 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
- *index = context->bfregi.sys_pages[0];
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX)) {
+ err = uverbs_copy_from(index, attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX);
+ if (err)
+ goto err_cqb;
+ } else if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
+ *index = ucmd.uar_page_index;
+ } else if (context->bfregi.lib_uar_dyn) {
+ err = -EINVAL;
+ goto err_cqb;
+ } else {
+ *index = context->bfregi.sys_pages[0];
+ }
if (ucmd.cqe_comp_en == 1) {
int mini_cqe_format;
@@ -782,6 +845,9 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
}
+ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)
+ cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS;
+
MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
return 0;
@@ -805,15 +871,14 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
ib_umem_release(cq->buf.umem);
}
-static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
- struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
for (i = 0; i < buf->nent; i++) {
- cqe = get_cqe(cq, i);
+ cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
@@ -839,7 +904,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (err)
goto err_db;
- init_cq_frag_buf(cq, &cq->buf);
+ init_cq_frag_buf(&cq->buf);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
@@ -858,7 +923,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
cq->buf.frag_buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
- *index = dev->mdev->priv.uar->index;
+ *index = dev->mdev->priv.bfreg.up->index;
return 0;
@@ -884,37 +949,34 @@ static void notify_soft_wc_handler(struct work_struct *work)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_cq *cq;
- int uninitialized_var(index);
- int uninitialized_var(inlen);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ int index;
+ int inlen;
u32 *cqb = NULL;
void *cqc;
int cqe_size;
- unsigned int irqn;
int eqn;
int err;
if (entries < 0 ||
(entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (check_cq_create_flags(attr->flags))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
@@ -927,20 +989,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
if (udata) {
err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
- &index, &inlen);
+ &index, &inlen, attrs);
if (err)
- goto err_create;
+ return err;
} else {
cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
- err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
+ err = mlx5_comp_eqn_get(dev->mdev, vector, &eqn);
if (err)
goto err_cqb;
@@ -953,21 +1015,23 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(cqc, cqc, uar_page, index);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
- err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
+ if (udata) {
+ cq->mcq.comp = mlx5_add_cq_to_tasklet;
+ cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
+ } else {
+ cq->mcq.comp = mlx5_ib_cq_comp;
+ }
+
+ err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
if (err)
goto err_cqb;
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
- cq->mcq.irqn = irqn;
- if (udata)
- cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
- else
- cq->mcq.comp = mlx5_ib_cq_comp;
cq->mcq.event = mlx5_ib_cq_event;
INIT_LIST_HEAD(&cq->wc_list);
@@ -980,7 +1044,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
kvfree(cqb);
- return &cq->ibcq;
+ return 0;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
@@ -991,26 +1055,34 @@ err_cqb:
destroy_cq_user(cq, udata);
else
destroy_cq_kernel(dev, cq);
+ return err;
+}
-err_create:
- kfree(cq);
+int mlx5_ib_pre_destroy_cq(struct ib_cq *cq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(cq->device);
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
- return ERR_PTR(err);
+ return mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+}
+
+void mlx5_ib_post_destroy_cq(struct ib_cq *cq)
+{
+ destroy_cq_kernel(to_mdev(cq->device), to_mcq(cq));
}
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(cq->device);
- struct mlx5_ib_cq *mcq = to_mcq(cq);
+ int ret;
+
+ ret = mlx5_ib_pre_destroy_cq(cq);
+ if (ret)
+ return ret;
- mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
if (udata)
- destroy_cq_user(mcq, udata);
+ destroy_cq_user(to_mcq(cq), udata);
else
- destroy_cq_kernel(dev, mcq);
-
- kfree(mcq);
-
+ mlx5_ib_post_destroy_cq(cq);
return 0;
}
@@ -1101,13 +1173,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
}
static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
- int entries, struct ib_udata *udata, int *npas,
- int *page_shift, int *cqe_size)
+ int entries, struct ib_udata *udata,
+ int *cqe_size)
{
struct mlx5_ib_resize_cq ucmd;
struct ib_umem *umem;
int err;
- int npages;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
@@ -1120,28 +1191,20 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
return -EINVAL;
- umem = ib_umem_get(udata, ucmd.buf_addr,
+ umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
(size_t)ucmd.cqe_size * entries,
- IB_ACCESS_LOCAL_WRITE, 1);
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
return err;
}
- mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
- npas, NULL);
-
cq->resize_umem = umem;
*cqe_size = ucmd.cqe_size;
return 0;
}
-static void un_resize_user(struct mlx5_ib_cq *cq)
-{
- ib_umem_release(cq->resize_umem);
-}
-
static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size)
{
@@ -1155,7 +1218,7 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (err)
goto ex;
- init_cq_frag_buf(cq, cq->resize_buf);
+ init_cq_frag_buf(cq->resize_buf);
return 0;
@@ -1164,12 +1227,6 @@ ex:
return err;
}
-static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
-{
- free_cq_buf(dev, cq->resize_buf);
- cq->resize_buf = NULL;
-}
-
static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
@@ -1234,9 +1291,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int err;
int npas;
__be64 *pas;
- int page_shift;
+ unsigned int page_offset_quantized = 0;
+ unsigned int page_shift;
int inlen;
- int uninitialized_var(cqe_size);
+ int cqe_size;
unsigned long flags;
if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
@@ -1261,22 +1319,34 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
if (udata) {
- err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
- &cqe_size);
+ unsigned long page_size;
+
+ err = resize_user(dev, cq, entries, udata, &cqe_size);
+ if (err)
+ goto ex;
+
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->resize_umem, cqc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto ex_resize;
+ }
+ npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
+ page_shift = order_base_2(page_size);
} else {
+ struct mlx5_frag_buf *frag_buf;
+
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
- if (!err) {
- struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
-
- npas = frag_buf->npages;
- page_shift = frag_buf->page_shift;
- }
+ if (err)
+ goto ex;
+ frag_buf = &cq->resize_buf->frag_buf;
+ npas = frag_buf->npages;
+ page_shift = frag_buf->page_shift;
}
- if (err)
- goto ex;
-
inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
@@ -1288,8 +1358,8 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
if (udata)
- mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
- pas, 0);
+ mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
+ 0);
else
mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
@@ -1303,6 +1373,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
MLX5_SET(cqc, cqc, cqe_sz,
cqe_sz_to_mlx_sz(cqe_size,
cq->private_flags &
@@ -1350,10 +1421,11 @@ ex_alloc:
kvfree(in);
ex_resize:
- if (udata)
- un_resize_user(cq);
- else
- un_resize_kernel(dev, cq);
+ ib_umem_release(cq->resize_umem);
+ if (!udata) {
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
ex:
mutex_unlock(&cq->resize_mutex);
return err;
@@ -1393,3 +1465,17 @@ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
return 0;
}
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_cq_create,
+ UVERBS_OBJECT_CQ,
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+const struct uapi_definition mlx5_ib_create_cq_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/data_direct.c b/drivers/infiniband/hw/mlx5/data_direct.c
new file mode 100644
index 000000000000..b81ac5709b56
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/data_direct.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "mlx5_ib.h"
+#include "data_direct.h"
+
+static LIST_HEAD(mlx5_data_direct_dev_list);
+static LIST_HEAD(mlx5_data_direct_reg_list);
+
+/*
+ * This mutex should be held when accessing either of the above lists
+ */
+static DEFINE_MUTEX(mlx5_data_direct_mutex);
+
+struct mlx5_data_direct_registration {
+ struct mlx5_ib_dev *ibdev;
+ char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1];
+ struct list_head list;
+};
+
+static const struct pci_device_id mlx5_data_direct_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, 0x2100) }, /* ConnectX-8 Data Direct */
+ { 0, }
+};
+
+static int mlx5_data_direct_vpd_get_vuid(struct mlx5_data_direct_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ unsigned int vpd_size, kw_len;
+ u8 *vpd_data;
+ int start;
+ int ret;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ pci_err(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
+ return PTR_ERR(vpd_data);
+ }
+
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "VU", &kw_len);
+ if (start < 0) {
+ ret = start;
+ pci_err(pdev, "VU keyword not found, err=%d\n", ret);
+ goto end;
+ }
+
+ dev->vuid = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
+ ret = dev->vuid ? 0 : -ENOMEM;
+
+end:
+ kfree(vpd_data);
+ return ret;
+}
+
+static void mlx5_data_direct_shutdown(struct pci_dev *pdev)
+{
+ pci_disable_device(pdev);
+}
+
+static int mlx5_data_direct_set_dma_caps(struct pci_dev *pdev)
+{
+ int err;
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev,
+ "Warning: couldn't set 64-bit PCI DMA mask, err=%d\n", err);
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, err=%d\n", err);
+ return err;
+ }
+ }
+
+ dma_set_max_seg_size(&pdev->dev, SZ_2G);
+ return 0;
+}
+
+int mlx5_data_direct_ib_reg(struct mlx5_ib_dev *ibdev, char *vuid)
+{
+ struct mlx5_data_direct_registration *reg;
+ struct mlx5_data_direct_dev *dev;
+
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg)
+ return -ENOMEM;
+
+ reg->ibdev = ibdev;
+ strcpy(reg->vuid, vuid);
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(dev, &mlx5_data_direct_dev_list, list) {
+ if (strcmp(dev->vuid, vuid) == 0) {
+ mlx5_ib_data_direct_bind(ibdev, dev);
+ break;
+ }
+ }
+
+ /* Add the registration to its global list, to be used upon bind/unbind
+ * of its affiliated data direct device
+ */
+ list_add_tail(&reg->list, &mlx5_data_direct_reg_list);
+ mutex_unlock(&mlx5_data_direct_mutex);
+ return 0;
+}
+
+void mlx5_data_direct_ib_unreg(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (reg->ibdev == ibdev) {
+ list_del(&reg->list);
+ kfree(reg);
+ goto end;
+ }
+ }
+
+ WARN_ON(true);
+end:
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static void mlx5_data_direct_dev_reg(struct mlx5_data_direct_dev *dev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (strcmp(dev->vuid, reg->vuid) == 0)
+ mlx5_ib_data_direct_bind(reg->ibdev, dev);
+ }
+
+ /* Add the data direct device to the global list, further IB devices may
+ * use it later as well
+ */
+ list_add_tail(&dev->list, &mlx5_data_direct_dev_list);
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static void mlx5_data_direct_dev_unreg(struct mlx5_data_direct_dev *dev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ /* Prevent any further affiliations */
+ list_del(&dev->list);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (strcmp(dev->vuid, reg->vuid) == 0)
+ mlx5_ib_data_direct_unbind(reg->ibdev);
+ }
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static int mlx5_data_direct_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mlx5_data_direct_dev *dev;
+ int err;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->device = &pdev->dev;
+ dev->pdev = pdev;
+
+ pci_set_drvdata(dev->pdev, dev);
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev->device, "Cannot enable PCI device, err=%d\n", err);
+ goto err;
+ }
+
+ pci_set_master(pdev);
+ err = mlx5_data_direct_set_dma_caps(pdev);
+ if (err)
+ goto err_disable;
+
+ if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ dev_dbg(dev->device, "Enabling pci atomics failed\n");
+
+ err = mlx5_data_direct_vpd_get_vuid(dev);
+ if (err)
+ goto err_disable;
+
+ mlx5_data_direct_dev_reg(dev);
+ return 0;
+
+err_disable:
+ pci_disable_device(pdev);
+err:
+ kfree(dev);
+ return err;
+}
+
+static void mlx5_data_direct_remove(struct pci_dev *pdev)
+{
+ struct mlx5_data_direct_dev *dev = pci_get_drvdata(pdev);
+
+ mlx5_data_direct_dev_unreg(dev);
+ pci_disable_device(pdev);
+ kfree(dev->vuid);
+ kfree(dev);
+}
+
+static struct pci_driver mlx5_data_direct_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mlx5_data_direct_pci_table,
+ .probe = mlx5_data_direct_probe,
+ .remove = mlx5_data_direct_remove,
+ .shutdown = mlx5_data_direct_shutdown,
+};
+
+int mlx5_data_direct_driver_register(void)
+{
+ return pci_register_driver(&mlx5_data_direct_driver);
+}
+
+void mlx5_data_direct_driver_unregister(void)
+{
+ pci_unregister_driver(&mlx5_data_direct_driver);
+}
diff --git a/drivers/infiniband/hw/mlx5/data_direct.h b/drivers/infiniband/hw/mlx5/data_direct.h
new file mode 100644
index 000000000000..2fd2bdbe8f69
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/data_direct.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _MLX5_IB_DATA_DIRECT_H
+#define _MLX5_IB_DATA_DIRECT_H
+
+struct mlx5_ib_dev;
+
+struct mlx5_data_direct_dev {
+ struct device *device;
+ struct pci_dev *pdev;
+ char *vuid;
+ struct list_head list;
+};
+
+int mlx5_data_direct_ib_reg(struct mlx5_ib_dev *ibdev, char *vuid);
+void mlx5_data_direct_ib_unreg(struct mlx5_ib_dev *ibdev);
+int mlx5_data_direct_driver_register(void);
+void mlx5_data_direct_driver_unregister(void);
+
+#endif
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 931f587dfb8f..d31d7f3005c6 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -13,53 +13,108 @@
#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
+#include <rdma/ib_ucaps.h>
#include "mlx5_ib.h"
+#include "devx.h"
+#include "qp.h"
+#include <linux/xarray.h>
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
+static void dispatch_event_fd(struct list_head *fd_list, const void *data);
+
enum devx_obj_flags {
DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
DEVX_OBJ_FLAGS_DCT = 1 << 1,
+ DEVX_OBJ_FLAGS_CQ = 1 << 2,
+ DEVX_OBJ_FLAGS_HW_FREED = 1 << 3,
+};
+
+#define MAX_ASYNC_CMDS 8
+
+struct mlx5_async_cmd {
+ struct ib_uobject *uobject;
+ void *in;
+ int in_size;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ int err;
+ struct mlx5_async_work cb_work;
+ struct completion comp;
};
struct devx_async_data {
struct mlx5_ib_dev *mdev;
struct list_head list;
- struct ib_uobject *fd_uobj;
+ struct devx_async_cmd_event_file *ev_file;
struct mlx5_async_work cb_work;
u16 cmd_out_len;
/* must be last field in this structure */
struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
};
-#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
-struct devx_obj {
- struct mlx5_core_dev *mdev;
- u64 obj_id;
- u32 dinlen; /* destroy inbox length */
- u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
- u32 flags;
- union {
- struct mlx5_ib_devx_mr devx_mr;
- struct mlx5_core_dct core_dct;
- };
+struct devx_async_event_data {
+ struct list_head list; /* headed in ev_file->event_list */
+ struct mlx5_ib_uapi_devx_async_event_hdr hdr;
+};
+
+/* first level XA value data structure */
+struct devx_event {
+ struct xarray object_ids; /* second XA level, Key = object id */
+ struct list_head unaffiliated_list;
+};
+
+/* second level XA value data structure */
+struct devx_obj_event {
+ struct rcu_head rcu;
+ struct list_head obj_sub_list;
+};
+
+struct devx_event_subscription {
+ struct list_head file_list; /* headed in ev_file->
+ * subscribed_events_list
+ */
+ struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
+ * devx_obj_event->obj_sub_list
+ */
+ struct list_head obj_list; /* headed in devx_object */
+ struct list_head event_list; /* headed in ev_file->event_list or in
+ * temp list via subscription
+ */
+
+ u8 is_cleaned:1;
+ u32 xa_key_level1;
+ u32 xa_key_level2;
+ struct rcu_head rcu;
+ u64 cookie;
+ struct devx_async_event_file *ev_file;
+ struct eventfd_ctx *eventfd;
+};
+
+struct devx_async_event_file {
+ struct ib_uobject uobj;
+ /* Head of events that are subscribed to this FD */
+ struct list_head subscribed_events_list;
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ struct mlx5_ib_dev *dev;
+ u8 omit_data:1;
+ u8 is_overflow_err:1;
+ u8 is_destroyed:1;
};
struct devx_umem {
struct mlx5_core_dev *mdev;
struct ib_umem *umem;
- u32 page_offset;
- int page_shift;
- int ncont;
u32 dinlen;
- u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+ u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)];
};
struct devx_umem_reg_cmd {
void *in;
u32 inlen;
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ u32 out[MLX5_ST_SZ_DW(create_umem_out)];
};
static struct mlx5_ib_ucontext *
@@ -68,10 +123,30 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap)
{
- u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
+{
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
void *uctx;
int err;
u16 uid;
@@ -82,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
return -EINVAL;
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
- if (is_user && capable(CAP_NET_RAW) &&
- (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ if (is_user &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
+ rdma_dev_has_raw_cap(&dev->ib_dev))
cap |= MLX5_UCTX_CAP_RAW_TX;
- if (is_user && capable(CAP_SYS_RAWIO) &&
+ if (is_user &&
(MLX5_CAP_GEN(dev->mdev, uctx_cap) &
- MLX5_UCTX_CAP_INTERNAL_DEV_RES))
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES) &&
+ capable(CAP_SYS_RAWIO))
cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
+ if (req_ucaps) {
+ err = set_uctx_ucaps(dev, req_ucaps, &cap);
+ if (err)
+ return err;
+ }
+
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
MLX5_SET(uctx, uctx, cap, cap);
@@ -97,14 +180,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
if (err)
return err;
- uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ uid = MLX5_GET(create_uctx_out, out, uid);
return uid;
}
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
MLX5_SET(destroy_uctx_in, in, uid, uid);
@@ -112,41 +195,130 @@ void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
+static bool is_legacy_unaffiliated_event_num(u16 event_num)
{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
-
- switch (opcode) {
- case MLX5_CMD_OP_DESTROY_TIR:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
- obj_id);
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
return true;
+ default:
+ return false;
+ }
+}
- case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
- *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
- table_id);
+static bool is_legacy_obj_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_COMP:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
return true;
default:
return false;
}
}
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+static u16 get_legacy_obj_type(u16 opcode)
{
- struct devx_obj *devx_obj = obj;
- u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_EVENT_QUEUE_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_EVENT_QUEUE_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_EVENT_QUEUE_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return 0;
+ }
+}
- if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
- *counter_id = MLX5_GET(dealloc_flow_counter_in,
- devx_obj->dinbox,
- flow_counter_id);
- return true;
+static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
+{
+ u16 opcode;
+
+ opcode = (obj->obj_id >> 32) & 0xffff;
+
+ if (is_legacy_obj_event_num(event_num))
+ return get_legacy_obj_type(opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return (obj->obj_id >> 48);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_OBJ_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_OBJ_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_OBJ_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_OBJ_TYPE_DCT;
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_OBJ_TYPE_TIR;
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_OBJ_TYPE_TIS;
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_OBJ_TYPE_PSV;
+ case MLX5_OBJ_TYPE_MKEY:
+ return MLX5_OBJ_TYPE_MKEY;
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_OBJ_TYPE_RMP;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_OBJ_TYPE_XRC_SRQ;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_OBJ_TYPE_XRQ;
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_OBJ_TYPE_RQT;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_OBJ_TYPE_FLOW_COUNTER;
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_OBJ_TYPE_CQ;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
+{
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return eqe->data.qp_srq.type;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ return 0;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
}
+}
- return false;
+static u32 get_dec_obj_id(u64 obj_id)
+{
+ return (obj_id & 0xffffffff);
}
/*
@@ -159,6 +331,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
return ((u64)opcode << 32) | obj_id;
}
+static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ case MLX5_CMD_OP_CREATE_UMEM:
+ return MLX5_GET(create_umem_out, out, umem_id);
+ case MLX5_CMD_OP_CREATE_MKEY:
+ return MLX5_GET(create_mkey_out, out, mkey_index);
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_GET(create_cq_out, out, cqn);
+ case MLX5_CMD_OP_ALLOC_PD:
+ return MLX5_GET(alloc_pd_out, out, pd);
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ return MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_GET(create_rmp_out, out, rmpn);
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_GET(create_sq_out, out, sqn);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_GET(create_rq_out, out, rqn);
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_GET(create_rqt_out, out, rqtn);
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_GET(create_tir_out, out, tirn);
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_GET(create_tis_out, out, tisn);
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ return MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ return MLX5_GET(create_flow_table_out, out, table_id);
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ return MLX5_GET(create_flow_group_out, out, group_id);
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ return MLX5_GET(set_fte_in, in, flow_index);
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ return MLX5_GET(alloc_packet_reformat_context_out, out,
+ packet_reformat_id);
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ return MLX5_GET(alloc_modify_header_context_out, out,
+ modify_header_id);
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ return MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ return MLX5_GET(set_l2_table_entry_in, in, table_index);
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_GET(create_qp_out, out, qpn);
+ case MLX5_CMD_OP_CREATE_SRQ:
+ return MLX5_GET(create_srq_out, out, srqn);
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_GET(create_dct_out, out, dctn);
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_GET(create_xrq_out, out, xrqn);
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ return MLX5_GET(attach_to_mcg_in, in, qpn);
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return MLX5_GET(alloc_xrcd_out, out, xrcd);
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_GET(create_psv_out, out, psv0_index);
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ return 0;
+ }
+}
+
static u64 devx_get_obj_id(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
@@ -270,8 +516,8 @@ static u64 devx_get_obj_id(const void *in)
break;
case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
- MLX5_GET(general_obj_in_cmd_hdr, in,
- obj_id));
+ MLX5_GET(query_modify_header_context_in,
+ in, modify_header_id));
break;
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
@@ -306,6 +552,10 @@ static u64 devx_get_obj_id(const void *in)
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
MLX5_GET(rst2init_qp_in, in, qpn));
break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2init_qp_in, in, qpn));
+ break;
case MLX5_CMD_OP_INIT2RTR_QP:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
MLX5_GET(init2rtr_qp_in, in, qpn));
@@ -363,6 +613,8 @@ static u64 devx_get_obj_id(const void *in)
break;
case MLX5_CMD_OP_ARM_XRQ:
case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(arm_xrq_in, in, xrqn));
break;
@@ -421,10 +673,9 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
case UVERBS_OBJECT_QP:
{
struct mlx5_ib_qp *qp = to_mqp(uobj->object);
- enum ib_qp_type qp_type = qp->ibqp.qp_type;
- if (qp_type == IB_QPT_RAW_PACKET ||
- (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
struct mlx5_ib_raw_packet_qp *raw_packet_qp =
&qp->raw_packet_qp;
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
@@ -440,10 +691,9 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
sq->tisn) == obj_id);
}
- if (qp_type == MLX5_IB_QPT_DCT)
+ if (qp->type == MLX5_IB_QPT_DCT)
return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
qp->dct.mdct.mqp.qpn) == obj_id;
-
return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
qp->ibqp.qp_num) == obj_id;
}
@@ -459,7 +709,21 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
obj_id;
case MLX5_IB_OBJECT_DEVX_OBJ:
- return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
+ {
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ struct devx_obj *devx_uobj = uobj->object;
+
+ if (opcode == MLX5_CMD_OP_QUERY_FLOW_COUNTER &&
+ devx_uobj->flow_counter_bulk_size) {
+ u64 end;
+
+ end = devx_uobj->obj_id +
+ devx_uobj->flow_counter_bulk_size;
+ return devx_uobj->obj_id <= obj_id && end > obj_id;
+ }
+
+ return devx_uobj->obj_id == obj_id;
+ }
default:
return false;
@@ -597,6 +861,14 @@ static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
return true;
return false;
}
+ case MLX5_CMD_OP_CREATE_PSV:
+ {
+ u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
+
+ if (num_psv == 1)
+ return true;
+ return false;
+ }
default:
return false;
}
@@ -621,6 +893,7 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
case MLX5_CMD_OP_RST2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
case MLX5_CMD_OP_SQERR2RTS_QP:
@@ -631,6 +904,8 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
case MLX5_CMD_OP_ARM_XRQ:
case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
return true;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
{
@@ -689,6 +964,7 @@ static bool devx_is_whitelist_cmd(void *in)
case MLX5_CMD_OP_QUERY_HCA_CAP:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
return true;
default:
return false;
@@ -715,12 +991,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
return c->devx_uid;
}
-static bool devx_is_general_cmd(void *in)
+
+static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
- if (opcode >= MLX5_CMD_OP_GENERAL_START &&
- opcode < MLX5_CMD_OP_GENERAL_END)
+ /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
+ if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
+ MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
+ (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END))
return true;
switch (opcode) {
@@ -739,6 +1019,8 @@ static bool devx_is_general_cmd(void *in)
case MLX5_CMD_OP_QUERY_CONG_STATUS:
case MLX5_CMD_OP_QUERY_CONG_PARAMS:
case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
return true;
default:
return false;
@@ -752,7 +1034,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
struct mlx5_ib_dev *dev;
int user_vector;
int dev_eqn;
- unsigned int irqn;
int err;
if (uverbs_copy_from(&user_vector, attrs,
@@ -764,7 +1045,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
- err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
+ err = mlx5_comp_eqn_get(dev->mdev, user_vector, &dev_eqn);
if (err < 0)
return err;
@@ -833,7 +1114,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
int cmd_out_len = uverbs_attr_get_len(attrs,
MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
void *cmd_out;
- int err;
+ int err, err2;
int uid;
c = devx_ufile2uctx(attrs);
@@ -846,7 +1127,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
return uid;
/* Only white list of some general HCA commands are allowed for this method. */
- if (!devx_is_general_cmd(cmd_in))
+ if (!devx_is_general_cmd(cmd_in, dev))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -854,171 +1135,205 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(dev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
- cmd_out, cmd_out_len);
- if (err)
+ err = mlx5_cmd_do(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
return err;
- return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
cmd_out_len);
+
+ return err2 ?: err;
}
static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
u32 *dinlen,
u32 *obj_id)
{
- u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ *obj_id = devx_get_created_obj_id(in, out, opcode);
*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
-
- MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
- switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+ switch (opcode) {
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_type,
+ MLX5_GET(general_obj_in_cmd_hdr, in, obj_type));
break;
case MLX5_CMD_OP_CREATE_UMEM:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_umem_in, din, opcode,
MLX5_CMD_OP_DESTROY_UMEM);
+ MLX5_SET(destroy_umem_in, din, umem_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_MKEY:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, din, mkey_index, *obj_id);
break;
case MLX5_CMD_OP_CREATE_CQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_PD:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, pd, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_transport_domain_in, din, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, din, transport_domain,
+ *obj_id);
break;
case MLX5_CMD_OP_CREATE_RMP:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, sqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_RQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, rqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_RQT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_TIR:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, tirn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_TIS:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, tisn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_q_counter_in, din, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_FLOW_TABLE:
*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
- *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
MLX5_SET(destroy_flow_table_in, din, other_vport,
MLX5_GET(create_flow_table_in, in, other_vport));
MLX5_SET(destroy_flow_table_in, din, vport_number,
MLX5_GET(create_flow_table_in, in, vport_number));
+ MLX5_SET(destroy_flow_table_in, din, other_eswitch,
+ MLX5_GET(create_flow_table_in, in, other_eswitch));
+ MLX5_SET(destroy_flow_table_in, din, eswitch_owner_vhca_id,
+ MLX5_GET(create_flow_table_in, in,
+ eswitch_owner_vhca_id));
MLX5_SET(destroy_flow_table_in, din, table_type,
MLX5_GET(create_flow_table_in, in, table_type));
MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_flow_table_in, din, opcode,
MLX5_CMD_OP_DESTROY_FLOW_TABLE);
break;
case MLX5_CMD_OP_CREATE_FLOW_GROUP:
*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
- *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
MLX5_SET(destroy_flow_group_in, din, other_vport,
MLX5_GET(create_flow_group_in, in, other_vport));
MLX5_SET(destroy_flow_group_in, din, vport_number,
MLX5_GET(create_flow_group_in, in, vport_number));
+ MLX5_SET(destroy_flow_group_in, din, other_eswitch,
+ MLX5_GET(create_flow_group_in, in, other_eswitch));
+ MLX5_SET(destroy_flow_group_in, din, eswitch_owner_vhca_id,
+ MLX5_GET(create_flow_group_in, in,
+ eswitch_owner_vhca_id));
MLX5_SET(destroy_flow_group_in, din, table_type,
MLX5_GET(create_flow_group_in, in, table_type));
MLX5_SET(destroy_flow_group_in, din, table_id,
MLX5_GET(create_flow_group_in, in, table_id));
MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_flow_group_in, din, opcode,
MLX5_CMD_OP_DESTROY_FLOW_GROUP);
break;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
- *obj_id = MLX5_GET(set_fte_in, in, flow_index);
MLX5_SET(delete_fte_in, din, other_vport,
MLX5_GET(set_fte_in, in, other_vport));
MLX5_SET(delete_fte_in, din, vport_number,
MLX5_GET(set_fte_in, in, vport_number));
+ MLX5_SET(delete_fte_in, din, other_eswitch,
+ MLX5_GET(set_fte_in, in, other_eswitch));
+ MLX5_SET(delete_fte_in, din, eswitch_owner_vhca_id,
+ MLX5_GET(set_fte_in, in, eswitch_owner_vhca_id));
MLX5_SET(delete_fte_in, din, table_type,
MLX5_GET(set_fte_in, in, table_type));
MLX5_SET(delete_fte_in, din, table_id,
MLX5_GET(set_fte_in, in, table_id));
MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_fte_in, din, opcode,
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
break;
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_flow_counter_in, din, opcode,
MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id,
+ *obj_id);
break;
case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_packet_reformat_context_in, din, opcode,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, din,
+ packet_reformat_id, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(dealloc_modify_header_context_in, din, opcode,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, din,
+ modify_header_id, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
- *obj_id = MLX5_GET(create_scheduling_element_out, out,
- scheduling_element_id);
MLX5_SET(destroy_scheduling_element_in, din,
scheduling_hierarchy,
MLX5_GET(create_scheduling_element_in, in,
scheduling_hierarchy));
MLX5_SET(destroy_scheduling_element_in, din,
scheduling_element_id, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_scheduling_element_in, din, opcode,
MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
break;
case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
- *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_vxlan_udp_dport_in, din, opcode,
MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
break;
case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
- *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(delete_l2_table_entry_in, din, opcode,
MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
break;
case MLX5_CMD_OP_CREATE_QP:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_SRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, srqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_XRC_SRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_SET(destroy_xrc_srq_in, din, opcode,
MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_DCT:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, *obj_id);
break;
case MLX5_CMD_OP_CREATE_XRQ:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id);
break;
case MLX5_CMD_OP_ATTACH_TO_MCG:
*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
@@ -1027,10 +1342,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, opcode,
+ MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id);
break;
case MLX5_CMD_OP_ALLOC_XRCD:
- MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_PSV:
+ MLX5_SET(destroy_psv_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, din, psvn, *obj_id);
break;
default:
/* The entry must match to one of the devx_is_obj_create_cmd */
@@ -1043,24 +1367,19 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
struct mlx5_ib_dev *dev,
void *in, void *out)
{
- struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
- struct mlx5_core_mkey *mkey;
+ struct mlx5_ib_mkey *mkey = &obj->mkey;
void *mkc;
u8 key;
- mkey = &devx_mr->mmkey;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
key = MLX5_GET(mkc, mkc, mkey_7_0);
mkey->key = mlx5_idx_to_mkey(
MLX5_GET(create_mkey_out, out, mkey_index)) | key;
mkey->type = MLX5_MKEY_INDIRECT_DEVX;
- mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
- mkey->size = MLX5_GET64(mkc, mkc, len);
- mkey->pd = MLX5_GET(mkc, mkc, pd);
- devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+ mkey->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+ init_waitqueue_head(&mkey->wait);
- return xa_err(xa_store(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(mkey->key), mkey, GFP_KERNEL));
+ return mlx5r_store_odp_mkey(dev, mkey);
}
static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
@@ -1089,30 +1408,40 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
}
MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ /* TPH is not allowed to bypass the regular kernel's verbs flow */
+ MLX5_SET(mkc, mkc, pcie_tph_en, 0);
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
+ MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX);
return 0;
}
-static void devx_free_indirect_mkey(struct rcu_head *rcu)
+static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
+ struct devx_event_subscription *sub)
{
- kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
-}
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
-/* This function to delete from the radix tree needs to be called before
- * destroying the underlying mkey. Otherwise a race might occur in case that
- * other thread will get the same mkey before this one will be deleted,
- * in that case it will fail via inserting to the tree its own data.
- *
- * Note:
- * An error in the destroy is not expected unless there is some other indirect
- * mkey which points to this one. In a kernel cleanup flow it will be just
- * destroyed in the iterative destruction call. In a user flow, in case
- * the application didn't close in the expected order it's its own problem,
- * the mkey won't be part of the tree, in both cases the kernel is safe.
- */
-static void devx_cleanup_mkey(struct devx_obj *obj)
-{
- xa_erase(&obj->mdev->priv.mkey_table,
- mlx5_base_mkey(obj->devx_mr.mmkey.key));
+ if (sub->is_cleaned)
+ return;
+
+ sub->is_cleaned = 1;
+ list_del_rcu(&sub->xa_list);
+
+ if (list_empty(&sub->obj_list))
+ return;
+
+ list_del_rcu(&sub->obj_list);
+ /* check whether key level 1 for this obj_sub_list is empty */
+ event = xa_load(&dev->devx_event_table.event_xa,
+ sub->xa_key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ sub->xa_key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
}
static int devx_obj_cleanup(struct ib_uobject *uobject,
@@ -1120,33 +1449,78 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
struct uverbs_attr_bundle *attrs)
{
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_devx_event_table *devx_event_table;
struct devx_obj *obj = uobject->object;
+ struct devx_event_subscription *sub_entry, *tmp;
+ struct mlx5_ib_dev *dev;
int ret;
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
-
- if (obj->flags & DEVX_OBJ_FLAGS_DCT)
- ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY &&
+ xa_erase(&obj->ib_dev->odp_mkeys,
+ mlx5_base_mkey(obj->mkey.key)))
+ /*
+ * The pagefault_single_data_segment() does commands against
+ * the mmkey, we must wait for that to stop before freeing the
+ * mkey, as another allocation could get the same mkey #.
+ */
+ mlx5r_deref_wait_odp_mkey(&obj->mkey);
+
+ if (obj->flags & DEVX_OBJ_FLAGS_HW_FREED)
+ ret = 0;
+ else if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+ ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
else
- ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
- sizeof(out));
- if (ib_is_destroy_retryable(ret, why, uobject))
+ ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
+ obj->dinlen, out, sizeof(out));
+ if (ret)
return ret;
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- struct mlx5_ib_dev *dev =
- mlx5_udata_to_mdev(&attrs->driver_udata);
+ devx_event_table = &dev->devx_event_table;
- call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
- devx_free_indirect_mkey);
- return ret;
- }
+ mutex_lock(&devx_event_table->event_xa_lock);
+ list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
+ devx_cleanup_subscription(dev, sub_entry);
+ mutex_unlock(&devx_event_table->event_xa_lock);
kfree(obj);
return ret;
}
+static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
+{
+ struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
+ struct mlx5_devx_event_table *table;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u32 obj_id = mcq->cqn;
+
+ table = &obj->ib_dev->devx_event_table;
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
+ if (!event)
+ goto out;
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event)
+ goto out;
+
+ dispatch_event_fd(&obj_event->obj_sub_list, eqe);
+out:
+ rcu_read_unlock();
+}
+
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, apu) ||
+ !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
+ return false;
+
+ return true;
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -1164,11 +1538,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
struct devx_obj *obj;
u16 obj_type = 0;
- int err;
+ int err, err2 = 0;
int uid;
u32 obj_id;
u16 opcode;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1195,53 +1572,74 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
if (opcode == MLX5_CMD_OP_CREATE_DCT) {
obj->flags |= DEVX_OBJ_FLAGS_DCT;
- err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
- cmd_in, cmd_in_len,
- cmd_out, cmd_out_len);
+ err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
+ cmd_in_len, cmd_out, cmd_out_len);
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
+ !is_apu_cq(dev, cmd_in)) {
+ obj->flags |= DEVX_OBJ_FLAGS_CQ;
+ obj->core_cq.comp = devx_cq_comp;
+ err = mlx5_create_cq(dev->mdev, &obj->core_cq,
+ cmd_in, cmd_in_len, cmd_out,
+ cmd_out_len);
} else {
- err = mlx5_cmd_exec(dev->mdev, cmd_in,
- cmd_in_len,
- cmd_out, cmd_out_len);
+ err = mlx5_cmd_do(dev->mdev, cmd_in, cmd_in_len,
+ cmd_out, cmd_out_len);
}
+ if (err == -EREMOTEIO)
+ err2 = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+ cmd_out, cmd_out_len);
if (err)
goto obj_free;
+ if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
+ u32 bulk = MLX5_GET(alloc_flow_counter_in,
+ cmd_in,
+ flow_counter_bulk_log_size);
+
+ if (bulk)
+ bulk = 1 << bulk;
+ else
+ bulk = 128UL * MLX5_GET(alloc_flow_counter_in,
+ cmd_in,
+ flow_counter_bulk);
+ obj->flow_counter_bulk_size = bulk;
+ }
+
uobj->object = obj;
- obj->mdev = dev->mdev;
+ INIT_LIST_HEAD(&obj->event_sub);
+ obj->ib_dev = dev;
devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
- if (err)
- goto obj_destroy;
- }
-
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
- goto err_copy;
+ goto obj_destroy;
if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
-
obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
return 0;
-err_copy:
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
obj_destroy:
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
- mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+ mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
else
- mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
+ mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
sizeof(out));
obj_free:
kfree(obj);
- return err;
+ return err2 ?: err;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
@@ -1256,9 +1654,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
void *cmd_out;
- int err;
+ int err, err2;
int uid;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1276,14 +1677,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
devx_set_umem_valid(cmd_in);
- err = mlx5_cmd_exec(mdev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
- cmd_out, cmd_out_len);
- if (err)
+ err = mlx5_cmd_do(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
return err;
- return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
cmd_out, cmd_out_len);
+
+ return err2 ?: err;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
@@ -1297,10 +1700,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
void *cmd_out;
- int err;
+ int err, err2;
int uid;
struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1316,14 +1722,16 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(mdev->mdev, cmd_in,
- uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
- cmd_out, cmd_out_len);
- if (err)
+ err = mlx5_cmd_do(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
return err;
- return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
cmd_out, cmd_out_len);
+
+ return err2 ?: err;
}
struct devx_async_event_queue {
@@ -1365,25 +1773,56 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
return 0;
}
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
+ struct devx_async_event_file *ev_file;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 flags;
+ int err;
+
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
+
+ if (err)
+ return err;
+
+ ev_file = container_of(uobj, struct devx_async_event_file,
+ uobj);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
+ ev_file->omit_data = 1;
+ INIT_LIST_HEAD(&ev_file->subscribed_events_list);
+ ev_file->dev = dev;
+ get_device(&dev->ib_dev.dev);
+ return 0;
+}
+
static void devx_query_callback(int status, struct mlx5_async_work *context)
{
struct devx_async_data *async_data =
container_of(context, struct devx_async_data, cb_work);
- struct ib_uobject *fd_uobj = async_data->fd_uobj;
- struct devx_async_cmd_event_file *ev_file;
- struct devx_async_event_queue *ev_queue;
+ struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
+ struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
unsigned long flags;
- ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
- uobj);
- ev_queue = &ev_file->ev_queue;
-
+ /*
+ * Note that if the struct devx_async_cmd_event_file uobj begins to be
+ * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
+ * routine returns, ensuring that it always remains valid here.
+ */
spin_lock_irqsave(&ev_queue->lock, flags);
list_add_tail(&async_data->list, &ev_queue->event_list);
spin_unlock_irqrestore(&ev_queue->lock, flags);
wake_up_interruptible(&ev_queue->poll_wait);
- fput(fd_uobj->object);
}
#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
@@ -1406,6 +1845,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
struct devx_async_cmd_event_file *ev_file;
struct devx_async_data *async_data;
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
return uid;
@@ -1449,9 +1891,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
async_data->cmd_out_len = cmd_out_len;
async_data->mdev = mdev;
- async_data->fd_uobj = fd_uobj;
+ async_data->ev_file = ev_file;
- get_file(fd_uobj->object);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
uverbs_attr_get_len(attrs,
@@ -1461,12 +1902,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
devx_query_callback, &async_data->cb_work);
if (err)
- goto cb_err;
+ goto free_async;
return 0;
-cb_err:
- fput(fd_uobj->object);
free_async:
kvfree(async_data);
sub_bytes:
@@ -1474,80 +1913,463 @@ sub_bytes:
return err;
}
+static void
+subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ /* Level 1 is valid for future use, no need to free */
+ if (!is_level2)
+ return;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids,
+ key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int
+subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_obj_event *obj_event;
+ struct devx_event *event;
+ int err;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ if (!event) {
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event->unaffiliated_list);
+ xa_init(&event->object_ids);
+
+ err = xa_insert(&devx_event_table->event_xa,
+ key_level1,
+ event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(event);
+ return err;
+ }
+ }
+
+ if (!is_level2)
+ return 0;
+
+ obj_event = xa_load(&event->object_ids, key_level2);
+ if (!obj_event) {
+ obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
+ if (!obj_event)
+ /* Level1 is valid for future use, no need to free */
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ err = xa_insert(&event->object_ids,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(obj_event);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (obj) {
+ if (!is_legacy_obj_event_num(event_type_num_list[i]))
+ return false;
+ } else if (!is_legacy_unaffiliated_event_num(
+ event_type_num_list[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define MAX_SUPP_EVENT_NUM 255
+static bool is_valid_events(struct mlx5_core_dev *dev,
+ int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ __be64 *aff_events;
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+ int i;
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ aff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_affiliated_events);
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ } else {
+ return is_valid_events_legacy(num_events, event_type_num_list,
+ obj);
+ }
+
+ for (i = 0; i < num_events; i++) {
+ if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
+ return false;
+
+ mask_entry = event_type_num_list[i] / 64;
+ mask_bit = event_type_num_list[i] % 64;
+
+ if (obj) {
+ /* CQ completion */
+ if (event_type_num_list[i] == 0)
+ continue;
+
+ if (!(be64_to_cpu(aff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+
+ continue;
+ }
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+ }
+
+ return true;
+}
+
+#define MAX_NUM_EVENTS 16
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ struct ib_uobject *fd_uobj;
+ struct devx_obj *obj = NULL;
+ struct devx_async_event_file *ev_file;
+ struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
+ u16 *event_type_num_list;
+ struct devx_event_subscription *event_sub, *tmp_sub;
+ struct list_head sub_list;
+ int redirect_fd;
+ bool use_eventfd = false;
+ int num_events;
+ u16 obj_type = 0;
+ u64 cookie = 0;
+ u32 obj_id = 0;
+ int err;
+ int i;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!IS_ERR(devx_uobj)) {
+ obj = (struct devx_obj *)devx_uobj->object;
+ if (obj)
+ obj_id = get_dec_obj_id(obj->obj_id);
+ }
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_event_file,
+ uobj);
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
+ err = uverbs_copy_from(&redirect_fd, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
+ if (err)
+ return err;
+
+ use_eventfd = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
+ if (use_eventfd)
+ return -EINVAL;
+
+ err = uverbs_copy_from(&cookie, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
+ if (err)
+ return err;
+ }
+
+ num_events = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ sizeof(u16));
+
+ if (num_events < 0)
+ return num_events;
+
+ if (num_events > MAX_NUM_EVENTS)
+ return -EINVAL;
+
+ event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
+
+ if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&sub_list);
+
+ /* Protect from concurrent subscriptions to same XA entries to allow
+ * both to succeed
+ */
+ mutex_lock(&devx_event_table->event_xa_lock);
+ for (i = 0; i < num_events; i++) {
+ u32 key_level1;
+
+ if (obj)
+ obj_type = get_dec_obj_type(obj,
+ event_type_num_list[i]);
+ key_level1 = event_type_num_list[i] | obj_type << 16;
+
+ err = subscribe_event_xa_alloc(devx_event_table,
+ key_level1,
+ obj,
+ obj_id);
+ if (err)
+ goto err;
+
+ event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
+ if (!event_sub) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ list_add_tail(&event_sub->event_list, &sub_list);
+ uverbs_uobject_get(&ev_file->uobj);
+ if (use_eventfd) {
+ event_sub->eventfd =
+ eventfd_ctx_fdget(redirect_fd);
+
+ if (IS_ERR(event_sub->eventfd)) {
+ err = PTR_ERR(event_sub->eventfd);
+ event_sub->eventfd = NULL;
+ goto err;
+ }
+ }
+
+ event_sub->cookie = cookie;
+ event_sub->ev_file = ev_file;
+ /* May be needed upon cleanup the devx object/subscription */
+ event_sub->xa_key_level1 = key_level1;
+ event_sub->xa_key_level2 = obj_id;
+ INIT_LIST_HEAD(&event_sub->obj_list);
+ }
+
+ /* Once all the allocations and the XA data insertions were done we
+ * can go ahead and add all the subscriptions to the relevant lists
+ * without concern of a failure.
+ */
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+
+ list_del_init(&event_sub->event_list);
+
+ spin_lock_irq(&ev_file->lock);
+ list_add_tail_rcu(&event_sub->file_list,
+ &ev_file->subscribed_events_list);
+ spin_unlock_irq(&ev_file->lock);
+
+ event = xa_load(&devx_event_table->event_xa,
+ event_sub->xa_key_level1);
+ WARN_ON(!event);
+
+ if (!obj) {
+ list_add_tail_rcu(&event_sub->xa_list,
+ &event->unaffiliated_list);
+ continue;
+ }
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ WARN_ON(!obj_event);
+ list_add_tail_rcu(&event_sub->xa_list,
+ &obj_event->obj_sub_list);
+ list_add_tail_rcu(&event_sub->obj_list,
+ &obj->event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return 0;
+
+err:
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ list_del(&event_sub->event_list);
+
+ subscribe_event_xa_dealloc(devx_event_table,
+ event_sub->xa_key_level1,
+ obj,
+ obj_id);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return err;
+}
+
static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
struct uverbs_attr_bundle *attrs,
- struct devx_umem *obj)
+ struct devx_umem *obj, u32 access_flags)
{
u64 addr;
size_t size;
- u32 access;
- int npages;
int err;
- u32 page_mask;
if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
return -EFAULT;
- err = uverbs_get_flags32(&access, attrs,
- MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
- IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
+ err = ib_check_mr_access(&dev->ib_dev, access_flags);
if (err)
return err;
- err = ib_check_mr_access(access);
- if (err)
- return err;
-
- obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
- if (IS_ERR(obj->umem))
- return PTR_ERR(obj->umem);
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD)) {
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int dmabuf_fd;
- mlx5_ib_cont_pages(obj->umem, obj->umem->address,
- MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
- &obj->page_shift, &obj->ncont, NULL);
+ err = uverbs_get_raw_fd(&dmabuf_fd, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD);
+ if (err)
+ return -EFAULT;
- if (!npages) {
- ib_umem_release(obj->umem);
- return -EINVAL;
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(
+ &dev->ib_dev, addr, size, dmabuf_fd, access_flags);
+ if (IS_ERR(umem_dmabuf))
+ return PTR_ERR(umem_dmabuf);
+ obj->umem = &umem_dmabuf->umem;
+ } else {
+ obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access_flags);
+ if (IS_ERR(obj->umem))
+ return PTR_ERR(obj->umem);
}
-
- page_mask = (1 << obj->page_shift) - 1;
- obj->page_offset = obj->umem->address & page_mask;
-
return 0;
}
-static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
- struct devx_umem *obj,
- struct devx_umem_reg_cmd *cmd)
+static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem,
+ unsigned long pgsz_bitmap)
{
- cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
- (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
- cmd->in = uverbs_zalloc(attrs, cmd->inlen);
- return PTR_ERR_OR_ZERO(cmd->in);
+ unsigned long page_size;
+
+ /* Don't bother checking larger page sizes as offset must be zero and
+ * total DEVX umem length must be equal to total umem length.
+ */
+ pgsz_bitmap &= GENMASK_ULL(max_t(u64, order_base_2(umem->length),
+ PAGE_SHIFT),
+ MLX5_ADAPTER_PAGE_SHIFT);
+ if (!pgsz_bitmap)
+ return 0;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, U64_MAX);
+ if (!page_size)
+ return 0;
+
+ /* If the page_size is less than the CPU page size then we can use the
+ * offset and create a umem which is a subset of the page list.
+ * For larger page sizes we can't be sure the DMA list reflects the
+ * VA so we must ensure that the umem extent is exactly equal to the
+ * page list. Reduce the page size until one of these cases is true.
+ */
+ while ((ib_umem_dma_offset(umem, page_size) != 0 ||
+ (umem->length % page_size) != 0) &&
+ page_size > PAGE_SIZE)
+ page_size /= 2;
+
+ return page_size;
}
-static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
- struct devx_umem *obj,
- struct devx_umem_reg_cmd *cmd)
+static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
+ struct uverbs_attr_bundle *attrs,
+ struct devx_umem *obj,
+ struct devx_umem_reg_cmd *cmd,
+ int access)
{
- void *umem;
+ unsigned long pgsz_bitmap;
+ unsigned int page_size;
__be64 *mtt;
+ void *umem;
+ int ret;
+
+ /*
+ * If the user does not pass in pgsz_bitmap then the user promises not
+ * to use umem_offset!=0 in any commands that allocate on top of the
+ * umem.
+ *
+ * If the user wants to use a umem_offset then it must pass in
+ * pgsz_bitmap which guides the maximum page size and thus maximum
+ * object alignment inside the umem. See the PRM.
+ *
+ * Users are not allowed to use IOVA here, mkeys are not supported on
+ * umem.
+ */
+ ret = uverbs_get_const_default(&pgsz_bitmap, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
+ GENMASK_ULL(63,
+ min(PAGE_SHIFT, MLX5_ADAPTER_PAGE_SHIFT)));
+ if (ret)
+ return ret;
+
+ page_size = devx_umem_find_best_pgsize(obj->umem, pgsz_bitmap);
+ if (!page_size)
+ return -EINVAL;
+
+ cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+ (MLX5_ST_SZ_BYTES(mtt) *
+ ib_umem_num_dma_blocks(obj->umem, page_size));
+ cmd->in = uverbs_zalloc(attrs, cmd->inlen);
+ if (IS_ERR(cmd->in))
+ return PTR_ERR(cmd->in);
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
- MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
- MLX5_SET(umem, umem, log_page_size, obj->page_shift -
- MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(umem, umem, page_offset, obj->page_offset);
- mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+ MLX5_SET64(umem, umem, num_of_mtt,
+ ib_umem_num_dma_blocks(obj->umem, page_size));
+ MLX5_SET(umem, umem, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(umem, umem, page_offset,
+ ib_umem_dma_offset(obj->umem, page_size));
+
+ if (mlx5_umem_needs_ats(dev, obj->umem, access))
+ MLX5_SET(umem, umem, ats, 1);
+
+ mlx5_ib_populate_pas(obj->umem, page_size, mtt,
(obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
- MLX5_IB_MTT_READ);
+ MLX5_IB_MTT_READ);
+ return 0;
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
@@ -1561,25 +2383,33 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ int access_flags;
int err;
if (!c->devx_uid)
return -EINVAL;
+ err = uverbs_get_flags32(&access_flags, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (err)
+ return err;
+
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
if (!obj)
return -ENOMEM;
- err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+ err = devx_umem_get(dev, &c->ibucontext, attrs, obj, access_flags);
if (err)
goto err_obj_free;
- err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
+ err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd, access_flags);
if (err)
goto err_umem_release;
- devx_umem_reg_cmd_build(dev, obj, &cmd);
-
MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
sizeof(cmd.out));
@@ -1589,14 +2419,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
obj->mdev = dev->mdev;
uobj->object = obj;
devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
- err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
- if (err)
- goto err_umem_destroy;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
- return 0;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id,
+ sizeof(obj_id));
+ return err;
-err_umem_destroy:
- mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
err_umem_release:
ib_umem_release(obj->umem);
err_obj_free:
@@ -1613,7 +2441,7 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
int err;
err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
- if (ib_is_destroy_retryable(err, why, uobject))
+ if (err)
return err;
ib_umem_release(obj->umem);
@@ -1621,6 +2449,297 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
return 0;
}
+static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
+ unsigned long event_type)
+{
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+
+ if (!MLX5_CAP_GEN(dev, event_cap))
+ return is_legacy_unaffiliated_event_num(event_type);
+
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
+
+ mask_entry = event_type / 64;
+ mask_bit = event_type % 64;
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
+ return false;
+
+ return true;
+}
+
+static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
+{
+ struct mlx5_eqe *eqe = data;
+ u32 obj_id = 0;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ break;
+ default:
+ obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
+ break;
+ }
+
+ return obj_id;
+}
+
+static int deliver_event(struct devx_event_subscription *event_sub,
+ const void *data)
+{
+ struct devx_async_event_file *ev_file;
+ struct devx_async_event_data *event_data;
+ unsigned long flags;
+
+ ev_file = event_sub->ev_file;
+
+ if (ev_file->omit_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!list_empty(&event_sub->event_list) ||
+ ev_file->is_destroyed) {
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return 0;
+ }
+
+ list_add_tail(&event_sub->event_list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+ }
+
+ event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
+ GFP_ATOMIC);
+ if (!event_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ ev_file->is_overflow_err = 1;
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return -ENOMEM;
+ }
+
+ event_data->hdr.cookie = event_sub->cookie;
+ memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
+
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!ev_file->is_destroyed)
+ list_add_tail(&event_data->list, &ev_file->event_list);
+ else
+ kfree(event_data);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ return 0;
+}
+
+static void dispatch_event_fd(struct list_head *fd_list,
+ const void *data)
+{
+ struct devx_event_subscription *item;
+
+ list_for_each_entry_rcu(item, fd_list, xa_list) {
+ if (item->eventfd)
+ eventfd_signal(item->eventfd);
+ else
+ deliver_event(item, data);
+ }
+}
+
+static int devx_event_notifier(struct notifier_block *nb,
+ unsigned long event_type, void *data)
+{
+ struct mlx5_devx_event_table *table;
+ struct mlx5_ib_dev *dev;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u16 obj_type = 0;
+ bool is_unaffiliated;
+ u32 obj_id;
+
+ /* Explicit filtering to kernel events which may occur frequently */
+ if (event_type == MLX5_EVENT_TYPE_CMD ||
+ event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
+ return NOTIFY_OK;
+
+ table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
+ dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
+ is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
+
+ if (!is_unaffiliated)
+ obj_type = get_event_obj_type(event_type, data);
+
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, event_type | (obj_type << 16));
+ if (!event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ if (is_unaffiliated) {
+ dispatch_event_fd(&event->unaffiliated_list, data);
+ rcu_read_unlock();
+ return NOTIFY_OK;
+ }
+
+ obj_id = devx_get_obj_id_from_event(event_type, data);
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ dispatch_event_fd(&obj_event->obj_sub_list, data);
+
+ rcu_read_unlock();
+ return NOTIFY_OK;
+}
+
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ int uid;
+
+ uid = mlx5_ib_devx_create(dev, false, 0);
+ if (uid > 0) {
+ dev->devx_whitelist_uid = uid;
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ }
+
+ return 0;
+}
+
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ struct devx_event_subscription *sub, *tmp;
+ struct devx_event *event;
+ void *entry;
+ unsigned long id;
+
+ if (dev->devx_whitelist_uid) {
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(
+ sub, tmp, &event->unaffiliated_list, xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+ }
+}
+
+static void devx_async_destroy_cb(int status, struct mlx5_async_work *context)
+{
+ struct mlx5_async_cmd *devx_out = container_of(context,
+ struct mlx5_async_cmd, cb_work);
+ struct devx_obj *obj = devx_out->uobject->object;
+
+ if (!status)
+ obj->flags |= DEVX_OBJ_FLAGS_HW_FREED;
+
+ complete(&devx_out->comp);
+}
+
+static void devx_async_destroy(struct mlx5_ib_dev *dev,
+ struct mlx5_async_cmd *cmd)
+{
+ init_completion(&cmd->comp);
+ cmd->err = mlx5_cmd_exec_cb(&dev->async_ctx, cmd->in, cmd->in_size,
+ &cmd->out, sizeof(cmd->out),
+ devx_async_destroy_cb, &cmd->cb_work);
+}
+
+static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
+{
+ if (!cmd->err)
+ wait_for_completion(&cmd->comp);
+ atomic_set(&cmd->uobject->usecnt, 0);
+}
+
+void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
+{
+ struct mlx5_async_cmd *async_cmd;
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *device = ucontext->device;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct ib_uobject *uobject;
+ struct devx_obj *obj;
+ int head = 0;
+ int tail = 0;
+
+ async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
+ if (!async_cmd)
+ return;
+
+ list_for_each_entry(uobject, &ufile->uobjects, list) {
+ WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
+
+ /*
+ * Currently we only support QP destruction, if other objects
+ * are to be destroyed need to add type synchronization to the
+ * cleanup algorithm and handle pre/post FW cleanup for the
+ * new types if needed.
+ */
+ if (uobj_get_object_id(uobject) != MLX5_IB_OBJECT_DEVX_OBJ ||
+ (get_dec_obj_type(uobject->object, MLX5_EVENT_TYPE_MAX) !=
+ MLX5_OBJ_TYPE_QP)) {
+ atomic_set(&uobject->usecnt, 0);
+ continue;
+ }
+
+ obj = uobject->object;
+
+ async_cmd[tail % MAX_ASYNC_CMDS].in = obj->dinbox;
+ async_cmd[tail % MAX_ASYNC_CMDS].in_size = obj->dinlen;
+ async_cmd[tail % MAX_ASYNC_CMDS].uobject = uobject;
+
+ devx_async_destroy(dev, &async_cmd[tail % MAX_ASYNC_CMDS]);
+ tail++;
+
+ if (tail - head == MAX_ASYNC_CMDS) {
+ devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
+ head++;
+ }
+ }
+
+ while (head != tail) {
+ devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
+ head++;
+ }
+
+ kfree(async_cmd);
+}
+
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1645,11 +2764,11 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
return -ERESTARTSYS;
}
- if (list_empty(&ev_queue->event_list) &&
- ev_queue->is_destroyed)
- return -EIO;
-
spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
}
event = list_entry(ev_queue->event_list.next,
@@ -1675,23 +2794,6 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
return ret;
}
-static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
-{
- struct ib_uobject *uobj = filp->private_data;
- struct devx_async_cmd_event_file *comp_ev_file = container_of(
- uobj, struct devx_async_cmd_event_file, uobj);
- struct devx_async_data *entry, *tmp;
-
- spin_lock_irq(&comp_ev_file->ev_queue.lock);
- list_for_each_entry_safe(entry, tmp,
- &comp_ev_file->ev_queue.event_list, list)
- kvfree(entry);
- spin_unlock_irq(&comp_ev_file->ev_queue.lock);
-
- uverbs_close_fd(filp);
- return 0;
-}
-
static __poll_t devx_async_cmd_event_poll(struct file *filp,
struct poll_table_struct *wait)
{
@@ -1715,27 +2817,193 @@ static const struct file_operations devx_async_cmd_event_fops = {
.owner = THIS_MODULE,
.read = devx_async_cmd_event_read,
.poll = devx_async_cmd_event_poll,
- .release = devx_async_cmd_event_close,
- .llseek = no_llseek,
+ .release = uverbs_uobject_fd_release,
};
-static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
- enum rdma_remove_reason why)
+static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub;
+ struct devx_async_event_data *event;
+ int ret = 0;
+ size_t eventsz;
+ bool omit_data;
+ void *event_data;
+
+ omit_data = ev_file->omit_data;
+
+ spin_lock_irq(&ev_file->lock);
+
+ if (ev_file->is_overflow_err) {
+ ev_file->is_overflow_err = 0;
+ spin_unlock_irq(&ev_file->lock);
+ return -EOVERFLOW;
+ }
+
+
+ while (list_empty(&ev_file->event_list)) {
+ spin_unlock_irq(&ev_file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(ev_file->poll_wait,
+ (!list_empty(&ev_file->event_list) ||
+ ev_file->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+ }
+
+ if (omit_data) {
+ event_sub = list_first_entry(&ev_file->event_list,
+ struct devx_event_subscription,
+ event_list);
+ eventsz = sizeof(event_sub->cookie);
+ event_data = &event_sub->cookie;
+ } else {
+ event = list_first_entry(&ev_file->event_list,
+ struct devx_async_event_data, list);
+ eventsz = sizeof(struct mlx5_eqe) +
+ sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
+ event_data = &event->hdr;
+ }
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EINVAL;
+ }
+
+ if (omit_data)
+ list_del_init(&event_sub->event_list);
+ else
+ list_del(&event->list);
+
+ spin_unlock_irq(&ev_file->lock);
+
+ if (copy_to_user(buf, event_data, eventsz))
+ /* This points to an application issue, not a kernel concern */
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ if (!omit_data)
+ kfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_file->poll_wait, wait);
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (!list_empty(&ev_file->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ spin_unlock_irq(&ev_file->lock);
+
+ return pollflags;
+}
+
+static void devx_free_subscription(struct rcu_head *rcu)
+{
+ struct devx_event_subscription *event_sub =
+ container_of(rcu, struct devx_event_subscription, rcu);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
+}
+
+static const struct file_operations devx_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_event_read,
+ .poll = devx_async_event_poll,
+ .release = uverbs_uobject_fd_release,
+};
+
+static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
{
struct devx_async_cmd_event_file *comp_ev_file =
container_of(uobj, struct devx_async_cmd_event_file,
uobj);
struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *entry, *tmp;
spin_lock_irq(&ev_queue->lock);
ev_queue->is_destroyed = 1;
spin_unlock_irq(&ev_queue->lock);
-
- if (why == RDMA_REMOVE_DRIVER_REMOVE)
- wake_up_interruptible(&ev_queue->poll_wait);
+ wake_up_interruptible(&ev_queue->poll_wait);
mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
- return 0;
+
+ spin_lock_irq(&comp_ev_file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp,
+ &comp_ev_file->ev_queue.event_list, list) {
+ list_del(&entry->list);
+ kvfree(entry);
+ }
+ spin_unlock_irq(&comp_ev_file->ev_queue.lock);
+};
+
+static void devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_event_file *ev_file =
+ container_of(uobj, struct devx_async_event_file,
+ uobj);
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+ struct mlx5_ib_dev *dev = ev_file->dev;
+
+ spin_lock_irq(&ev_file->lock);
+ ev_file->is_destroyed = 1;
+
+ /* free the pending events allocation */
+ if (ev_file->omit_data) {
+ struct devx_event_subscription *event_sub, *tmp;
+
+ list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
+ event_list)
+ list_del_init(&event_sub->event_list);
+
+ } else {
+ struct devx_async_event_data *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
+ list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
+ spin_unlock_irq(&ev_file->lock);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(dev, event_sub);
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ call_rcu(&event_sub->rcu, devx_free_subscription);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+
+ put_device(&dev->ib_dev.dev);
};
DECLARE_UVERBS_NAMED_METHOD(
@@ -1750,8 +3018,12 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY),
+ UVERBS_ATTR_RAW_FD(MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD,
+ UA_OPTIONAL),
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
enum ib_access_flags),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
+ u64),
UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
@@ -1820,7 +3092,7 @@ DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
UVERBS_IDR_ANY_OBJECT,
- UVERBS_ACCESS_WRITE,
+ UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
@@ -1869,10 +3141,32 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u64),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
- &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
@@ -1898,11 +3192,29 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_OBJECT(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
- devx_hot_unplug_async_cmd_event_file,
+ devx_async_cmd_event_destroy_uobj,
&devx_async_cmd_event_fops, "[devx_async_cmd]",
O_RDONLY),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_devx_create_event_channel_flags,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
+ devx_async_event_destroy_uobj,
+ &devx_async_event_fops, "[devx_async_event]",
+ O_RDONLY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
+
static bool devx_is_supported(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1923,5 +3235,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
{},
};
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
new file mode 100644
index 000000000000..ee9e7d3af93f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DEVX_H
+#define _MLX5_IB_DEVX_H
+
+#include "mlx5_ib.h"
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_ib_dev *ib_dev;
+ u64 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ union {
+ struct mlx5_ib_mkey mkey;
+ struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
+ };
+ struct list_head event_sub; /* holds devx_event_subscription entries */
+};
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
+#else
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user,
+ u64 req_ucaps)
+{
+ return -EOPNOTSUPP;
+}
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+}
+static inline void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
+{
+}
+#endif
+#endif /* _MLX5_IB_DEVX_H */
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
new file mode 100644
index 000000000000..9ded2b7c1e31
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "dm.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
+ u64 length, u32 alignment)
+{
+ struct mlx5_core_dev *dev = dm->dev;
+ u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
+ >> PAGE_SHIFT;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
+ u32 mlx5_alignment;
+ u64 page_idx = 0;
+ int ret = 0;
+
+ if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
+ return -EINVAL;
+
+ /* mlx5 device sets alignment as 64*2^driver_value
+ * so normalizing is needed.
+ */
+ mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
+ alignment - MLX5_MEMIC_BASE_ALIGN;
+ if (mlx5_alignment > max_alignment)
+ return -EINVAL;
+
+ MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
+ MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
+ MLX5_SET(alloc_memic_in, in, memic_size, length);
+ MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
+ mlx5_alignment);
+
+ while (page_idx < num_memic_hw_pages) {
+ spin_lock(&dm->lock);
+ page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages,
+ num_memic_hw_pages,
+ page_idx,
+ num_pages, 0);
+
+ if (page_idx < num_memic_hw_pages)
+ bitmap_set(dm->memic_alloc_pages,
+ page_idx, num_pages);
+
+ spin_unlock(&dm->lock);
+
+ if (page_idx >= num_memic_hw_pages)
+ break;
+
+ MLX5_SET64(alloc_memic_in, in, range_start_addr,
+ hw_start_addr + (page_idx * PAGE_SIZE));
+
+ ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out);
+ if (ret) {
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ page_idx, num_pages);
+ spin_unlock(&dm->lock);
+
+ if (ret == -EAGAIN) {
+ page_idx++;
+ continue;
+ }
+
+ return ret;
+ }
+
+ *addr = dev->bar_addr +
+ MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
+ u64 length)
+{
+ struct mlx5_core_dev *dev = dm->dev;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {};
+ u64 start_page_idx;
+ int err;
+
+ addr -= dev->bar_addr;
+ start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
+
+ MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
+ MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
+ MLX5_SET(dealloc_memic_in, in, memic_size, length);
+
+ err = mlx5_cmd_exec_in(dev, dealloc_memic, in);
+ if (err)
+ return;
+
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&dm->lock);
+}
+
+void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_memic_in)] = {};
+ struct mlx5_core_dev *dev = dm->dev;
+
+ MLX5_SET(modify_memic_in, in, opcode, MLX5_CMD_OP_MODIFY_MEMIC);
+ MLX5_SET(modify_memic_in, in, op_mod, MLX5_MODIFY_MEMIC_OP_MOD_DEALLOC);
+ MLX5_SET(modify_memic_in, in, memic_operation_type, operation);
+ MLX5_SET64(modify_memic_in, in, memic_start_addr, addr - dev->bar_addr);
+
+ mlx5_cmd_exec_in(dev, modify_memic, in);
+}
+
+static int mlx5_cmd_alloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation, phys_addr_t *op_addr)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_memic_in)] = {};
+ struct mlx5_core_dev *dev = dm->dev;
+ int err;
+
+ MLX5_SET(modify_memic_in, in, opcode, MLX5_CMD_OP_MODIFY_MEMIC);
+ MLX5_SET(modify_memic_in, in, op_mod, MLX5_MODIFY_MEMIC_OP_MOD_ALLOC);
+ MLX5_SET(modify_memic_in, in, memic_operation_type, operation);
+ MLX5_SET64(modify_memic_in, in, memic_start_addr, addr - dev->bar_addr);
+
+ err = mlx5_cmd_exec_inout(dev, modify_memic, in, out);
+ if (err)
+ return err;
+
+ *op_addr = dev->bar_addr +
+ MLX5_GET64(modify_memic_out, out, memic_operation_addr);
+ return 0;
+}
+
+static int add_dm_mmap_entry(struct ib_ucontext *context,
+ struct mlx5_user_mmap_entry *mentry, u8 mmap_flag,
+ size_t size, u64 address)
+{
+ mentry->mmap_flag = mmap_flag;
+ mentry->address = address;
+
+ return rdma_user_mmap_entry_insert_range(
+ context, &mentry->rdma_entry, size,
+ MLX5_IB_MMAP_DEVICE_MEM << 16,
+ (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
+}
+
+static void mlx5_ib_dm_memic_free(struct kref *kref)
+{
+ struct mlx5_ib_dm_memic *dm =
+ container_of(kref, struct mlx5_ib_dm_memic, ref);
+ struct mlx5_ib_dev *dev = to_mdev(dm->base.ibdm.device);
+
+ mlx5_cmd_dealloc_memic(&dev->dm, dm->base.dev_addr, dm->base.size);
+ kfree(dm);
+}
+
+static int copy_op_to_user(struct mlx5_ib_dm_op_entry *op_entry,
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+
+ page_idx = op_entry->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ start_offset = op_entry->op_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+}
+
+static int map_existing_op(struct mlx5_ib_dm_memic *dm, u8 op,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dm_op_entry *op_entry;
+
+ op_entry = xa_load(&dm->ops, op);
+ if (!op_entry)
+ return -ENOENT;
+
+ return copy_op_to_user(op_entry, attrs);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE);
+ struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ struct ib_dm *ibdm = uobj->object;
+ struct mlx5_ib_dm_memic *dm = to_memic(ibdm);
+ struct mlx5_ib_dm_op_entry *op_entry;
+ int err;
+ u8 op;
+
+ err = uverbs_copy_from(&op, attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP);
+ if (err)
+ return err;
+
+ if (op >= BITS_PER_TYPE(u32))
+ return -EOPNOTSUPP;
+
+ if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&dm->ops_xa_lock);
+ err = map_existing_op(dm, op, attrs);
+ if (!err || err != -ENOENT)
+ goto err_unlock;
+
+ op_entry = kzalloc(sizeof(*op_entry), GFP_KERNEL);
+ if (!op_entry)
+ goto err_unlock;
+
+ err = mlx5_cmd_alloc_memic_op(&dev->dm, dm->base.dev_addr, op,
+ &op_entry->op_addr);
+ if (err) {
+ kfree(op_entry);
+ goto err_unlock;
+ }
+ op_entry->op = op;
+ op_entry->dm = dm;
+
+ err = add_dm_mmap_entry(uobj->context, &op_entry->mentry,
+ MLX5_IB_MMAP_TYPE_MEMIC_OP, dm->base.size,
+ op_entry->op_addr & PAGE_MASK);
+ if (err) {
+ mlx5_cmd_dealloc_memic_op(&dev->dm, dm->base.dev_addr, op);
+ kfree(op_entry);
+ goto err_unlock;
+ }
+ /* From this point, entry will be freed by mmap_free */
+ kref_get(&dm->ref);
+
+ err = copy_op_to_user(op_entry, attrs);
+ if (err)
+ goto err_remove;
+
+ err = xa_insert(&dm->ops, op, op_entry, GFP_KERNEL);
+ if (err)
+ goto err_remove;
+ mutex_unlock(&dm->ops_xa_lock);
+
+ return 0;
+
+err_remove:
+ rdma_user_mmap_entry_remove(&op_entry->mentry.rdma_entry);
+err_unlock:
+ mutex_unlock(&dm->ops_xa_lock);
+
+ return err;
+}
+
+static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+ struct mlx5_ib_dm_memic *dm;
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+ u64 address;
+
+ if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->base.type = MLX5_IB_UAPI_DM_TYPE_MEMIC;
+ dm->base.size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
+ dm->base.ibdm.device = ctx->device;
+
+ kref_init(&dm->ref);
+ xa_init(&dm->ops);
+ mutex_init(&dm->ops_xa_lock);
+ dm->req_length = attr->length;
+
+ err = mlx5_cmd_alloc_memic(dm_db, &dm->base.dev_addr,
+ dm->base.size, attr->alignment);
+ if (err) {
+ kfree(dm);
+ return ERR_PTR(err);
+ }
+
+ address = dm->base.dev_addr & PAGE_MASK;
+ err = add_dm_mmap_entry(ctx, &dm->mentry, MLX5_IB_MMAP_TYPE_MEMIC,
+ dm->base.size, address);
+ if (err) {
+ mlx5_cmd_dealloc_memic(dm_db, dm->base.dev_addr, dm->base.size);
+ kfree(dm);
+ return ERR_PTR(err);
+ }
+
+ page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ goto err_copy;
+
+ start_offset = dm->base.dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_copy;
+
+ return &dm->base.ibdm;
+
+err_copy:
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+ return ERR_PTR(err);
+}
+
+static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
+{
+ switch (uapi_type) {
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return MLX5_SW_ICM_TYPE_SW_ENCAP;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ default:
+ return MLX5_SW_ICM_TYPE_STEERING;
+ }
+}
+
+static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs,
+ int type)
+{
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
+ enum mlx5_sw_icm_type icm_type;
+ struct mlx5_ib_dm_icm *dm;
+ u64 act_size;
+ int err;
+
+ if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW))
+ return ERR_PTR(-EPERM);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2)))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->base.type = type;
+ dm->base.ibdm.device = ctx->device;
+
+ /* Allocation size must a multiple of the basic block size
+ * and a power of 2.
+ */
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ act_size = roundup_pow_of_two(act_size);
+
+ dm->base.size = act_size;
+ icm_type = get_icm_type(type);
+
+ err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment,
+ to_mucontext(ctx)->devx_uid,
+ &dm->base.dev_addr, &dm->obj_id);
+ if (err)
+ goto free;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &dm->base.dev_addr, sizeof(dm->base.dev_addr));
+ if (err) {
+ mlx5_dm_sw_icm_dealloc(dev, icm_type, dm->base.size,
+ to_mucontext(ctx)->devx_uid,
+ dm->base.dev_addr, dm->obj_id);
+ goto free;
+ }
+ return &dm->base.ibdm;
+free:
+ kfree(dm);
+ return ERR_PTR(err);
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ enum mlx5_ib_uapi_dm_type type;
+ int err;
+
+ err = uverbs_get_const_default(&type, attrs,
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ MLX5_IB_UAPI_DM_TYPE_MEMIC);
+ if (err)
+ return ERR_PTR(err);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
+ type, attr->length, attr->alignment);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ return handle_alloc_dm_memic(context, attr, attrs);
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return handle_alloc_dm_sw_icm(context, attr, attrs, type);
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static void dm_memic_remove_ops(struct mlx5_ib_dm_memic *dm)
+{
+ struct mlx5_ib_dm_op_entry *entry;
+ unsigned long idx;
+
+ mutex_lock(&dm->ops_xa_lock);
+ xa_for_each(&dm->ops, idx, entry) {
+ xa_erase(&dm->ops, idx);
+ rdma_user_mmap_entry_remove(&entry->mentry.rdma_entry);
+ }
+ mutex_unlock(&dm->ops_xa_lock);
+}
+
+static void mlx5_dm_memic_dealloc(struct mlx5_ib_dm_memic *dm)
+{
+ dm_memic_remove_ops(dm);
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+}
+
+static int mlx5_dm_icm_dealloc(struct mlx5_ib_ucontext *ctx,
+ struct mlx5_ib_dm_icm *dm)
+{
+ enum mlx5_sw_icm_type type = get_icm_type(dm->base.type);
+ struct mlx5_core_dev *dev = to_mdev(dm->base.ibdm.device)->mdev;
+ int err;
+
+ err = mlx5_dm_sw_icm_dealloc(dev, type, dm->base.size, ctx->devx_uid,
+ dm->base.dev_addr, dm->obj_id);
+ if (!err)
+ kfree(dm);
+ return 0;
+}
+
+static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+
+ switch (dm->type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ mlx5_dm_memic_dealloc(to_memic(ibdm));
+ return 0;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm *ibdm =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE);
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+ struct mlx5_ib_dm_memic *memic;
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+
+ if (dm->type != MLX5_IB_UAPI_DM_TYPE_MEMIC)
+ return -EOPNOTSUPP;
+
+ memic = to_memic(ibdm);
+ page_idx = memic->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ return err;
+
+ start_offset = memic->base.dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH,
+ &memic->req_length,
+ sizeof(memic->req_length));
+}
+
+void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
+ struct mlx5_user_mmap_entry *mentry)
+{
+ struct mlx5_ib_dm_op_entry *op_entry;
+ struct mlx5_ib_dm_memic *mdm;
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ mdm = container_of(mentry, struct mlx5_ib_dm_memic, mentry);
+ kref_put(&mdm->ref, mlx5_ib_dm_memic_free);
+ break;
+ case MLX5_IB_MMAP_TYPE_MEMIC_OP:
+ op_entry = container_of(mentry, struct mlx5_ib_dm_op_entry,
+ mentry);
+ mdm = op_entry->dm;
+ mlx5_cmd_dealloc_memic_op(&dev->dm, mdm->base.dev_addr,
+ op_entry->op);
+ kfree(op_entry);
+ kref_put(&mdm->ref, mlx5_ib_dm_memic_free);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DM_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_dm, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16), UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ enum mlx5_ib_uapi_dm_type, UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DM_MAP_OP_ADDR,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DM,
+ &UVERBS_METHOD(MLX5_IB_METHOD_DM_MAP_OP_ADDR),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DM_QUERY));
+
+const struct uapi_definition mlx5_ib_dm_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM),
+ {},
+};
+
+const struct ib_device_ops mlx5_ib_dev_dm_ops = {
+ .alloc_dm = mlx5_ib_alloc_dm,
+ .dealloc_dm = mlx5_ib_dealloc_dm,
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
+};
diff --git a/drivers/infiniband/hw/mlx5/dm.h b/drivers/infiniband/hw/mlx5/dm.h
new file mode 100644
index 000000000000..9674a80d8d70
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dm.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DM_H
+#define _MLX5_IB_DM_H
+
+#include "mlx5_ib.h"
+
+extern const struct ib_device_ops mlx5_ib_dev_dm_ops;
+extern const struct uapi_definition mlx5_ib_dm_defs[];
+
+struct mlx5_ib_dm {
+ struct ib_dm ibdm;
+ u32 type;
+ phys_addr_t dev_addr;
+ size_t size;
+};
+
+struct mlx5_ib_dm_op_entry {
+ struct mlx5_user_mmap_entry mentry;
+ phys_addr_t op_addr;
+ struct mlx5_ib_dm_memic *dm;
+ u8 op;
+};
+
+struct mlx5_ib_dm_memic {
+ struct mlx5_ib_dm base;
+ struct mlx5_user_mmap_entry mentry;
+ struct xarray ops;
+ struct mutex ops_xa_lock;
+ struct kref ref;
+ size_t req_length;
+};
+
+struct mlx5_ib_dm_icm {
+ struct mlx5_ib_dm base;
+ u32 obj_id;
+};
+
+static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm, ibdm);
+}
+
+static inline struct mlx5_ib_dm_memic *to_memic(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm_memic, base.ibdm);
+}
+
+static inline struct mlx5_ib_dm_icm *to_icm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm_icm, base.ibdm);
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
+ struct mlx5_user_mmap_entry *mentry);
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
+ u64 length);
+void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation);
+
+#endif /* _MLX5_IB_DM_H */
diff --git a/drivers/infiniband/hw/mlx5/dmah.c b/drivers/infiniband/hw/mlx5/dmah.c
new file mode 100644
index 000000000000..362a88992ffa
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dmah.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <linux/pci-tph.h>
+#include "dmah.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int mlx5_ib_alloc_dmah(struct ib_dmah *ibdmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
+ struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
+ u16 st_bits = BIT(IB_DMAH_CPU_ID_EXISTS) |
+ BIT(IB_DMAH_MEM_TYPE_EXISTS);
+ int err;
+
+ /* PH is a must for TPH following PCIe spec 6.2-1.0 */
+ if (!(ibdmah->valid_fields & BIT(IB_DMAH_PH_EXISTS)))
+ return -EINVAL;
+
+ /* ST is optional; however, partial data for it is not allowed */
+ if (ibdmah->valid_fields & st_bits) {
+ if ((ibdmah->valid_fields & st_bits) != st_bits)
+ return -EINVAL;
+ err = mlx5_st_alloc_index(mdev, ibdmah->mem_type,
+ ibdmah->cpu_id, &dmah->st_index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_dealloc_dmah(struct ib_dmah *ibdmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
+ struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
+
+ if (ibdmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ return mlx5_st_dealloc_index(mdev, dmah->st_index);
+
+ return 0;
+}
+
+const struct ib_device_ops mlx5_ib_dev_dmah_ops = {
+ .alloc_dmah = mlx5_ib_alloc_dmah,
+ .dealloc_dmah = mlx5_ib_dealloc_dmah,
+};
diff --git a/drivers/infiniband/hw/mlx5/dmah.h b/drivers/infiniband/hw/mlx5/dmah.h
new file mode 100644
index 000000000000..68de72b4744a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dmah.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _MLX5_IB_DMAH_H
+#define _MLX5_IB_DMAH_H
+
+#include "mlx5_ib.h"
+
+extern const struct ib_device_ops mlx5_ib_dev_dmah_ops;
+
+struct mlx5_ib_dmah {
+ struct ib_dmah ibdmah;
+ u16 st_index;
+};
+
+static inline struct mlx5_ib_dmah *to_mdmah(struct ib_dmah *ibdmah)
+{
+ return container_of(ibdmah, struct mlx5_ib_dmah, ibdmah);
+}
+
+#endif /* _MLX5_IB_DMAH_H */
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 8f4e5f22b84c..e32111117a5e 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -32,6 +32,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
+#include <linux/sched/mm.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
@@ -41,10 +42,10 @@ struct mlx5_ib_user_db_page {
struct ib_umem *umem;
unsigned long user_virt;
int refcnt;
+ struct mm_struct *mm;
};
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
- struct ib_udata *udata, unsigned long virt,
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db)
{
struct mlx5_ib_user_db_page *page;
@@ -53,7 +54,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
mutex_lock(&context->db_page_mutex);
list_for_each_entry(page, &context->db_page_list, list)
- if (page->user_virt == (virt & PAGE_MASK))
+ if ((current->mm == page->mm) &&
+ (page->user_virt == (virt & PAGE_MASK)))
goto found;
page = kmalloc(sizeof(*page), GFP_KERNEL);
@@ -64,17 +66,21 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
goto out;
}
+ mmgrab(current->mm);
+ page->mm = current->mm;
list_add(&page->list, &context->db_page_list);
found:
- db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
@@ -90,6 +96,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
if (!--db->u.user_page->refcnt) {
list_del(&db->u.user_page->list);
+ mmdrop(db->u.user_page->mm);
ib_umem_release(db->u.user_page->umem);
kfree(db->u.user_page);
}
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
deleted file mode 100644
index 1fc302d41a53..000000000000
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ /dev/null
@@ -1,682 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/*
- * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
- */
-
-#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/uverbs_types.h>
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/uverbs_std_types.h>
-#include <rdma/mlx5_user_ioctl_cmds.h>
-#include <rdma/mlx5_user_ioctl_verbs.h>
-#include <rdma/ib_umem.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/fs.h>
-#include "mlx5_ib.h"
-
-#define UVERBS_MODULE_NAME mlx5_ib
-#include <rdma/uverbs_named_ioctl.h>
-
-static int
-mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
- enum mlx5_flow_namespace_type *namespace)
-{
- switch (table_type) {
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
- *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
- *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
- break;
- case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
- *namespace = MLX5_FLOW_NAMESPACE_FDB;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
- [MLX5_IB_FLOW_TYPE_NORMAL] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- .u.ptr = {
- .len = sizeof(u16), /* data is priority */
- .min_len = sizeof(u16),
- }
- },
- [MLX5_IB_FLOW_TYPE_SNIFFER] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
- [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_NO_DATA(),
- },
-};
-
-#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
-static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
- struct mlx5_ib_flow_handler *flow_handler;
- struct mlx5_ib_flow_matcher *fs_matcher;
- struct ib_uobject **arr_flow_actions;
- struct ib_uflow_resources *uflow_res;
- void *devx_obj;
- int dest_id, dest_type;
- void *cmd_in;
- int inlen;
- bool dest_devx, dest_qp;
- struct ib_qp *qp = NULL;
- struct ib_uobject *uobj =
- uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
- struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- int len, ret, i;
- u32 counter_id = 0;
-
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
- dest_devx =
- uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
- dest_qp = uverbs_attr_is_valid(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
-
- fs_matcher = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS &&
- ((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
- return -EINVAL;
-
- /* Allow only DEVX object as dest when inserting to FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx)
- return -EINVAL;
-
- if (dest_devx) {
- devx_obj = uverbs_attr_get_obj(
- attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
- if (IS_ERR(devx_obj))
- return PTR_ERR(devx_obj);
-
- /* Verify that the given DEVX object is a flow
- * steering destination.
- */
- if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
- return -EINVAL;
- /* Allow only flow table as dest when inserting to FDB */
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
- dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
- return -EINVAL;
- } else if (dest_qp) {
- struct mlx5_ib_qp *mqp;
-
- qp = uverbs_attr_get_obj(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
-
- if (qp->qp_type != IB_QPT_RAW_PACKET)
- return -EINVAL;
-
- mqp = to_mqp(qp);
- if (mqp->flags & MLX5_IB_QP_RSS)
- dest_id = mqp->rss_qp.tirn;
- else
- dest_id = mqp->raw_packet_qp.rq.tirn;
- dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- } else {
- dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- }
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
- if (len) {
- devx_obj = arr_flow_actions[0]->object;
-
- if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
- return -EINVAL;
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- }
-
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- return -EINVAL;
-
- cmd_in = uverbs_attr_get_alloced_ptr(
- attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
- inlen = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
-
- uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
- if (!uflow_res)
- return -ENOMEM;
-
- len = uverbs_attr_get_uobjs_arr(attrs,
- MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
- for (i = 0; i < len; i++) {
- struct mlx5_ib_flow_action *maction =
- to_mflow_act(arr_flow_actions[i]->object);
-
- ret = parse_flow_flow_action(maction, false, &flow_act);
- if (ret)
- goto err_out;
- flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
- arr_flow_actions[i]->object);
- }
-
- ret = uverbs_copy_from(&flow_act.flow_tag, attrs,
- MLX5_IB_ATTR_CREATE_FLOW_TAG);
- if (!ret) {
- if (flow_act.flow_tag >= BIT(24)) {
- ret = -EINVAL;
- goto err_out;
- }
- flow_act.flags |= FLOW_ACT_HAS_TAG;
- }
-
- flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
- counter_id,
- cmd_in, inlen,
- dest_id, dest_type);
- if (IS_ERR(flow_handler)) {
- ret = PTR_ERR(flow_handler);
- goto err_out;
- }
-
- ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
-
- return 0;
-err_out:
- ib_uverbs_flow_resources_free(uflow_res);
- return ret;
-}
-
-static int flow_matcher_cleanup(struct ib_uobject *uobject,
- enum rdma_remove_reason why,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_matcher *obj = uobject->object;
- int ret;
-
- ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
- if (ret)
- return ret;
-
- kfree(obj);
- return 0;
-}
-
-static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
- struct mlx5_ib_flow_matcher *obj)
-{
- enum mlx5_ib_uapi_flow_table_type ft_type =
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
- u32 flags;
- int err;
-
- /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
- * users should switch to it. We leave this to not break userspace
- */
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
- uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
- return -EINVAL;
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
- err = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
- if (err)
- return err;
-
- err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
- if (err)
- return err;
-
- return 0;
- }
-
- if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
- err = uverbs_get_flags32(&flags, attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- IB_FLOW_ATTR_FLAGS_EGRESS);
- if (err)
- return err;
-
- if (flags) {
- mlx5_ib_ft_type_to_namespace(
- MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
- &obj->ns_type);
- return 0;
- }
- }
-
- obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
- struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- struct mlx5_ib_flow_matcher *obj;
- int err;
-
- obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- obj->mask_len = uverbs_attr_get_len(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- err = uverbs_copy_from(&obj->matcher_mask,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
- if (err)
- goto end;
-
- obj->flow_type = uverbs_attr_get_enum_id(
- attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
-
- if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
- err = uverbs_copy_from(&obj->priority,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
- if (err)
- goto end;
- }
-
- err = uverbs_copy_from(&obj->match_criteria_enable,
- attrs,
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
- if (err)
- goto end;
-
- err = mlx5_ib_matcher_ns(attrs, obj);
- if (err)
- goto end;
-
- uobj->object = obj;
- obj->mdev = dev->mdev;
- atomic_set(&obj->usecnt, 0);
- return 0;
-
-end:
- kfree(obj);
- return err;
-}
-
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- switch (maction->flow_action_raw.sub_type) {
- case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
- mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.action_id);
- break;
- case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
- mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
- maction->flow_action_raw.action_id);
- break;
- case MLX5_IB_FLOW_ACTION_DECAP:
- break;
- default:
- break;
- }
-}
-
-static struct ib_flow_action *
-mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
- enum mlx5_ib_uapi_flow_table_type ft_type,
- u8 num_actions, void *in)
-{
- enum mlx5_flow_namespace_type namespace;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ERR_PTR(-EINVAL);
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return ERR_PTR(-ENOMEM);
-
- ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
- &maction->flow_action_raw.action_id);
-
- if (ret) {
- kfree(maction);
- return ERR_PTR(ret);
- }
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
- maction->flow_action_raw.dev = dev;
-
- return &maction->ib_action;
-}
-
-static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
-{
- return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- max_modify_header_actions) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions);
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct ib_flow_action *action;
- int num_actions;
- void *in;
- int ret;
-
- if (!mlx5_ib_modify_header_supported(mdev))
- return -EOPNOTSUPP;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
-
- num_actions = uverbs_attr_ptr_get_array_size(
- attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto));
- if (num_actions < 0)
- return num_actions;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
- if (ret)
- return ret;
- action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
- if (IS_ERR(action))
- return PTR_ERR(action);
-
- uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
-
- return 0;
-}
-
-static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
- u8 packet_reformat_type,
- u8 ft_type)
-{
- switch (packet_reformat_type) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE(ibdev->mdev,
- encap_general_header);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
- return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
- reformat_l2_to_l3_tunnel);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
- reformat_l3_tunnel_to_l2);
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
- if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
- return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
- break;
- default:
- break;
- }
-
- return false;
-}
-
-static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
-{
- switch (dv_prt) {
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
- *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
- break;
- case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
- *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int mlx5_ib_flow_action_create_packet_reformat_ctx(
- struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_action *maction,
- u8 ft_type, u8 dv_prt,
- void *in, size_t len)
-{
- enum mlx5_flow_namespace_type namespace;
- u8 prm_prt;
- int ret;
-
- ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
- if (ret)
- return ret;
-
- ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
- if (ret)
- return ret;
-
- ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
- in, namespace,
- &maction->flow_action_raw.action_id);
- if (ret)
- return ret;
-
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
- maction->flow_action_raw.dev = dev;
-
- return 0;
-}
-
-static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct uverbs_attr_bundle *attrs)
-{
- struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
- struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
- enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
- enum mlx5_ib_uapi_flow_table_type ft_type;
- struct mlx5_ib_flow_action *maction;
- int ret;
-
- ret = uverbs_get_const(&ft_type, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
- if (ret)
- return ret;
-
- ret = uverbs_get_const(&dv_prt, attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
- if (ret)
- return ret;
-
- if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
- return -EOPNOTSUPP;
-
- maction = kzalloc(sizeof(*maction), GFP_KERNEL);
- if (!maction)
- return -ENOMEM;
-
- if (dv_prt ==
- MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
- maction->flow_action_raw.sub_type =
- MLX5_IB_FLOW_ACTION_DECAP;
- maction->flow_action_raw.dev = mdev;
- } else {
- void *in;
- int len;
-
- in = uverbs_attr_get_alloced_ptr(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
- if (IS_ERR(in)) {
- ret = PTR_ERR(in);
- goto free_maction;
- }
-
- len = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
-
- ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
- maction, ft_type, dv_prt, in, len);
- if (ret)
- goto free_maction;
- }
-
- uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
- IB_FLOW_ACTION_UNSPECIFIED);
- return 0;
-
-free_maction:
- kfree(maction);
- return ret;
-}
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_CREATE_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_READ,
- UA_MANDATORY),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
- UVERBS_OBJECT_QP,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_READ, 1,
- MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
- UA_OPTIONAL),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
- UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL),
- UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
- MLX5_IB_OBJECT_DEVX_OBJ,
- UVERBS_ACCESS_READ, 1, 1,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_DESTROY_FLOW,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
- UVERBS_OBJECT_FLOW,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(mlx5_ib_fs,
- UVERBS_OBJECT_FLOW,
- &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
- &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
- UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
- set_action_in_add_action_in_auto)),
- UA_MANDATORY,
- UA_ALLOC_AND_COPY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
- UVERBS_ATTR_MIN_SIZE(1),
- UA_ALLOC_AND_COPY,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
- enum mlx5_ib_uapi_flow_action_packet_reformat_type,
- UA_MANDATORY),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_MANDATORY));
-
-ADD_UVERBS_METHODS(
- mlx5_ib_flow_actions,
- UVERBS_OBJECT_FLOW_ACTION,
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
-
-DECLARE_UVERBS_NAMED_METHOD(
- MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_NEW,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(
- MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
- UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
- UA_MANDATORY),
- UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
- mlx5_ib_flow_type,
- UA_MANDATORY),
- UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
- UVERBS_ATTR_TYPE(u8),
- UA_MANDATORY),
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
- enum ib_flow_flags,
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
- enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
-
-DECLARE_UVERBS_NAMED_METHOD_DESTROY(
- MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
- UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
- MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_ACCESS_DESTROY,
- UA_MANDATORY));
-
-DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
- UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
- &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-
-const struct uapi_definition mlx5_ib_flow_defs[] = {
- UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
- MLX5_IB_OBJECT_FLOW_MATCHER),
- UAPI_DEF_CHAIN_OBJ_TREE(
- UVERBS_OBJECT_FLOW,
- &mlx5_ib_fs),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_actions),
- {},
-};
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
new file mode 100644
index 000000000000..d17823ce7f38
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -0,0 +1,3516 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_ucaps.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
+#include "mlx5_ib.h"
+#include "counters.h"
+#include "devx.h"
+#include "fs.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+enum {
+ MATCH_CRITERIA_ENABLE_OUTER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC_BIT,
+ MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
+
+struct mlx5_per_qp_opfc {
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
+};
+
+#define HEADER_IS_ZERO(match_criteria, headers) \
+ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+static u8 get_match_criteria_enable(u32 *match_criteria)
+{
+ u8 match_criteria_enable;
+
+ match_criteria_enable =
+ (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+ MATCH_CRITERIA_ENABLE_OUTER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+ MATCH_CRITERIA_ENABLE_MISC_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+ MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+ return match_criteria_enable;
+}
+
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
+}
+
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
+static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
+}
+
+static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+{
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+#define LAST_ETH_FIELD vlan_tag
+#define LAST_IPV4_FIELD tos
+#define LAST_IPV6_FIELD traffic_class
+#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
+
+/* Field is the last supported field */
+#define FIELDS_NOT_SUPPORTED(filter, field) \
+ memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \
+ sizeof(filter) - offsetofend(typeof(filter), field))
+
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
+{
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
+ return 0;
+ }
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action, u32 prev_type)
+{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters);
+ void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters_2);
+ void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters_2);
+ void *headers_c;
+ void *headers_v;
+ int match_ipv;
+ int ret;
+
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ }
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
+ case IB_FLOW_SPEC_ETH:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+ return -EOPNOTSUPP;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv6.mask.traffic_class,
+ ib_spec->ipv6.val.traffic_class);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
+
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ return -EOPNOTSUPP;
+ case IB_FLOW_SPEC_TCP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (ib_spec->gre.mask.c_ks_res0_ver)
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ IPPROTO_GRE);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+ ntohs(ib_spec->gre.mask.protocol));
+ MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+ ntohs(ib_spec->gre.val.protocol));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.mask.key,
+ sizeof(ib_spec->gre.mask.key));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.val.key,
+ sizeof(ib_spec->gre.val.key));
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ switch (prev_type) {
+ case IB_FLOW_SPEC_UDP:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ default:
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ inner_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ inner_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ } else {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ }
+ }
+ break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
+ if (ret)
+ return ret;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
+{
+ union ib_flow_spec *flow_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
+ return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
+}
+
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
+ unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
+ ib_spec->eth.mask.ether_type) {
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+ struct mlx5_ib_dev *dev = handler->dev;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rules(iter->rule);
+ put_flow_table(dev, iter->prio, true);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rules(handler->rule);
+ put_flow_table(dev, handler->prio, true);
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ mutex_unlock(&dev->flow_db->lock);
+ if (handler->flow_matcher)
+ atomic_dec(&handler->flow_matcher->usecnt);
+ kfree(handler);
+
+ return 0;
+}
+
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
+enum flow_table_type {
+ MLX5_IB_FT_RX,
+ MLX5_IB_FT_TX
+};
+
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
+
+static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
+}
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+ struct mlx5_ib_flow_prio *prio,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5_create_auto_grouped_flow_table(ns, ft_attr);
+ if (IS_ERR(ft))
+ return ERR_CAST(ft);
+
+ prio->flow_table = ft;
+ prio->refcount = 0;
+ return prio;
+}
+
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr,
+ enum flow_table_type ft_type)
+{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns = NULL;
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int max_table_size;
+ int num_entries;
+ int num_groups;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ if (flow_is_multicast_only(flow_attr) && !dont_trap)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
+ dev->mdev, log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority, &num_entries, &num_groups);
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ allow_sniffer_and_nic_rx_shared_tir))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ns = mlx5_get_flow_namespace(
+ dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+
+ prio = &dev->flow_db->sniffer[ft_type];
+ priority = 0;
+ num_entries = 1;
+ num_groups = 1;
+ break;
+ default:
+ break;
+ }
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ max_table_size = min_t(int, num_entries, max_table_size);
+
+ ft = prio->flow_table;
+ if (ft)
+ return prio;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = max_table_size;
+ ft_attr.flags = flags;
+ ft_attr.autogroup.max_num_groups = num_groups;
+ return _get_prio(ns, prio, &ft_attr);
+}
+
+enum {
+ RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
+};
+
+static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.source_vhca_port) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ ft_field_support.source_vhca_port))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
+ misc_parameters.source_vhca_port);
+ MLX5_SET(fte_match_param, &spec->match_value,
+ misc_parameters.source_vhca_port, port_num);
+
+ return 0;
+}
+
+static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec, int ipv)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.outer_ip_version))
+ return -EOPNOTSUPP;
+
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
+ INET_ECN_CE);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ ipv);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
+ IB_BTH_OPCODE_CNP);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+/* Returns the prio we should use for the given optional counter type,
+ * whereas for bytes type we use the packet type, since they share the same
+ * resources.
+ */
+static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
+ u32 type)
+{
+ u32 prio_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ prio_type = type;
+ }
+
+ return &dev->flow_db->opfcs[prio_type];
+}
+
+static void put_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_ib_flow_prio *prio;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return;
+ }
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ put_flow_table(dev, prio, true);
+}
+
+static int get_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_flow_table_attr ft_attr = {};
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ int priority;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ if (prio->flow_table)
+ return 0;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE;
+ ft_attr.autogroup.max_num_groups = 1;
+ prio = _get_prio(ns, prio, &ft_attr);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ prio->refcount = 1;
+
+ return 0;
+}
+
+static struct mlx5_per_qp_opfc *get_per_qp_opfc(struct xarray *qpn_opfc_xa,
+ u32 qp_num, bool *new)
+{
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+
+ *new = false;
+
+ per_qp_opfc = xa_load(qpn_opfc_xa, qp_num);
+ if (per_qp_opfc)
+ return per_qp_opfc;
+ per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
+
+ if (!per_qp_opfc)
+ return NULL;
+
+ *new = true;
+ return per_qp_opfc;
+}
+
+static int add_op_fc_rules(struct mlx5_ib_dev *dev,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
+ struct xarray *qpn_opfc_xa,
+ struct mlx5_per_qp_opfc *per_qp_opfc,
+ struct mlx5_ib_flow_prio *prio,
+ enum mlx5_ib_optional_counter_type type,
+ u32 qp_num, u32 port_num)
+{
+ struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_spec *spec;
+ int i, err, spec_num;
+ bool is_tx;
+
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = fc_arr[type];
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto null_fc;
+ }
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 2;
+ is_tx = false;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec[1].match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = false;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = false;
+ break;
+ default:
+ err = -EINVAL;
+ goto free_spec;
+ }
+
+ if (is_tx) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.source_sqn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.source_sqn, qp_num);
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+
+ err = xa_err(xa_store(qpn_opfc_xa, qp_num, per_qp_opfc, GFP_KERNEL));
+ if (err)
+ goto del_rules;
+
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ while (i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free_spec:
+ kfree(spec);
+null_fc:
+ opfc->fc = NULL;
+ return err;
+}
+
+static bool
+is_fc_shared_and_in_use(struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX], u32 type,
+ struct mlx5_fc **fc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *fc = fc_arr[shared_fc_type];
+ if (!(*fc))
+ return false;
+
+ return true;
+}
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX])
+{
+ struct mlx5_fc *in_use_fc;
+ int i;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!fc_arr[i])
+ continue;
+
+ if (is_fc_shared_and_in_use(fc_arr, i, &in_use_fc)) {
+ fc_arr[i] = NULL;
+ continue;
+ }
+
+ mlx5_fc_destroy(dev->mdev, fc_arr[i]);
+ fc_arr[i] = NULL;
+ }
+}
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ enum mlx5_flow_namespace_type fn_type;
+ int priority, i, err, spec_num;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 2;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ prio = get_opfc_prio(dev, type);
+ if (!prio->flow_table) {
+ err = get_per_qp_prio(dev, type);
+ if (err)
+ goto free;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = dev->num_ports * MAX_OPFC_RULES;
+ ft_attr.autogroup.max_num_groups = 1;
+ prio = _get_prio(ns, prio, &ft_attr);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto put_prio;
+ }
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ for (i -= 1; i >= 0; i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+put_prio:
+ put_per_qp_prio(dev, type);
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ struct mlx5_ib_flow_prio *prio;
+ int i;
+
+ prio = get_opfc_prio(dev, type);
+
+ for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, true);
+ }
+
+ put_per_qp_prio(dev, type);
+}
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct xarray *qpn_opfc_xa)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ int i, j;
+
+ per_qp_opfc = xa_load(qpn_opfc_xa, qp->qp_num);
+ if (!per_qp_opfc)
+ return;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!per_qp_opfc->opfcs[i].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
+ &in_use_opfc)) {
+ per_qp_opfc->opfcs[i].fc = NULL;
+ continue;
+ }
+
+ for (j = 0; j < MAX_OPFC_RULES; j++) {
+ if (!per_qp_opfc->opfcs[i].rule[j])
+ continue;
+ mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
+ prio = get_opfc_prio(dev, i);
+ put_flow_table(dev, prio, true);
+ }
+ per_qp_opfc->opfcs[i].fc = NULL;
+ }
+
+ kfree(per_qp_opfc);
+ xa_erase(qpn_opfc_xa, qp->qp_num);
+}
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
+ struct xarray *qpn_opfc_xa, u32 port)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_ib_counters *cnts;
+ struct mlx5_ib_op_fc *opfc;
+ struct mlx5_fc *in_use_fc;
+ int i, err, per_qp_type;
+ bool new;
+
+ cnts = &dev->port[port - 1].cnts;
+
+ for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
+ opfc = &cnts->opfcs[i];
+ if (!opfc->fc)
+ continue;
+
+ per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ prio = get_opfc_prio(dev, per_qp_type);
+ WARN_ON(!prio->flow_table);
+
+ if (is_fc_shared_and_in_use(fc_arr, per_qp_type, &in_use_fc))
+ fc_arr[per_qp_type] = in_use_fc;
+
+ if (!fc_arr[per_qp_type]) {
+ fc_arr[per_qp_type] = mlx5_fc_create(dev->mdev, false);
+ if (IS_ERR(fc_arr[per_qp_type]))
+ return PTR_ERR(fc_arr[per_qp_type]);
+ }
+
+ per_qp_opfc = get_per_qp_opfc(qpn_opfc_xa, qp->qp_num, &new);
+ if (!per_qp_opfc) {
+ err = -ENOMEM;
+ goto free_fc;
+ }
+ err = add_op_fc_rules(dev, fc_arr, qpn_opfc_xa, per_qp_opfc,
+ prio, per_qp_type, qp->qp_num, port);
+ if (err)
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ mlx5r_fs_unbind_op_fc(qp, qpn_opfc_xa);
+ if (new)
+ kfree(per_qp_opfc);
+free_fc:
+ if (xa_empty(qpn_opfc_xa))
+ mlx5r_fs_destroy_fcs(dev, fc_arr);
+ return err;
+}
+
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
+ const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
+ unsigned int spec_index;
+ u32 prev_type = 0;
+ int err = 0;
+ int dest_num = 0;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+
+ if (!is_valid_attr(dev->mdev, flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ if (dev->is_rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(dev->mdev, spec,
+ ib_flow, flow_attr, &flow_act,
+ prev_type);
+ if (err < 0)
+ goto free;
+
+ prev_type = ((union ib_flow_spec *)ib_flow)->type;
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
+ if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
+ struct mlx5_eswitch_rep *rep;
+
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
+ }
+
+ spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
+ err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ mcounters = to_mcounters(flow_act.counters);
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter =
+ mcounters->hw_cntrs_hndl;
+ dest_num++;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ if (!dest_num)
+ rule_dst = NULL;
+ } else {
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ flow_act.action |=
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else if (dest_num)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ spec->flow_context.flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act,
+ rule_dst, dest_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err && handler) {
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ kfree(handler);
+ }
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
+}
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_spec_eth eth_flow;
+ struct ib_flow_attr flow_attr;
+ } leftovers_wc = { .flow_attr = { .num_of_specs = 1,
+ .size = sizeof(leftovers_wc) },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = { .dst_mac = { 0x1 } },
+ .val = { .dst_mac = { 0x1 } } } };
+
+ static struct {
+ struct ib_flow_spec_eth eth_flow;
+ struct ib_flow_attr flow_attr;
+ } leftovers_uc = { .flow_attr = { .num_of_specs = 1,
+ .size = sizeof(leftovers_uc) },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = { .dst_mac = { 0x1 } },
+ .val = { .dst_mac = {} } } };
+
+ handler = create_flow_rule(dev, ft_prio, &leftovers_wc.flow_attr, dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_uc.flow_attr, dst);
+ if (IS_ERR(handler_ucast)) {
+ mlx5_del_flow_rules(handler->rule);
+ ft_prio->refcount--;
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_rx,
+ struct mlx5_ib_flow_prio *ft_tx,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_rx;
+ struct mlx5_ib_flow_handler *handler_tx;
+ int err;
+ static const struct ib_flow_attr flow_attr = {
+ .num_of_specs = 0,
+ .type = IB_FLOW_ATTR_SNIFFER,
+ .size = sizeof(flow_attr)
+ };
+
+ handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
+ if (IS_ERR(handler_rx)) {
+ err = PTR_ERR(handler_rx);
+ goto err;
+ }
+
+ handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
+ if (IS_ERR(handler_tx)) {
+ err = PTR_ERR(handler_tx);
+ goto err_tx;
+ }
+
+ list_add(&handler_tx->list, &handler_rx->list);
+
+ return handler_rx;
+
+err_tx:
+ mlx5_del_flow_rules(handler_rx->rule);
+ ft_rx->refcount--;
+ kfree(handler_rx);
+err:
+ return ERR_PTR(err);
+}
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
+ int err;
+ int underlay_qpn;
+
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err)
+ goto free_ucmd;
+ }
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ if (flow_attr->flags &
+ ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+ if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
+ if (IS_ERR(ft_prio_tx)) {
+ err = PTR_ERR(ft_prio_tx);
+ ft_prio_tx = NULL;
+ goto destroy_ft;
+ }
+ }
+
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->is_rss)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
+
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ mqp->underlay_qpn :
+ 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
+ underlay_qpn, ucmd);
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
+ handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
+ break;
+ default:
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+ kfree(ucmd);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+ if (ft_prio_tx)
+ put_flow_table(dev, ft_prio_tx, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+free_ucmd:
+ kfree(ucmd);
+ return ERR_PTR(err);
+}
+
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ u32 *flags, u16 *vport_idx,
+ u16 *vport,
+ struct mlx5_core_dev **ft_mdev,
+ u32 ib_port, u16 *esw_owner_vhca_id)
+{
+ struct mlx5_core_dev *esw_mdev;
+
+ if (!is_mdev_switchdev_mode(dev->mdev))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+ return -EOPNOTSUPP;
+
+ if (!dev->port[ib_port - 1].rep)
+ return -EINVAL;
+
+ esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+ if (esw_mdev != dev->mdev) {
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev,
+ rdma_transport_manager_other_eswitch))
+ return -EOPNOTSUPP;
+ *flags |= MLX5_FLOW_TABLE_OTHER_ESWITCH;
+ *esw_owner_vhca_id = MLX5_CAP_GEN(esw_mdev, vhca_id);
+ }
+
+ *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+ *ft_mdev = esw_mdev;
+ *vport = dev->port[ib_port - 1].rep->vport;
+ *vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+ return 0;
+}
+
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
+ enum mlx5_flow_namespace_type ns_type,
+ bool mcast, u32 ib_port)
+{
+ struct mlx5_core_dev *ft_mdev = dev->mdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio = NULL;
+ u16 esw_owner_vhca_id = 0;
+ int max_table_size = 0;
+ u16 vport_idx = 0;
+ bool esw_encap;
+ u32 flags = 0;
+ u16 vport = 0;
+ int priority;
+ int ret;
+
+ if (mcast)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(user_priority, false);
+
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ switch (ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ max_table_size = BIT(
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ priority = user_priority;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (ib_port == 0 ||
+ user_priority >= MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+ return ERR_PTR(-EINVAL);
+ ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+ &vport_idx, &vport,
+ &ft_mdev, ib_port,
+ &esw_owner_vhca_id);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+ ft_mdev, log_max_ft_size));
+ else
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+ ft_mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
+ default:
+ break;
+ }
+
+ max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+ ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+ else
+ ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ switch (ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ prio = &dev->flow_db->prios[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ prio = &dev->flow_db->egress_prios[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ prio = &dev->flow_db->fdb[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ prio = &dev->flow_db->rdma_rx[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ prio = &dev->flow_db->rdma_tx[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ prio = &dev->flow_db->rdma_transport_rx[priority][ib_port - 1];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ prio = &dev->flow_db->rdma_transport_tx[priority][ib_port - 1];
+ break;
+ default: return ERR_PTR(-EINVAL);
+ }
+
+ if (!prio)
+ return ERR_PTR(-EINVAL);
+
+ if (prio->flow_table)
+ return prio;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = max_table_size;
+ ft_attr.flags = flags;
+ ft_attr.vport = vport;
+ ft_attr.esw_owner_vhca_id = esw_owner_vhca_id;
+ ft_attr.autogroup.max_num_groups = MLX5_FS_MAX_TYPES;
+ return _get_prio(ns, prio, &ft_attr);
+}
+
+static struct mlx5_ib_flow_handler *
+_create_raw_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct mlx5_flow_destination *dst,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ void *cmd_in, int inlen,
+ int dst_num)
+{
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ memcpy(spec->match_value, cmd_in, inlen);
+ memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
+ fs_matcher->mask_len);
+ spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
+
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ flow_act, dst, dst_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+ ft_prio->flow_table = ft;
+
+free:
+ if (err)
+ kfree(handler);
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
+ void *match_v)
+{
+ void *match_c;
+ void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
+ void *dmac, *dmac_mask;
+ void *ipv4, *ipv4_mask;
+
+ if (!(fs_matcher->match_criteria_enable &
+ (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
+ return false;
+
+ match_c = fs_matcher->matcher_mask.match_params;
+ match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+
+ dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dmac_47_16);
+ dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dmac_47_16);
+
+ if (is_multicast_ether_addr(dmac) &&
+ is_multicast_ether_addr(dmac_mask))
+ return true;
+
+ ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
+ ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
+ return true;
+
+ return false;
+}
+
+static struct mlx5_ib_flow_handler *raw_fs_rule_add(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
+ struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type)
+{
+ struct mlx5_flow_destination *dst;
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
+ bool mcast;
+ int err;
+
+ if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOMEM);
+
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, fs_matcher->priority,
+ fs_matcher->ns_type, mcast,
+ fs_matcher->ib_port);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ switch (dest_type) {
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
+ dst[dst_num].type = dest_type;
+ dst[dst_num++].tir_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num++].ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
+ dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ break;
+ default:
+ break;
+ }
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ if (WARN_ON(!counter)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter = counter;
+ dst_num++;
+ }
+
+ handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
+ fs_matcher, flow_context, flow_act,
+ cmd_in, inlen, dst_num);
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ atomic_inc(&fs_matcher->usecnt);
+ handler->flow_matcher = fs_matcher;
+
+ kfree(dst);
+
+ return handler;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+
+ return ERR_PTR(err);
+}
+
+static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.modify_hdr);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.pkt_reformat);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ destroy_flow_action_raw(maction);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
+ *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+ [MLX5_IB_FLOW_TYPE_NORMAL] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ .u.ptr = {
+ .len = sizeof(u16), /* data is priority */
+ .min_len = sizeof(u16),
+ }
+ },
+ [MLX5_IB_FLOW_TYPE_SNIFFER] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_DESTROY_TIR:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+ obj_id);
+ return true;
+
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+ table_id);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int get_dests(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
+ int *dest_type, struct ib_qp **qp, u32 *flags)
+{
+ bool dest_devx, dest_qp;
+ void *devx_obj;
+ int err;
+
+ dest_devx = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+ dest_qp = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+ *flags = 0;
+ err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
+ if (err)
+ return err;
+
+ /* Both flags are not allowed */
+ if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
+ *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ if (dest_devx && (dest_qp || *flags))
+ return -EINVAL;
+ else if (dest_qp && *flags)
+ return -EINVAL;
+ }
+
+ /* Allow only DEVX object, drop as dest for FDB */
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
+ !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
+ return -EINVAL;
+
+ /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
+ ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+ return -EINVAL;
+
+ *qp = NULL;
+ if (dest_devx) {
+ devx_obj =
+ uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+
+ /* Verify that the given DEVX object is a flow
+ * steering destination.
+ */
+ if (!is_flow_dest(devx_obj, dest_id, dest_type))
+ return -EINVAL;
+ /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
+ *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ return -EINVAL;
+ } else if (dest_qp) {
+ struct mlx5_ib_qp *mqp;
+
+ *qp = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+ if (IS_ERR(*qp))
+ return PTR_ERR(*qp);
+
+ if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ mqp = to_mqp(*qp);
+ if (mqp->is_rss)
+ *dest_id = mqp->rss_qp.tirn;
+ else
+ *dest_id = mqp->raw_packet_qp.rq.tirn;
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
+ !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ }
+
+ if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool
+is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
+ *fc_bulk_size = devx_obj->flow_counter_bulk_size;
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ *counter_id += offset;
+ return true;
+ }
+
+ return false;
+}
+
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_flow_context flow_context = {.flow_tag =
+ MLX5_FS_DEFAULT_FLOW_TAG};
+ int dest_id, dest_type = -1, inlen, len, ret, i;
+ struct mlx5_ib_flow_handler *flow_handler;
+ struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_fc *counter = NULL;
+ struct ib_qp *qp = NULL;
+ void *devx_obj, *cmd_in;
+ struct ib_uobject *uobj;
+ struct mlx5_ib_dev *dev;
+ u32 flags;
+
+ if (!rdma_uattrs_has_raw_cap(attrs))
+ return -EPERM;
+
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
+ return -EINVAL;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0;
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+ int num_offsets = uverbs_attr_ptr_get_array_size(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ sizeof(u32));
+
+ if (num_offsets != 1)
+ return -EINVAL;
+
+ offset_attr = uverbs_attr_get_alloced_ptr(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+ offset = *offset_attr;
+ }
+
+ if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size))
+ return -EINVAL;
+ counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res) {
+ ret = -ENOMEM;
+ goto destroy_counter;
+ }
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_context.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow_handler =
+ raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
+ counter, cmd_in, inlen, dest_id, dest_type);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
+
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
+
+ return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+destroy_counter:
+ if (counter)
+ mlx5_fc_local_destroy(counter);
+ return ret;
+}
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_matcher *obj = uobject->object;
+
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
+
+ kfree(obj);
+ return 0;
+}
+
+static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+
+ if (ft_prio->anchor.ft)
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
+ ft_attr.prio = 0;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ ft_prio->anchor.ft = ft;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.ft) {
+ mlx5_destroy_flow_table(ft_prio->anchor.ft);
+ ft_prio->anchor.ft = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_drop)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+
+ ft_prio->anchor.fg_drop = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_drop) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
+ ft_prio->anchor.fg_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_goto_table)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+ ft_prio->anchor.fg_goto_table = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_goto_table) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
+ ft_prio->anchor.fg_goto_table = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_drop)
+ return 0;
+
+ flow_act.fg = ft_prio->anchor.fg_drop;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ NULL, 0);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_drop = handle;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_drop) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
+ ft_prio->anchor.rule_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_goto_table)
+ return 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.fg = ft_prio->anchor.fg_goto_table;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = ft_prio->flow_table;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_goto_table = handle;
+
+ return 0;
+}
+
+static void
+steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_goto_table) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
+ ft_prio->anchor.rule_goto_table = NULL;
+ }
+}
+
+static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ int err;
+
+ err = steering_anchor_create_ft(dev, ft_prio, ns_type);
+ if (err)
+ return err;
+
+ err = steering_anchor_create_fg_drop(ft_prio);
+ if (err)
+ goto destroy_ft;
+
+ err = steering_anchor_create_fg_goto_table(ft_prio);
+ if (err)
+ goto destroy_fg_drop;
+
+ err = steering_anchor_create_rule_drop(ft_prio);
+ if (err)
+ goto destroy_fg_goto_table;
+
+ err = steering_anchor_create_rule_goto_table(ft_prio);
+ if (err)
+ goto destroy_rule_drop;
+
+ return 0;
+
+destroy_rule_drop:
+ steering_anchor_destroy_rule_drop(ft_prio);
+destroy_fg_goto_table:
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+destroy_fg_drop:
+ steering_anchor_destroy_fg_drop(ft_prio);
+destroy_ft:
+ steering_anchor_destroy_ft(ft_prio);
+
+ return err;
+}
+
+static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
+{
+ steering_anchor_destroy_rule_goto_table(ft_prio);
+ steering_anchor_destroy_rule_drop(ft_prio);
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+ steering_anchor_destroy_fg_drop(ft_prio);
+ steering_anchor_destroy_ft(ft_prio);
+}
+
+static int steering_anchor_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_steering_anchor *obj = uobject->object;
+
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
+
+ mutex_lock(&obj->dev->flow_db->lock);
+ if (!--obj->ft_prio->anchor.rule_goto_table_ref)
+ steering_anchor_destroy_rule_goto_table(obj->ft_prio);
+
+ put_flow_table(obj->dev, obj->ft_prio, true);
+ mutex_unlock(&obj->dev->flow_db->lock);
+
+ kfree(obj);
+ return 0;
+}
+
+static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
+ int count)
+{
+ while (count--)
+ mlx5_steering_anchor_destroy_res(&prio[count]);
+}
+
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
+{
+ fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
+ fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
+ fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
+ fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
+}
+
+static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *obj)
+{
+ enum mlx5_ib_uapi_flow_table_type ft_type =
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
+ u32 flags;
+ int err;
+
+ /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
+ * users should switch to it. We leave this to not break userspace
+ */
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
+ uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
+ return -EINVAL;
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
+ err = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
+ if (err)
+ return err;
+
+ return 0;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ return err;
+
+ if (flags)
+ return mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
+ &obj->ns_type);
+ }
+
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
+
+ return 0;
+}
+
+static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
+{
+ if (is_mdev_switchdev_mode(dev->mdev))
+ return UCAP_ENABLED(enabled_caps,
+ RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ struct mlx5_ib_flow_matcher *obj;
+ int err;
+
+ obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->mask_len = uverbs_attr_get_len(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ err = uverbs_copy_from(&obj->matcher_mask,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ if (err)
+ goto end;
+
+ obj->flow_type = uverbs_attr_get_enum_id(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+ if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+ err = uverbs_copy_from(&obj->priority,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+ if (err)
+ goto end;
+ }
+
+ err = uverbs_copy_from(&obj->match_criteria_enable,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+ if (err)
+ goto end;
+
+ err = mlx5_ib_matcher_ns(attrs, obj);
+ if (err)
+ goto end;
+
+ if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
+ mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
+ err = -EINVAL;
+ goto end;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+ err = uverbs_copy_from(&obj->ib_port, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+ if (err)
+ goto end;
+ if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+ obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
+ err = -EOPNOTSUPP;
+ goto end;
+ }
+ }
+
+ uobj->object = obj;
+ obj->mdev = dev->mdev;
+ atomic_set(&obj->usecnt, 0);
+ return 0;
+
+end:
+ kfree(obj);
+ return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_ib_steering_anchor *obj;
+ struct mlx5_ib_flow_prio *ft_prio;
+ u16 priority;
+ u32 ft_id;
+ int err;
+
+ if (!rdma_dev_has_raw_cap(&dev->ib_dev))
+ return -EPERM;
+
+ err = uverbs_get_const(&ib_uapi_ft_type, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
+ if (err)
+ return err;
+
+ err = uverbs_copy_from(&priority, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
+ if (err)
+ return err;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto free_obj;
+ }
+
+ ft_prio->refcount++;
+
+ if (!ft_prio->anchor.rule_goto_table_ref) {
+ err = steering_anchor_create_res(dev, ft_prio, ns_type);
+ if (err)
+ goto put_flow_table;
+ }
+
+ ft_prio->anchor.rule_goto_table_ref++;
+
+ ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ &ft_id, sizeof(ft_id));
+ if (err)
+ goto destroy_res;
+
+ mutex_unlock(&dev->flow_db->lock);
+
+ uobj->object = obj;
+ obj->dev = dev;
+ obj->ft_prio = ft_prio;
+ atomic_set(&obj->usecnt, 0);
+
+ return 0;
+
+destroy_res:
+ --ft_prio->anchor.rule_goto_table_ref;
+ mlx5_steering_anchor_destroy_res(ft_prio);
+put_flow_table:
+ put_flow_table(dev, ft_prio, true);
+free_obj:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(obj);
+
+ return err;
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
+
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ int num_actions;
+ void *in;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ struct mlx5_pkt_reformat_params reformat_params;
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = prm_prt;
+ reformat_params.size = len;
+ reformat_params.data = in;
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
+ namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
+ return ret;
+ }
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_CREATE_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ enum mlx5_ib_create_flow_flags,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DESTROY_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+ UVERBS_OBJECT_FLOW,
+ &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_add_copy_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ mlx5_ib_flow_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
+ {},
+};
+
+static const struct ib_device_ops flow_ops = {
+ .create_flow = mlx5_ib_create_flow,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+};
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ int i, j;
+
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) {
+ dev->flow_db->rdma_transport_rx[i] =
+ kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio), GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_rx[i])
+ goto free_rdma_transport_rx;
+ }
+
+ for (j = 0; j < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; j++) {
+ dev->flow_db->rdma_transport_tx[j] =
+ kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio), GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_tx[j])
+ goto free_rdma_transport_tx;
+ }
+
+ mutex_init(&dev->flow_db->lock);
+
+ ib_set_device_ops(&dev->ib_dev, &flow_ops);
+ return 0;
+
+free_rdma_transport_tx:
+ while (j--)
+ kfree(dev->flow_db->rdma_transport_tx[j]);
+free_rdma_transport_rx:
+ while (i--)
+ kfree(dev->flow_db->rdma_transport_rx[i]);
+ kfree(dev->flow_db);
+ return -ENOMEM;
+}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
new file mode 100644
index 000000000000..7abba0e2837c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_FS_H
+#define _MLX5_IB_FS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
+
+static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
+{
+ int i;
+
+ /* When a steering anchor is created, a special flow table is also
+ * created for the user to reference. Since the user can reference it,
+ * the kernel cannot trust that when the user destroys the steering
+ * anchor, they no longer reference the flow table.
+ *
+ * To address this issue, when a user destroys a steering anchor, only
+ * the flow steering rule in the table is destroyed, but the table
+ * itself is kept to deal with the above scenario. The remaining
+ * resources are only removed when the RDMA device is destroyed, which
+ * is a safe assumption that all references are gone.
+ */
+ mlx5_ib_fs_cleanup_anchor(dev);
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++)
+ kfree(dev->flow_db->rdma_transport_tx[i]);
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++)
+ kfree(dev->flow_db->rdma_transport_rx[i]);
+ kfree(dev->flow_db);
+}
+#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 4950df3f71b6..d5487834ed25 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -35,44 +35,19 @@
struct mlx5_ib_gsi_wr {
struct ib_cqe cqe;
struct ib_wc wc;
- int send_flags;
bool completed:1;
};
-struct mlx5_ib_gsi_qp {
- struct ib_qp ibqp;
- struct ib_qp *rx_qp;
- u8 port_num;
- struct ib_qp_cap cap;
- enum ib_sig_type sq_sig_type;
- /* Serialize qp state modifications */
- struct mutex mutex;
- struct ib_cq *cq;
- struct mlx5_ib_gsi_wr *outstanding_wrs;
- u32 outstanding_pi, outstanding_ci;
- int num_qps;
- /* Protects access to the tx_qps. Post send operations synchronize
- * with tx_qp creation in setup_qp(). Also protects the
- * outstanding_wrs array and indices.
- */
- spinlock_t lock;
- struct ib_qp **tx_qps;
-};
-
-static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
-{
- return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
-}
-
static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
{
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
/* Call with gsi->lock locked */
-static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+static void generate_completions(struct mlx5_ib_qp *mqp)
{
- struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+ struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
struct mlx5_ib_gsi_wr *wr;
u32 index;
@@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
if (!wr->completed)
break;
- if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
- wr->send_flags & IB_SEND_SIGNALED)
- WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
-
+ WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
wr->completed = false;
}
@@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
struct mlx5_ib_gsi_wr *wr =
container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+ struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
u64 wr_id;
unsigned long flags;
@@ -106,54 +79,43 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
wr_id = wr->wc.wr_id;
wr->wc = *wc;
wr->wc.wr_id = wr_id;
- wr->wc.qp = &gsi->ibqp;
+ wr->wc.qp = &mqp->ibqp;
- generate_completions(gsi);
+ generate_completions(mqp);
spin_unlock_irqrestore(&gsi->lock, flags);
}
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_gsi_qp *gsi;
- struct ib_qp_init_attr hw_init_attr = *init_attr;
- const u8 port_num = init_attr->port_num;
- const int num_pkeys = pd->device->attrs.max_pkeys;
- const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
+ struct ib_qp_init_attr hw_init_attr = *attr;
+ const u8 port_num = attr->port_num;
+ int num_qps = 0;
int ret;
- mlx5_ib_dbg(dev, "creating GSI QP\n");
-
- if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
- mlx5_ib_warn(dev,
- "invalid port number %d during GSI QP creation\n",
- port_num);
- return ERR_PTR(-EINVAL);
+ if (mlx5_ib_deth_sqpn_cap(dev)) {
+ if (MLX5_CAP_GEN(dev->mdev,
+ port_type) == MLX5_CAP_PORT_TYPE_IB)
+ num_qps = pd->device->attrs.max_pkeys;
+ else if (dev->lag_active)
+ num_qps = dev->lag_ports;
}
- gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
- if (!gsi)
- return ERR_PTR(-ENOMEM);
-
+ gsi = &mqp->gsi;
gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
- if (!gsi->tx_qps) {
- ret = -ENOMEM;
- goto err_free;
- }
+ if (!gsi->tx_qps)
+ return -ENOMEM;
- gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
- sizeof(*gsi->outstanding_wrs),
- GFP_KERNEL);
+ gsi->outstanding_wrs =
+ kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
+ GFP_KERNEL);
if (!gsi->outstanding_wrs) {
ret = -ENOMEM;
goto err_free_tx;
}
- mutex_init(&gsi->mutex);
-
- mutex_lock(&dev->devr.mutex);
-
if (dev->devr.ports[port_num - 1].gsi) {
mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
port_num);
@@ -163,16 +125,15 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
gsi->num_qps = num_qps;
spin_lock_init(&gsi->lock);
- gsi->cap = init_attr->cap;
- gsi->sq_sig_type = init_attr->sq_sig_type;
- gsi->ibqp.qp_num = 1;
+ gsi->cap = attr->cap;
gsi->port_num = port_num;
- gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+ gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
IB_POLL_SOFTIRQ);
if (IS_ERR(gsi->cq)) {
- mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
- PTR_ERR(gsi->cq));
+ mlx5_ib_warn(dev,
+ "unable to create send CQ for GSI QP. error %pe\n",
+ gsi->cq);
ret = PTR_ERR(gsi->cq);
goto err_free_wrs;
}
@@ -184,52 +145,43 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
hw_init_attr.cap.max_send_sge = 0;
hw_init_attr.cap.max_inline_data = 0;
}
+
gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
if (IS_ERR(gsi->rx_qp)) {
- mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
- PTR_ERR(gsi->rx_qp));
+ mlx5_ib_warn(dev,
+ "unable to create hardware GSI QP. error %pe\n",
+ gsi->rx_qp);
ret = PTR_ERR(gsi->rx_qp);
goto err_destroy_cq;
}
- dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
-
- mutex_unlock(&dev->devr.mutex);
-
- return &gsi->ibqp;
+ dev->devr.ports[attr->port_num - 1].gsi = gsi;
+ return 0;
err_destroy_cq:
ib_free_cq(gsi->cq);
err_free_wrs:
- mutex_unlock(&dev->devr.mutex);
kfree(gsi->outstanding_wrs);
err_free_tx:
kfree(gsi->tx_qps);
-err_free:
- kfree(gsi);
- return ERR_PTR(ret);
+ return ret;
}
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
const int port_num = gsi->port_num;
int qp_index;
int ret;
- mlx5_ib_dbg(dev, "destroying GSI QP\n");
-
- mutex_lock(&dev->devr.mutex);
ret = ib_destroy_qp(gsi->rx_qp);
if (ret) {
mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
ret);
- mutex_unlock(&dev->devr.mutex);
return ret;
}
dev->devr.ports[port_num - 1].gsi = NULL;
- mutex_unlock(&dev->devr.mutex);
gsi->rx_qp = NULL;
for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
@@ -243,8 +195,6 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
kfree(gsi->outstanding_wrs);
kfree(gsi->tx_qps);
- kfree(gsi);
-
return 0;
}
@@ -261,16 +211,15 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
.max_send_sge = gsi->cap.max_send_sge,
.max_inline_data = gsi->cap.max_inline_data,
},
- .sq_sig_type = gsi->sq_sig_type,
.qp_type = IB_QPT_UD,
- .create_flags = mlx5_ib_create_qp_sqpn_qp1(),
+ .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
};
return ib_create_qp(pd, &init_attr);
}
static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
- u16 qp_index)
+ u16 pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct ib_qp_attr attr;
@@ -279,7 +228,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
attr.qp_state = IB_QPS_INIT;
- attr.pkey_index = qp_index;
+ attr.pkey_index = pkey_index;
attr.qkey = IB_QP1_QKEY;
attr.port_num = gsi->port_num;
ret = ib_modify_qp(qp, &attr, mask);
@@ -313,12 +262,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
{
struct ib_device *device = gsi->rx_qp->device;
struct mlx5_ib_dev *dev = to_mdev(device);
+ int pkey_index = qp_index;
+ struct mlx5_ib_qp *mqp;
struct ib_qp *qp;
unsigned long flags;
u16 pkey;
int ret;
- ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ pkey_index = 0;
+
+ ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
if (ret) {
mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
gsi->port_num, qp_index);
@@ -342,12 +296,16 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
qp = create_gsi_ud_qp(gsi);
if (IS_ERR(qp)) {
- mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
- PTR_ERR(qp));
+ mlx5_ib_warn(dev,
+ "unable to create hardware UD QP for GSI: %pe\n",
+ qp);
return;
}
- ret = modify_to_rts(gsi, qp, qp_index);
+ mqp = to_mqp(qp);
+ if (dev->lag_active)
+ mqp->gsi_lag_port = qp_index + 1;
+ ret = modify_to_rts(gsi, qp, pkey_index);
if (ret)
goto err_destroy_qp;
@@ -362,58 +320,49 @@ err_destroy_qp:
WARN_ON_ONCE(qp);
}
-static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
-{
- u16 qp_index;
-
- for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
- setup_qp(gsi, qp_index);
-}
-
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+ u16 qp_index;
int ret;
mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
- mutex_lock(&gsi->mutex);
ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
if (ret) {
mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
- goto unlock;
+ return ret;
}
- if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
- setup_qps(gsi);
+ if (to_mqp(gsi->rx_qp)->state != IB_QPS_RTS)
+ return 0;
-unlock:
- mutex_unlock(&gsi->mutex);
-
- return ret;
+ for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+ setup_qp(gsi, qp_index);
+ return 0;
}
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
int ret;
- mutex_lock(&gsi->mutex);
ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
qp_init_attr->cap = gsi->cap;
- mutex_unlock(&gsi->mutex);
-
return ret;
}
/* Call with gsi->lock locked */
-static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
struct ib_ud_wr *wr, struct ib_wc *wc)
{
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
struct mlx5_ib_gsi_wr *gsi_wr;
@@ -442,22 +391,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
}
/* Call with gsi->lock locked */
-static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
- struct ib_ud_wr *wr)
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
{
struct ib_wc wc = {
{ .wr_id = wr->wr.wr_id },
.status = IB_WC_SUCCESS,
.opcode = IB_WC_SEND,
- .qp = &gsi->ibqp,
+ .qp = &mqp->ibqp,
};
int ret;
- ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+ ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
if (ret)
return ret;
- generate_completions(gsi);
+ generate_completions(mqp);
return 0;
}
@@ -466,11 +414,15 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
{
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+ struct mlx5_ib_ah *ah = to_mah(wr->ah);
int qp_index = wr->pkey_index;
- if (!mlx5_ib_deth_sqpn_cap(dev))
+ if (!gsi->num_qps)
return gsi->rx_qp;
+ if (dev->lag_active && ah->xmit_port)
+ qp_index = ah->xmit_port - 1;
+
if (qp_index >= gsi->num_qps)
return NULL;
@@ -480,7 +432,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct ib_qp *tx_qp;
unsigned long flags;
int ret;
@@ -493,22 +446,21 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
spin_lock_irqsave(&gsi->lock, flags);
tx_qp = get_tx_qp(gsi, &cur_wr);
if (!tx_qp) {
- ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+ ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
if (ret)
goto err;
spin_unlock_irqrestore(&gsi->lock, flags);
continue;
}
- ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+ ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
if (ret)
goto err;
ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
if (ret) {
/* Undo the effect of adding the outstanding wr */
- gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
- gsi->cap.max_send_wr;
+ gsi->outstanding_pi--;
goto err;
}
spin_unlock_irqrestore(&gsi->lock, flags);
@@ -525,17 +477,16 @@ err:
int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
return ib_post_recv(gsi->rx_qp, wr, bad_wr);
}
void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
{
- if (!gsi)
- return;
+ u16 qp_index;
- mutex_lock(&gsi->mutex);
- setup_qps(gsi);
- mutex_unlock(&gsi->mutex);
+ for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+ setup_qp(gsi, qp_index);
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 22e651cb5534..bbecca405171 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -8,19 +8,95 @@
#include "srq.h"
static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep,
+ int vport_index)
{
struct mlx5_ib_dev *ibdev;
- int vport_index;
+ struct net_device *ndev;
- ibdev = mlx5_ib_get_uplink_ibdev(dev->priv.eswitch);
- vport_index = ibdev->free_port++;
+ ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
+ if (!ibdev)
+ return -EINVAL;
ibdev->port[vport_index].rep = rep;
- write_lock(&ibdev->port[vport_index].roce.netdev_lock);
- ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
- write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
+ rep->rep_data[REP_IB].priv = ibdev;
+ ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
+
+ return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
+}
+
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_mpesw(peer_dev))
+ *num_ports += peer_num_ports;
+ else
+ /* Only 1 ib port is the representor for all uplinks */
+ *num_ports += peer_num_ports - 1;
+ }
+}
+
+static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
+ struct mlx5_core_dev *new_owner)
+{
+ int ret;
+
+ if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
+ !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
+ return 0;
+
+ ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+ FS_FT_RDMA_TRANSPORT_TX);
+ if (ret)
+ return ret;
+
+ ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+ FS_FT_RDMA_TRANSPORT_RX);
+ if (ret) {
+ mlx5_fs_set_root_dev(cur_owner, cur_owner,
+ FS_FT_RDMA_TRANSPORT_TX);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i, ret;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
+ WARN_ON_ONCE(ret);
+ }
+}
+
+static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev;
+ int ret;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ ret = mlx5_ib_set_owner_transport(peer_dev, dev);
+ if (ret) {
+ mlx5_ib_release_transport(dev);
+ return ret;
+ }
+ }
return 0;
}
@@ -28,112 +104,186 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
static int
mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
- int num_ports = MLX5_TOTAL_VPORTS(dev);
+ u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+ struct mlx5_core_dev *lag_master = dev;
const struct mlx5_ib_profile *profile;
+ struct mlx5_core_dev *peer_dev;
struct mlx5_ib_dev *ibdev;
+ int new_uplink = false;
int vport_index;
+ int ret;
+ int i;
+
+ vport_index = rep->vport_index;
+
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ if (mlx5_lag_is_master(dev)) {
+ mlx5_ib_num_ports_update(dev, &num_ports);
+ } else {
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ if (!mlx5_lag_is_mpesw(dev))
+ return 0;
+ new_uplink = true;
+ }
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_master(peer_dev))
+ lag_master = peer_dev;
+ else if (!mlx5_lag_is_mpesw(dev))
+ /* Only 1 ib port is the representor for all uplinks */
+ peer_n_ports--;
+
+ if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+ vport_index += peer_n_ports;
+ }
+ }
+ }
- if (rep->vport == MLX5_VPORT_UPLINK)
- profile = &uplink_rep_profile;
+ if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
+ profile = &raw_eth_profile;
else
- return mlx5_ib_set_vport_rep(dev, rep);
+ return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
- ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
- if (!ibdev)
- return -ENOMEM;
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ ret = mlx5_ib_take_transport(lag_master);
+ if (ret)
+ return ret;
+ }
+
+ ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(lag_master));
+ if (!ibdev) {
+ ret = -ENOMEM;
+ goto release_transport;
+ }
ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
GFP_KERNEL);
if (!ibdev->port) {
- ib_dealloc_device(&ibdev->ib_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail_port;
}
ibdev->is_rep = true;
- vport_index = ibdev->free_port++;
+ vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
- ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport);
- ibdev->mdev = dev;
+ ibdev->mdev = lag_master;
ibdev->num_ports = num_ports;
-
- if (!__mlx5_ib_add(ibdev, profile))
- return -EINVAL;
+ ibdev->ib_dev.phys_port_cnt = num_ports;
+ ret = ib_device_set_netdev(&ibdev->ib_dev,
+ mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
+ rep->vport),
+ vport_index + 1);
+ if (ret)
+ goto fail_add;
+
+ ret = __mlx5_ib_add(ibdev, profile);
+ if (ret)
+ goto fail_add;
rep->rep_data[REP_IB].priv = ibdev;
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_register_peer_vport_reps(lag_master);
return 0;
+
+fail_add:
+ kfree(ibdev->port);
+fail_port:
+ ib_dealloc_device(&ibdev->ib_dev);
+release_transport:
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_release_transport(lag_master);
+
+ return ret;
+}
+
+static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return rep->rep_data[REP_IB].priv;
}
static void
mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
{
- struct mlx5_ib_dev *dev;
+ struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
+ struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ int vport_index = rep->vport_index;
+ struct mlx5_ib_port *port;
+ int i;
- if (!rep->rep_data[REP_IB].priv ||
- rep->vport != MLX5_VPORT_UPLINK)
+ if (WARN_ON(!mdev))
return;
- dev = mlx5_ib_rep_to_dev(rep);
- __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
- rep->rep_data[REP_IB].priv = NULL;
-}
+ if (!dev)
+ return;
-static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
-{
- return mlx5_ib_rep_to_dev(rep);
-}
+ if (mlx5_lag_is_shared_fdb(mdev) &&
+ !mlx5_lag_is_master(mdev)) {
+ if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
+ return;
+ for (i = 0; i < dev->num_ports; i++) {
+ if (dev->port[i].rep == rep)
+ break;
+ }
+ if (WARN_ON(i == dev->num_ports))
+ return;
+ vport_index = i;
+ }
-static const struct mlx5_eswitch_rep_ops rep_ops = {
- .load = mlx5_ib_vport_rep_load,
- .unload = mlx5_ib_vport_rep_unload,
- .get_proto_dev = mlx5_ib_vport_get_proto_dev,
-};
+ port = &dev->port[vport_index];
-void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
+ rep->rep_data[REP_IB].priv = NULL;
+ port->rep = NULL;
- mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
-}
+ if (rep->vport == MLX5_VPORT_UPLINK) {
-void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
-{
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
+ return;
- mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
-}
+ if (mlx5_lag_is_shared_fdb(mdev)) {
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
-u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
-{
- return mlx5_eswitch_mode(esw);
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
+ mlx5_ib_release_transport(mdev);
+ }
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ }
}
-struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index)
-{
- return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB);
-}
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+ .load = mlx5_ib_vport_rep_load,
+ .unload = mlx5_ib_vport_rep_unload,
+ .get_proto_dev = mlx5_ib_rep_to_dev,
+};
-struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index)
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
{
- return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH);
-}
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+ int i;
-struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
-{
- return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB);
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ }
}
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport)
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ u16 vport_num)
{
- return mlx5_eswitch_vport_rep(esw, vport);
+ return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
}
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
- u16 port)
+ u32 port)
{
struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
struct mlx5_eswitch_rep *rep;
@@ -146,6 +296,51 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
rep = dev->port[port - 1].rep;
- return mlx5_eswitch_add_send_to_vport_rule(esw, rep->vport,
- sq->base.mqp.qpn);
+ return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
+}
+
+static int mlx5r_rep_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ return 0;
+}
+
+static void mlx5r_rep_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+}
+
+static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".rdma-rep", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
+
+static struct auxiliary_driver mlx5r_rep_driver = {
+ .name = "rep",
+ .probe = mlx5r_rep_probe,
+ .remove = mlx5r_rep_remove,
+ .id_table = mlx5r_rep_id_table,
+};
+
+int mlx5r_rep_init(void)
+{
+ return auxiliary_driver_register(&mlx5r_rep_driver);
+}
+
+void mlx5r_rep_cleanup(void)
+{
+ auxiliary_driver_unregister(&mlx5r_rep_driver);
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 22adce2d6795..9c55e5c528b4 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -9,69 +9,32 @@
#include <linux/mlx5/eswitch.h>
#include "mlx5_ib.h"
-#ifdef CONFIG_MLX5_ESWITCH
-extern const struct mlx5_ib_profile uplink_rep_profile;
+extern const struct mlx5_ib_profile raw_eth_profile;
-u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
-struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index);
-struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw);
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- int vport_index);
-void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev);
-void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev);
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5r_rep_init(void);
+void mlx5r_rep_cleanup(void);
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
- u16 port);
+ u32 port);
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index);
+ u16 vport_num);
#else /* CONFIG_MLX5_ESWITCH */
-static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
-{
- return SRIOV_NONE;
-}
-
-static inline
-struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
- int vport_index)
-{
- return NULL;
-}
-
-static inline
-struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw)
-{
- return NULL;
-}
-
-static inline
-struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw,
- int vport_index)
-{
- return NULL;
-}
-
-static inline void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev) {}
-static inline void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev) {}
+static inline int mlx5r_rep_init(void) { return 0; }
+static inline void mlx5r_rep_cleanup(void) {}
static inline
struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
- u16 port)
+ u32 port)
{
return NULL;
}
static inline
struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
- int vport_index)
+ u16 vport_num)
{
return NULL;
}
#endif
-
-static inline
-struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
-{
- return rep->rep_data[REP_IB].priv;
-}
#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index 649a3364f838..afeb5e53254f 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
@@ -48,7 +47,7 @@ static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
}
}
-int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -91,7 +90,7 @@ static inline enum port_state_policy net_to_mlx_policy(int policy)
}
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
- u8 port, int state)
+ u32 port, int state)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -119,7 +118,7 @@ out:
}
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_stats *stats)
+ u32 port, struct ifla_vf_stats *stats)
{
int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
struct mlx5_core_dev *mdev;
@@ -134,7 +133,7 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
if (!out)
return -ENOMEM;
- err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+ err = mlx5_core_query_vport_counter(mdev, true, vf, port, out);
if (err)
goto ex;
@@ -149,7 +148,8 @@ ex:
return err;
}
-static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+static int set_vf_node_guid(struct ib_device *device, int vf, u32 port,
+ u64 guid)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -164,13 +164,16 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
- if (!err)
+ if (!err) {
vfs_ctx[vf].node_guid = guid;
+ vfs_ctx[vf].node_guid_valid = 1;
+ }
kfree(in);
return err;
}
-static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+static int set_vf_port_guid(struct ib_device *device, int vf, u32 port,
+ u64 guid)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
@@ -185,13 +188,15 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
- if (!err)
+ if (!err) {
vfs_ctx[vf].port_guid = guid;
+ vfs_ctx[vf].port_guid_valid = 1;
+ }
kfree(in);
return err;
}
-int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u32 port,
u64 guid, int type)
{
if (type == IFLA_VF_IB_NODE_GUID)
@@ -201,3 +206,19 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
return -EINVAL;
}
+
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
+
+ node_guid->guid =
+ vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0;
+ port_guid->guid =
+ vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/macsec.c b/drivers/infiniband/hw/mlx5/macsec.c
new file mode 100644
index 000000000000..3c56eb5eddf3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/macsec.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "macsec.h"
+#include <linux/mlx5/macsec.h>
+
+struct mlx5_reserved_gids {
+ int macsec_index;
+ const struct ib_gid_attr *physical_gid;
+};
+
+struct mlx5_roce_gids {
+ struct list_head roce_gid_list_entry;
+ u16 gid_idx;
+ union {
+ struct sockaddr_in sockaddr_in;
+ struct sockaddr_in6 sockaddr_in6;
+ } addr;
+};
+
+struct mlx5_macsec_device {
+ struct list_head macsec_devices_list_entry;
+ void *macdev;
+ struct list_head macsec_roce_gids;
+ struct list_head tx_rules_list;
+ struct list_head rx_rules_list;
+};
+
+static void cleanup_macsec_device(struct mlx5_macsec_device *macsec_device)
+{
+ if (!list_empty(&macsec_device->tx_rules_list) ||
+ !list_empty(&macsec_device->rx_rules_list) ||
+ !list_empty(&macsec_device->macsec_roce_gids))
+ return;
+
+ list_del(&macsec_device->macsec_devices_list_entry);
+ kfree(macsec_device);
+}
+
+static struct mlx5_macsec_device *get_macsec_device(void *macdev,
+ struct list_head *macsec_devices_list)
+{
+ struct mlx5_macsec_device *iter, *macsec_device = NULL;
+
+ list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) {
+ if (iter->macdev == macdev) {
+ macsec_device = iter;
+ break;
+ }
+ }
+
+ if (macsec_device)
+ return macsec_device;
+
+ macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+ if (!macsec_device)
+ return NULL;
+
+ macsec_device->macdev = macdev;
+ INIT_LIST_HEAD(&macsec_device->tx_rules_list);
+ INIT_LIST_HEAD(&macsec_device->rx_rules_list);
+ INIT_LIST_HEAD(&macsec_device->macsec_roce_gids);
+ list_add(&macsec_device->macsec_devices_list_entry, macsec_devices_list);
+
+ return macsec_device;
+}
+
+static void mlx5_macsec_del_roce_gid(struct mlx5_macsec_device *macsec_device, u16 gid_idx)
+{
+ struct mlx5_roce_gids *current_gid, *next_gid;
+
+ list_for_each_entry_safe(current_gid, next_gid, &macsec_device->macsec_roce_gids,
+ roce_gid_list_entry)
+ if (current_gid->gid_idx == gid_idx) {
+ list_del(&current_gid->roce_gid_list_entry);
+ kfree(current_gid);
+ }
+}
+
+static void mlx5_macsec_save_roce_gid(struct mlx5_macsec_device *macsec_device,
+ const struct sockaddr *addr, u16 gid_idx)
+{
+ struct mlx5_roce_gids *roce_gids;
+
+ roce_gids = kzalloc(sizeof(*roce_gids), GFP_KERNEL);
+ if (!roce_gids)
+ return;
+
+ roce_gids->gid_idx = gid_idx;
+ if (addr->sa_family == AF_INET)
+ memcpy(&roce_gids->addr.sockaddr_in, addr, sizeof(roce_gids->addr.sockaddr_in));
+ else
+ memcpy(&roce_gids->addr.sockaddr_in6, addr, sizeof(roce_gids->addr.sockaddr_in6));
+
+ list_add_tail(&roce_gids->roce_gid_list_entry, &macsec_device->macsec_roce_gids);
+}
+
+static void handle_macsec_gids(struct list_head *macsec_devices_list,
+ struct mlx5_macsec_event_data *data)
+{
+ struct mlx5_macsec_device *macsec_device;
+ struct mlx5_roce_gids *gid;
+
+ macsec_device = get_macsec_device(data->macdev, macsec_devices_list);
+ if (!macsec_device)
+ return;
+
+ list_for_each_entry(gid, &macsec_device->macsec_roce_gids, roce_gid_list_entry) {
+ mlx5_macsec_add_roce_sa_rules(data->fs_id, (struct sockaddr *)&gid->addr,
+ gid->gid_idx, &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, data->macsec_fs,
+ data->is_tx);
+ }
+}
+
+static void del_sa_roce_rule(struct list_head *macsec_devices_list,
+ struct mlx5_macsec_event_data *data)
+{
+ struct mlx5_macsec_device *macsec_device;
+
+ macsec_device = get_macsec_device(data->macdev, macsec_devices_list);
+ WARN_ON(!macsec_device);
+
+ mlx5_macsec_del_roce_sa_rules(data->fs_id, data->macsec_fs,
+ &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, data->is_tx);
+}
+
+static int macsec_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_macsec *macsec = container_of(nb, struct mlx5_macsec, blocking_events_nb);
+
+ mutex_lock(&macsec->lock);
+ switch (event) {
+ case MLX5_DRIVER_EVENT_MACSEC_SA_ADDED:
+ handle_macsec_gids(&macsec->macsec_devices_list, data);
+ break;
+ case MLX5_DRIVER_EVENT_MACSEC_SA_DELETED:
+ del_sa_roce_rule(&macsec->macsec_devices_list, data);
+ break;
+ default:
+ mutex_unlock(&macsec->lock);
+ return NOTIFY_DONE;
+ }
+ mutex_unlock(&macsec->lock);
+ return NOTIFY_OK;
+}
+
+void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ dev->macsec.blocking_events_nb.notifier_call = macsec_event;
+ blocking_notifier_chain_register(&dev->mdev->macsec_nh,
+ &dev->macsec.blocking_events_nb);
+}
+
+void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ blocking_notifier_chain_unregister(&dev->mdev->macsec_nh,
+ &dev->macsec.blocking_events_nb);
+}
+
+int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev)
+{
+ int i, j, max_gids;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return 0;
+ }
+
+ max_gids = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size);
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->port[i].reserved_gids = kcalloc(max_gids,
+ sizeof(*dev->port[i].reserved_gids),
+ GFP_KERNEL);
+ if (!dev->port[i].reserved_gids)
+ goto err;
+
+ for (j = 0; j < max_gids; j++)
+ dev->port[i].reserved_gids[j].macsec_index = -1;
+ }
+
+ INIT_LIST_HEAD(&dev->macsec.macsec_devices_list);
+ mutex_init(&dev->macsec.lock);
+
+ return 0;
+err:
+ while (i >= 0) {
+ kfree(dev->port[i].reserved_gids);
+ i--;
+ }
+ return -ENOMEM;
+}
+
+void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev)
+{
+ int i;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev))
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+
+ for (i = 0; i < dev->num_ports; i++)
+ kfree(dev->port[i].reserved_gids);
+
+ mutex_destroy(&dev->macsec.lock);
+}
+
+int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(attr->device);
+ struct mlx5_macsec_device *macsec_device;
+ const struct ib_gid_attr *physical_gid;
+ struct mlx5_reserved_gids *mgids;
+ struct net_device *ndev;
+ int ret = 0;
+ union {
+ struct sockaddr_in sockaddr_in;
+ struct sockaddr_in6 sockaddr_in6;
+ } addr;
+
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return 0;
+ }
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ if (!netif_is_macsec(ndev) || !macsec_netdev_is_offloaded(ndev)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ mutex_lock(&dev->macsec.lock);
+ macsec_device = get_macsec_device(ndev, &dev->macsec.macsec_devices_list);
+ if (!macsec_device) {
+ ret = -ENOMEM;
+ goto dev_err;
+ }
+
+ physical_gid = rdma_find_gid(attr->device, &attr->gid,
+ attr->gid_type, NULL);
+ if (!IS_ERR(physical_gid)) {
+ ret = set_roce_addr(to_mdev(physical_gid->device),
+ physical_gid->port_num,
+ physical_gid->index, NULL,
+ physical_gid);
+ if (ret)
+ goto gid_err;
+
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[physical_gid->index];
+ mgids->macsec_index = attr->index;
+ mgids->physical_gid = physical_gid;
+ }
+
+ /* Proceed with adding steering rules, regardless if there was gid ambiguity or not.*/
+ rdma_gid2ip((struct sockaddr *)&addr, &attr->gid);
+ ret = mlx5_macsec_add_roce_rule(ndev, (struct sockaddr *)&addr, attr->index,
+ &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, dev->mdev->macsec_fs);
+ if (ret && !IS_ERR(physical_gid))
+ goto rule_err;
+
+ mlx5_macsec_save_roce_gid(macsec_device, (struct sockaddr *)&addr, attr->index);
+
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+ return ret;
+
+rule_err:
+ set_roce_addr(to_mdev(physical_gid->device), physical_gid->port_num,
+ physical_gid->index, &physical_gid->gid, physical_gid);
+ mgids->macsec_index = -1;
+gid_err:
+ rdma_put_gid_attr(physical_gid);
+ cleanup_macsec_device(macsec_device);
+dev_err:
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+ return ret;
+}
+
+void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(attr->device);
+ struct mlx5_macsec_device *macsec_device;
+ struct mlx5_reserved_gids *mgids;
+ struct net_device *ndev;
+ int i, max_gids;
+
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[attr->index];
+ if (mgids->macsec_index != -1) { /* Checking if physical gid has ambiguous IP */
+ rdma_put_gid_attr(mgids->physical_gid);
+ mgids->macsec_index = -1;
+ return;
+ }
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (!netif_is_macsec(ndev) || !macsec_netdev_is_offloaded(ndev)) {
+ rcu_read_unlock();
+ return;
+ }
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ mutex_lock(&dev->macsec.lock);
+ max_gids = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size);
+ for (i = 0; i < max_gids; i++) { /* Checking if macsec gid has ambiguous IP */
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[i];
+ if (mgids->macsec_index == attr->index) {
+ const struct ib_gid_attr *physical_gid = mgids->physical_gid;
+
+ set_roce_addr(to_mdev(physical_gid->device),
+ physical_gid->port_num,
+ physical_gid->index,
+ &physical_gid->gid, physical_gid);
+
+ rdma_put_gid_attr(physical_gid);
+ mgids->macsec_index = -1;
+ break;
+ }
+ }
+ macsec_device = get_macsec_device(ndev, &dev->macsec.macsec_devices_list);
+ mlx5_macsec_del_roce_rule(attr->index, dev->mdev->macsec_fs,
+ &macsec_device->tx_rules_list, &macsec_device->rx_rules_list);
+ mlx5_macsec_del_roce_gid(macsec_device, attr->index);
+ cleanup_macsec_device(macsec_device);
+
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+}
diff --git a/drivers/infiniband/hw/mlx5/macsec.h b/drivers/infiniband/hw/mlx5/macsec.h
new file mode 100644
index 000000000000..9b77ba90f0f4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/macsec.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_MACSEC_H__
+#define __MLX5_MACSEC_H__
+
+#include <net/macsec.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_MACSEC
+struct mlx5_reserved_gids;
+
+int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr);
+void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr);
+int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr) { return 0; }
+static inline void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr) {}
+static inline int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev) { return 0; }
+static inline void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev) {}
+static inline void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev) {}
+static inline void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev) {}
+#endif
+#endif /* __MLX5_MACSEC_H__ */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 6c529e6f3a01..2453ae4384a7 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/mlx5/cmd.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
@@ -43,17 +42,17 @@ enum {
MLX5_IB_VENDOR_CLASS2 = 0xa
};
-static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u32 port_num,
struct ib_mad *in_mad)
{
if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
return true;
- return dev->mdev->port_caps[port_num - 1].has_smi;
+ return dev->port_caps[port_num - 1].has_smi;
}
static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
- int ignore_bkey, u8 port, const struct ib_wc *in_wc,
+ int ignore_bkey, u32 port, const struct ib_wc *in_wc,
const struct ib_grh *in_grh, const void *in_mad,
void *response_mad)
{
@@ -70,62 +69,10 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
- return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
+ return mlx5_cmd_mad_ifc(dev, in_mad, response_mad, op_modifier,
port);
}
-static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
- u16 slid;
- int err;
-
- slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
-
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
- return IB_MAD_RESULT_SUCCESS;
-
- /* Don't process SMInfo queries -- the SMA can't handle them.
- */
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
- return IB_MAD_RESULT_SUCCESS;
- } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
- in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
- in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
- return IB_MAD_RESULT_SUCCESS;
- } else {
- return IB_MAD_RESULT_SUCCESS;
- }
-
- err = mlx5_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- /* set return bit in status of directed route responses */
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
-
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
- /* no response for trap repress */
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
-}
-
static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
void *out)
{
@@ -200,11 +147,91 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
+static void pma_cnt_ext_assign_ppcnt(struct ib_pma_portcounters_ext *cnt_ext,
+ void *out)
{
+ void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+ counter_set);
+
+#define MLX5_GET_EXT_CNTR(counter_name) \
+ MLX5_GET64(ib_ext_port_cntrs_grp_data_layout, \
+ out_pma, counter_name##_high)
+
+ cnt_ext->port_xmit_data =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_data) >> 2);
+ cnt_ext->port_rcv_data =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_data) >> 2);
+
+ cnt_ext->port_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_pkts));
+ cnt_ext->port_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_pkts));
+
+ cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_xmit_pkts));
+ cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_rcv_pkts));
+
+ cnt_ext->port_multicast_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_xmit_pkts));
+ cnt_ext->port_multicast_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_rcv_pkts));
+}
+
+static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, u8 plane_num,
+ void *out, size_t sz, bool ext)
+{
+ u32 *in;
int err;
+
+ in = kvzalloc(sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(ppcnt_reg, in, local_port, port_num);
+ MLX5_SET(ppcnt_reg, in, plane_ind, plane_num);
+
+ if (ext)
+ MLX5_SET(ppcnt_reg, in, grp,
+ MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP);
+ else
+ MLX5_SET(ppcnt_reg, in, grp,
+ MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+ err = mlx5_core_access_reg(dev, in, sz, out,
+ sz, MLX5_REG_PPCNT, 0, 0);
+
+ kvfree(in);
+ return err;
+}
+
+static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx5_core_dev *mdev;
+ bool native_port = true;
+ u32 mdev_port_num;
void *out_cnt;
+ int err;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev) {
+ /* Fail to get the native port, likely due to 2nd port is still
+ * unaffiliated. In such case default to 1st port and attached
+ * PF device.
+ */
+ native_port = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
+ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 &&
+ !mlx5_core_mp_enabled(mdev) &&
+ dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) {
+ /* set local port to one for Function-Per-Port HCA. */
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
/* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
@@ -212,83 +239,131 @@ static int process_pma_cmd(struct mlx5_core_dev *mdev, u8 port_num,
cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ goto done;
}
if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
struct ib_pma_portcounters_ext *pma_cnt_ext =
(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
- int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ int sz = max(MLX5_ST_SZ_BYTES(query_vport_counter_out),
+ MLX5_ST_SZ_BYTES(ppcnt_reg));
out_cnt = kvzalloc(sz, GFP_KERNEL);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
- err = mlx5_core_query_vport_counter(mdev, 0, 0,
- port_num, out_cnt, sz);
- if (!err)
- pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ err = query_ib_ppcnt(mdev, mdev_port_num,
+ port_num, out_cnt, sz, 1);
+ if (!err)
+ pma_cnt_ext_assign_ppcnt(pma_cnt_ext, out_cnt);
+ } else {
+ err = mlx5_core_query_vport_counter(mdev, 0, 0,
+ mdev_port_num,
+ out_cnt);
+ if (!err)
+ pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ }
} else {
struct ib_pma_portcounters *pma_cnt =
(struct ib_pma_portcounters *)(out_mad->data + 40);
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
out_cnt = kvzalloc(sz, GFP_KERNEL);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
+
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI)
+ err = query_ib_ppcnt(mdev, mdev_port_num, port_num,
+ out_cnt, sz, 0);
+ else
+ err = query_ib_ppcnt(mdev, mdev_port_num, 0,
+ out_cnt, sz, 0);
- err = mlx5_core_query_ib_ppcnt(mdev, port_num,
- out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
- }
-
+ }
kvfree(out_cnt);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = err ? IB_MAD_RESULT_FAILURE :
+ IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+done:
+ if (native_port)
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return err;
}
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
- struct mlx5_core_dev *mdev;
- u8 mdev_port_num;
- int ret;
+ u8 mgmt_class = in->mad_hdr.mgmt_class;
+ u8 method = in->mad_hdr.method;
+ u16 slid;
+ int err;
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
+ slid = in_wc ? ib_lid_cpu16(in_wc->slid) :
+ be16_to_cpu(IB_LID_PERMISSIVE);
- memset(out_mad->data, 0, sizeof(out_mad->data));
+ if (method == IB_MGMT_METHOD_TRAP && !slid)
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
- if (!mdev)
- return IB_MAD_RESULT_FAILURE;
+ switch (mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: {
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
- if (MLX5_CAP_GEN(mdev, vport_counters) &&
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- ret = process_pma_cmd(mdev, mdev_port_num, in_mad, out_mad);
- } else {
- ret = process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
- in_mad, out_mad);
+ /* Don't process SMInfo queries -- the SMA can't handle them.
+ */
+ if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+ return IB_MAD_RESULT_SUCCESS;
+ } break;
+ case IB_MGMT_CLASS_PERF_MGMT:
+ if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
+ method == IB_MGMT_METHOD_GET)
+ return process_pma_cmd(dev, port_num, in, out);
+ fallthrough;
+ case MLX5_IB_VENDOR_CLASS1:
+ case MLX5_IB_VENDOR_CLASS2:
+ case IB_MGMT_CLASS_CONG_MGMT: {
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } break;
+ default:
+ return IB_MAD_RESULT_SUCCESS;
}
- mlx5_ib_put_native_port_mdev(dev, port_num);
- return ret;
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+ in_grh, in, out);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ /* set return bit in status of directed route responses */
+ if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
u16 packet_error;
@@ -297,7 +372,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -305,7 +380,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
packet_error = be16_to_cpu(out_mad->status);
- dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
+ dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
out:
@@ -314,17 +389,17 @@ out:
return err;
}
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad)
+static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
+ struct ib_smp *out_mad)
{
- struct ib_smp *in_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *in_mad;
+ int err;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
if (!in_mad)
return -ENOMEM;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad,
@@ -337,8 +412,8 @@ int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -359,8 +434,8 @@ out:
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
u16 *max_pkeys)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -381,8 +456,8 @@ out:
int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
u32 *vendor_id)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -402,8 +477,8 @@ out:
int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -411,7 +486,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
@@ -427,8 +502,8 @@ out:
int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -436,7 +511,7 @@ int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
@@ -450,11 +525,11 @@ out:
return err;
}
-int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -462,7 +537,7 @@ int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -479,11 +554,11 @@ out:
return err;
}
-int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
+int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -491,7 +566,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -502,7 +577,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
memcpy(gid->raw, out_mad->data + 8, 8);
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -519,13 +594,13 @@ out:
return err;
}
-int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
+int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int ext_active_speed;
int err = -ENOMEM;
@@ -536,7 +611,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
/* props being zeroed by the caller, avoid zeroing it here */
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -555,7 +630,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
props->gid_tbl_len = out_mad->data[50];
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
- props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len;
+ props->pkey_tbl_len = dev->pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
@@ -590,14 +665,32 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
props->active_speed = IB_SPEED_HDR;
break;
+ case 8:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_NDR_SUP)
+ props->active_speed = IB_SPEED_NDR;
+ break;
+ }
+ }
+
+ /* Check if extended speeds 2 (XDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_EXTENDED_SPEEDS2_SUP) {
+ ext_active_speed = (out_mad->data[56] >> 4) & 0x6;
+
+ switch (ext_active_speed) {
+ case 2:
+ if (props->port_cap_flags2 & IB_PORT_LINK_SPEED_XDR_SUP)
+ props->active_speed = IB_SPEED_XDR;
+ break;
}
}
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
- if (mdev->port_caps[port - 1].ext_port_cap &
+ if (dev->port_caps[port - 1].ext_port_cap &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index abac70ad5c7c..40284bbb45d6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,33 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#include <linux/debugfs.h>
@@ -39,9 +13,7 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
-#if defined(CONFIG_X86)
-#include <asm/pat.h>
-#endif
+#include <linux/log2.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
@@ -52,35 +24,42 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/driver.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/lag.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "devx.h"
+#include "dm.h"
+#include "fs.h"
#include "srq.h"
-#include <linux/mlx5/fs_helpers.h>
-#include <linux/mlx5/accel.h>
+#include "qp.h"
+#include "wr.h"
+#include "restrack.h"
+#include "counters.h"
+#include "umr.h"
#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_ucaps.h>
+#include "macsec.h"
+#include "data_direct.h"
+#include "dmah.h"
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
-#define DRIVER_NAME "mlx5_ib"
-#define DRIVER_VERSION "5.0-0"
-
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
-MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) IB driver");
MODULE_LICENSE("Dual BSD/GPL");
-static char mlx5_version[] =
- DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
- DRIVER_VERSION "\n";
-
struct mlx5_ib_event_work {
struct work_struct work;
union {
@@ -104,12 +83,6 @@ static LIST_HEAD(mlx5_ib_dev_list);
*/
static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
-/* We can't use an array for xlt_emergency_page because dma_map_single
- * doesn't work on kernel modules memory
- */
-static unsigned long xlt_emergency_page;
-static struct mutex xlt_emergency_page_mutex;
-
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
{
struct mlx5_ib_dev *dev;
@@ -134,7 +107,7 @@ mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
}
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+mlx5_ib_port_link_layer(struct ib_device *device, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(device);
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
@@ -143,7 +116,7 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
}
static int get_port_state(struct ib_device *ibdev,
- u8 port_num,
+ u32 port_num,
enum ib_port_state *state)
{
struct ib_port_attr attr;
@@ -158,9 +131,9 @@ static int get_port_state(struct ib_device *ibdev,
static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
struct net_device *ndev,
- u8 *port_num)
+ struct net_device *upper,
+ u32 *port_num)
{
- struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
struct net_device *rep_ndev;
struct mlx5_ib_port *port;
int i;
@@ -170,15 +143,59 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
if (!port->rep)
continue;
- read_lock(&port->roce.netdev_lock);
- rep_ndev = mlx5_ib_get_rep_netdev(esw,
- port->rep->vport);
- if (rep_ndev == ndev) {
- read_unlock(&port->roce.netdev_lock);
+ if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
*port_num = i + 1;
return &port->roce;
}
- read_unlock(&port->roce.netdev_lock);
+
+ if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+ continue;
+ rep_ndev = ib_device_get_netdev(&dev->ib_dev, i + 1);
+ if (rep_ndev && rep_ndev == ndev) {
+ dev_put(rep_ndev);
+ *port_num = i + 1;
+ return &port->roce;
+ }
+
+ dev_put(rep_ndev);
+ }
+
+ return NULL;
+}
+
+static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev,
+ struct net_device *ndev,
+ struct net_device *upper,
+ struct net_device *ib_ndev)
+{
+ if (!dev->ib_active)
+ return false;
+
+ /* Event is about our upper device */
+ if (upper == ndev)
+ return true;
+
+ /* RDMA device is not in lag and not in switchdev */
+ if (!dev->is_rep && !upper && ndev == ib_ndev)
+ return true;
+
+ /* RDMA devie is in switchdev */
+ if (dev->is_rep && ndev == ib_ndev)
+ return true;
+
+ return false;
+}
+
+static struct net_device *mlx5_ib_get_rep_uplink_netdev(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_ib_port *port;
+ int i;
+
+ for (i = 0; i < ibdev->num_ports; i++) {
+ port = &ibdev->port[i];
+ if (port->rep && port->rep->vport == MLX5_VPORT_UPLINK) {
+ return ib_device_get_netdev(&ibdev->ib_dev, i + 1);
+ }
}
return NULL;
@@ -189,7 +206,8 @@ static int mlx5_netdev_event(struct notifier_block *this,
{
struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- u8 port_num = roce->native_port_num;
+ u32 port_num = roce->native_port_num;
+ struct net_device *ib_ndev = NULL;
struct mlx5_core_dev *mdev;
struct mlx5_ib_dev *ibdev;
@@ -203,46 +221,67 @@ static int mlx5_netdev_event(struct notifier_block *this,
/* Should already be registered during the load */
if (ibdev->is_rep)
break;
- write_lock(&roce->netdev_lock);
+
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+ /* Exit if already registered */
+ if (ib_ndev)
+ goto put_ndev;
+
if (ndev->dev.parent == mdev->device)
- roce->netdev = ndev;
- write_unlock(&roce->netdev_lock);
+ ib_device_set_netdev(&ibdev->ib_dev, ndev, port_num);
break;
case NETDEV_UNREGISTER:
/* In case of reps, ib device goes away before the netdevs */
- write_lock(&roce->netdev_lock);
- if (roce->netdev == ndev)
- roce->netdev = NULL;
- write_unlock(&roce->netdev_lock);
- break;
+ if (ibdev->is_rep)
+ break;
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+ if (ib_ndev == ndev)
+ ib_device_set_netdev(&ibdev->ib_dev, NULL, port_num);
+ goto put_ndev;
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
- struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev);
struct net_device *upper = NULL;
- if (lag_ndev) {
- upper = netdev_master_upper_dev_get(lag_ndev);
- dev_put(lag_ndev);
+ if (!netif_is_lag_master(ndev) && !netif_is_lag_port(ndev) &&
+ !mlx5_core_mp_enabled(mdev))
+ return NOTIFY_DONE;
+
+ if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
+ struct net_device *lag_ndev;
+
+ if(mlx5_lag_is_roce(mdev))
+ lag_ndev = ib_device_get_netdev(&ibdev->ib_dev, 1);
+ else /* sriov lag */
+ lag_ndev = mlx5_ib_get_rep_uplink_netdev(ibdev);
+
+ if (lag_ndev) {
+ upper = netdev_master_upper_dev_get(lag_ndev);
+ dev_put(lag_ndev);
+ } else {
+ goto done;
+ }
}
if (ibdev->is_rep)
- roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+ roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
if (!roce)
return NOTIFY_DONE;
- if ((upper == ndev || (!upper && ndev == roce->netdev))
- && ibdev->ib_active) {
+
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+
+ if (mlx5_netdev_send_event(ibdev, ndev, upper, ib_ndev)) {
struct ib_event ibev = { };
enum ib_port_state port_state;
if (get_port_state(&ibdev->ib_dev, port_num,
&port_state))
- goto done;
+ goto put_ndev;
if (roce->last_port_state == port_state)
- goto done;
+ goto put_ndev;
roce->last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
@@ -251,7 +290,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
else if (port_state == IB_PORT_ACTIVE)
ibev.event = IB_EVENT_PORT_ACTIVE;
else
- goto done;
+ goto put_ndev;
ibev.element.port_num = port_num;
ib_dispatch_event(&ibev);
@@ -262,42 +301,16 @@ static int mlx5_netdev_event(struct notifier_block *this,
default:
break;
}
+put_ndev:
+ dev_put(ib_ndev);
done:
mlx5_ib_put_native_port_mdev(ibdev, port_num);
return NOTIFY_DONE;
}
-static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
- u8 port_num)
-{
- struct mlx5_ib_dev *ibdev = to_mdev(device);
- struct net_device *ndev;
- struct mlx5_core_dev *mdev;
-
- mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
- if (!mdev)
- return NULL;
-
- ndev = mlx5_lag_get_roce_netdev(mdev);
- if (ndev)
- goto out;
-
- /* Ensure ndev does not disappear before we invoke dev_hold()
- */
- read_lock(&ibdev->port[port_num - 1].roce.netdev_lock);
- ndev = ibdev->port[port_num - 1].roce.netdev;
- if (ndev)
- dev_hold(ndev);
- read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock);
-
-out:
- mlx5_ib_put_native_port_mdev(ibdev, port_num);
- return ndev;
-}
-
struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
- u8 ib_port_num,
- u8 *native_port_num)
+ u32 ib_port_num,
+ u32 *native_port_num)
{
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
ib_port_num);
@@ -305,6 +318,14 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_port *port;
+ if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ if (native_port_num)
+ *native_port_num = smi_to_native_portnum(ibdev,
+ ib_port_num);
+ return ibdev->mdev;
+
+ }
+
if (!mlx5_core_mp_enabled(ibdev->mdev) ||
ll != IB_LINK_LAYER_ETHERNET) {
if (native_port_num)
@@ -316,9 +337,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
*native_port_num = 1;
port = &ibdev->port[ib_port_num - 1];
- if (!port)
- return NULL;
-
spin_lock(&port->mp.mpi_lock);
mpi = ibdev->port[ib_port_num - 1].mp.mpi;
if (mpi && !mpi->unaffiliate) {
@@ -334,7 +352,7 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
return mdev;
}
-void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num)
+void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u32 port_num)
{
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
port_num);
@@ -358,8 +376,8 @@ out:
spin_unlock(&port->mp.mpi_lock);
}
-static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
- u8 *active_width)
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper,
+ u16 *active_speed, u8 *active_width)
{
switch (eth_proto_oper) {
case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
@@ -416,7 +434,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
return 0;
}
-static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
u8 *active_width)
{
switch (eth_proto_oper) {
@@ -457,10 +475,46 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_HDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_NDR;
+ break;
case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_HDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_1_200GBASE_CR1_KR1):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_2_400GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_4_800GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_1600TAUI_8_1600TBASE_CR8_KR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_XDR;
+ break;
default:
return -EINVAL;
}
@@ -468,7 +522,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
return 0;
}
-static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
+static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
u8 *active_width, bool ext)
{
return ext ?
@@ -478,7 +532,7 @@ static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
active_width);
}
-static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -487,9 +541,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
bool put_mdev = true;
- u16 qkey_viol_cntr;
u32 eth_prot_oper;
- u8 mdev_port_num;
+ u32 mdev_port_num;
bool ext;
int err;
@@ -510,13 +563,13 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
*/
if (dev->is_rep)
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
- 1);
+ 1, 0);
else
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
- mdev_port_num);
+ mdev_port_num, 0);
if (err)
goto out;
- ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
props->active_width = IB_WIDTH_4X;
@@ -525,29 +578,31 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width, ext);
- props->port_cap_flags |= IB_PORT_CM_SUP;
- props->ip_gids = true;
+ if (!dev->is_rep && dev->mdev->roce.roce_en) {
+ u16 qkey_viol_cntr;
- props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
- roce_address_table_size);
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+ }
props->max_mtu = IB_MTU_4096;
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
- props->phys_state = 3;
-
- mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
- props->qkey_viol_cntr = qkey_viol_cntr;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
/* If this is a stub query for an unaffiliated port stop here */
if (!put_mdev)
goto out;
- ndev = mlx5_ib_get_netdev(device, port_num);
+ ndev = ib_device_get_netdev(device, port_num);
if (!ndev)
goto out;
- if (dev->lag_active) {
+ if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) {
@@ -560,7 +615,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
}
ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
@@ -574,31 +629,31 @@ out:
return err;
}
-static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr)
+int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
{
- enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ enum ib_gid_type gid_type;
u16 vlan_id = 0xffff;
u8 roce_version = 0;
u8 roce_l3_type = 0;
u8 mac[ETH_ALEN];
int ret;
+ gid_type = attr->gid_type;
if (gid) {
- gid_type = attr->gid_type;
ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
if (ret)
return ret;
}
switch (gid_type) {
- case IB_GID_TYPE_IB:
+ case IB_GID_TYPE_ROCE:
roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
roce_version = MLX5_ROCE_VERSION_2;
- if (ipv6_addr_v4mapped((void *)gid))
+ if (gid && ipv6_addr_v4mapped((void *)gid))
roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
else
roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
@@ -617,6 +672,12 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
+ int ret;
+
+ ret = mlx5r_add_gid_macsec_operations(attr);
+ if (ret)
+ return ret;
+
return set_roce_addr(to_mdev(attr->device), attr->port_num,
attr->index, &attr->gid, attr);
}
@@ -624,12 +685,19 @@ static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(to_mdev(attr->device), attr->port_num,
- attr->index, NULL, NULL);
+ int ret;
+
+ ret = set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, NULL, attr);
+ if (ret)
+ return ret;
+
+ mlx5r_del_gid_macsec_operations(attr);
+ return 0;
}
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
- const struct ib_gid_attr *attr)
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr)
{
if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
@@ -692,21 +760,6 @@ static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
get_atomic_caps(dev, atomic_size_qp, props);
}
-static void get_atomic_caps_dc(struct mlx5_ib_dev *dev,
- struct ib_device_attr *props)
-{
- u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
-
- get_atomic_caps(dev, atomic_size_qp, props);
-}
-
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev)
-{
- struct ib_device_attr props = {};
-
- get_atomic_caps_dc(dev, &props);
- return (props.atomic_cap == IB_ATOMIC_HCA) ? true : false;
-}
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -793,7 +846,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
- err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, 0, false, &tmp);
break;
default:
@@ -824,10 +877,67 @@ static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
MLX5_REG_NODE_DESC, 0, 0);
}
+static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev,
+ struct mlx5_ib_query_device_resp *resp)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ u16 vport = mlx5_eswitch_manager_vport(mdev);
+
+ resp->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(esw,
+ vport);
+ resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
+}
+
+/*
+ * Calculate maximum SQ overhead across all QP types.
+ * Other QP types (REG_UMR, UC, RC, UD/SMI/GSI, XRC_TGT)
+ * have smaller overhead than the types calculated below,
+ * so they are implicitly included.
+ */
+static u32 mlx5_ib_calc_max_sq_overhead(void)
+{
+ u32 max_overhead_xrc, overhead_ud_lso, a, b;
+
+ /* XRC_INI */
+ max_overhead_xrc = sizeof(struct mlx5_wqe_xrc_seg);
+ max_overhead_xrc += sizeof(struct mlx5_wqe_ctrl_seg);
+ a = sizeof(struct mlx5_wqe_atomic_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg);
+ b = sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg) +
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD / MLX5_IB_UMR_OCTOWORD;
+ max_overhead_xrc += max(a, b);
+
+ /* UD with LSO */
+ overhead_ud_lso = sizeof(struct mlx5_wqe_ctrl_seg);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_eth_pad);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_eth_seg);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_datagram_seg);
+
+ return max(max_overhead_xrc, overhead_ud_lso);
+}
+
+static u32 mlx5_ib_calc_max_qp_wr(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ u32 max_wqe_bb_units = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ u32 max_wqe_size;
+ /* max QP overhead + 1 SGE, no inline, no special features */
+ max_wqe_size = mlx5_ib_calc_max_sq_overhead() +
+ sizeof(struct mlx5_wqe_data_seg);
+
+ max_wqe_size = roundup_pow_of_two(max_wqe_size);
+
+ max_wqe_size = ALIGN(max_wqe_size, MLX5_SEND_WQE_BB);
+
+ return (max_wqe_bb_units * MLX5_SEND_WQE_BB) / max_wqe_size;
+}
+
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
+ size_t uhw_outlen = (uhw) ? uhw->outlen : 0;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
@@ -841,12 +951,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
u64 max_tso;
resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
- if (uhw->outlen && uhw->outlen < resp_len)
+ if (uhw_outlen && uhw_outlen < resp_len)
return -EINVAL;
- else
- resp.response_length = resp_len;
- if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
+ resp.response_length = resp_len;
+
+ if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
@@ -855,9 +965,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (err)
return err;
- err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
- if (err)
- return err;
+ props->max_pkeys = dev->pkey_table_len;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
@@ -884,11 +992,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
- props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
+ props->kernel_cap_flags |= IBK_SG_GAPS_REG;
}
- props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ /* IB_WR_REG_MR always requires changing the entity size with UMR */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -897,7 +1007,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
- props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
if (MLX5_CAP_ETH(mdev, csum_cap)) {
@@ -910,7 +1020,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->raw_packet_caps |=
IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
- if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
if (max_tso) {
resp.tso_caps.max_tso = 1 << max_tso;
@@ -920,7 +1030,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) {
resp.rss_caps.rx_hash_function =
MLX5_RX_HASH_FUNC_TOEPLITZ;
resp.rss_caps.rx_hash_fields_mask =
@@ -933,22 +1043,18 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_UDP |
MLX5_RX_HASH_DST_PORT_UDP |
MLX5_RX_HASH_INNER;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- resp.rss_caps.rx_hash_fields_mask |=
- MLX5_RX_HASH_IPSEC_SPI;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
- if (field_avail(typeof(resp), tso_caps, uhw->outlen))
+ if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen)
resp.response_length += sizeof(resp.tso_caps);
- if (field_avail(typeof(resp), rss_caps, uhw->outlen))
+ if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen)
resp.response_length += sizeof(resp.rss_caps);
}
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ props->kernel_cap_flags |= IBK_UD_TSO;
}
if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
@@ -985,7 +1091,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_mr_size = ~0ull;
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
- props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ props->max_qp_wr = mlx5_ib_calc_max_qp_wr(dev);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
@@ -1008,28 +1114,45 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
+ props->max_pi_fast_reg_page_list_len =
+ props->max_fast_reg_page_list_len / 2;
+ props->max_sgl_rd =
+ MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance);
get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
props->max_ah = INT_MAX;
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- if (MLX5_CAP_GEN(mdev, pg))
- props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
+ props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
+ if (!uhw) {
+ /* ODP for kernel QPs is not implemented for receive
+ * WQEs and SRQ WQEs
+ */
+ props->odp_caps.per_transport_caps.rc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.uc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.ud_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.xrc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ }
}
- if (MLX5_CAP_GEN(mdev, cd))
- props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
-
- if (!mlx5_core_is_pf(mdev))
- props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+ if (mlx5_core_is_vf(mdev))
+ props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET && raw_support) {
@@ -1043,15 +1166,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(mdev, tag_matching)) {
- props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
- props->tm_caps.flags = IB_TM_CAP_RC;
props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
+ if (MLX5_CAP_GEN(mdev, tag_matching) &&
+ MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
+ props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
+ props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
props->cq_caps.max_cq_moderation_count =
MLX5_MAX_CQ_COUNT;
@@ -1059,7 +1186,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_MAX_CQ_PERIOD;
}
- if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) {
resp.response_length += sizeof(resp.cqe_comp_caps);
if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
@@ -1077,7 +1204,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
+ if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen &&
raw_support) {
if (MLX5_CAP_QOS(mdev, packet_pacing) &&
MLX5_CAP_GEN(mdev, qos)) {
@@ -1095,8 +1222,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.packet_pacing_caps);
}
- if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
+ if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <=
+ uhw_outlen) {
if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
resp.mlx5_ib_support_multi_pkt_send_wqes =
MLX5_IB_ALLOW_MPW;
@@ -1109,7 +1236,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
}
- if (field_avail(typeof(resp), flags, uhw->outlen)) {
+ if (offsetofend(typeof(resp), flags) <= uhw_outlen) {
resp.response_length += sizeof(resp.flags);
if (MLX5_CAP_GEN(mdev, cqe_compression_128))
@@ -1123,10 +1250,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
+
+ if (MLX5_CAP_GEN_2(mdev, dp_ordering_force) &&
+ (MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc)))
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP;
}
- if (field_avail(typeof(resp), sw_parsing_caps,
- uhw->outlen)) {
+ if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) {
resp.response_length += sizeof(resp.sw_parsing_caps);
if (MLX5_CAP_ETH(mdev, swp)) {
resp.sw_parsing_caps.sw_parsing_offloads |=
@@ -1146,7 +1280,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
+ if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen &&
raw_support) {
resp.response_length += sizeof(resp.striding_rq_caps);
if (MLX5_CAP_GEN(mdev, striding_rq)) {
@@ -1154,8 +1288,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
- resp.striding_rq_caps.min_single_wqe_log_num_of_strides =
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range))
+ resp.striding_rq_caps
+ .min_single_wqe_log_num_of_strides =
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ else
+ resp.striding_rq_caps
+ .min_single_wqe_log_num_of_strides =
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
resp.striding_rq_caps.supported_qpts =
@@ -1163,8 +1303,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), tunnel_offloads_caps,
- uhw->outlen)) {
+ if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) {
resp.response_length += sizeof(resp.tunnel_offloads_caps);
if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
resp.tunnel_offloads_caps |=
@@ -1175,17 +1314,38 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_GRE;
- if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
- MLX5_FLEX_PROTO_CW_MPLS_GRE)
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
- if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
- MLX5_FLEX_PROTO_CW_MPLS_UDP)
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
}
- if (uhw->outlen) {
+ if (offsetofend(typeof(resp), dci_streams_caps) <= uhw_outlen) {
+ resp.response_length += sizeof(resp.dci_streams_caps);
+
+ resp.dci_streams_caps.max_log_num_concurent =
+ MLX5_CAP_GEN(mdev, log_max_dci_stream_channels);
+
+ resp.dci_streams_caps.max_log_num_errored =
+ MLX5_CAP_GEN(mdev, log_max_dci_errored_streams);
+ }
+
+ if (offsetofend(typeof(resp), reserved) <= uhw_outlen)
+ resp.response_length += sizeof(resp.reserved);
+
+ if (offsetofend(typeof(resp), reg_c0) <= uhw_outlen) {
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ resp.response_length += sizeof(resp.reg_c0);
+
+ if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw))
+ fill_esw_mgr_reg_c0(mdev, &resp);
+ }
+
+ if (uhw_outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -1195,32 +1355,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
return 0;
}
-enum mlx5_ib_width {
- MLX5_IB_WIDTH_1X = 1 << 0,
- MLX5_IB_WIDTH_2X = 1 << 1,
- MLX5_IB_WIDTH_4X = 1 << 2,
- MLX5_IB_WIDTH_8X = 1 << 3,
- MLX5_IB_WIDTH_12X = 1 << 4
-};
-
-static void translate_active_width(struct ib_device *ibdev, u8 active_width,
- u8 *ib_width)
+static void translate_active_width(struct ib_device *ibdev, u16 active_width,
+ u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- if (active_width & MLX5_IB_WIDTH_1X)
+ if (active_width & MLX5_PTYS_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
- else if (active_width & MLX5_IB_WIDTH_2X)
+ else if (active_width & MLX5_PTYS_WIDTH_2X)
*ib_width = IB_WIDTH_2X;
- else if (active_width & MLX5_IB_WIDTH_4X)
+ else if (active_width & MLX5_PTYS_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
- else if (active_width & MLX5_IB_WIDTH_8X)
+ else if (active_width & MLX5_PTYS_WIDTH_8X)
*ib_width = IB_WIDTH_8X;
- else if (active_width & MLX5_IB_WIDTH_12X)
+ else if (active_width & MLX5_PTYS_WIDTH_12X)
*ib_width = IB_WIDTH_12X;
else {
mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
- (int)active_width);
+ active_width);
*ib_width = IB_WIDTH_4X;
}
@@ -1288,17 +1440,17 @@ static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
return 0;
}
-static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
+static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *rep;
+ u8 vl_hw_cap, plane_index = 0;
u16 max_mtu;
u16 oper_mtu;
int err;
- u8 ib_link_width_oper;
- u8 vl_hw_cap;
+ u16 ib_link_width_oper;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep) {
@@ -1308,6 +1460,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
/* props being zeroed by the caller, avoid zeroing it here */
+ if (ibdev->type == RDMA_DEVICE_TYPE_SMI) {
+ plane_index = port;
+ port = smi_to_native_portnum(dev, port);
+ }
+
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
goto out;
@@ -1318,7 +1475,14 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->sm_sl = rep->sm_sl;
props->state = rep->vport_state;
props->phys_state = rep->port_physical_state;
- props->port_cap_flags = rep->cap_mask1;
+
+ props->port_cap_flags = rep->cap_mask1;
+ if (dev->num_plane) {
+ props->port_cap_flags |= IB_PORT_SM_DISABLED;
+ props->port_cap_flags &= ~IB_PORT_SM;
+ } else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ props->port_cap_flags &= ~IB_PORT_CM_SUP;
+
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
@@ -1330,16 +1494,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
props->port_cap_flags2 = rep->cap_mask2;
- err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
+ err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
+ &props->active_speed, port, plane_index);
if (err)
goto out;
translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
- err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
- if (err)
- goto out;
-
mlx5_query_port_max_mtu(mdev, &max_mtu, port);
props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
@@ -1359,7 +1520,7 @@ out:
return err;
}
-int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
unsigned int count;
@@ -1404,25 +1565,23 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
return ret;
}
-static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
- int ret;
+ return mlx5_query_port_roce(ibdev, port, props);
+}
- /* Only link layer == ethernet is valid for representors
- * and we always use port 1
+static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ /* Default special Pkey for representor device port as per the
+ * IB specification 1.3 section 10.9.1.2.
*/
- ret = mlx5_query_port_roce(ibdev, port, props);
- if (ret || !props)
- return ret;
-
- /* We don't support GIDS */
- props->gid_tbl_len = 0;
-
- return ret;
+ *pkey = 0xffff;
+ return 0;
}
-static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int mlx5_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -1441,13 +1600,13 @@ static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
}
-static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
+static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u32 port,
u16 index, u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev;
bool put_mdev = true;
- u8 mdev_port_num;
+ u32 mdev_port_num;
int err;
mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num);
@@ -1468,7 +1627,7 @@ static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
return err;
}
-static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
switch (mlx5_get_vport_access_method(ibdev)) {
@@ -1512,12 +1671,12 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
-static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u32 port_num, u32 mask,
u32 value)
{
struct mlx5_hca_vport_context ctx = {};
struct mlx5_core_dev *mdev;
- u8 mdev_port_num;
+ u32 mdev_port_num;
int err;
mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
@@ -1546,7 +1705,7 @@ out:
return err;
}
-static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+static int mlx5_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
struct ib_port_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -1647,7 +1806,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
bfregi = &context->bfregi;
for (i = 0; i < bfregi->num_static_sys_pages; i++) {
- err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i],
+ context->devx_uid);
if (err)
goto error;
@@ -1661,7 +1821,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
error:
for (--i; i >= 0; i--)
- if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+ if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+ context->devx_uid))
mlx5_ib_warn(dev, "failed to free uar %d\n", i);
return err;
@@ -1677,13 +1838,49 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
for (i = 0; i < bfregi->num_sys_pages; i++)
if (i < bfregi->num_static_sys_pages ||
bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
- mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+ mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+ context->devx_uid);
+}
+
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_ib_lb_state *lb_state)
+{
+ int err;
+
+ err = mlx5_nic_vport_update_local_lb(master, true);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
+
+ lb_state->force_enable = true;
+ return 0;
+
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
+
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_ib_lb_state *lb_state)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
+
+ lb_state->force_enable = false;
}
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
+ if (dev->lb.force_enable)
+ return 0;
+
mutex_lock(&dev->lb.mutex);
if (td)
dev->lb.user_td++;
@@ -1705,6 +1902,9 @@ int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
+ if (dev->lb.force_enable)
+ return;
+
mutex_lock(&dev->lb.mutex);
if (td)
dev->lb.user_td--;
@@ -1758,6 +1958,90 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
mlx5_ib_disable_lb(dev, true, false);
}
+static int set_ucontext_resp(struct ib_ucontext *uctx,
+ struct mlx5_ib_alloc_ucontext_resp *resp)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ resp->dump_fill_mkey = dev->mkeys.dump_fill_mkey;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
+ if (mlx5_wc_support_get(dev->mdev))
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_bf_reg_size);
+ resp->cache_line_size = cache_line_size();
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp->cqe_version = context->cqe_version;
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev,
+ num_of_uars_per_page) : 1;
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
+ resp->num_ports = dev->num_ports;
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
+
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
+ resp->eth_min_inline++;
+ }
+
+ if (dev->mdev->clock_info)
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+
+ /*
+ * We don't want to expose information from the PCI bar that is located
+ * after 4096 bytes, so if the arch only supports larger pages, let's
+ * pretend we don't support reading the HCA's core clock. This is also
+ * forced by mmap function.
+ */
+ if (PAGE_SIZE <= 4096) {
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp->hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg,
+ internal_timer_h) % PAGE_SIZE;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+
+ if (rt_supported(MLX5_CAP_GEN(dev->mdev, sq_ts_format)) &&
+ rt_supported(MLX5_CAP_GEN(dev->mdev, rq_ts_format)) &&
+ rt_supported(MLX5_CAP_ROCE(dev->mdev, qp_ts_format)))
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS;
+
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
+
+ if (MLX5_CAP_GEN(dev->mdev, drain_sigerr))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS;
+
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG;
+
+ return 0;
+}
+
+static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps)
+{
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) ||
+ UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1765,15 +2049,14 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_ucontext *context = to_mucontext(uctx);
struct mlx5_bfreg_info *bfregi;
int ver;
int err;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
- u32 dump_fill_mkey;
bool lib_uar_4k;
+ bool lib_uar_dyn;
if (!dev->ib_active)
return -EAGAIN;
@@ -1800,44 +2083,33 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return -EINVAL;
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = cache_line_size();
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
- resp.cqe_version = min_t(__u8,
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
- req.max_cqe_version);
- resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
- resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
- MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
- if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
- if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
- if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
- resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
- /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+ err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps);
+ if (err < 0)
+ goto out_ctx;
+ context->devx_uid = err;
+
+ if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) {
+ err = mlx5_cmd_add_privileged_uid(dev->mdev,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+ }
}
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+ lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
bfregi = &context->bfregi;
+ if (lib_uar_dyn) {
+ bfregi->lib_uar_dyn = lib_uar_dyn;
+ goto uar_done;
+ }
+
/* updates req->total_num_bfregs */
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
- goto out_ctx;
+ goto out_ucap;
mutex_init(&bfregi->lock);
bfregi->lib_uar_4k = lib_uar_4k;
@@ -1845,7 +2117,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
GFP_KERNEL);
if (!bfregi->count) {
err = -ENOMEM;
- goto out_ctx;
+ goto out_ucap;
}
bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -1860,105 +2132,35 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
if (err)
goto out_sys_pages;
- if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
- context->ibucontext.invalidate_range =
- &mlx5_ib_invalidate_range;
-
- if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev, true);
- if (err < 0)
- goto out_uars;
- context->devx_uid = err;
- }
-
+uar_done:
err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
context->devx_uid);
if (err)
- goto out_devx;
-
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
- if (err)
- goto out_mdev;
- }
+ goto out_uars;
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_bfregs = req.total_num_bfregs;
- resp.num_ports = dev->num_ports;
-
- if (field_avail(typeof(resp), cqe_version, udata->outlen))
- resp.response_length += sizeof(resp.cqe_version);
-
- if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
- MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
- resp.response_length += sizeof(resp.cmds_supp_uhw);
- }
-
- if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
- if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
- mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
- resp.eth_min_inline++;
- }
- resp.response_length += sizeof(resp.eth_min_inline);
- }
-
- if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) {
- if (mdev->clock_info)
- resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
- resp.response_length += sizeof(resp.clock_info_versions);
- }
-
- /*
- * We don't want to expose information from the PCI bar that is located
- * after 4096 bytes, so if the arch only supports larger pages, let's
- * pretend we don't support reading the HCA's core clock. This is also
- * forced by mmap function.
- */
- if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
- if (PAGE_SIZE <= 4096) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
- }
- resp.response_length += sizeof(resp.hca_core_clock_offset);
- }
-
- if (field_avail(typeof(resp), log_uar_size, udata->outlen))
- resp.response_length += sizeof(resp.log_uar_size);
-
- if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
- resp.response_length += sizeof(resp.num_uars_per_page);
-
- if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) {
- resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
- resp.response_length += sizeof(resp.num_dyn_bfregs);
- }
+ context->cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
- if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
- if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
- resp.dump_fill_mkey = dump_fill_mkey;
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
- }
- resp.response_length += sizeof(resp.dump_fill_mkey);
- }
+ err = set_ucontext_resp(uctx, &resp);
+ if (err)
+ goto out_mdev;
+ resp.response_length = min(udata->outlen, sizeof(resp));
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
- context->cqe_version = resp.cqe_version;
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
- if (dev->lag_active) {
- u8 port = mlx5_core_native_port_num(dev->mdev) - 1;
+ if (mlx5_ib_lag_should_assign_affinity(dev)) {
+ u32 port = mlx5_core_native_port_num(dev->mdev) - 1;
atomic_set(&context->tx_port_affinity,
atomic_add_return(
@@ -1969,9 +2171,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
out_mdev:
mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
-out_devx:
- if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
- mlx5_ib_devx_destroy(dev, context->devx_uid);
out_uars:
deallocate_uars(dev, context);
@@ -1982,30 +2181,61 @@ out_sys_pages:
out_count:
kfree(bfregi->count);
+out_ucap:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX &&
+ uctx_rdma_ctrl_is_enabled(uctx->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid);
+
+out_devx:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
+
out_ctx:
return err;
}
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
+ int ret;
+
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
+ if (ret)
+ return ret;
+
+ uctx_resp.response_length =
+ min_t(size_t,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
+ sizeof(uctx_resp));
+
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ &uctx_resp,
+ sizeof(uctx_resp));
+ return ret;
+}
+
static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
- /* All umem's must be destroyed before destroying the ucontext. */
- mutex_lock(&ibcontext->per_mm_list_lock);
- WARN_ON(!list_empty(&ibcontext->per_mm_list));
- mutex_unlock(&ibcontext->per_mm_list_lock);
-
bfregi = &context->bfregi;
mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
- if (context->devx_uid)
- mlx5_ib_devx_destroy(dev, context->devx_uid);
-
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
kfree(bfregi->count);
+
+ if (context->devx_uid) {
+ if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev,
+ context->devx_uid);
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
+ }
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -2018,6 +2248,17 @@ static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
+static u64 uar_index2paddress(struct mlx5_ib_dev *dev,
+ int uar_idx)
+{
+ unsigned int fw_uars_per_page;
+
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_UARS_IN_PAGE : 1;
+
+ return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE);
+}
+
static int get_command(unsigned long offset)
{
return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
@@ -2056,7 +2297,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
case MLX5_IB_MMAP_DEVICE_MEM:
return "Device Memory";
default:
- return NULL;
+ return "Unknown";
}
}
@@ -2073,7 +2314,7 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
if (vma->vm_flags & (VM_WRITE | VM_EXEC))
return -EPERM;
- vma->vm_flags &= ~VM_MAYWRITE;
+ vm_flags_clear(vma, VM_MAYWRITE);
if (!dev->mdev->clock_info)
return -EOPNOTSUPP;
@@ -2082,6 +2323,35 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
virt_to_page(dev->mdev->clock_info));
}
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
+{
+ struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
+ struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+ struct mlx5_var_table *var_table = &dev->var_table;
+ struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext);
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ case MLX5_IB_MMAP_TYPE_MEMIC_OP:
+ mlx5_ib_dm_mmap_free(dev, mentry);
+ break;
+ case MLX5_IB_MMAP_TYPE_VAR:
+ mutex_lock(&var_table->bitmap_lock);
+ clear_bit(mentry->page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+ kfree(mentry);
+ break;
+ case MLX5_IB_MMAP_TYPE_UAR_WC:
+ case MLX5_IB_MMAP_TYPE_UAR_NC:
+ mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx,
+ context->devx_uid);
+ kfree(mentry);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
@@ -2097,6 +2367,9 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
bfregi->num_static_sys_pages;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
if (vma->vm_end - vma->vm_start != PAGE_SIZE)
return -EINVAL;
@@ -2114,14 +2387,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_ALLOC_WC:
-/* Some architectures don't support WC memory */
-#if defined(CONFIG_X86)
- if (!pat_enabled())
- return -EPERM;
-#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
- return -EPERM;
-#endif
- /* fall through */
case MLX5_IB_MMAP_REGULAR_PAGE:
/* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
prot = pgprot_writecombine(vma->vm_page_prot);
@@ -2157,7 +2422,8 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
bfregi->count[bfreg_dyn_idx]++;
mutex_unlock(&bfregi->lock);
- err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index,
+ context->devx_uid);
if (err) {
mlx5_ib_warn(dev, "UAR alloc failed\n");
goto free_bfreg;
@@ -2170,7 +2436,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
- prot);
+ prot, NULL);
if (err) {
mlx5_ib_err(dev,
"rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
@@ -2186,7 +2452,7 @@ err:
if (!dyn_uar)
return err;
- mlx5_cmd_free_uar(dev->mdev, idx);
+ mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid);
free_bfreg:
mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
@@ -2194,25 +2460,55 @@ free_bfreg:
return err;
}
-static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
+{
+ unsigned long idx;
+ u8 command;
+
+ command = get_command(vma->vm_pgoff);
+ idx = get_extended_index(vma->vm_pgoff);
+
+ return (command << 16 | idx);
+}
+
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct ib_ucontext *ucontext)
{
- struct mlx5_ib_ucontext *mctx = to_mucontext(context);
- struct mlx5_ib_dev *dev = to_mdev(context->device);
- u16 page_idx = get_extended_index(vma->vm_pgoff);
- size_t map_size = vma->vm_end - vma->vm_start;
- u32 npages = map_size >> PAGE_SHIFT;
+ struct mlx5_user_mmap_entry *mentry;
+ struct rdma_user_mmap_entry *entry;
+ unsigned long pgoff;
+ pgprot_t prot;
phys_addr_t pfn;
+ int ret;
- if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
- page_idx + npages)
+ pgoff = mlx5_vma_to_pgoff(vma);
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
+ if (!entry)
return -EINVAL;
- pfn = ((dev->mdev->bar_addr +
- MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
- PAGE_SHIFT) +
- page_idx;
- return rdma_user_mmap_io(context, vma, pfn, map_size,
- pgprot_writecombine(vma->vm_page_prot));
+ mentry = to_mmmap(entry);
+ pfn = (mentry->address >> PAGE_SHIFT);
+ if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
+ mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
+ prot = pgprot_noncached(vma->vm_page_prot);
+ else
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ ret = rdma_user_mmap_io(ucontext, vma, pfn,
+ entry->npages * PAGE_SIZE,
+ prot,
+ entry);
+ rdma_user_mmap_entry_put(&mentry->rdma_entry);
+ return ret;
+}
+
+static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry)
+{
+ u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF;
+ u64 index = entry->rdma_entry.start_pgoff & 0xFFFF;
+
+ return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) |
+ (index & 0xFF)) << PAGE_SHIFT;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2225,9 +2521,12 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_ALLOC_WC:
+ if (!mlx5_wc_support_get(dev->mdev))
+ return -EPERM;
+ fallthrough;
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
- case MLX5_IB_MMAP_ALLOC_WC:
return uar_mmap(dev, command, vma, context);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
@@ -2239,7 +2538,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
if (vma->vm_flags & VM_WRITE)
return -EPERM;
- vma->vm_flags &= ~VM_MAYWRITE;
+ vm_flags_clear(vma, VM_MAYWRITE);
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)
@@ -2250,210 +2549,15 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
PAGE_SHIFT;
return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
PAGE_SIZE,
- pgprot_noncached(vma->vm_page_prot));
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
- case MLX5_IB_MMAP_DEVICE_MEM:
- return dm_mmap(ibcontext, vma);
-
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
- u32 type)
-{
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
- return -EOPNOTSUPP;
- break;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- if (!capable(CAP_SYS_RAWIO) ||
- !capable(CAP_NET_RAW))
- return -EPERM;
-
- if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
- return -EOPNOTSUPP;
- break;
- }
-
- return 0;
-}
-
-static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
- struct mlx5_ib_dm *dm,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
- u64 start_offset;
- u32 page_idx;
- int err;
-
- dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
-
- err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
- dm->size, attr->alignment);
- if (err)
- return err;
-
- page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
- PAGE_SHIFT;
-
- err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- &page_idx, sizeof(page_idx));
- if (err)
- goto err_dealloc;
-
- start_offset = dm->dev_addr & ~PAGE_MASK;
- err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- &start_offset, sizeof(start_offset));
- if (err)
- goto err_dealloc;
-
- bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
- DIV_ROUND_UP(dm->size, PAGE_SIZE));
-
- return 0;
-
-err_dealloc:
- mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
-
- return err;
-}
-
-static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
- struct mlx5_ib_dm *dm,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs,
- int type)
-{
- struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
- u64 act_size;
- int err;
-
- /* Allocation size must a multiple of the basic block size
- * and a power of 2.
- */
- act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
- act_size = roundup_pow_of_two(act_size);
-
- dm->size = act_size;
- err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
- to_mucontext(ctx)->devx_uid, &dm->dev_addr,
- &dm->icm_dm.obj_id);
- if (err)
- return err;
-
- err = uverbs_copy_to(attrs,
- MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- &dm->dev_addr, sizeof(dm->dev_addr));
- if (err)
- mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
- to_mucontext(ctx)->devx_uid,
- dm->dev_addr, dm->icm_dm.obj_id);
-
- return err;
-}
-
-struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dm *dm;
- enum mlx5_ib_uapi_dm_type type;
- int err;
-
- err = uverbs_get_const_default(&type, attrs,
- MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
- MLX5_IB_UAPI_DM_TYPE_MEMIC);
- if (err)
- return ERR_PTR(err);
-
- mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
- type, attr->length, attr->alignment);
-
- err = check_dm_type_support(to_mdev(ibdev), type);
- if (err)
- return ERR_PTR(err);
-
- dm = kzalloc(sizeof(*dm), GFP_KERNEL);
- if (!dm)
- return ERR_PTR(-ENOMEM);
-
- dm->type = type;
-
- switch (type) {
- case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- err = handle_alloc_dm_memic(context, dm,
- attr,
- attrs);
- break;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
- break;
default:
- err = -EOPNOTSUPP;
+ return mlx5_ib_mmap_offset(dev, vma, ibcontext);
}
- if (err)
- goto err_free;
-
- return &dm->ibdm;
-
-err_free:
- kfree(dm);
- return ERR_PTR(err);
-}
-
-int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
- &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
- struct mlx5_ib_dm *dm = to_mdm(ibdm);
- u32 page_idx;
- int ret;
-
- switch (dm->type) {
- case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
- if (ret)
- return ret;
-
- page_idx = (dm->dev_addr -
- pci_resource_start(dm_db->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dm_db->dev,
- memic_bar_start_addr)) >>
- PAGE_SHIFT;
- bitmap_clear(ctx->dm_pages, page_idx,
- DIV_ROUND_UP(dm->size, PAGE_SIZE));
- break;
- case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
- case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
- ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
- ctx->devx_uid, dm->dev_addr,
- dm->icm_dm.obj_id);
- if (ret)
- return ret;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- kfree(dm);
-
return 0;
}
@@ -2464,7 +2568,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct mlx5_ib_alloc_pd_resp resp;
int err;
u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
- u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
u16 uid = 0;
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
@@ -2472,8 +2576,7 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
uid = context ? context->devx_uid : 0;
MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
MLX5_SET(alloc_pd_in, in, uid, uid);
- err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
- out, sizeof(out));
+ err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out);
if (err)
return err;
@@ -2490,1790 +2593,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
return 0;
}
-static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
-}
-
-enum {
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
- MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT,
- MATCH_CRITERIA_ENABLE_MISC2_BIT
-};
-
-#define HEADER_IS_ZERO(match_criteria, headers) \
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
-
-static u8 get_match_criteria_enable(u32 *match_criteria)
-{
- u8 match_criteria_enable;
-
- match_criteria_enable =
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
- MATCH_CRITERIA_ENABLE_MISC_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
- MATCH_CRITERIA_ENABLE_INNER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
- MATCH_CRITERIA_ENABLE_MISC2_BIT;
-
- return match_criteria_enable;
-}
-
-static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- u8 entry_mask;
- u8 entry_val;
- int err = 0;
-
- if (!mask)
- goto out;
-
- entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
- ip_protocol);
- entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
- ip_protocol);
- if (!entry_mask) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
- goto out;
- }
- /* Don't override existing ip protocol */
- if (mask != entry_mask || val != entry_val)
- err = -EINVAL;
-out:
- return err;
-}
-
-static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
- bool inner)
-{
- if (inner) {
- MLX5_SET(fte_match_set_misc,
- misc_c, inner_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, inner_ipv6_flow_label, val);
- } else {
- MLX5_SET(fte_match_set_misc,
- misc_c, outer_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, outer_ipv6_flow_label, val);
- }
-}
-
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
-}
-
-static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
-{
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
- return -EOPNOTSUPP;
-
- if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
- !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
-#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD tos
-#define LAST_IPV6_FIELD traffic_class
-#define LAST_TCP_UDP_FIELD src_port
-#define LAST_TUNNEL_FIELD tunnel_id
-#define LAST_FLOW_TAG_FIELD tag_id
-#define LAST_DROP_FIELD size
-#define LAST_COUNTERS_FIELD counters
-
-/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
-
-int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
- bool is_egress,
- struct mlx5_flow_act *action)
-{
-
- switch (maction->ib_action.type) {
- case IB_FLOW_ACTION_ESP:
- if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
- return -EINVAL;
- /* Currently only AES_GCM keymat is supported by the driver */
- action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
- action->action |= is_egress ?
- MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
- return 0;
- case IB_FLOW_ACTION_UNSPECIFIED:
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- action->modify_id = maction->flow_action_raw.action_id;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_DECAP) {
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
- return -EINVAL;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
- return 0;
- }
- if (maction->flow_action_raw.sub_type ==
- MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
- if (action->action &
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
- return -EINVAL;
- action->action |=
- MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
- action->reformat_id =
- maction->flow_action_raw.action_id;
- return 0;
- }
- /* fall through */
- default:
- return -EOPNOTSUPP;
- }
-}
-
-static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
- u32 *match_v, const union ib_flow_spec *ib_spec,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action, u32 prev_type)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters);
- void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters_2);
- void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters_2);
- void *headers_c;
- void *headers_v;
- int match_ipv;
- int ret;
-
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- inner_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version);
- } else {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- }
-
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
- case IB_FLOW_SPEC_ETH:
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -EOPNOTSUPP;
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dmac_47_16),
- ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16),
- ib_spec->eth.val.dst_mac);
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- smac_47_16),
- ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- smac_47_16),
- ib_spec->eth.val.src_mac);
-
- if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- cvlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- cvlan_tag, 1);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_cfi,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_cfi,
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_prio,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_prio,
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
- }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ntohs(ib_spec->eth.val.ether_type));
- break;
- case IB_FLOW_SPEC_IPV4:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV4_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.src_ip,
- sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.src_ip,
- sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.dst_ip,
- sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.dst_ip,
- sizeof(ib_spec->ipv4.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto,
- ib_spec->ipv4.val.proto))
- return -EINVAL;
- break;
- case IB_FLOW_SPEC_IPV6:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -EOPNOTSUPP;
-
- if (match_ipv) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ip_version, 0xf);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ip_version, MLX5_FS_IPV6_VERSION);
- } else {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
- }
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.src_ip,
- sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.src_ip,
- sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.dst_ip,
- sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.dst_ip,
- sizeof(ib_spec->ipv6.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv6.mask.traffic_class,
- ib_spec->ipv6.val.traffic_class);
-
- if (set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr))
- return -EINVAL;
-
- set_flow_label(misc_params_c, misc_params_v,
- ntohl(ib_spec->ipv6.mask.flow_label),
- ntohl(ib_spec->ipv6.val.flow_label),
- ib_spec->type & IB_FLOW_SPEC_INNER);
- break;
- case IB_FLOW_SPEC_ESP:
- if (ib_spec->esp.mask.seq)
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
- ntohl(ib_spec->esp.mask.spi));
- MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
- ntohl(ib_spec->esp.val.spi));
- break;
- case IB_FLOW_SPEC_TCP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_UDP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_GRE:
- if (ib_spec->gre.mask.c_ks_res0_ver)
- return -EOPNOTSUPP;
-
- if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
- return -EINVAL;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_GRE);
-
- MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
- ntohs(ib_spec->gre.mask.protocol));
- MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
- ntohs(ib_spec->gre.val.protocol));
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key.nvgre.hi),
- &ib_spec->gre.mask.key,
- sizeof(ib_spec->gre.mask.key));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key.nvgre.hi),
- &ib_spec->gre.val.key,
- sizeof(ib_spec->gre.val.key));
- break;
- case IB_FLOW_SPEC_MPLS:
- switch (prev_type) {
- case IB_FLOW_SPEC_UDP:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_udp),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- case IB_FLOW_SPEC_GRE:
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls_over_gre),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- break;
- default:
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- inner_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- inner_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- } else {
- if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_first_mpls),
- &ib_spec->mpls.mask.tag))
- return -EOPNOTSUPP;
-
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
- outer_first_mpls),
- &ib_spec->mpls.val.tag,
- sizeof(ib_spec->mpls.val.tag));
- memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
- outer_first_mpls),
- &ib_spec->mpls.mask.tag,
- sizeof(ib_spec->mpls.mask.tag));
- }
- }
- break;
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
- LAST_TUNNEL_FIELD))
- return -EOPNOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
- ntohl(ib_spec->tunnel.mask.tunnel_id));
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
- ntohl(ib_spec->tunnel.val.tunnel_id));
- break;
- case IB_FLOW_SPEC_ACTION_TAG:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
- LAST_FLOW_TAG_FIELD))
- return -EOPNOTSUPP;
- if (ib_spec->flow_tag.tag_id >= BIT(24))
- return -EINVAL;
-
- action->flow_tag = ib_spec->flow_tag.tag_id;
- action->flags |= FLOW_ACT_HAS_TAG;
- break;
- case IB_FLOW_SPEC_ACTION_DROP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
- LAST_DROP_FIELD))
- return -EOPNOTSUPP;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
- break;
- case IB_FLOW_SPEC_ACTION_HANDLE:
- ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
- flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
- if (ret)
- return ret;
- break;
- case IB_FLOW_SPEC_ACTION_COUNT:
- if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
- LAST_COUNTERS_FIELD))
- return -EOPNOTSUPP;
-
- /* for now support only one counters spec per flow */
- if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
- return -EINVAL;
-
- action->counters = ib_spec->flow_count.counters;
- action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* If a flow could catch both multicast and unicast packets,
- * it won't fall into the multicast flow steering table and this rule
- * could steal other multicast packets.
- */
-static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
-{
- union ib_flow_spec *flow_spec;
-
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->num_of_specs < 1)
- return false;
-
- flow_spec = (union ib_flow_spec *)(ib_attr + 1);
- if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
- struct ib_flow_spec_ipv4 *ipv4_spec;
-
- ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
- if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
- return true;
-
- return false;
- }
-
- if (flow_spec->type == IB_FLOW_SPEC_ETH) {
- struct ib_flow_spec_eth *eth_spec;
-
- eth_spec = (struct ib_flow_spec_eth *)flow_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
- }
-
- return false;
-}
-
-enum valid_spec {
- VALID_SPEC_INVALID,
- VALID_SPEC_VALID,
- VALID_SPEC_NA,
-};
-
-static enum valid_spec
-is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- const u32 *match_c = spec->match_criteria;
- bool is_crypto =
- (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
- MLX5_FLOW_CONTEXT_ACTION_DECRYPT));
- bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c);
- bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP;
-
- /*
- * Currently only crypto is supported in egress, when regular egress
- * rules would be supported, always return VALID_SPEC_NA.
- */
- if (!is_crypto)
- return VALID_SPEC_NA;
-
- return is_crypto && is_ipsec &&
- (!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
- VALID_SPEC_VALID : VALID_SPEC_INVALID;
-}
-
-static bool is_valid_spec(struct mlx5_core_dev *mdev,
- const struct mlx5_flow_spec *spec,
- const struct mlx5_flow_act *flow_act,
- bool egress)
-{
- /* We curretly only support ipsec egress flow */
- return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID;
-}
-
-static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr,
- bool check_inner)
-{
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- int match_ipv = check_inner ?
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.inner_ip_version) :
- MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
- ft_field_support.outer_ip_version);
- int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
- bool ipv4_spec_valid, ipv6_spec_valid;
- unsigned int ip_spec_type = 0;
- bool has_ethertype = false;
- unsigned int spec_index;
- bool mask_valid = true;
- u16 eth_type = 0;
- bool type_valid;
-
- /* Validate that ethertype is correct */
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
- ib_spec->eth.mask.ether_type) {
- mask_valid = (ib_spec->eth.mask.ether_type ==
- htons(0xffff));
- has_ethertype = true;
- eth_type = ntohs(ib_spec->eth.val.ether_type);
- } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
- (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
- ip_spec_type = ib_spec->type;
- }
- ib_spec = (void *)ib_spec + ib_spec->size;
- }
-
- type_valid = (!has_ethertype) || (!ip_spec_type);
- if (!type_valid && mask_valid) {
- ipv4_spec_valid = (eth_type == ETH_P_IP) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
- ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
- (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
-
- type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
- (((eth_type == ETH_P_MPLS_UC) ||
- (eth_type == ETH_P_MPLS_MC)) && match_ipv);
- }
-
- return type_valid;
-}
-
-static bool is_valid_attr(struct mlx5_core_dev *mdev,
- const struct ib_flow_attr *flow_attr)
-{
- return is_valid_ethertype(mdev, flow_attr, false) &&
- is_valid_ethertype(mdev, flow_attr, true);
-}
-
-static void put_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *prio, bool ft_added)
-{
- prio->refcount -= !!ft_added;
- if (!prio->refcount) {
- mlx5_destroy_flow_table(prio->flow_table);
- prio->flow_table = NULL;
- }
-}
-
-static void counters_clear_description(struct ib_counters *counters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
-
- mutex_lock(&mcounters->mcntrs_mutex);
- kfree(mcounters->counters_data);
- mcounters->counters_data = NULL;
- mcounters->cntrs_max_index = 0;
- mutex_unlock(&mcounters->mcntrs_mutex);
-}
-
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
-{
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
- struct mlx5_ib_flow_handler,
- ibflow);
- struct mlx5_ib_flow_handler *iter, *tmp;
- struct mlx5_ib_dev *dev = handler->dev;
-
- mutex_lock(&dev->flow_db->lock);
-
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
- mlx5_del_flow_rules(iter->rule);
- put_flow_table(dev, iter->prio, true);
- list_del(&iter->list);
- kfree(iter);
- }
-
- mlx5_del_flow_rules(handler->rule);
- put_flow_table(dev, handler->prio, true);
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
-
- mutex_unlock(&dev->flow_db->lock);
- if (handler->flow_matcher)
- atomic_dec(&handler->flow_matcher->usecnt);
- kfree(handler);
-
- return 0;
-}
-
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
-{
- priority *= 2;
- if (!dont_trap)
- priority++;
- return priority;
-}
-
-enum flow_table_type {
- MLX5_IB_FT_RX,
- MLX5_IB_FT_TX
-};
-
-#define MLX5_FS_MAX_TYPES 6
-#define MLX5_FS_MAX_ENTRIES BIT(16)
-
-static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
- struct mlx5_ib_flow_prio *prio,
- int priority,
- int num_entries, int num_groups,
- u32 flags)
-{
- struct mlx5_flow_table *ft;
-
- ft = mlx5_create_auto_grouped_flow_table(ns, priority,
- num_entries,
- num_groups,
- 0, flags);
- if (IS_ERR(ft))
- return ERR_CAST(ft);
-
- prio->flow_table = ft;
- prio->refcount = 0;
- return prio;
-}
-
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
- struct ib_flow_attr *flow_attr,
- enum flow_table_type ft_type)
-{
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
- struct mlx5_flow_table *ft;
- int max_table_size;
- int num_entries;
- int num_groups;
- u32 flags = 0;
- int priority;
-
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- enum mlx5_flow_namespace_type fn_type;
-
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(flow_attr->priority,
- dont_trap);
- if (ft_type == MLX5_IB_FT_RX) {
- fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
- prio = &dev->flow_db->prios[priority];
- if (!dev->is_rep &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (!dev->is_rep &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else {
- max_table_size =
- BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
- log_max_ft_size));
- fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
- prio = &dev->flow_db->egress_prios[priority];
- if (!dev->is_rep &&
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- }
- ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
- num_entries = MLX5_FS_MAX_ENTRIES;
- num_groups = MLX5_FS_MAX_TYPES;
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
- prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
- allow_sniffer_and_nic_rx_shared_tir))
- return ERR_PTR(-ENOTSUPP);
-
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
-
- prio = &dev->flow_db->sniffer[ft_type];
- priority = 0;
- num_entries = 1;
- num_groups = 1;
- }
-
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- max_table_size = min_t(int, num_entries, max_table_size);
-
- ft = prio->flow_table;
- if (!ft)
- return _get_prio(ns, prio, priority, max_table_size, num_groups,
- flags);
-
- return prio;
-}
-
-static void set_underlay_qp(struct mlx5_ib_dev *dev,
- struct mlx5_flow_spec *spec,
- u32 underlay_qpn)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
- spec->match_criteria,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
-
- if (underlay_qpn &&
- MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- ft_field_support.bth_dst_qp)) {
- MLX5_SET(fte_match_set_misc,
- misc_params_v, bth_dst_qp, underlay_qpn);
- MLX5_SET(fte_match_set_misc,
- misc_params_c, bth_dst_qp, 0xffffff);
- }
-}
-
-static int read_flow_counters(struct ib_device *ibdev,
- struct mlx5_read_counters_attr *read_attr)
-{
- struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_fc_query(dev->mdev, fc,
- &read_attr->out[IB_COUNTER_PACKETS],
- &read_attr->out[IB_COUNTER_BYTES]);
-}
-
-/* flow counters currently expose two counters packets and bytes */
-#define FLOW_COUNTERS_NUM 2
-static int counters_set_description(struct ib_counters *counters,
- enum mlx5_ib_counters_type counters_type,
- struct mlx5_ib_flow_counters_desc *desc_data,
- u32 ncounters)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- u32 cntrs_max_index = 0;
- int i;
-
- if (counters_type != MLX5_IB_COUNTERS_FLOW)
- return -EINVAL;
-
- /* init the fields for the object */
- mcounters->type = counters_type;
- mcounters->read_counters = read_flow_counters;
- mcounters->counters_num = FLOW_COUNTERS_NUM;
- mcounters->ncounters = ncounters;
- /* each counter entry have both description and index pair */
- for (i = 0; i < ncounters; i++) {
- if (desc_data[i].description > IB_COUNTER_BYTES)
- return -EINVAL;
-
- if (cntrs_max_index <= desc_data[i].index)
- cntrs_max_index = desc_data[i].index + 1;
- }
-
- mutex_lock(&mcounters->mcntrs_mutex);
- mcounters->counters_data = desc_data;
- mcounters->cntrs_max_index = cntrs_max_index;
- mutex_unlock(&mcounters->mcntrs_mutex);
-
- return 0;
-}
-
-#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
-static int flow_counters_set_data(struct ib_counters *ibcounters,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
- struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
- struct mlx5_ib_flow_counters_desc *desc_data = NULL;
- bool hw_hndl = false;
- int ret = 0;
-
- if (ucmd && ucmd->ncounters_data != 0) {
- cntrs_data = ucmd->data;
- if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
- return -EINVAL;
-
- desc_data = kcalloc(cntrs_data->ncounters,
- sizeof(*desc_data),
- GFP_KERNEL);
- if (!desc_data)
- return -ENOMEM;
-
- if (copy_from_user(desc_data,
- u64_to_user_ptr(cntrs_data->counters_data),
- sizeof(*desc_data) * cntrs_data->ncounters)) {
- ret = -EFAULT;
- goto free;
- }
- }
-
- if (!mcounters->hw_cntrs_hndl) {
- mcounters->hw_cntrs_hndl = mlx5_fc_create(
- to_mdev(ibcounters->device)->mdev, false);
- if (IS_ERR(mcounters->hw_cntrs_hndl)) {
- ret = PTR_ERR(mcounters->hw_cntrs_hndl);
- goto free;
- }
- hw_hndl = true;
- }
-
- if (desc_data) {
- /* counters already bound to at least one flow */
- if (mcounters->cntrs_max_index) {
- ret = -EINVAL;
- goto free_hndl;
- }
-
- ret = counters_set_description(ibcounters,
- MLX5_IB_COUNTERS_FLOW,
- desc_data,
- cntrs_data->ncounters);
- if (ret)
- goto free_hndl;
-
- } else if (!mcounters->cntrs_max_index) {
- /* counters not bound yet, must have udata passed */
- ret = -EINVAL;
- goto free_hndl;
- }
-
- return 0;
-
-free_hndl:
- if (hw_hndl) {
- mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
- mcounters->hw_cntrs_hndl);
- mcounters->hw_cntrs_hndl = NULL;
- }
-free:
- kfree(desc_data);
- return ret;
-}
-
-static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst,
- u32 underlay_qpn,
- struct mlx5_ib_create_flow *ucmd)
-{
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination dest_arr[2] = {};
- struct mlx5_flow_destination *rule_dst = dest_arr;
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
- unsigned int spec_index;
- u32 prev_type = 0;
- int err = 0;
- int dest_num = 0;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
-
- if (!is_valid_attr(dev->mdev, flow_attr))
- return ERR_PTR(-EINVAL);
-
- if (dev->is_rep && is_egress)
- return ERR_PTR(-EINVAL);
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
- if (dst) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
-
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(dev->mdev, spec->match_criteria,
- spec->match_value,
- ib_flow, flow_attr, &flow_act,
- prev_type);
- if (err < 0)
- goto free;
-
- prev_type = ((union ib_flow_spec *)ib_flow)->type;
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
- }
-
- if (!flow_is_multicast_only(flow_attr))
- set_underlay_qp(dev, spec, underlay_qpn);
-
- if (dev->is_rep) {
- void *misc;
-
- if (!dev->port[flow_attr->port - 1].rep) {
- err = -EINVAL;
- goto free;
- }
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
- misc_parameters);
- MLX5_SET(fte_match_set_misc, misc, source_port,
- dev->port[flow_attr->port - 1].rep->vport);
- misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
- misc_parameters);
- MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
- }
-
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-
- if (is_egress &&
- !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) {
- err = -EINVAL;
- goto free;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- struct mlx5_ib_mcounters *mcounters;
-
- err = flow_counters_set_data(flow_act.counters, ucmd);
- if (err)
- goto free;
-
- mcounters = to_mcounters(flow_act.counters);
- handler->ibcounters = flow_act.counters;
- dest_arr[dest_num].type =
- MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
- dest_num++;
- }
-
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
- rule_dst = NULL;
- dest_num = 0;
- }
- } else {
- if (is_egress)
- flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- else
- flow_act.action |=
- dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- }
-
- if ((flow_act.flags & FLOW_ACT_HAS_TAG) &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
- flow_act.flow_tag, flow_attr->type);
- err = -EINVAL;
- goto free;
- }
- handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act,
- rule_dst, dest_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
-
- ft_prio->flow_table = ft;
-free:
- if (err && handler) {
- if (handler->ibcounters &&
- atomic_read(&handler->ibcounters->usecnt) == 1)
- counters_clear_description(handler->ibcounters);
- kfree(handler);
- }
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
-}
-
-static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_dst = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
- if (!IS_ERR(handler)) {
- handler_dst = create_flow_rule(dev, ft_prio,
- flow_attr, dst);
- if (IS_ERR(handler_dst)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_dst;
- } else {
- list_add(&handler_dst->list, &handler->list);
- }
- }
-
- return handler;
-}
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- static struct {
- struct ib_flow_attr flow_attr;
- struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
-
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
- if (!IS_ERR(handler) &&
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
- handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
- if (IS_ERR(handler_ucast)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_ucast;
- } else {
- list_add(&handler_ucast->list, &handler->list);
- }
- }
-
- return handler;
-}
-
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_rx,
- struct mlx5_ib_flow_prio *ft_tx,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_rx;
- struct mlx5_ib_flow_handler *handler_tx;
- int err;
- static const struct ib_flow_attr flow_attr = {
- .num_of_specs = 0,
- .size = sizeof(flow_attr)
- };
-
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
- if (IS_ERR(handler_rx)) {
- err = PTR_ERR(handler_rx);
- goto err;
- }
-
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
- if (IS_ERR(handler_tx)) {
- err = PTR_ERR(handler_tx);
- goto err_tx;
- }
-
- list_add(&handler_tx->list, &handler_rx->list);
-
- return handler_rx;
-
-err_tx:
- mlx5_del_flow_rules(handler_rx->rule);
- ft_rx->refcount--;
- kfree(handler_rx);
-err:
- return ERR_PTR(err);
-}
-
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain,
- struct ib_udata *udata)
-{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_qp *mqp = to_mqp(qp);
- struct mlx5_ib_flow_handler *handler = NULL;
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
- struct mlx5_ib_flow_prio *ft_prio;
- bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
- struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
- size_t min_ucmd_sz, required_ucmd_sz;
- int err;
- int underlay_qpn;
-
- if (udata && udata->inlen) {
- min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
- sizeof(ucmd_hdr.reserved);
- if (udata->inlen < min_ucmd_sz)
- return ERR_PTR(-EOPNOTSUPP);
-
- err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
- if (err)
- return ERR_PTR(err);
-
- /* currently supports only one counters data */
- if (ucmd_hdr.ncounters_data > 1)
- return ERR_PTR(-EINVAL);
-
- required_ucmd_sz = min_ucmd_sz +
- sizeof(struct mlx5_ib_flow_counters_data) *
- ucmd_hdr.ncounters_data;
- if (udata->inlen > required_ucmd_sz &&
- !ib_is_udata_cleared(udata, required_ucmd_sz,
- udata->inlen - required_ucmd_sz))
- return ERR_PTR(-EOPNOTSUPP);
-
- ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
- if (!ucmd)
- return ERR_PTR(-ENOMEM);
-
- err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
- if (err)
- goto free_ucmd;
- }
-
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > dev->num_ports ||
- (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
- IB_FLOW_ATTR_FLAGS_EGRESS))) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- if (is_egress &&
- (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
- err = -EINVAL;
- goto free_ucmd;
- }
-
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst) {
- err = -ENOMEM;
- goto free_ucmd;
- }
-
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = get_flow_table(dev, flow_attr,
- is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
- if (IS_ERR(ft_prio_tx)) {
- err = PTR_ERR(ft_prio_tx);
- ft_prio_tx = NULL;
- goto destroy_ft;
- }
- }
-
- if (is_egress) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- } else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->flags & MLX5_IB_QP_RSS)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
- }
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
- handler = create_dont_trap_rule(dev, ft_prio,
- flow_attr, dst);
- } else {
- underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
- mqp->underlay_qpn : 0;
- handler = _create_flow_rule(dev, ft_prio, flow_attr,
- dst, underlay_qpn, ucmd);
- }
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
- err = -EINVAL;
- goto destroy_ft;
- }
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- handler = NULL;
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
- kfree(ucmd);
-
- return &handler->ibflow;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
- if (ft_prio_tx)
- put_flow_table(dev, ft_prio_tx, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-free_ucmd:
- kfree(ucmd);
- return ERR_PTR(err);
-}
-
-static struct mlx5_ib_flow_prio *
-_get_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- bool mcast)
-{
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio = NULL;
- int max_table_size = 0;
- u32 flags = 0;
- int priority;
-
- if (mcast)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(fs_matcher->priority, false);
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
- max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
- reformat_l3_tunnel_to_l2))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) {
- max_table_size = BIT(
- MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
- if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
- } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) {
- max_table_size = BIT(
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
- priority = FDB_BYPASS_PATH;
- }
-
- max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
-
- ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type);
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS)
- prio = &dev->flow_db->prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
- prio = &dev->flow_db->egress_prios[priority];
- else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- prio = &dev->flow_db->fdb;
-
- if (!prio)
- return ERR_PTR(-EINVAL);
-
- if (prio->flow_table)
- return prio;
-
- return _get_prio(ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
-}
-
-static struct mlx5_ib_flow_handler *
-_create_raw_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct mlx5_flow_destination *dst,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen,
- int dst_num)
-{
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_spec *spec;
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- int err = 0;
-
- spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- memcpy(spec->match_value, cmd_in, inlen);
- memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
- fs_matcher->mask_len);
- spec->match_criteria_enable = fs_matcher->match_criteria_enable;
-
- handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, dst_num);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
- handler->dev = dev;
- ft_prio->flow_table = ft;
-
-free:
- if (err)
- kfree(handler);
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
- void *match_v)
-{
- void *match_c;
- void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
- void *dmac, *dmac_mask;
- void *ipv4, *ipv4_mask;
-
- if (!(fs_matcher->match_criteria_enable &
- (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
- return false;
-
- match_c = fs_matcher->matcher_mask.match_params;
- match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
-
- dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dmac_47_16);
- dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dmac_47_16);
-
- if (is_multicast_ether_addr(dmac) &&
- is_multicast_ether_addr(dmac_mask))
- return true;
-
- ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
-
- if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
- ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
- return true;
-
- return false;
-}
-
-struct mlx5_ib_flow_handler *
-mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_act *flow_act,
- u32 counter_id,
- void *cmd_in, int inlen, int dest_id,
- int dest_type)
-{
- struct mlx5_flow_destination *dst;
- struct mlx5_ib_flow_prio *ft_prio;
- struct mlx5_ib_flow_handler *handler;
- int dst_num = 0;
- bool mcast;
- int err;
-
- if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
- return ERR_PTR(-EOPNOTSUPP);
-
- if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOMEM);
-
- dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
-
- mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
- mutex_lock(&dev->flow_db->lock);
-
- ft_prio = _get_flow_table(dev, fs_matcher, mcast);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
-
- if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst[dst_num].type = dest_type;
- dst[dst_num].tir_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst[dst_num].ft_num = dest_id;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
- } else {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
- flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
- }
-
- dst_num++;
-
- if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
- dst_num++;
- }
-
- handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
- cmd_in, inlen, dst_num);
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- goto destroy_ft;
- }
-
- mutex_unlock(&dev->flow_db->lock);
- atomic_inc(&fs_matcher->usecnt);
- handler->flow_matcher = fs_matcher;
-
- kfree(dst);
-
- return handler;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
-unlock:
- mutex_unlock(&dev->flow_db->lock);
- kfree(dst);
-
- return ERR_PTR(err);
-}
-
-static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
-{
- u32 flags = 0;
-
- if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA)
- flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA;
-
- return flags;
-}
-
-#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA
-static struct ib_flow_action *
-mlx5_ib_create_flow_action_esp(struct ib_device *device,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_dev *mdev = to_mdev(device);
- struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm;
- struct mlx5_accel_esp_xfrm_attrs accel_attrs = {};
- struct mlx5_ib_flow_action *action;
- u64 action_flags;
- u64 flags;
- int err = 0;
-
- err = uverbs_get_flags64(
- &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
- if (err)
- return ERR_PTR(err);
-
- flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
-
- /* We current only support a subset of the standard features. Only a
- * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn
- * (with overlap). Full offload mode isn't supported.
- */
- if (!attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)))
- return ERR_PTR(-EOPNOTSUPP);
-
- if (attr->keymat->protocol !=
- IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM)
- return ERR_PTR(-EOPNOTSUPP);
-
- aes_gcm = &attr->keymat->keymat.aes_gcm;
-
- if (aes_gcm->icv_len != 16 ||
- aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
- return ERR_PTR(-EOPNOTSUPP);
-
- action = kmalloc(sizeof(*action), GFP_KERNEL);
- if (!action)
- return ERR_PTR(-ENOMEM);
-
- action->esp_aes_gcm.ib_flags = attr->flags;
- memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key,
- sizeof(accel_attrs.keymat.aes_gcm.aes_key));
- accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8;
- memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt,
- sizeof(accel_attrs.keymat.aes_gcm.salt));
- memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv,
- sizeof(accel_attrs.keymat.aes_gcm.seq_iv));
- accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8;
- accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ;
- accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM;
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT)
- accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT;
-
- action->esp_aes_gcm.ctx =
- mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags);
- if (IS_ERR(action->esp_aes_gcm.ctx)) {
- err = PTR_ERR(action->esp_aes_gcm.ctx);
- goto err_parse;
- }
-
- action->esp_aes_gcm.ib_flags = attr->flags;
-
- return &action->ib_action;
-
-err_parse:
- kfree(action);
- return ERR_PTR(err);
-}
-
-static int
-mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action,
- const struct ib_flow_action_attrs_esp *attr,
- struct uverbs_attr_bundle *attrs)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
- struct mlx5_accel_esp_xfrm_attrs accel_attrs;
- int err = 0;
-
- if (attr->keymat || attr->replay || attr->encap ||
- attr->spi || attr->seq || attr->tfc_pad ||
- attr->hard_limit_pkts ||
- (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)))
- return -EOPNOTSUPP;
-
- /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can
- * be modified.
- */
- if (!(maction->esp_aes_gcm.ib_flags &
- IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) &&
- attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED |
- IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))
- return -EINVAL;
-
- memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs,
- sizeof(accel_attrs));
-
- accel_attrs.esn = attr->esn;
- if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)
- accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
- else
- accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP;
-
- err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx,
- &accel_attrs);
- if (err)
- return err;
-
- maction->esp_aes_gcm.ib_flags &=
- ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
- maction->esp_aes_gcm.ib_flags |=
- attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW;
-
- return 0;
-}
-
-static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
-{
- struct mlx5_ib_flow_action *maction = to_mflow_act(action);
-
- switch (action->type) {
- case IB_FLOW_ACTION_ESP:
- /*
- * We only support aes_gcm by now, so we implicitly know this is
- * the underline crypto.
- */
- mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx);
- break;
- case IB_FLOW_ACTION_UNSPECIFIED:
- mlx5_ib_destroy_flow_action_raw(maction);
- break;
- default:
- WARN_ON(true);
- break;
- }
-
- kfree(maction);
- return 0;
+ return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
@@ -4286,7 +2611,7 @@ static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
uid = ibqp->pd ?
to_mpd(ibqp->pd)->uid : 0;
- if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) {
mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
return -EOPNOTSUPP;
}
@@ -4334,7 +2659,7 @@ static ssize_t fw_pages_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
+ return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages);
}
static DEVICE_ATTR_RO(fw_pages);
@@ -4344,7 +2669,7 @@ static ssize_t reg_pages_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
+ return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
static DEVICE_ATTR_RO(reg_pages);
@@ -4354,7 +2679,7 @@ static ssize_t hca_type_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
+ return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device);
}
static DEVICE_ATTR_RO(hca_type);
@@ -4364,7 +2689,7 @@ static ssize_t hw_rev_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%x\n", dev->mdev->rev_id);
+ return sysfs_emit(buf, "%x\n", dev->mdev->rev_id);
}
static DEVICE_ATTR_RO(hw_rev);
@@ -4374,8 +2699,8 @@ static ssize_t board_id_show(struct device *device,
struct mlx5_ib_dev *dev =
rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
- dev->mdev->board_id);
+ return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+ dev->mdev->board_id);
}
static DEVICE_ATTR_RO(board_id);
@@ -4398,9 +2723,14 @@ static void pkey_change_handler(struct work_struct *work)
container_of(work, struct mlx5_ib_port_resources,
pkey_change_work);
- mutex_lock(&ports->devr->mutex);
+ if (!ports->gsi)
+ /*
+ * We got this event before device was fully configured
+ * and MAD registration code wasn't called/finished yet.
+ */
+ return;
+
mlx5_ib_gsi_pkey_change(ports->gsi);
- mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
@@ -4457,7 +2787,7 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
- mcq->comp(mcq);
+ mcq->comp(mcq, NULL);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
@@ -4472,8 +2802,7 @@ static void delay_drop_handler(struct work_struct *work)
atomic_inc(&delay_drop->events_cnt);
mutex_lock(&delay_drop->lock);
- err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
- delay_drop->timeout);
+ err = mlx5_core_set_delay_drop(delay_drop->dev, delay_drop->timeout);
if (err) {
mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
delay_drop->timeout);
@@ -4485,7 +2814,7 @@ static void delay_drop_handler(struct work_struct *work)
static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
struct ib_event *ibev)
{
- u8 port = (eqe->data.port.port >> 4) & 0xf;
+ u32 port = (eqe->data.port.port >> 4) & 0xf;
switch (eqe->sub_type) {
case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
@@ -4501,7 +2830,7 @@ static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe
static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
struct ib_event *ibev)
{
- u8 port = (eqe->data.port.port >> 4) & 0xf;
+ u32 port = (eqe->data.port.port >> 4) & 0xf;
ibev->element.port_num = port;
@@ -4572,7 +2901,7 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
break;
case MLX5_EVENT_TYPE_GENERAL_EVENT:
handle_general_event(ibdev, work->param, &ibev);
- /* fall through */
+ fallthrough;
default:
goto out;
}
@@ -4632,392 +2961,304 @@ static int mlx5_ib_event_slave_port(struct notifier_block *nb,
return NOTIFY_OK;
}
-static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane)
{
struct mlx5_hca_vport_context vport_ctx;
int err;
- int port;
- for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) {
- dev->mdev->port_caps[port - 1].has_smi = false;
- if (MLX5_CAP_GEN(dev->mdev, port_type) ==
- MLX5_CAP_PORT_TYPE_IB) {
- if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
- err = mlx5_query_hca_vport_context(dev->mdev, 0,
- port, 0,
- &vport_ctx);
- if (err) {
- mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
- port, err);
- return err;
- }
- dev->mdev->port_caps[port - 1].has_smi =
- vport_ctx.has_smi;
- } else {
- dev->mdev->port_caps[port - 1].has_smi = true;
- }
- }
- }
- return 0;
-}
+ *num_plane = 0;
+ if (!MLX5_CAP_GEN(mdev, ib_virt) || !MLX5_CAP_GEN_2(mdev, multiplane))
+ return 0;
-static void get_ext_port_caps(struct mlx5_ib_dev *dev)
-{
- int port;
+ err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx);
+ if (err)
+ return err;
- for (port = 1; port <= dev->num_ports; port++)
- mlx5_query_ext_port_caps(dev, port);
+ *num_plane = vport_ctx.num_plane;
+ return 0;
}
-static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port)
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
{
- struct ib_device_attr *dprops = NULL;
- struct ib_port_attr *pprops = NULL;
- int err = -ENOMEM;
- struct ib_udata uhw = {.inlen = 0, .outlen = 0};
-
- pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
- if (!pprops)
- goto out;
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+ int port;
- dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
- if (!dprops)
- goto out;
+ if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ return 0;
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
- if (err) {
- mlx5_ib_warn(dev, "query_device failed %d\n", err);
- goto out;
- }
+ for (port = 1; port <= dev->num_ports; port++) {
+ if (dev->num_plane) {
+ dev->port_caps[port - 1].has_smi = false;
+ continue;
+ } else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) ||
+ dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ dev->port_caps[port - 1].has_smi = true;
+ continue;
+ }
- memset(pprops, 0, sizeof(*pprops));
- err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
- if (err) {
- mlx5_ib_warn(dev, "query_port %d failed %d\n",
- port, err);
- goto out;
+ err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->port_caps[port - 1].has_smi = vport_ctx.has_smi;
}
- dev->mdev->port_caps[port - 1].pkey_table_len =
- dprops->max_pkeys;
- dev->mdev->port_caps[port - 1].gid_table_len =
- pprops->gid_tbl_len;
- mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n",
- port, dprops->max_pkeys, pprops->gid_tbl_len);
-
-out:
- kfree(pprops);
- kfree(dprops);
-
- return err;
+ return 0;
}
-static int get_port_caps(struct mlx5_ib_dev *dev, u8 port)
+static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
- /* For representors use port 1, is this is the only native
- * port
- */
- if (dev->is_rep)
- return __get_port_caps(dev, 1);
- return __get_port_caps(dev, port);
+ unsigned int port;
+
+ rdma_for_each_port (&dev->ib_dev, port)
+ mlx5_query_ext_port_caps(dev, port);
}
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
{
- int err;
-
- err = mlx5_mr_cache_cleanup(dev);
- if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
-
- if (dev->umrc.qp)
- mlx5_ib_destroy_qp(dev->umrc.qp, NULL);
- if (dev->umrc.cq)
- ib_free_cq(dev->umrc.cq);
- if (dev->umrc.pd)
- ib_dealloc_pd(dev->umrc.pd);
+ switch (umr_fence_cap) {
+ case MLX5_CAP_UMR_FENCE_NONE:
+ return MLX5_FENCE_MODE_NONE;
+ case MLX5_CAP_UMR_FENCE_SMALL:
+ return MLX5_FENCE_MODE_INITIATOR_SMALL;
+ default:
+ return MLX5_FENCE_MODE_STRONG_ORDERING;
+ }
}
-enum {
- MAX_UMR_WR = 128,
-};
-
-static int create_umr_res(struct mlx5_ib_dev *dev)
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ struct ib_cq_init_attr cq_attr = {.cqe = 1};
+ struct ib_device *ibdev;
struct ib_pd *pd;
struct ib_cq *cq;
- struct ib_qp *qp;
- int ret;
+ int ret = 0;
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
- pd = ib_alloc_pd(&dev->ib_dev, 0);
+ /*
+ * devr->c0 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->c0)
+ return 0;
+
+ mutex_lock(&devr->cq_lock);
+ if (devr->c0)
+ goto unlock;
+
+ ibdev = &dev->ib_dev;
+ pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
ret = PTR_ERR(pd);
- goto error_0;
+ mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%pe\n",
+ pd);
+ goto unlock;
}
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
- goto error_2;
+ mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%pe\n",
+ cq);
+ ib_dealloc_pd(pd);
+ goto unlock;
}
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
- qp->send_cq = init_attr->send_cq;
- qp->recv_cq = init_attr->recv_cq;
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
- IB_QP_PORT, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
- }
+ devr->p0 = pd;
+ devr->c0 = cq;
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
+unlock:
+ mutex_unlock(&devr->cq_lock);
+ return ret;
+}
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ struct ib_srq_init_attr attr;
+ struct ib_srq *s0, *s1;
+ int ret = 0;
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
- }
+ /*
+ * devr->s1 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->s1)
+ return 0;
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
+ mutex_lock(&devr->srq_lock);
+ if (devr->s1)
+ goto unlock;
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
- if (ret) {
- mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
- }
+ ret = mlx5_ib_dev_res_cq_init(dev);
+ if (ret)
+ goto unlock;
- kfree(attr);
- kfree(init_attr);
+ memset(&attr, 0, sizeof(attr));
+ attr.attr.max_sge = 1;
+ attr.attr.max_wr = 1;
+ attr.srq_type = IB_SRQT_XRC;
+ attr.ext.cq = devr->c0;
- return 0;
+ s0 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s0)) {
+ ret = PTR_ERR(s0);
+ mlx5_ib_err(dev,
+ "Couldn't create SRQ 0 for res init, err=%pe\n",
+ s0);
+ goto unlock;
+ }
-error_4:
- mlx5_ib_destroy_qp(qp, NULL);
- dev->umrc.qp = NULL;
+ memset(&attr, 0, sizeof(attr));
+ attr.attr.max_sge = 1;
+ attr.attr.max_wr = 1;
+ attr.srq_type = IB_SRQT_BASIC;
-error_3:
- ib_free_cq(cq);
- dev->umrc.cq = NULL;
+ s1 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s1)) {
+ ret = PTR_ERR(s1);
+ mlx5_ib_err(dev,
+ "Couldn't create SRQ 1 for res init, err=%pe\n",
+ s1);
+ ib_destroy_srq(s0);
+ }
-error_2:
- ib_dealloc_pd(pd);
- dev->umrc.pd = NULL;
+ devr->s0 = s0;
+ devr->s1 = s1;
-error_0:
- kfree(attr);
- kfree(init_attr);
+unlock:
+ mutex_unlock(&devr->srq_lock);
return ret;
}
-static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
- switch (umr_fence_cap) {
- case MLX5_CAP_UMR_FENCE_NONE:
- return MLX5_FENCE_MODE_NONE;
- case MLX5_CAP_UMR_FENCE_SMALL:
- return MLX5_FENCE_MODE_INITIATOR_SMALL;
- default:
- return MLX5_FENCE_MODE_STRONG_ORDERING;
- }
-}
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int ret;
-static int create_dev_resources(struct mlx5_ib_resources *devr)
-{
- struct ib_srq_init_attr attr;
- struct mlx5_ib_dev *dev;
- struct ib_device *ibdev;
- struct ib_cq_init_attr cq_attr = {.cqe = 1};
- int port;
- int ret = 0;
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
- dev = container_of(devr, struct mlx5_ib_dev, devr);
- ibdev = &dev->ib_dev;
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
+ return ret;
- mutex_init(&devr->mutex);
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret) {
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+ return ret;
+ }
- devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
- if (!devr->p0)
- return -ENOMEM;
+ mutex_init(&devr->cq_lock);
+ mutex_init(&devr->srq_lock);
- devr->p0->device = ibdev;
- devr->p0->uobject = NULL;
- atomic_set(&devr->p0->usecnt, 0);
+ return 0;
+}
- ret = mlx5_ib_alloc_pd(devr->p0, NULL);
- if (ret)
- goto error0;
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
- goto error1;
+ /* After s0/s1 init, they are not unset during the device lifetime. */
+ if (devr->s1) {
+ ib_destroy_srq(devr->s1);
+ ib_destroy_srq(devr->s0);
}
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
- atomic_set(&devr->c0->usecnt, 0);
-
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x0)) {
- ret = PTR_ERR(devr->x0);
- goto error2;
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+ /* After p0/c0 init, they are not unset during the device lifetime. */
+ if (devr->c0) {
+ ib_destroy_cq(devr->c0);
+ ib_dealloc_pd(devr->p0);
}
- devr->x0->device = &dev->ib_dev;
- devr->x0->inode = NULL;
- atomic_set(&devr->x0->usecnt, 0);
- mutex_init(&devr->x0->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
-
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL);
- if (IS_ERR(devr->x1)) {
- ret = PTR_ERR(devr->x1);
- goto error3;
- }
- devr->x1->device = &dev->ib_dev;
- devr->x1->inode = NULL;
- atomic_set(&devr->x1->usecnt, 0);
- mutex_init(&devr->x1->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
+ mutex_destroy(&devr->cq_lock);
+ mutex_destroy(&devr->srq_lock);
+}
- memset(&attr, 0, sizeof(attr));
- attr.attr.max_sge = 1;
- attr.attr.max_wr = 1;
- attr.srq_type = IB_SRQT_XRC;
- attr.ext.cq = devr->c0;
- attr.ext.xrc.xrcd = devr->x0;
+static int
+mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ bool ro_supp = false;
+ void *mkc;
+ u32 mkey;
+ u32 pdn;
+ u32 *in;
+ int err;
- devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq);
- if (!devr->s0) {
- ret = -ENOMEM;
- goto error4;
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ return err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err;
}
- devr->s0->device = &dev->ib_dev;
- devr->s0->pd = devr->p0;
- devr->s0->srq_type = IB_SRQT_XRC;
- devr->s0->ext.xrc.xrcd = devr->x0;
- devr->s0->ext.cq = devr->c0;
- ret = mlx5_ib_create_srq(devr->s0, &attr, NULL);
- if (ret)
- goto err_create;
+ MLX5_SET(create_mkey_in, in, data_direct, 1);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
+ if (err)
+ goto err_mkey;
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
- atomic_inc(&devr->s0->ext.cq->usecnt);
- atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s0->usecnt, 0);
+ dev->ddr.mkey = mkey;
+ dev->ddr.pdn = pdn;
- memset(&attr, 0, sizeof(attr));
- attr.attr.max_sge = 1;
- attr.attr.max_wr = 1;
- attr.srq_type = IB_SRQT_BASIC;
- devr->s1 = rdma_zalloc_drv_obj(ibdev, ib_srq);
- if (!devr->s1) {
- ret = -ENOMEM;
- goto error5;
+ /* create another mkey with RO support */
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) {
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
+ ro_supp = true;
}
- devr->s1->device = &dev->ib_dev;
- devr->s1->pd = devr->p0;
- devr->s1->srq_type = IB_SRQT_BASIC;
- devr->s1->ext.cq = devr->c0;
-
- ret = mlx5_ib_create_srq(devr->s1, &attr, NULL);
- if (ret)
- goto error6;
-
- atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s1->usecnt, 0);
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) {
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
+ ro_supp = true;
+ }
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
- INIT_WORK(&devr->ports[port].pkey_change_work,
- pkey_change_handler);
- devr->ports[port].devr = devr;
+ if (ro_supp) {
+ err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
+ /* RO is defined as best effort */
+ if (!err) {
+ dev->ddr.mkey_ro = mkey;
+ dev->ddr.mkey_ro_valid = true;
+ }
}
+ kvfree(in);
return 0;
-error6:
- kfree(devr->s1);
-error5:
- mlx5_ib_destroy_srq(devr->s0, NULL);
-err_create:
- kfree(devr->s0);
-error4:
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
-error3:
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
-error2:
- mlx5_ib_destroy_cq(devr->c0, NULL);
-error1:
- mlx5_ib_dealloc_pd(devr->p0, NULL);
-error0:
- kfree(devr->p0);
- return ret;
+err_mkey:
+ kvfree(in);
+err:
+ mlx5_core_dealloc_pd(mdev, pdn);
+ return err;
}
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+static void
+mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev)
{
- int port;
- mlx5_ib_destroy_srq(devr->s1, NULL);
- kfree(devr->s1);
- mlx5_ib_destroy_srq(devr->s0, NULL);
- kfree(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0, NULL);
- mlx5_ib_dealloc_xrcd(devr->x1, NULL);
- mlx5_ib_destroy_cq(devr->c0, NULL);
- mlx5_ib_dealloc_pd(devr->p0, NULL);
- kfree(devr->p0);
-
- /* Make sure no change P_Key work items are still executing */
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
- cancel_work_sync(&devr->ports[port].pkey_change_work);
+ if (dev->ddr.mkey_ro_valid)
+ mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey_ro);
+
+ mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey);
+ mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn);
}
static u32 get_core_cap_flags(struct ib_device *ibdev,
@@ -5033,6 +3274,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev,
if (rep->grh_required)
ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
+ if (dev->num_plane)
+ return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD |
+ RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA |
+ RDMA_CORE_CAP_AF_IB;
+ else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI;
+
if (ll == IB_LINK_LAYER_INFINIBAND)
return ret | RDMA_CORE_PORT_IBA_IB;
@@ -5054,7 +3302,7 @@ static u32 get_core_cap_flags(struct ib_device *ibdev,
return ret;
}
-static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -5068,6 +3316,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
return err;
if (ll == IB_LINK_LAYER_INFINIBAND) {
+ if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ port_num = smi_to_native_portnum(dev, port_num);
+
err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
&rep);
if (err)
@@ -5077,13 +3328,12 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
- if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
-static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -5111,6 +3361,67 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str)
fw_rev_sub(dev->mdev));
}
+static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev,
+ lag_events);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_device *ibdev = &dev->ib_dev;
+ struct net_device *old_ndev = NULL;
+ struct mlx5_ib_port *port;
+ struct net_device *ndev;
+ u32 portnum = 0;
+ int ret = 0;
+ int i;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE:
+ ndev = data;
+ if (ndev) {
+ if (!mlx5_lag_is_roce(mdev)) {
+ // sriov lag
+ for (i = 0; i < dev->num_ports; i++) {
+ port = &dev->port[i];
+ if (port->rep && port->rep->vport ==
+ MLX5_VPORT_UPLINK) {
+ portnum = i;
+ break;
+ }
+ }
+ }
+ old_ndev = ib_device_get_netdev(ibdev, portnum + 1);
+ ret = ib_device_set_netdev(ibdev, ndev, portnum + 1);
+ if (ret)
+ goto out;
+
+ if (old_ndev)
+ roce_del_all_netdev_gids(ibdev, portnum + 1,
+ old_ndev);
+ rdma_roce_rescan_port(ibdev, portnum + 1);
+ }
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+out:
+ dev_put(old_ndev);
+ return notifier_from_errno(ret);
+}
+
+static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev)
+{
+ dev->lag_events.notifier_call = lag_event;
+ blocking_notifier_chain_register(&dev->mdev->priv.lag_nh,
+ &dev->lag_events);
+}
+
+static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev)
+{
+ blocking_notifier_chain_unregister(&dev->mdev->priv.lag_nh,
+ &dev->lag_events);
+}
+
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -5119,7 +3430,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
struct mlx5_flow_table *ft;
int err;
- if (!ns || !mlx5_lag_is_roce(mdev))
+ if (!ns || !mlx5_lag_is_active(mdev))
return 0;
err = mlx5_cmd_create_vport_lag(mdev);
@@ -5132,7 +3443,9 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
goto err_destroy_vport_lag;
}
+ mlx5e_lag_event_register(dev);
dev->flow_db->lag_demux_ft = ft;
+ dev->lag_ports = mlx5_lag_get_num_ports(mdev);
dev->lag_active = true;
return 0;
@@ -5148,6 +3461,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
if (dev->lag_active) {
dev->lag_active = false;
+ mlx5e_lag_event_unregister(dev);
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL;
@@ -5155,33 +3469,70 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
+static void mlx5_netdev_notifier_register(struct mlx5_roce *roce,
+ struct net_device *netdev)
{
int err;
- dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
- err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
- if (err) {
- dev->port[port_num].roce.nb.notifier_call = NULL;
- return err;
- }
+ if (roce->tracking_netdev)
+ return;
+ roce->tracking_netdev = netdev;
+ roce->nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn);
+ WARN_ON(err);
+}
- return 0;
+static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce)
+{
+ if (!roce->tracking_netdev)
+ return;
+ unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb,
+ &roce->nn);
+ roce->tracking_netdev = NULL;
}
-static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
+static int mlx5e_mdev_notifier_event(struct notifier_block *nb,
+ unsigned long event, void *data)
{
- if (dev->port[port_num].roce.nb.notifier_call) {
- unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
- dev->port[port_num].roce.nb.notifier_call = NULL;
+ struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb);
+ struct net_device *netdev = data;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
+ if (netdev)
+ mlx5_netdev_notifier_register(roce, netdev);
+ else
+ mlx5_netdev_notifier_unregister(roce);
+ break;
+ default:
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
+}
+
+static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_roce *roce = &dev->port[port_num].roce;
+
+ roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event;
+ mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb);
+ mlx5_core_uplink_netdev_event_replay(dev->mdev);
+}
+
+static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_roce *roce = &dev->port[port_num].roce;
+
+ mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb);
+ mlx5_netdev_notifier_unregister(roce);
}
static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
- if (MLX5_CAP_GEN(dev->mdev, roce)) {
+ if (!dev->is_rep && dev->profile != &raw_eth_profile) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
return err;
@@ -5194,7 +3545,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
return 0;
err_disable_roce:
- if (MLX5_CAP_GEN(dev->mdev, roce))
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
mlx5_nic_vport_disable_roce(dev->mdev);
return err;
@@ -5203,337 +3554,11 @@ err_disable_roce:
static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
mlx5_eth_lag_cleanup(dev);
- if (MLX5_CAP_GEN(dev->mdev, roce))
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-struct mlx5_ib_counter {
- const char *name;
- size_t offset;
-};
-
-#define INIT_Q_COUNTER(_name) \
- { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
-
-static const struct mlx5_ib_counter basic_q_cnts[] = {
- INIT_Q_COUNTER(rx_write_requests),
- INIT_Q_COUNTER(rx_read_requests),
- INIT_Q_COUNTER(rx_atomic_requests),
- INIT_Q_COUNTER(out_of_buffer),
-};
-
-static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
- INIT_Q_COUNTER(out_of_sequence),
-};
-
-static const struct mlx5_ib_counter retrans_q_cnts[] = {
- INIT_Q_COUNTER(duplicate_request),
- INIT_Q_COUNTER(rnr_nak_retry_err),
- INIT_Q_COUNTER(packet_seq_err),
- INIT_Q_COUNTER(implied_nak_seq_err),
- INIT_Q_COUNTER(local_ack_timeout_err),
-};
-
-#define INIT_CONG_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
-
-static const struct mlx5_ib_counter cong_cnts[] = {
- INIT_CONG_COUNTER(rp_cnp_ignored),
- INIT_CONG_COUNTER(rp_cnp_handled),
- INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
- INIT_CONG_COUNTER(np_cnp_sent),
-};
-
-static const struct mlx5_ib_counter extended_err_cnts[] = {
- INIT_Q_COUNTER(resp_local_length_error),
- INIT_Q_COUNTER(resp_cqe_error),
- INIT_Q_COUNTER(req_cqe_error),
- INIT_Q_COUNTER(req_remote_invalid_request),
- INIT_Q_COUNTER(req_remote_access_errors),
- INIT_Q_COUNTER(resp_remote_access_errors),
- INIT_Q_COUNTER(resp_cqe_flush_error),
- INIT_Q_COUNTER(req_cqe_flush_error),
-};
-
-#define INIT_EXT_PPCNT_COUNTER(_name) \
- { .name = #_name, .offset = \
- MLX5_BYTE_OFF(ppcnt_reg, \
- counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
-
-static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
- INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
-};
-
-static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
-{
- int i;
-
- for (i = 0; i < dev->num_ports; i++) {
- if (dev->port[i].cnts.set_id_valid)
- mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].cnts.set_id);
- kfree(dev->port[i].cnts.names);
- kfree(dev->port[i].cnts.offsets);
- }
-}
-
-static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_counters *cnts)
-{
- u32 num_counters;
-
- num_counters = ARRAY_SIZE(basic_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
- num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
- num_counters += ARRAY_SIZE(retrans_q_cnts);
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
- num_counters += ARRAY_SIZE(extended_err_cnts);
-
- cnts->num_q_counters = num_counters;
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
- num_counters += ARRAY_SIZE(cong_cnts);
- }
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
- num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
- }
- cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
- if (!cnts->names)
- return -ENOMEM;
-
- cnts->offsets = kcalloc(num_counters,
- sizeof(cnts->offsets), GFP_KERNEL);
- if (!cnts->offsets)
- goto err_names;
-
- return 0;
-
-err_names:
- kfree(cnts->names);
- cnts->names = NULL;
- return -ENOMEM;
-}
-
-static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
- const char **names,
- size_t *offsets)
-{
- int i;
- int j = 0;
-
- for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
- names[j] = basic_q_cnts[i].name;
- offsets[j] = basic_q_cnts[i].offset;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
- for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
- names[j] = out_of_seq_q_cnts[i].name;
- offsets[j] = out_of_seq_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
- names[j] = retrans_q_cnts[i].name;
- offsets[j] = retrans_q_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
- for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
- names[j] = extended_err_cnts[i].name;
- offsets[j] = extended_err_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
- names[j] = cong_cnts[i].name;
- offsets[j] = cong_cnts[i].offset;
- }
- }
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
- names[j] = ext_ppcnt_cnts[i].name;
- offsets[j] = ext_ppcnt_cnts[i].offset;
- }
- }
-}
-
-static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
-{
- int err = 0;
- int i;
- bool is_shared;
-
- is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
-
- for (i = 0; i < dev->num_ports; i++) {
- err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
- if (err)
- goto err_alloc;
-
- mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
- dev->port[i].cnts.offsets);
-
- err = mlx5_cmd_alloc_q_counter(dev->mdev,
- &dev->port[i].cnts.set_id,
- is_shared ?
- MLX5_SHARED_RESOURCE_UID : 0);
- if (err) {
- mlx5_ib_warn(dev,
- "couldn't allocate queue counter for port %d, err %d\n",
- i + 1, err);
- goto err_alloc;
- }
- dev->port[i].cnts.set_id_valid = true;
- }
-
- return 0;
-
-err_alloc:
- mlx5_ib_dealloc_counters(dev);
- return err;
-}
-
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
-
- /* We support only per port stats */
- if (port_num == 0)
- return NULL;
-
- return rdma_alloc_hw_stats_struct(port->cnts.names,
- port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
-{
- int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
- void *out;
- __be32 val;
- int ret, i;
-
- out = kvzalloc(outlen, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_core_query_q_counter(mdev,
- port->cnts.set_id, 0,
- out, outlen);
- if (ret)
- goto free;
-
- for (i = 0; i < port->cnts.num_q_counters; i++) {
- val = *(__be32 *)(out + port->cnts.offsets[i]);
- stats->value[i] = (u64)be32_to_cpu(val);
- }
-
-free:
- kvfree(out);
- return ret;
-}
-
-static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
- struct mlx5_ib_port *port,
- struct rdma_hw_stats *stats)
-{
- int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
- int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- int ret, i;
- void *out;
-
- out = kvzalloc(sz, GFP_KERNEL);
- if (!out)
- return -ENOMEM;
-
- ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
- if (ret)
- goto free;
-
- for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
- stats->value[i + offset] =
- be64_to_cpup((__be64 *)(out +
- port->cnts.offsets[i + offset]));
- }
-
-free:
- kvfree(out);
- return ret;
-}
-
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_port *port = &dev->port[port_num - 1];
- struct mlx5_core_dev *mdev;
- int ret, num_counters;
- u8 mdev_port_num;
-
- if (!stats)
- return -EINVAL;
-
- num_counters = port->cnts.num_q_counters +
- port->cnts.num_cong_counters +
- port->cnts.num_ext_ppcnt_counters;
-
- /* q_counters are per IB device, query the master mdev */
- ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
- if (ret)
- return ret;
-
- if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
- ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
- if (ret)
- return ret;
- }
-
- if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
- mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
- &mdev_port_num);
- if (!mdev) {
- /* If port is not affiliated yet, its in down state
- * which doesn't have any counters yet, so it would be
- * zero. So no need to read from the HCA.
- */
- goto done;
- }
- ret = mlx5_lag_query_cong_counters(dev->mdev,
- stats->value +
- port->cnts.num_q_counters,
- port->cnts.num_cong_counters,
- port->cnts.offsets +
- port->cnts.num_q_counters);
-
- mlx5_ib_put_native_port_mdev(dev, port_num);
- if (ret)
- return ret;
- }
-
-done:
- return num_counters;
-}
-
-static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
+static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
enum rdma_netdev_t type,
struct rdma_netdev_alloc_params *params)
{
@@ -5543,24 +3568,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
-static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
-{
- if (!dev->delay_drop.dbg)
- return;
- debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
- kfree(dev->delay_drop.dbg);
- dev->delay_drop.dbg = NULL;
-}
-
-static void cancel_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- cancel_work_sync(&dev->delay_drop.delay_drop_work);
- delay_drop_debugfs_cleanup(dev);
-}
-
static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
@@ -5600,81 +3607,26 @@ static const struct file_operations fops_delay_drop_timeout = {
.read = delay_drop_timeout_read,
};
-static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_ib_dbg_delay_drop *dbg;
-
- if (!mlx5_debugfs_root)
- return 0;
-
- dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
- if (!dbg)
- return -ENOMEM;
-
- dev->delay_drop.dbg = dbg;
-
- dbg->dir_debugfs =
- debugfs_create_dir("delay_drop",
- dev->mdev->priv.dbg_root);
- if (!dbg->dir_debugfs)
- goto out_debugfs;
-
- dbg->events_cnt_debugfs =
- debugfs_create_atomic_t("num_timeout_events", 0400,
- dbg->dir_debugfs,
- &dev->delay_drop.events_cnt);
- if (!dbg->events_cnt_debugfs)
- goto out_debugfs;
-
- dbg->rqs_cnt_debugfs =
- debugfs_create_atomic_t("num_rqs", 0400,
- dbg->dir_debugfs,
- &dev->delay_drop.rqs_cnt);
- if (!dbg->rqs_cnt_debugfs)
- goto out_debugfs;
-
- dbg->timeout_debugfs =
- debugfs_create_file("timeout", 0600,
- dbg->dir_debugfs,
- &dev->delay_drop,
- &fops_delay_drop_timeout);
- if (!dbg->timeout_debugfs)
- goto out_debugfs;
-
- return 0;
-
-out_debugfs:
- delay_drop_debugfs_cleanup(dev);
- return -ENOMEM;
-}
-
-static void init_delay_drop(struct mlx5_ib_dev *dev)
-{
- if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
- return;
-
- mutex_init(&dev->delay_drop.lock);
- dev->delay_drop.dev = dev;
- dev->delay_drop.activate = false;
- dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
- INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
- atomic_set(&dev->delay_drop.rqs_cnt, 0);
- atomic_set(&dev->delay_drop.events_cnt, 0);
-
- if (delay_drop_debugfs_init(dev))
- mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
-}
-
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
- u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
struct mlx5_ib_port *port = &ibdev->port[port_num];
int comps;
int err;
int i;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
+
+ mlx5_core_mp_event_replay(ibdev->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ NULL);
+ mlx5_core_mp_event_replay(mpi->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ NULL);
+
mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
@@ -5683,14 +3635,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
return;
}
- if (mpi->mdev_events.notifier_call)
- mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
- mpi->mdev_events.notifier_call = NULL;
-
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
- mlx5_remove_netdev_notifier(ibdev, port_num);
+ if (mpi->mdev_events.notifier_call)
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+ mpi->mdev_events.notifier_call = NULL;
+ mlx5_mdev_netdev_untrack(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
comps = mpi->mdev_refcnt;
@@ -5708,13 +3659,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
port->mp.mpi = NULL;
- list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
-
spin_unlock(&port->mp.mpi_lock);
err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
- mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1);
+ mlx5_ib_dbg(ibdev, "unaffiliated port %u\n", port_num + 1);
/* Log an error, still needed to cleanup the pointers and add
* it back to the list.
*/
@@ -5725,16 +3674,18 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
}
-/* The mlx5_ib_multiport_mutex should be held when calling this function */
static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
- u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u64 key;
int err;
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
spin_lock(&ibdev->port[port_num].mp.mpi_lock);
if (ibdev->port[port_num].mp.mpi) {
- mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
+ mlx5_ib_dbg(ibdev, "port %u already affiliated.\n",
port_num + 1);
spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
return false;
@@ -5749,22 +3700,25 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
if (err)
goto unbind;
- err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev));
- if (err)
- goto unbind;
-
- err = mlx5_add_netdev_notifier(ibdev, port_num);
- if (err) {
- mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
- port_num + 1);
- goto unbind;
- }
+ mlx5_mdev_netdev_track(ibdev, port_num);
mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
mlx5_ib_init_cong_debugfs(ibdev, port_num);
+ key = mpi->mdev->priv.adev_idx;
+ mlx5_core_mp_event_replay(mpi->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ &key);
+ mlx5_core_mp_event_replay(ibdev->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ &key);
+
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
+ if (err)
+ goto unbind;
+
return true;
unbind:
@@ -5772,14 +3726,49 @@ unbind:
return false;
}
+static int mlx5_ib_data_direct_init(struct mlx5_ib_dev *dev)
+{
+ char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1] = {};
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev->mdev, data_direct) ||
+ !MLX5_CAP_GEN_2(dev->mdev, query_vuid))
+ return 0;
+
+ ret = mlx5_cmd_query_vuid(dev->mdev, true, vuid);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_create_data_direct_resources(dev);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&dev->data_direct_mr_list);
+ ret = mlx5_data_direct_ib_reg(dev, vuid);
+ if (ret)
+ mlx5_ib_free_data_direct_resources(dev);
+
+ return ret;
+}
+
+static void mlx5_ib_data_direct_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, data_direct) ||
+ !MLX5_CAP_GEN_2(dev->mdev, query_vuid))
+ return;
+
+ mlx5_data_direct_ib_unreg(dev);
+ mlx5_ib_free_data_direct_resources(dev);
+}
+
static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
{
- int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
port_num + 1);
struct mlx5_ib_multiport_info *mpi;
int err;
- int i;
+ u32 i;
if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
return 0;
@@ -5818,7 +3807,8 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list,
list) {
if (dev->sys_image_guid == mpi->sys_image_guid &&
- (mlx5_core_native_port_num(mpi->mdev) - 1) == i) {
+ (mlx5_core_native_port_num(mpi->mdev) - 1) == i &&
+ mlx5_core_same_coredev_type(dev->mdev, mpi->mdev)) {
bound = mlx5_ib_bind_slave_port(dev, mpi);
}
@@ -5830,11 +3820,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
break;
}
}
- if (!bound) {
- get_port_caps(dev, i + 1);
+ if (!bound)
mlx5_ib_dbg(dev, "no free port found for port %d\n",
i + 1);
- }
}
list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
@@ -5844,10 +3832,10 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
{
- int port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
port_num + 1);
- int i;
+ u32 i;
if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
return;
@@ -5860,8 +3848,12 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
kfree(dev->port[i].mp.mpi);
dev->port[i].mp.mpi = NULL;
} else {
- mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1);
- mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi);
+ mlx5_ib_dbg(dev, "unbinding port_num: %u\n",
+ i + 1);
+ list_add_tail(&dev->port[i].mp.mpi->list,
+ &mlx5_ib_unaffiliated_port_list);
+ mlx5_ib_unbind_slave_port(dev,
+ dev->port[i].mp.mpi);
}
}
}
@@ -5873,297 +3865,432 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
- mlx5_ib_dm,
- UVERBS_OBJECT_DM,
- UVERBS_METHOD_DM_ALLOC,
- UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- UVERBS_ATTR_TYPE(u64),
- UA_MANDATORY),
- UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- UVERBS_ATTR_TYPE(u16),
- UA_OPTIONAL),
- UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
- enum mlx5_ib_uapi_dm_type,
- UA_OPTIONAL));
-
-ADD_UVERBS_ATTRIBUTES_SIMPLE(
- mlx5_ib_flow_action,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- enum mlx5_ib_uapi_flow_action_flags));
+static int mmap_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_user_mmap_entry *obj = uobject->object;
-static const struct uapi_definition mlx5_ib_defs[] = {
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
- UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
- UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
-#endif
+ rdma_user_mmap_entry_remove(&obj->rdma_entry);
+ return 0;
+}
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
- &mlx5_ib_flow_action),
- UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
- {}
-};
+static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c,
+ struct mlx5_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(
+ &c->ibucontext, &entry->rdma_entry, length,
+ (MLX5_IB_MMAP_OFFSET_START << 16),
+ ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1));
+}
-static int mlx5_ib_read_counters(struct ib_counters *counters,
- struct ib_counters_read_attr *read_attr,
- struct uverbs_attr_bundle *attrs)
+static struct mlx5_user_mmap_entry *
+alloc_var_entry(struct mlx5_ib_ucontext *c)
{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
- struct mlx5_read_counters_attr mread_attr = {};
- struct mlx5_ib_flow_counters_desc *desc;
- int ret, i;
+ struct mlx5_user_mmap_entry *entry;
+ struct mlx5_var_table *var_table;
+ u32 page_idx;
+ int err;
- mutex_lock(&mcounters->mcntrs_mutex);
- if (mcounters->cntrs_max_index > read_attr->ncounters) {
- ret = -EINVAL;
- goto err_bound;
- }
+ var_table = &to_mdev(c->ibucontext.device)->var_table;
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
- mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
- GFP_KERNEL);
- if (!mread_attr.out) {
- ret = -ENOMEM;
- goto err_bound;
+ mutex_lock(&var_table->bitmap_lock);
+ page_idx = find_first_zero_bit(var_table->bitmap,
+ var_table->num_var_hw_entries);
+ if (page_idx >= var_table->num_var_hw_entries) {
+ err = -ENOSPC;
+ mutex_unlock(&var_table->bitmap_lock);
+ goto end;
}
- mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
- mread_attr.flags = read_attr->flags;
- ret = mcounters->read_counters(counters->device, &mread_attr);
- if (ret)
- goto err_read;
+ set_bit(page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
- /* do the pass over the counters data array to assign according to the
- * descriptions and indexing pairs
- */
- desc = mcounters->counters_data;
- for (i = 0; i < mcounters->ncounters; i++)
- read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
-
-err_read:
- kfree(mread_attr.out);
-err_bound:
- mutex_unlock(&mcounters->mcntrs_mutex);
- return ret;
+ entry->address = var_table->hw_start_addr +
+ (page_idx * var_table->stride_size);
+ entry->page_idx = page_idx;
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
+
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry,
+ var_table->stride_size);
+ if (err)
+ goto err_insert;
+
+ return entry;
+
+err_insert:
+ mutex_lock(&var_table->bitmap_lock);
+ clear_bit(page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+end:
+ kfree(entry);
+ return ERR_PTR(err);
}
-static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
{
- struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_user_mmap_entry *entry;
+ u64 mmap_offset;
+ u32 length;
+ int err;
- counters_clear_description(counters);
- if (mcounters->hw_cntrs_hndl)
- mlx5_fc_destroy(to_mdev(counters->device)->mdev,
- mcounters->hw_cntrs_hndl);
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
- kfree(mcounters);
+ entry = alloc_var_entry(c);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
- return 0;
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
+ length = entry->rdma_entry.npages * PAGE_SIZE;
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+ &entry->page_idx, sizeof(entry->page_idx));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ &length, sizeof(length));
+ return err;
}
-static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
- struct uverbs_attr_bundle *attrs)
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_VAR_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_VAR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_VAR_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_VAR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
+
+static bool var_is_supported(struct ib_device *device)
{
- struct mlx5_ib_mcounters *mcounters;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
- if (!mcounters)
+ return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
+}
+
+static struct mlx5_user_mmap_entry *
+alloc_uar_entry(struct mlx5_ib_ucontext *c,
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type)
+{
+ struct mlx5_user_mmap_entry *entry;
+ struct mlx5_ib_dev *dev;
+ u32 uar_index;
+ int err;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
return ERR_PTR(-ENOMEM);
- mutex_init(&mcounters->mcntrs_mutex);
+ dev = to_mdev(c->ibucontext.device);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid);
+ if (err)
+ goto end;
- return &mcounters->ibcntrs;
+ entry->page_idx = uar_index;
+ entry->address = uar_index2paddress(dev, uar_index);
+ if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC;
+ else
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC;
+
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE);
+ if (err)
+ goto err_insert;
+
+ return entry;
+
+err_insert:
+ mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid);
+end:
+ kfree(entry);
+ return ERR_PTR(err);
}
-static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
{
- struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_user_mmap_entry *entry;
+ u64 mmap_offset;
+ u32 length;
+ int err;
- mlx5_ib_cleanup_multiport_master(dev);
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- srcu_barrier(&dev->mr_srcu);
- cleanup_srcu_struct(&dev->mr_srcu);
- }
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
- WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
+ err = uverbs_get_const(&alloc_type, attrs,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE);
+ if (err)
+ return err;
- WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
- !bitmap_empty(
- dev->dm.steering_sw_icm_alloc_blocks,
- BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
+ if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF &&
+ alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
+ return -EOPNOTSUPP;
- kfree(dev->dm.steering_sw_icm_alloc_blocks);
+ if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
+ alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+ return -EOPNOTSUPP;
- WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
- !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
- BIT(MLX5_CAP_DEV_MEM(
- mdev, log_header_modify_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
+ entry = alloc_uar_entry(c, alloc_type);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
- kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
+ length = entry->rdma_entry.npages * PAGE_SIZE;
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+ &entry->page_idx, sizeof(entry->page_idx));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ &length, sizeof(length));
+ return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_UAR_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_UAR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+ enum mlx5_ib_uapi_uar_alloc_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_UAR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_query_context,
+ UVERBS_OBJECT_DEVICE,
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
+ dump_fill_mkey),
+ UA_MANDATORY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_reg_dmabuf_mr,
+ UVERBS_OBJECT_MR,
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum mlx5_ib_uapi_reg_dmabuf_flags,
+ UA_OPTIONAL));
+
+static const struct uapi_definition mlx5_ib_defs[] = {
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_dm_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs),
+
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, &mlx5_ib_reg_dmabuf_mr),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
+ {}
+};
+
+static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_data_direct_cleanup(dev);
+ mlx5_ib_cleanup_multiport_master(dev);
+ WARN_ON(!xa_empty(&dev->odp_mkeys));
+ mutex_destroy(&dev->cap_mask_mutex);
+ WARN_ON(!xa_empty(&dev->sig_mrs));
+ WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
+ mlx5r_macsec_dealloc_gids(dev);
}
static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- u64 header_modify_icm_blocks = 0;
- u64 steering_icm_blocks = 0;
- int err;
- int i;
+ int err, i;
+
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
+ dev->ib_dev.dev.parent = mdev->device;
+ dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
for (i = 0; i < dev->num_ports; i++) {
spin_lock_init(&dev->port[i].mp.mpi_lock);
- rwlock_init(&dev->port[i].roce.netdev_lock);
dev->port[i].roce.dev = dev;
dev->port[i].roce.native_port_num = i + 1;
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
}
- err = mlx5_ib_init_multiport_master(dev);
+ err = mlx5r_cmd_query_special_mkeys(dev);
if (err)
return err;
- err = set_has_smi_cap(dev);
+ err = mlx5r_macsec_init_gids_and_devlist(dev);
if (err)
return err;
- if (!mlx5_core_mp_enabled(mdev)) {
- for (i = 1; i <= dev->num_ports; i++) {
- err = get_port_caps(dev, i);
- if (err)
- break;
- }
- } else {
- err = get_port_caps(dev, mlx5_core_native_port_num(mdev));
- }
+ err = mlx5_ib_init_multiport_master(dev);
+ if (err)
+ goto err;
+
+ err = set_has_smi_cap(dev);
+ if (err)
+ goto err_mp;
+
+ err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len);
if (err)
goto err_mp;
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.owner = THIS_MODULE;
- dev->ib_dev.node_type = RDMA_NODE_IB_CA;
- dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
- dev->ib_dev.phys_port_cnt = dev->num_ports;
- dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
- dev->ib_dev.dev.parent = mdev->device;
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_max(mdev);
mutex_init(&dev->cap_mask_mutex);
+ mutex_init(&dev->data_direct_lock);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
-
- if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
- if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
- steering_icm_blocks =
- BIT(MLX5_CAP_DEV_MEM(mdev,
- log_steering_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
- dev->dm.steering_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(steering_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->dm.steering_sw_icm_alloc_blocks)
- goto err_mp;
- }
-
- if (MLX5_CAP64_DEV_MEM(mdev,
- header_modify_sw_icm_start_address)) {
- header_modify_icm_blocks = BIT(
- MLX5_CAP_DEV_MEM(
- mdev, log_header_modify_sw_icm_size) -
- MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
- dev->dm.header_modify_sw_icm_alloc_blocks =
- kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
- sizeof(unsigned long), GFP_KERNEL);
- if (!dev->dm.header_modify_sw_icm_alloc_blocks)
- goto err_dm;
- }
- }
+ xa_init(&dev->odp_mkeys);
+ xa_init(&dev->sig_mrs);
+ atomic_set(&dev->mkey_var, 0);
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
-
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- err = init_srcu_struct(&dev->mr_srcu);
- if (err)
- goto err_dm;
- }
+ err = mlx5_ib_data_direct_init(dev);
+ if (err)
+ goto err_mp;
return 0;
-
-err_dm:
- kfree(dev->dm.steering_sw_icm_alloc_blocks);
- kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
-
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
-
- return -ENOMEM;
-}
-
-static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev)
-{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
-
- return 0;
+err:
+ mlx5r_macsec_dealloc_gids(dev);
+ return err;
}
-static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
-{
- kfree(dev->flow_db);
-}
+static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name);
+static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev);
static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX5,
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
+
.add_gid = mlx5_ib_add_gid,
+ .add_sub_dev = mlx5_ib_add_sub_dev,
.alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
.alloc_pd = mlx5_ib_alloc_pd,
.alloc_ucontext = mlx5_ib_alloc_ucontext,
.attach_mcast = mlx5_ib_mcg_attach,
.check_mr_status = mlx5_ib_check_mr_status,
.create_ah = mlx5_ib_create_ah,
- .create_counters = mlx5_ib_create_counters,
.create_cq = mlx5_ib_create_cq,
- .create_flow = mlx5_ib_create_flow,
.create_qp = mlx5_ib_create_qp,
.create_srq = mlx5_ib_create_srq,
+ .create_user_ah = mlx5_ib_create_ah,
.dealloc_pd = mlx5_ib_dealloc_pd,
.dealloc_ucontext = mlx5_ib_dealloc_ucontext,
.del_gid = mlx5_ib_del_gid,
+ .del_sub_dev = mlx5_ib_del_sub_dev,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
- .destroy_counters = mlx5_ib_destroy_counters,
.destroy_cq = mlx5_ib_destroy_cq,
- .destroy_flow = mlx5_ib_destroy_flow,
- .destroy_flow_action = mlx5_ib_destroy_flow_action,
.destroy_qp = mlx5_ib_destroy_qp,
.destroy_srq = mlx5_ib_destroy_srq,
.detach_mcast = mlx5_ib_mcg_detach,
.disassociate_ucontext = mlx5_ib_disassociate_ucontext,
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
+ .device_group = &mlx5_attr_group,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
.map_mr_sg = mlx5_ib_map_mr_sg,
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
+ .mmap_free = mlx5_ib_mmap_free,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
.modify_port = mlx5_ib_modify_port,
.modify_qp = mlx5_ib_modify_qp,
.modify_srq = mlx5_ib_modify_srq,
+ .pre_destroy_cq = mlx5_ib_pre_destroy_cq,
.poll_cq = mlx5_ib_poll_cq,
- .post_recv = mlx5_ib_post_recv,
- .post_send = mlx5_ib_post_send,
+ .post_destroy_cq = mlx5_ib_post_destroy_cq,
+ .post_recv = mlx5_ib_post_recv_nodrain,
+ .post_send = mlx5_ib_post_send_nodrain,
.post_srq_recv = mlx5_ib_post_srq_recv,
.process_mad = mlx5_ib_process_mad,
.query_ah = mlx5_ib_query_ah,
@@ -6172,29 +4299,31 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.query_pkey = mlx5_ib_query_pkey,
.query_qp = mlx5_ib_query_qp,
.query_srq = mlx5_ib_query_srq,
- .read_counters = mlx5_ib_read_counters,
+ .query_ucontext = mlx5_ib_query_ucontext,
.reg_user_mr = mlx5_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf,
.req_notify_cq = mlx5_ib_arm_cq,
.rereg_user_mr = mlx5_ib_rereg_user_mr,
.resize_cq = mlx5_ib_resize_cq,
+ .ufile_hw_cleanup = mlx5_ib_ufile_hw_cleanup,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_dmah, mlx5_ib_dmah, ibdmah),
INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mlx5_ib_qp, ibqp),
INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
};
-static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
- .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
- .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
-};
-
static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
.rdma_netdev_get_params = mlx5_ib_rn_get_params,
};
static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
.get_vf_config = mlx5_ib_get_vf_config,
+ .get_vf_guid = mlx5_ib_get_vf_guid,
.get_vf_stats = mlx5_ib_get_vf_stats,
.set_vf_guid = mlx5_ib_set_vf_guid,
.set_vf_link_state = mlx5_ib_set_vf_link_state,
@@ -6203,61 +4332,91 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
.alloc_mw = mlx5_ib_alloc_mw,
.dealloc_mw = mlx5_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw),
};
static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
.alloc_xrcd = mlx5_ib_alloc_xrcd,
.dealloc_xrcd = mlx5_ib_dealloc_xrcd,
-};
-static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
- .alloc_dm = mlx5_ib_alloc_dm,
- .dealloc_dm = mlx5_ib_dealloc_dm,
- .reg_dm_mr = mlx5_ib_reg_dm_mr,
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
+static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_var_table *var_table = &dev->var_table;
+ u8 log_doorbell_bar_size;
+ u8 log_doorbell_stride;
+ u64 bar_size;
+
+ log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+ log_doorbell_bar_size);
+ log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+ log_doorbell_stride);
+ var_table->hw_start_addr = dev->mdev->bar_addr +
+ MLX5_CAP64_DEV_VDPA_EMULATION(mdev,
+ doorbell_bar_offset);
+ bar_size = (1ULL << log_doorbell_bar_size) * 4096;
+ var_table->stride_size = 1ULL << log_doorbell_stride;
+ var_table->num_var_hw_entries = div_u64(bar_size,
+ var_table->stride_size);
+ mutex_init(&var_table->bitmap_lock);
+ var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
+ GFP_KERNEL);
+ return (var_table->bitmap) ? 0 : -ENOMEM;
+}
+
+static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
+static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev)
+{
+ int ret;
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+ if (ret)
+ goto remove_local;
+ }
+
+ return 0;
+
+remove_local:
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ return ret;
+}
+
+static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL)
+ mlx5_ib_cleanup_ucaps(dev);
+
+ bitmap_free(dev->var_table.bitmap);
+}
+
static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
int err;
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
- dev->ib_dev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
-
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
ib_set_device_ops(&dev->ib_dev,
@@ -6268,29 +4427,20 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
- if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ if (MLX5_CAP_GEN(mdev, imaicl))
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
- }
- if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ if (MLX5_CAP_GEN(mdev, xrc))
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
- }
if (MLX5_CAP_DEV_MEM(mdev, memic) ||
MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
- dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
+ if (mdev->st)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dmah_ops);
+
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
@@ -6305,6 +4455,22 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
mutex_init(&dev->lb.mutex);
+ if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
+ err = mlx5_ib_init_var_table(dev);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
+ err = mlx5_ib_init_ucaps(dev);
+ if (err)
+ return err;
+ }
+
+ dev->ib_dev.use_cq_dim = true;
+
return 0;
}
@@ -6322,9 +4488,10 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
.get_port_immutable = mlx5_port_rep_immutable,
.query_port = mlx5_ib_rep_query_port,
+ .query_pkey = mlx5_ib_rep_query_pkey,
};
-static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
{
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
return 0;
@@ -6335,70 +4502,30 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.create_wq = mlx5_ib_create_wq,
.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
.destroy_wq = mlx5_ib_destroy_wq,
- .get_netdev = mlx5_ib_get_netdev,
.modify_wq = mlx5_ib_modify_wq,
-};
-
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
-{
- u8 port_num;
-
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
-
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- /* Register only for native ports */
- return mlx5_add_netdev_notifier(dev, port_num);
-}
-
-static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
-
- mlx5_remove_netdev_notifier(dev, port_num);
-}
-
-static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
-{
- struct mlx5_core_dev *mdev = dev->mdev;
- enum rdma_link_layer ll;
- int port_type_cap;
- int err = 0;
-
- port_type_cap = MLX5_CAP_GEN(mdev, port_type);
- ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- if (ll == IB_LINK_LAYER_ETHERNET)
- err = mlx5_ib_stage_common_roce_init(dev);
-
- return err;
-}
-
-static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_stage_common_roce_cleanup(dev);
-}
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
+};
-static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u32 port_num = 0;
int err;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_ib_stage_common_roce_init(dev);
- if (err)
- return err;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
+ /* Register only for native ports */
+ mlx5_mdev_netdev_track(dev, port_num);
err = mlx5_enable_eth(dev);
if (err)
@@ -6407,68 +4534,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
return 0;
cleanup:
- mlx5_ib_stage_common_roce_cleanup(dev);
-
+ mlx5_mdev_netdev_untrack(dev, port_num);
return err;
}
-static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
+ u32 port_num;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
mlx5_disable_eth(dev);
- mlx5_ib_stage_common_roce_cleanup(dev);
- }
-}
-
-static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev)
-{
- return create_dev_resources(&dev->devr);
-}
-
-static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev)
-{
- destroy_dev_resources(&dev->devr);
-}
-
-static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_internal_fill_odp_caps(dev);
-
- return mlx5_ib_odp_init_one(dev);
-}
-
-static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_ib_odp_cleanup_one(dev);
-}
-
-static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
- .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
- .get_hw_stats = mlx5_ib_get_hw_stats,
-};
-static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
-
- return mlx5_ib_alloc_counters(dev);
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ mlx5_mdev_netdev_untrack(dev, port_num);
}
-
- return 0;
-}
-
-static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev)
-{
- if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
- mlx5_ib_dealloc_counters(dev);
}
static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
@@ -6484,17 +4569,6 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
mlx5_core_native_port_num(dev->mdev) - 1);
}
-static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
-{
- dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
- return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
-}
-
-static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
-}
-
static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@@ -6505,7 +4579,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
if (err)
- mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
return err;
}
@@ -6520,17 +4594,21 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
- rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
- if (!mlx5_lag_is_roce(dev->mdev))
+ if (dev->sub_dev_name) {
+ name = dev->sub_dev_name;
+ ib_mark_name_assigned_by_user(&dev->ib_dev);
+ } else if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
- return ib_register_device(&dev->ib_dev, name);
+ return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev);
}
static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
- destroy_umrc_res(dev);
+ mlx5_mkey_cache_cleanup(dev);
+ mlx5r_umr_resource_cleanup(dev);
+ mlx5r_umr_cleanup(dev);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -6540,53 +4618,112 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
- return create_umr_res(dev);
+ int ret;
+
+ ret = mlx5r_umr_init(dev);
+ if (ret)
+ return ret;
+
+ ret = mlx5_mkey_cache_init(dev);
+ if (ret)
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ return ret;
}
static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
{
- init_delay_drop(dev);
+ struct dentry *root;
+
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return 0;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ root = debugfs_create_dir("delay_drop", mlx5_debugfs_get_dev_root(dev->mdev));
+ dev->delay_drop.dir_debugfs = root;
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
return 0;
}
static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
{
- cancel_delay_drop(dev);
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ if (!dev->delay_drop.dir_debugfs)
+ return;
+
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
}
static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
+ INIT_WORK(&devr->ports[port].pkey_change_work,
+ pkey_change_handler);
+
dev->mdev_events.notifier_call = mlx5_ib_event;
mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+
+ mlx5r_macsec_event_register(dev);
+
return 0;
}
static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+
+ mlx5r_macsec_event_unregister(dev);
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
+ cancel_work_sync(&devr->ports[port].pkey_change_work);
}
-static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
+void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
+ struct mlx5_data_direct_dev *dev)
{
- int uid;
-
- uid = mlx5_ib_devx_create(dev, false);
- if (uid > 0)
- dev->devx_whitelist_uid = uid;
-
- return 0;
+ mutex_lock(&ibdev->data_direct_lock);
+ ibdev->data_direct_dev = dev;
+ mutex_unlock(&ibdev->data_direct_lock);
}
-static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
+
+void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev)
{
- if (dev->devx_whitelist_uid)
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+ mutex_lock(&ibdev->data_direct_lock);
+ mlx5_ib_revoke_data_direct_mrs(ibdev);
+ ibdev->data_direct_dev = NULL;
+ mutex_unlock(&ibdev->data_direct_lock);
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
{
+ dev->ib_active = false;
+
/* Number of stages to cleanup */
while (stage) {
stage--;
@@ -6598,12 +4735,14 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
ib_dealloc_device(&dev->ib_dev);
}
-void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile)
+int __mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
{
int err;
int i;
+ dev->profile = profile;
+
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
err = profile->stage[i].init(dev);
@@ -6612,54 +4751,53 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
- dev->profile = profile;
dev->ib_active = true;
-
- return dev;
+ return 0;
err_out:
- __mlx5_ib_remove(dev, profile, i);
-
- return NULL;
+ /* Clean up stages which were initialized */
+ while (i) {
+ i--;
+ if (profile->stage[i].cleanup)
+ profile->stage[i].cleanup(dev);
+ }
+ return -ENOMEM;
}
static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
- NULL),
+ mlx5_ib_stage_caps_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_roce_init,
- mlx5_ib_stage_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
- mlx5_ib_stage_dev_notifier_init,
- mlx5_ib_stage_dev_notifier_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
- mlx5_ib_stage_odp_init,
- mlx5_ib_stage_odp_cleanup),
+ mlx5_ib_odp_init_one,
+ mlx5_ib_odp_cleanup_one),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_UAR,
- mlx5_ib_stage_uar_init,
- mlx5_ib_stage_uar_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
@@ -6667,50 +4805,56 @@ static const struct mlx5_ib_profile pf_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
mlx5_ib_stage_delay_drop_init,
mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
-const struct mlx5_ib_profile uplink_rep_profile = {
+const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB,
- mlx5_ib_stage_flow_db_init,
- mlx5_ib_stage_flow_db_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_CAPS,
mlx5_ib_stage_caps_init,
- NULL),
+ mlx5_ib_stage_caps_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
- mlx5_ib_stage_rep_non_default_cb,
+ mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
- mlx5_ib_stage_rep_roce_init,
- mlx5_ib_stage_rep_roce_cleanup),
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
- mlx5_ib_stage_dev_res_init,
- mlx5_ib_stage_dev_res_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
- mlx5_ib_stage_dev_notifier_init,
- mlx5_ib_stage_dev_notifier_cleanup),
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
- mlx5_ib_stage_counters_init,
- mlx5_ib_stage_counters_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_UAR,
- mlx5_ib_stage_uar_init,
- mlx5_ib_stage_uar_cleanup),
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
+ mlx5_ib_stage_cong_debugfs_init,
+ mlx5_ib_stage_cong_debugfs_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
@@ -6718,18 +4862,115 @@ const struct mlx5_ib_profile uplink_rep_profile = {
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
- mlx5_ib_stage_devx_init,
- mlx5_ib_stage_devx_cleanup),
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
};
-static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
+static const struct mlx5_ib_profile plane_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ mlx5_ib_stage_caps_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+};
+
+static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name)
{
+ struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane;
+ enum rdma_link_layer ll;
+ int ret;
+
+ if (mparent->smi_dev)
+ return ERR_PTR(-EEXIST);
+
+ ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev,
+ port_type));
+ if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane ||
+ ll != IB_LINK_LAYER_INFINIBAND ||
+ !MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mplane = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(mparent->mdev));
+ if (!mplane)
+ return ERR_PTR(-ENOMEM);
+
+ mplane->port = kcalloc(mparent->num_plane * mparent->num_ports,
+ sizeof(*mplane->port), GFP_KERNEL);
+ if (!mplane->port) {
+ ret = -ENOMEM;
+ goto fail_kcalloc;
+ }
+
+ mplane->ib_dev.type = type;
+ mplane->mdev = mparent->mdev;
+ mplane->num_ports = mparent->num_plane;
+ mplane->sub_dev_name = name;
+ mplane->ib_dev.phys_port_cnt = mplane->num_ports;
+
+ ret = __mlx5_ib_add(mplane, &plane_profile);
+ if (ret)
+ goto fail_ib_add;
+
+ mparent->smi_dev = mplane;
+ return &mplane->ib_dev;
+
+fail_ib_add:
+ kfree(mplane->port);
+fail_kcalloc:
+ ib_dealloc_device(&mplane->ib_dev);
+ return ERR_PTR(ret);
+}
+
+static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(sub_dev);
+
+ to_mdev(sub_dev->parent)->smi_dev = NULL;
+ __mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX);
+}
+
+static int mlx5r_mp_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_dev *dev;
bool bound = false;
@@ -6737,24 +4978,25 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
if (!mpi)
- return NULL;
+ return -ENOMEM;
mpi->mdev = mdev;
-
err = mlx5_query_nic_vport_system_image_guid(mdev,
&mpi->sys_image_guid);
if (err) {
kfree(mpi);
- return NULL;
+ return err;
}
mutex_lock(&mlx5_ib_multiport_mutex);
list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) {
- if (dev->sys_image_guid == mpi->sys_image_guid)
+ if (dev->sys_image_guid == mpi->sys_image_guid &&
+ mlx5_core_same_coredev_type(dev->mdev, mpi->mdev))
bound = mlx5_ib_bind_slave_port(dev, mpi);
if (bound) {
rdma_roce_rescan_device(&dev->ib_dev);
+ mpi->ibdev->ib_active = true;
break;
}
}
@@ -6766,119 +5008,173 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
}
mutex_unlock(&mlx5_ib_multiport_mutex);
- return mpi;
+ auxiliary_set_drvdata(adev, mpi);
+ return 0;
}
-static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
+static void mlx5r_mp_remove(struct auxiliary_device *adev)
{
- enum rdma_link_layer ll;
- struct mlx5_ib_dev *dev;
- int port_type_cap;
- int num_ports;
+ struct mlx5_ib_multiport_info *mpi;
- printk_once(KERN_INFO "%s", mlx5_version);
+ mpi = auxiliary_get_drvdata(adev);
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ if (mpi->ibdev)
+ mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
+ else
+ list_del(&mpi->list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ kfree(mpi);
+}
- if (MLX5_ESWITCH_MANAGER(mdev) &&
- mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
- if (!mlx5_core_mp_enabled(mdev))
- mlx5_ib_register_vport_reps(mdev);
- return mdev;
- }
+static int mlx5r_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ const struct mlx5_ib_profile *profile;
+ int port_type_cap, num_ports, ret;
+ enum rdma_link_layer ll;
+ struct mlx5_ib_dev *dev;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
- return mlx5_ib_add_slave_port(mdev);
-
num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
MLX5_CAP_GEN(mdev, num_vhca_ports));
- dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
+ dev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(mdev));
if (!dev)
- return NULL;
+ return -ENOMEM;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
+ ret = mlx5_ib_get_plane_num(mdev, &dev->num_plane);
+ if (ret)
+ goto fail;
+ }
+
dev->port = kcalloc(num_ports, sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port) {
- ib_dealloc_device((struct ib_device *)dev);
- return NULL;
+ ret = -ENOMEM;
+ goto fail;
}
dev->mdev = mdev;
dev->num_ports = num_ports;
+ dev->ib_dev.phys_port_cnt = num_ports;
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
+ profile = &raw_eth_profile;
+ else
+ profile = &pf_profile;
+
+ ret = __mlx5_ib_add(dev, profile);
+ if (ret)
+ goto fail_ib_add;
+
+ auxiliary_set_drvdata(adev, dev);
+ return 0;
- return __mlx5_ib_add(dev, &pf_profile);
+fail_ib_add:
+ kfree(dev->port);
+fail:
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
}
-static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
+static void mlx5r_remove(struct auxiliary_device *adev)
{
- struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_dev *dev;
- if (MLX5_ESWITCH_MANAGER(mdev) && context == mdev) {
- mlx5_ib_unregister_vport_reps(mdev);
- return;
- }
-
- if (mlx5_core_is_mp_slave(mdev)) {
- mpi = context;
- mutex_lock(&mlx5_ib_multiport_mutex);
- if (mpi->ibdev)
- mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
- list_del(&mpi->list);
- mutex_unlock(&mlx5_ib_multiport_mutex);
- return;
- }
-
- dev = context;
+ dev = auxiliary_get_drvdata(adev);
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
-static struct mlx5_interface mlx5_ib_interface = {
- .add = mlx5_ib_add,
- .remove = mlx5_ib_remove,
- .protocol = MLX5_INTERFACE_PROTOCOL_IB,
+static const struct auxiliary_device_id mlx5r_mp_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".multiport", },
+ {},
};
-unsigned long mlx5_ib_get_xlt_emergency_page(void)
-{
- mutex_lock(&xlt_emergency_page_mutex);
- return xlt_emergency_page;
-}
+static const struct auxiliary_device_id mlx5r_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".rdma", },
+ {},
+};
-void mlx5_ib_put_xlt_emergency_page(void)
-{
- mutex_unlock(&xlt_emergency_page_mutex);
-}
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table);
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table);
+
+static struct auxiliary_driver mlx5r_mp_driver = {
+ .name = "multiport",
+ .probe = mlx5r_mp_probe,
+ .remove = mlx5r_mp_remove,
+ .id_table = mlx5r_mp_id_table,
+};
+
+static struct auxiliary_driver mlx5r_driver = {
+ .name = "rdma",
+ .probe = mlx5r_probe,
+ .remove = mlx5r_remove,
+ .id_table = mlx5r_id_table,
+};
static int __init mlx5_ib_init(void)
{
- int err;
+ int ret;
- xlt_emergency_page = __get_free_page(GFP_KERNEL);
+ xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL);
if (!xlt_emergency_page)
return -ENOMEM;
- mutex_init(&xlt_emergency_page_mutex);
-
mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
if (!mlx5_ib_event_wq) {
- free_page(xlt_emergency_page);
+ free_page((unsigned long)xlt_emergency_page);
return -ENOMEM;
}
+ ret = mlx5_ib_qp_event_init();
+ if (ret)
+ goto qp_event_err;
+
mlx5_ib_odp_init();
+ ret = mlx5r_rep_init();
+ if (ret)
+ goto rep_err;
+ ret = mlx5_data_direct_driver_register();
+ if (ret)
+ goto dd_err;
+ ret = auxiliary_driver_register(&mlx5r_mp_driver);
+ if (ret)
+ goto mp_err;
+ ret = auxiliary_driver_register(&mlx5r_driver);
+ if (ret)
+ goto drv_err;
- err = mlx5_register_interface(&mlx5_ib_interface);
+ return 0;
- return err;
+drv_err:
+ auxiliary_driver_unregister(&mlx5r_mp_driver);
+mp_err:
+ mlx5_data_direct_driver_unregister();
+dd_err:
+ mlx5r_rep_cleanup();
+rep_err:
+ mlx5_ib_qp_event_cleanup();
+qp_event_err:
+ destroy_workqueue(mlx5_ib_event_wq);
+ free_page((unsigned long)xlt_emergency_page);
+ return ret;
}
static void __exit mlx5_ib_cleanup(void)
{
- mlx5_unregister_interface(&mlx5_ib_interface);
+ mlx5_data_direct_driver_unregister();
+ auxiliary_driver_unregister(&mlx5r_driver);
+ auxiliary_driver_unregister(&mlx5r_mp_driver);
+ mlx5r_rep_cleanup();
+
+ mlx5_ib_qp_event_cleanup();
destroy_workqueue(mlx5_ib_event_wq);
- mutex_destroy(&xlt_emergency_page_mutex);
- free_page(xlt_emergency_page);
+ free_page((unsigned long)xlt_emergency_page);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 9f90be296ee0..af321f6ef7f5 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -30,201 +30,66 @@
* SOFTWARE.
*/
-#include <linux/module.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
-/* @umem: umem object to scan
- * @addr: ib virtual address requested by the user
- * @max_page_shift: high limit for page_shift - 0 means no limit
- * @count: number of PAGE_SIZE pages covered by umem
- * @shift: page shift for the compound pages found in the region
- * @ncont: number of compund pages
- * @order: log2 of the number of compound pages
+/*
+ * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
+ * filled in the pas array.
*/
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order)
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags)
{
- unsigned long tmp;
- unsigned long m;
- u64 base = ~0, p = 0;
- u64 len, pfn;
- int i = 0;
- struct scatterlist *sg;
- int entry;
- unsigned long page_shift = umem->page_shift;
-
- if (umem->is_odp) {
- *ncont = ib_umem_page_count(umem);
- *count = *ncont << (page_shift - PAGE_SHIFT);
- *shift = page_shift;
- if (order)
- *order = ilog2(roundup_pow_of_two(*ncont));
-
- return;
- }
-
- addr = addr >> page_shift;
- tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - page_shift, m);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> page_shift;
- pfn = sg_dma_address(sg) >> page_shift;
- if (base + p != pfn) {
- /* If either the offset or the new
- * base are unaligned update m
- */
- tmp = (unsigned long)(pfn | p);
- if (!IS_ALIGNED(tmp, 1 << m))
- m = find_first_bit(&tmp, BITS_PER_LONG);
-
- base = pfn;
- p = 0;
- }
-
- p += len;
- i += len;
- }
+ struct ib_block_iter biter;
- if (i) {
- m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
-
- if (order)
- *order = ilog2(roundup_pow_of_two(i) >> m);
-
- *ncont = DIV_ROUND_UP(i, (1 << m));
- } else {
- m = 0;
-
- if (order)
- *order = 0;
-
- *ncont = 0;
+ rdma_umem_for_each_dma_block (umem, &biter, page_size) {
+ *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ access_flags);
+ pas++;
}
- *shift = page_shift + m;
- *count = i;
-}
-
-static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
-{
- u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
-
- if (umem_dma & ODP_READ_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_READ;
- if (umem_dma & ODP_WRITE_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_WRITE;
-
- return mtt_entry;
}
/*
- * Populate the given array with bus addresses from the umem.
- *
- * dev - mlx5_ib device
- * umem - umem to use to fill the pages
- * page_shift - determines the page size used in the resulting array
- * offset - offset into the umem to start from,
- * only implemented for ODP umems
- * num_pages - total number of pages to fill
- * pas - bus addresses array to fill
- * access_flags - access flags to set on all present pages.
- use enum mlx5_ib_mtt_access_flags for this.
+ * Compute the page shift and page_offset for mailboxes that use a quantized
+ * page_offset. The granulatity of the page offset scales according to page
+ * size.
*/
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags)
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized)
{
- unsigned long umem_page_shift = umem->page_shift;
- int shift = page_shift - umem_page_shift;
- int mask = (1 << shift) - 1;
- int i, k, idx;
- u64 cur = 0;
- u64 base;
- int len;
- struct scatterlist *sg;
- int entry;
-
- if (umem->is_odp) {
- WARN_ON(shift != 0);
- WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
-
- for (i = 0; i < num_pages; ++i) {
- dma_addr_t pa =
- to_ib_umem_odp(umem)->dma_list[offset + i];
-
- pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
- }
- return;
- }
-
- i = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> umem_page_shift;
- base = sg_dma_address(sg);
-
- /* Skip elements below offset */
- if (i + len < offset << shift) {
- i += len;
- continue;
- }
-
- /* Skip pages below offset */
- if (i < offset << shift) {
- k = (offset << shift) - i;
- i = offset << shift;
- } else {
- k = 0;
- }
-
- for (; k < len; k++) {
- if (!(i & mask)) {
- cur = base + (k << umem_page_shift);
- cur |= access_flags;
- idx = (i >> shift) - offset;
-
- pas[idx] = cpu_to_be64(cur);
- mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[idx]));
- }
- i++;
-
- /* Stop after num_pages reached */
- if (i >> shift >= offset + num_pages)
- return;
- }
+ const u64 page_offset_mask = (1UL << page_offset_bits) - 1;
+ unsigned long page_size;
+ u64 page_offset;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask);
+ if (!page_size)
+ return 0;
+
+ /*
+ * page size is the largest possible page size.
+ *
+ * Reduce the page_size, and thus the page_offset and quanta, until the
+ * page_offset fits into the mailbox field. Once page_size < scale this
+ * loop is guaranteed to terminate.
+ */
+ page_offset = ib_umem_dma_offset(umem, page_size);
+ while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) {
+ page_size /= 2;
+ page_offset = ib_umem_dma_offset(umem, page_size);
}
-}
-
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags)
-{
- return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
- ib_umem_num_pages(umem), pas,
- access_flags);
-}
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
-{
- u64 page_size;
- u64 page_mask;
- u64 off_size;
- u64 off_mask;
- u64 buf_off;
-
- page_size = (u64)1 << page_shift;
- page_mask = page_size - 1;
- buf_off = addr & page_mask;
- off_size = page_size >> 6;
- off_mask = off_size - 1;
-
- if (buf_off & off_mask)
- return -EINVAL;
- *offset = buf_off >> ilog2(off_size);
- return 0;
+ /*
+ * The address is not aligned, or otherwise cannot be represented by the
+ * page_offset.
+ */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+
+ *page_offset_quantized =
+ (unsigned long)page_offset / (page_size / scale);
+ if (WARN_ON(*page_offset_quantized > page_offset_mask))
+ return 0;
+ return page_size;
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a043af7ee366..09d82d5f95e3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,33 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -51,6 +25,8 @@
#include <rdma/mlx5_user_ioctl_verbs.h>
#include "srq.h"
+#include "qp.h"
+#include "macsec.h"
#define mlx5_ib_dbg(_dev, format, arg...) \
dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
@@ -64,12 +40,74 @@
dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
-#define field_avail(type, fld, sz) (offsetof(type, fld) + \
- sizeof(((type *)0)->fld) <= (sz))
+#define mlx5_ib_log(lvl, _dev, format, arg...) \
+ dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##arg)
+
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
-#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+static __always_inline unsigned long
+__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
+ unsigned int pgsz_shift)
+{
+ unsigned int largest_pg_shift =
+ min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift,
+ BITS_PER_LONG - 1);
+
+ /*
+ * Despite a command allowing it, the device does not support lower than
+ * 4k page size.
+ */
+ pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift);
+ return GENMASK(largest_pg_shift, pgsz_shift);
+}
+
+static __always_inline unsigned long
+__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
+ unsigned int offset_shift)
+{
+ unsigned int largest_offset_shift =
+ min_t(unsigned long, page_offset_bits - 1 + offset_shift,
+ BITS_PER_LONG - 1);
+
+ return GENMASK(largest_offset_shift, offset_shift);
+}
+
+/*
+ * QP/CQ/WQ/etc type commands take a page offset that satisifies:
+ * page_offset_quantized * (page_size/scale) = page_offset
+ * Which restricts allowed page sizes to ones that satisify the above.
+ */
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized);
+#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), \
+ GENMASK(31, order_base_2(scale)), scale, \
+ page_offset_quantized)
+
+#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
+ page_offset_quantized)
+
+enum {
+ MLX5_IB_MMAP_OFFSET_START = 9,
+ MLX5_IB_MMAP_OFFSET_END = 255,
+};
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
@@ -118,9 +156,31 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
-#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \
- (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
-#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+enum mlx5_ib_mmap_type {
+ MLX5_IB_MMAP_TYPE_MEMIC = 1,
+ MLX5_IB_MMAP_TYPE_VAR = 2,
+ MLX5_IB_MMAP_TYPE_UAR_WC = 3,
+ MLX5_IB_MMAP_TYPE_UAR_NC = 4,
+ MLX5_IB_MMAP_TYPE_MEMIC_OP = 5,
+};
+
+struct mlx5_bfreg_info {
+ u32 *sys_pages;
+ int num_low_latency_bfregs;
+ unsigned int *count;
+
+ /*
+ * protect bfreg allocation data structs
+ */
+ struct mutex lock;
+ u32 ver;
+ u8 lib_uar_4k : 1;
+ u8 lib_uar_dyn : 1;
+ u32 num_sys_pages;
+ u32 num_static_sys_pages;
+ u32 total_num_bfregs;
+ u32 num_dyn_bfregs;
+};
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
@@ -135,7 +195,6 @@ struct mlx5_ib_ucontext {
u32 tdn;
u64 lib_caps;
- DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
u16 devx_uid;
/* For RoCE LAG TX affinity */
atomic_t tx_port_affinity;
@@ -168,8 +227,20 @@ enum {
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
#define MLX5_IB_NUM_SNIFFER_FTS 2
#define MLX5_IB_NUM_EGRESS_FTS 1
+#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
+
+struct mlx5_ib_anchor {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg_goto_table;
+ struct mlx5_flow_group *fg_drop;
+ struct mlx5_flow_handle *rule_goto_table;
+ struct mlx5_flow_handle *rule_drop;
+ unsigned int rule_goto_table_ref;
+};
+
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
+ struct mlx5_ib_anchor anchor;
unsigned int refcount;
};
@@ -192,6 +263,38 @@ struct mlx5_ib_flow_matcher {
struct mlx5_core_dev *mdev;
atomic_t usecnt;
u8 match_criteria_enable;
+ u32 ib_port;
+};
+
+struct mlx5_ib_steering_anchor {
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_dev *dev;
+ atomic_t usecnt;
+};
+
+struct mlx5_ib_pp {
+ u16 index;
+ struct mlx5_core_dev *mdev;
+};
+
+enum mlx5_ib_optional_counter_type {
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
+
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP,
+
+ MLX5_IB_OPCOUNTER_MAX,
};
struct mlx5_ib_flow_db {
@@ -199,8 +302,13 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
- struct mlx5_ib_flow_prio fdb;
+ struct mlx5_ib_flow_prio fdb[MLX5_IB_NUM_FDB_FTS];
+ struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_ib_flow_prio *rdma_transport_rx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
+ struct mlx5_ib_flow_prio *rdma_transport_tx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
@@ -210,16 +318,9 @@ struct mlx5_ib_flow_db {
};
/* Use macros here so that don't have to duplicate
- * enum ib_send_flags and enum ib_qp_type for low-level driver
+ * enum ib_qp_type for low-level driver
*/
-#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
-#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
-#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
-#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
-#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
-
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
* IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
@@ -230,9 +331,6 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
-#define MLX5_IB_UMR_OCTOWORD 16
-#define MLX5_IB_UMR_XLT_ALIGNMENT 64
-
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
@@ -240,18 +338,15 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
+#define MLX5_IB_UPD_XLT_DOWNGRADE BIT(7)
+#define MLX5_IB_UPD_XLT_KEEP_PGSZ BIT(8)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
* These flags are intended for internal use by the mlx5_ib driver, and they
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
-
-/* Create a UD QP whose source QP number is 1 */
-static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
-{
- return IB_QP_CREATE_RESERVED_START;
-}
+#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
struct wr_list {
u16 opcode;
@@ -282,6 +377,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
+ u16 last_poll;
void *cur_edge;
};
@@ -294,6 +390,7 @@ enum mlx5_ib_wq_flags {
#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16
#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6
#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13
+#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3
struct mlx5_ib_rwq {
struct ib_wq ibwq;
@@ -309,7 +406,6 @@ struct mlx5_ib_rwq {
struct ib_umem *umem;
size_t buf_size;
unsigned int page_shift;
- int create_type;
struct mlx5_db db;
u32 user_index;
u32 wqe_count;
@@ -318,17 +414,6 @@ struct mlx5_ib_rwq {
u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
-enum {
- MLX5_QP_USER,
- MLX5_QP_KERNEL,
- MLX5_QP_EMPTY
-};
-
-enum {
- MLX5_WQ_USER,
- MLX5_WQ_KERNEL
-};
-
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
@@ -350,7 +435,7 @@ struct mlx5_ib_qp_base {
struct mlx5_ib_qp_trans {
struct mlx5_ib_qp_base base;
u16 xrcdn;
- u8 alt_port;
+ u32 alt_port;
u8 atomic_rd_en;
u8 resp_depth;
};
@@ -395,6 +480,22 @@ struct mlx5_ib_dct {
u32 *in;
};
+struct mlx5_ib_gsi_qp {
+ struct ib_qp *rx_qp;
+ u32 port_num;
+ struct ib_qp_cap cap;
+ struct ib_cq *cq;
+ struct mlx5_ib_gsi_wr *outstanding_wrs;
+ u32 outstanding_pi, outstanding_ci;
+ int num_qps;
+ /* Protects access to the tx_qps. Post send operations synchronize
+ * with tx_qp creation in setup_qp(). Also protects the
+ * outstanding_wrs array and indices.
+ */
+ spinlock_t lock;
+ struct ib_qp **tx_qps;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
@@ -402,6 +503,7 @@ struct mlx5_ib_qp {
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
struct mlx5_ib_dct dct;
+ struct mlx5_ib_gsi_qp gsi;
};
struct mlx5_frag_buf buf;
@@ -415,33 +517,37 @@ struct mlx5_ib_qp {
/* serialize qp state modifications
*/
struct mutex mutex;
+ /* cached variant of create_flags from struct ib_qp_init_attr */
u32 flags;
- u8 port;
+ u32 port;
u8 state;
- int wq_sig;
- int scat_cqe;
int max_inline_data;
struct mlx5_bf bf;
- int has_rq;
+ u8 has_rq:1;
+ u8 is_rss:1;
+ u8 is_ooo_rq:1;
/* only for user space QPs. For kernel
* we have it from the bf object
*/
int bfregn;
- int create_type;
-
- /* Store signature errors */
- bool signature_en;
-
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct mlx5_rate_limit rl;
u32 underlay_qpn;
u32 flags_en;
- /* storage for qp sub type when core qp type is IB_QPT_DRIVER */
- enum ib_qp_type qp_sub_type;
+ /*
+ * IB/core doesn't store low-level QP types, so
+ * store both MLX and IBTA types in the field below.
+ */
+ enum ib_qp_type type;
+ /* A flag to indicate if there's a new counter is configured
+ * but not take effective
+ */
+ u32 counter_pending;
+ u16 gsi_lag_port;
};
struct mlx5_ib_cq_buf {
@@ -452,48 +558,9 @@ struct mlx5_ib_cq_buf {
int nent;
};
-enum mlx5_ib_qp_flags {
- MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
- MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
- MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
- MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
- MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
- MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
- /* QP uses 1 as its source QP number */
- MLX5_IB_QP_SQPN_QP1 = 1 << 6,
- MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
- MLX5_IB_QP_RSS = 1 << 8,
- MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
- MLX5_IB_QP_UNDERLAY = 1 << 10,
- MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11,
- MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12,
- MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13,
-};
-
-struct mlx5_umr_wr {
- struct ib_send_wr wr;
- u64 virt_addr;
- u64 offset;
- struct ib_pd *pd;
- unsigned int page_shift;
- unsigned int xlt_size;
- u64 length;
- int access_flags;
- u32 mkey;
-};
-
-static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
-{
- return container_of(wr, struct mlx5_umr_wr, wr);
-}
-
-struct mlx5_shared_mr_info {
- int mr_id;
- struct ib_umem *umem;
-};
-
enum mlx5_ib_cq_pr_flags {
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0,
+ MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1,
};
struct mlx5_ib_cq {
@@ -556,17 +623,43 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
-struct mlx5_ib_dm {
- struct ib_dm ibdm;
- phys_addr_t dev_addr;
- u32 type;
- size_t size;
- union {
- struct {
- u32 obj_id;
- } icm_dm;
- /* other dm types specific params should be added here */
- };
+struct mlx5_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u8 mmap_flag;
+ u64 address;
+ u32 page_idx;
+};
+
+enum mlx5_mkey_type {
+ MLX5_MKEY_MR = 1,
+ MLX5_MKEY_MW,
+ MLX5_MKEY_INDIRECT_DEVX,
+ MLX5_MKEY_NULL,
+ MLX5_MKEY_IMPLICIT_CHILD,
+};
+
+/* Used for non-existent ph value */
+#define MLX5_IB_NO_PH 0xff
+
+struct mlx5r_cache_rb_key {
+ u8 ats:1;
+ u8 ph;
+ u16 st_index;
+ unsigned int access_mode;
+ unsigned int access_flags;
+ unsigned int ndescs;
+};
+
+struct mlx5_ib_mkey {
+ u32 key;
+ enum mlx5_mkey_type type;
+ unsigned int ndescs;
+ struct wait_queue_head wait;
+ refcount_t usecount;
+ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
+ struct mlx5r_cache_rb_key rb_key;
+ struct mlx5_cache_ent *cache_ent;
+ u8 cacheable : 1;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -582,33 +675,68 @@ struct mlx5_ib_dm {
IB_ACCESS_REMOTE_READ |\
IB_ZERO_BASED)
+#define mlx5_update_odp_stats(mr, counter_name, value) \
+ atomic64_add(value, &((mr)->odp_stats.counter_name))
+
+#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \
+ do { \
+ mlx5_update_odp_stats(mr, counter_name, value); \
+ atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \
+ } while (0)
+
struct mlx5_ib_mr {
- struct ib_mr ibmr;
- void *descs;
- dma_addr_t desc_map;
- int ndescs;
- int max_descs;
- int desc_size;
- int access_mode;
- struct mlx5_core_mkey mmkey;
- struct ib_umem *umem;
- struct mlx5_shared_mr_info *smr_info;
- struct list_head list;
- int order;
- bool allocated_from_cache;
- int npages;
- struct mlx5_ib_dev *dev;
- u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
- struct mlx5_core_sig_ctx *sig;
- int live;
- void *descs_alloc;
- int access_flags; /* Needed for rereg MR */
+ struct ib_mr ibmr;
+ struct mlx5_ib_mkey mmkey;
+
+ struct ib_umem *umem;
+ /* The mr is data direct related */
+ u8 data_direct :1;
- struct mlx5_ib_mr *parent;
- atomic_t num_leaf_free;
- wait_queue_head_t q_leaf_free;
- struct mlx5_async_work cb_work;
- atomic_t num_pending_prefetch;
+ union {
+ /* Used only by kernel MRs (umem == NULL) */
+ struct {
+ void *descs;
+ void *descs_alloc;
+ dma_addr_t desc_map;
+ int max_descs;
+ int desc_size;
+ int access_mode;
+
+ /* For Kernel IB_MR_TYPE_INTEGRITY */
+ struct mlx5_core_sig_ctx *sig;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr *klm_mr;
+ struct mlx5_ib_mr *mtt_mr;
+ u64 data_iova;
+ u64 pi_iova;
+ int meta_ndescs;
+ int meta_length;
+ int data_length;
+ };
+
+ /* Used only by User MRs (umem != NULL) */
+ struct {
+ unsigned int page_shift;
+ /* Current access_flags */
+ int access_flags;
+
+ /* For User ODP */
+ struct mlx5_ib_mr *parent;
+ struct xarray implicit_children;
+ union {
+ struct work_struct work;
+ } odp_destroy;
+ struct ib_odp_counters odp_stats;
+ bool is_odp_implicit;
+ /* The affilated data direct crossed mr */
+ struct mlx5_ib_mr *dd_crossed_mr;
+ struct list_head dd_node;
+ u8 revoked :1;
+ /* Indicates previous dmabuf page fault occurred */
+ u8 dmabuf_faulted:1;
+ struct mlx5_ib_mkey null_mmkey;
+ };
+ };
};
static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
@@ -617,16 +745,15 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
mr->umem->is_odp;
}
+static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_dmabuf;
+}
+
struct mlx5_ib_mw {
struct ib_mw ibmw;
- struct mlx5_core_mkey mmkey;
- int ndescs;
-};
-
-struct mlx5_ib_devx_mr {
- struct mlx5_core_mkey mmkey;
- int ndescs;
- struct rcu_head rcu;
+ struct mlx5_ib_mkey mmkey;
};
struct mlx5_ib_umr_context {
@@ -635,84 +762,149 @@ struct mlx5_ib_umr_context {
struct completion done;
};
+enum {
+ MLX5_UMR_STATE_UNINIT,
+ MLX5_UMR_STATE_ACTIVE,
+ MLX5_UMR_STATE_RECOVER,
+ MLX5_UMR_STATE_ERR,
+};
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- /* control access to UMR QP
+ /* Protects from UMR QP overflow
*/
struct semaphore sem;
+ /* Protects from using UMR while the UMR is not active
+ */
+ struct mutex lock;
+ unsigned int state;
+ /* Protects from repeat UMR QP creation */
+ struct mutex init_lock;
};
-enum {
- MLX5_FMR_INVALID,
- MLX5_FMR_VALID,
- MLX5_FMR_BUSY,
+#define NUM_MKEYS_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))
+
+struct mlx5_mkeys_page {
+ u32 mkeys[NUM_MKEYS_PER_PAGE];
+ struct list_head list;
};
+static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE);
-struct mlx5_cache_ent {
- struct list_head head;
- /* sync access to the cahce entry
- */
- spinlock_t lock;
+struct mlx5_mkeys_queue {
+ struct list_head pages_list;
+ u32 num_pages;
+ unsigned long ci;
+ spinlock_t lock; /* sync list ops */
+};
+struct mlx5_cache_ent {
+ struct mlx5_mkeys_queue mkeys_queue;
+ u32 pending;
char name[4];
- u32 order;
- u32 xlt;
- u32 access_mode;
- u32 page;
- u32 size;
- u32 cur;
+ struct rb_node node;
+ struct mlx5r_cache_rb_key rb_key;
+
+ u8 is_tmp:1;
+ u8 disabled:1;
+ u8 fill_to_high_water:1;
+ u8 tmp_cleanup_scheduled:1;
+
+ /*
+ * - limit is the low water mark for stored mkeys, 2* limit is the
+ * upper water mark.
+ */
+ u32 in_use;
+ u32 limit;
+
+ /* Statistics */
u32 miss;
- u32 limit;
struct mlx5_ib_dev *dev;
- struct work_struct work;
struct delayed_work dwork;
- int pending;
- struct completion compl;
};
-struct mlx5_mr_cache {
+struct mlx5r_async_create_mkey {
+ union {
+ u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
+ u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
+ };
+ struct mlx5_async_work cb_work;
+ struct mlx5_cache_ent *ent;
+ u32 mkey;
+};
+
+struct mlx5_mkey_cache {
struct workqueue_struct *wq;
- struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
- int stopped;
- struct dentry *root;
+ struct rb_root rb_root;
+ struct mutex rb_lock;
+ struct dentry *fs_root;
unsigned long last_add;
};
-struct mlx5_ib_gsi_qp;
-
struct mlx5_ib_port_resources {
- struct mlx5_ib_resources *devr;
struct mlx5_ib_gsi_qp *gsi;
struct work_struct pkey_change_work;
};
+struct mlx5_data_direct_resources {
+ u32 pdn;
+ u32 mkey;
+ u32 mkey_ro;
+ u8 mkey_ro_valid :1;
+};
+
struct mlx5_ib_resources {
struct ib_cq *c0;
- struct ib_xrcd *x0;
- struct ib_xrcd *x1;
+ struct mutex cq_lock;
+ u32 xrcdn0;
+ u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
+ struct mutex srq_lock;
struct mlx5_ib_port_resources ports[2];
- /* Protects changes to the port resources */
- struct mutex mutex;
+};
+
+#define MAX_OPFC_RULES 2
+
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
};
struct mlx5_ib_counters {
- const char **names;
+ struct rdma_stat_desc *descs;
size_t *offsets;
u32 num_q_counters;
u32 num_cong_counters;
u32 num_ext_ppcnt_counters;
+ u32 num_op_counters;
u16 set_id;
- bool set_id_valid;
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
};
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
+ struct xarray *qpn_opfc_xa, u32 port);
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct xarray *qpn_opfc_xa);
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX]);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
@@ -725,13 +917,14 @@ struct mlx5_roce {
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
* netdev pointer
*/
- rwlock_t netdev_lock;
- struct net_device *netdev;
struct notifier_block nb;
+ struct netdev_net_notifier nn;
+ struct notifier_block mdev_nb;
+ struct net_device *tracking_netdev;
atomic_t tx_port_affinity;
enum ib_port_state last_port_state;
struct mlx5_ib_dev *dev;
- u8 native_port_num;
+ u32 native_port_num;
};
struct mlx5_ib_port {
@@ -740,13 +933,16 @@ struct mlx5_ib_port {
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
struct mlx5_roce roce;
struct mlx5_eswitch_rep *rep;
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_reserved_gids *reserved_gids;
+#endif
};
struct mlx5_ib_dbg_param {
int offset;
struct mlx5_ib_dev *dev;
struct dentry *dentry;
- u8 port_num;
+ u32 port_num;
};
enum mlx5_ib_dbg_cc_types {
@@ -756,6 +952,7 @@ enum mlx5_ib_dbg_cc_types {
MLX5_IB_DBG_CC_RP_BYTE_RESET,
MLX5_IB_DBG_CC_RP_THRESHOLD,
MLX5_IB_DBG_CC_RP_AI_RATE,
+ MLX5_IB_DBG_CC_RP_MAX_RATE,
MLX5_IB_DBG_CC_RP_HAI_RATE,
MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
MLX5_IB_DBG_CC_RP_MIN_RATE,
@@ -765,9 +962,12 @@ enum mlx5_ib_dbg_cc_types {
MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
MLX5_IB_DBG_CC_RP_GD,
+ MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS,
MLX5_IB_DBG_CC_NP_CNP_DSCP,
MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
MLX5_IB_DBG_CC_NP_CNP_PRIO,
+ MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID,
+ MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP,
MLX5_IB_DBG_CC_MAX,
};
@@ -780,13 +980,6 @@ enum {
MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
};
-struct mlx5_ib_dbg_delay_drop {
- struct dentry *dir_debugfs;
- struct dentry *rqs_cnt_debugfs;
- struct dentry *events_cnt_debugfs;
- struct dentry *timeout_debugfs;
-};
-
struct mlx5_ib_delay_drop {
struct mlx5_ib_dev *dev;
struct work_struct delay_drop_work;
@@ -796,29 +989,29 @@ struct mlx5_ib_delay_drop {
bool activate;
atomic_t events_cnt;
atomic_t rqs_cnt;
- struct mlx5_ib_dbg_delay_drop *dbg;
+ struct dentry *dir_debugfs;
};
enum mlx5_ib_stages {
MLX5_IB_STAGE_INIT,
- MLX5_IB_STAGE_FLOW_DB,
+ MLX5_IB_STAGE_FS,
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
+ MLX5_IB_STAGE_QP,
MLX5_IB_STAGE_SRQ,
MLX5_IB_STAGE_DEVICE_RESOURCES,
- MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
- MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
+ MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
- MLX5_IB_STAGE_CLASS_ATTR,
+ MLX5_IB_STAGE_RESTRACK,
MLX5_IB_STAGE_MAX,
};
@@ -856,7 +1049,10 @@ struct mlx5_ib_flow_action {
struct {
struct mlx5_ib_dev *dev;
u32 sub_type;
- u32 action_id;
+ union {
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ };
} flow_action_raw;
};
};
@@ -869,8 +1065,6 @@ struct mlx5_dm {
*/
spinlock_t lock;
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
- unsigned long *steering_sw_icm_alloc_blocks;
- unsigned long *header_modify_sw_icm_alloc_blocks;
};
struct mlx5_read_counters_attr {
@@ -917,6 +1111,7 @@ struct mlx5_ib_lb_state {
u32 user_td;
int qps;
bool enabled;
+ bool force_enable;
};
struct mlx5_ib_pf_eq {
@@ -929,46 +1124,84 @@ struct mlx5_ib_pf_eq {
mempool_t *pool;
};
+struct mlx5_devx_event_table {
+ struct mlx5_nb devx_nb;
+ /* serialize updating the event_xa */
+ struct mutex event_xa_lock;
+ struct xarray event_xa;
+};
+
+struct mlx5_var_table {
+ /* serialize updating the bitmap */
+ struct mutex bitmap_lock;
+ unsigned long *bitmap;
+ u64 hw_start_addr;
+ u32 stride_size;
+ u64 num_var_hw_entries;
+};
+
+struct mlx5_port_caps {
+ bool has_smi;
+ u8 ext_port_cap;
+};
+
+
+struct mlx5_special_mkeys {
+ u32 dump_fill_mkey;
+ __be32 null_mkey;
+ __be32 terminate_scatter_list_mkey;
+};
+
+struct mlx5_macsec {
+ struct mutex lock; /* Protects mlx5_macsec internal contexts */
+ struct list_head macsec_devices_list;
+ struct notifier_block blocking_events_nb;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_data_direct_dev *data_direct_dev;
+ /* protect accessing data_direct_dev */
+ struct mutex data_direct_lock;
struct notifier_block mdev_events;
+ struct notifier_block lag_events;
int num_ports;
/* serialize update of capability mask
*/
struct mutex cap_mask_mutex;
- bool ib_active;
+ u8 ib_active:1;
+ u8 is_rep:1;
+ u8 lag_active:1;
+ u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
*/
struct mlx5_ib_resources devr;
- struct mlx5_mr_cache cache;
+
+ atomic_t mkey_var;
+ struct mlx5_mkey_cache cache;
struct timer_list delay_timer;
/* Prevents soft lock on massive reg MRs */
struct mutex slow_path_mutex;
- int fill_delay;
struct ib_odp_caps odp_caps;
u64 odp_max_size;
+ struct mutex odp_eq_mutex;
struct mlx5_ib_pf_eq odp_pf_eq;
- /*
- * Sleepable RCU that prevents destruction of MRs while they are still
- * being used by a page fault handler.
- */
- struct srcu_struct mr_srcu;
- u32 null_mkey;
+ struct xarray odp_mkeys;
+
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
+ struct list_head data_direct_mr_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
- bool is_rep;
- int lag_active;
struct mlx5_ib_lb_state lb;
u8 umr_fence;
@@ -977,8 +1210,25 @@ struct mlx5_ib_dev {
struct mlx5_dm dm;
u16 devx_whitelist_uid;
struct mlx5_srq_table srq_table;
+ struct mlx5_qp_table qp_table;
struct mlx5_async_ctx async_ctx;
- int free_port;
+ struct mlx5_devx_event_table devx_event_table;
+ struct mlx5_var_table var_table;
+
+ struct xarray sig_mrs;
+ struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
+ u16 pkey_table_len;
+ u8 lag_ports;
+ struct mlx5_special_mkeys mkeys;
+ struct mlx5_data_direct_resources ddr;
+
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_macsec macsec;
+#endif
+
+ u8 num_plane;
+ struct mlx5_ib_dev *smi_dev;
+ const char *sub_dev_name;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -996,6 +1246,11 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
+static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr)
+{
+ return to_mdev(mr->ibmr.device);
+}
+
static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata)
{
struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
@@ -1019,11 +1274,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
-{
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -1054,11 +1304,6 @@ static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
return container_of(msrq, struct mlx5_ib_srq, msrq);
}
-static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
-{
- return container_of(ibdm, struct mlx5_ib_dm, ibdm);
-}
-
static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mlx5_ib_mr, ibmr);
@@ -1075,30 +1320,40 @@ to_mflow_act(struct ib_flow_action *ibact)
return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
}
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
- struct ib_udata *udata, unsigned long virt,
+static inline struct mlx5_user_mmap_entry *
+to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry,
+ struct mlx5_user_mmap_entry, rdma_entry);
+}
+
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
-int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, u32 flags,
+int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
@@ -1106,63 +1361,64 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
void mlx5_ib_drain_sq(struct ib_qp *qp);
void mlx5_ib_drain_rq(struct ib_qp *qp);
-int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr);
-int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr);
-int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
- int buflen, size_t *bc);
-int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
- int buflen, size_t *bc);
-int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
- void *buffer, int buflen, size_t *bc);
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx5_ib_pre_destroy_cq(struct ib_cq *cq);
+void mlx5_ib_post_destroy_cq(struct ib_cq *cq);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
int mlx5_ib_advise_mr(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags,
struct ib_sge *sg_list,
u32 num_sge,
struct uverbs_attr_bundle *attrs);
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
-int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
- int page_shift, int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
- struct ib_udata *udata,
int access_flags);
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
+void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 virt_addr, int access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata);
+ u32 max_num_sg);
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg,
+ u32 max_num_meta_sg);
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port);
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid);
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
@@ -1171,32 +1427,28 @@ int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
u32 *vendor_id);
int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc);
int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid);
-int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey);
-int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
+int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid);
-int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
+int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
-int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
-int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order);
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags);
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags);
-void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
+void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ int access_flags, int access_mode,
+ int ndescs);
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
-void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -1205,49 +1457,52 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
-bool mlx5_ib_dc_atomic_is_supported(struct mlx5_ib_dev *dev);
-struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_dm_alloc_attr *attr,
- struct uverbs_attr_bundle *attrs);
-int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs);
struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
+void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
+ struct mlx5_data_direct_dev *dev);
+void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
- unsigned long end);
-void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
- size_t nentries, struct mlx5_ib_mr *mr, int flags);
+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
+int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags);
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_pf_eq *eq)
{
- return;
+ return 0;
}
-
-static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
-static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
- size_t nentries, struct mlx5_ib_mr *mr,
- int flags) {}
+static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
+{
+ return -EOPNOTSUPP;
+}
static inline int
mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
@@ -1256,37 +1511,47 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
{
return -EOPNOTSUPP;
}
-static inline void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp,
- unsigned long start,
- unsigned long end){};
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
+static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
+
/* Needed for rep profile */
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage);
-void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
- const struct mlx5_ib_profile *profile);
+int __mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_info *info);
+ u32 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
- u8 port, int state);
+ u32 port, int state);
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_stats *stats);
-int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+ u32 port, struct ifla_vf_stats *stats);
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u32 port,
u64 guid, int type);
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
- const struct ib_gid_attr *attr);
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr);
-void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
-void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num);
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num);
/* GSI QP helper functions */
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr);
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr);
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp);
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask);
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
@@ -1304,66 +1569,22 @@ void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
int bfregn);
struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi);
struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
- u8 ib_port_num,
- u8 *native_port_num);
+ u32 ib_port_num,
+ u32 *native_port_num);
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
- u8 port_num);
+ u32 port_num);
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
-void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
-struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
- struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_act *flow_act, u32 counter_id,
- void *cmd_in, int inlen, int dest_id, int dest_type);
-bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
-bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
-void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
-#else
-static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
- bool is_user) { return -EOPNOTSUPP; }
-static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
- int *dest_type)
-{
- return false;
-}
-static inline void
-mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
-{
- return;
-};
-#endif
-static inline void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
-static inline u8 convert_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ;
-}
+extern const struct uapi_definition mlx5_ib_qos_defs[];
+extern const struct uapi_definition mlx5_ib_std_types_defs[];
+extern const struct uapi_definition mlx5_ib_create_cq_defs[];
static inline int is_qp1(enum ib_qp_type qp_type)
{
- return qp_type == MLX5_IB_QPT_HW_GSI;
+ return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
-#define MLX5_MAX_UMR_SHIFT 16
-#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
-
static inline u32 check_cq_create_flags(u32 flags)
{
/*
@@ -1396,12 +1617,11 @@ static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
{
u8 cqe_version = ucontext->cqe_version;
- if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
- !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version &&
+ (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
return 0;
- if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
- !!cqe_version))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version)
return -EINVAL;
return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
@@ -1414,12 +1634,11 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
{
u8 cqe_version = ucontext->cqe_version;
- if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
- !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version &&
+ (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
return 0;
- if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
- !!cqe_version))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version)
return -EINVAL;
return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
@@ -1431,16 +1650,176 @@ static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_suppor
MLX5_UARS_IN_PAGE : 1;
}
-static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi)
-{
- return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages;
-}
-
-unsigned long mlx5_ib_get_xlt_emergency_page(void);
-void mlx5_ib_put_xlt_emergency_page(void);
+extern void *xlt_emergency_page;
int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
+
+static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mkey *mmkey)
+{
+ refcount_set(&mmkey->usecount, 1);
+
+ return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key),
+ mmkey, GFP_KERNEL));
+}
+
+/* deref an mkey that can participate in ODP flow */
+static inline void mlx5r_deref_odp_mkey(struct mlx5_ib_mkey *mmkey)
+{
+ if (refcount_dec_and_test(&mmkey->usecount))
+ wake_up(&mmkey->wait);
+}
+
+/* deref an mkey that can participate in ODP flow and wait for relese */
+static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey)
+{
+ mlx5r_deref_odp_mkey(mmkey);
+ wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
+}
+
+static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
+{
+ /*
+ * If the driver is in hash mode and the port_select_flow_table_bypass cap
+ * is supported, it means that the driver no longer needs to assign the port
+ * affinity by default. If a user wants to set the port affinity explicitly,
+ * the user has a dedicated API to do that, so there is no need to assign
+ * the port affinity by default.
+ */
+ if (dev->lag_active &&
+ mlx5_lag_mode_is_hash(dev->mdev) &&
+ MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
+ return 0;
+
+ if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
+ return 0;
+
+ return dev->lag_active ||
+ (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
+ MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
+}
+
+static inline bool rt_supported(int ts_cap)
+{
+ return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+ ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+}
+
+/*
+ * PCI Peer to Peer is a trainwreck. If no switch is present then things
+ * sometimes work, depending on the pci_distance_p2p logic for excluding broken
+ * root complexes. However if a switch is present in the path, then things get
+ * really ugly depending on how the switch is setup. This table assumes that the
+ * root complex is strict and is validating that all req/reps are matches
+ * perfectly - so any scenario where it sees only half the transaction is a
+ * failure.
+ *
+ * CR/RR/DT ATS RO P2P
+ * 00X X X OK
+ * 010 X X fails (request is routed to root but root never sees comp)
+ * 011 0 X fails (request is routed to root but root never sees comp)
+ * 011 1 X OK
+ * 10X X 1 OK
+ * 101 X 0 fails (completion is routed to root but root didn't see req)
+ * 110 X 0 SLOW
+ * 111 0 0 SLOW
+ * 111 1 0 fails (completion is routed to root but root didn't see req)
+ * 111 1 1 OK
+ *
+ * Unfortunately we cannot reliably know if a switch is present or what the
+ * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that
+ * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows.
+ *
+ * For now assume if the umem is a dma_buf then it is P2P.
+ */
+static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
+ struct ib_umem *umem, int access_flags)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf)
+ return false;
+ return access_flags & IB_ACCESS_RELAXED_ORDERING;
+}
+
+int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr);
+
+static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
+{
+ return (port - 1) / dev->num_ports + 1;
+}
+
+static inline unsigned int get_max_log_entity_size_cap(struct mlx5_ib_dev *dev,
+ int access_mode)
+{
+ int max_log_size = 0;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ max_log_size =
+ MLX5_CAP_GEN_2(dev->mdev, max_mkey_log_entity_size_mtt);
+ else if (access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ max_log_size = MLX5_CAP_GEN_2(
+ dev->mdev, max_mkey_log_entity_size_fixed_buffer);
+
+ if (!max_log_size ||
+ (max_log_size > 31 &&
+ !MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)))
+ max_log_size = 31;
+
+ return max_log_size;
+}
+
+static inline unsigned int get_min_log_entity_size_cap(struct mlx5_ib_dev *dev,
+ int access_mode)
+{
+ int min_log_size = 0;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KSM &&
+ MLX5_CAP_GEN_2(dev->mdev,
+ min_mkey_log_entity_size_fixed_buffer_valid))
+ min_log_size = MLX5_CAP_GEN_2(
+ dev->mdev, min_mkey_log_entity_size_fixed_buffer);
+ else
+ min_log_size =
+ MLX5_CAP_GEN_2(dev->mdev, log_min_mkey_entity_size);
+
+ min_log_size = max(min_log_size, MLX5_ADAPTER_PAGE_SHIFT);
+ return min_log_size;
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+static __always_inline unsigned long
+mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ u64 iova, int access_mode)
+{
+ unsigned int max_log_entity_size_cap, min_log_entity_size_cap;
+ unsigned long bitmap;
+
+ max_log_entity_size_cap = get_max_log_entity_size_cap(dev, access_mode);
+ min_log_entity_size_cap = get_min_log_entity_size_cap(dev, access_mode);
+
+ bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap);
+
+ /* In KSM mode HW requires IOVA and mkey's page size to be aligned */
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KSM && iova)
+ bitmap &= GENMASK_ULL(__ffs64(iova), 0);
+
+ return ib_umem_find_best_pgsz(umem, bitmap, iova);
+}
+
+static inline unsigned long
+mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf,
+ int access_mode)
+{
+ return mlx5_umem_mkc_find_best_pgsz(to_mdev(umem_dmabuf->umem.ibdev),
+ &umem_dmabuf->umem,
+ umem_dmabuf->umem.iova,
+ access_mode);
+}
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 83b452d977d4..325fa04cbe8a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,230 +37,346 @@
#include <linux/debugfs.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <rdma/ib_umem.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include <rdma/ib_umem_odp.h>
-#include <rdma/ib_verbs.h>
+#include "dm.h"
#include "mlx5_ib.h"
+#include "umr.h"
+#include "data_direct.h"
+#include "dmah.h"
enum {
MAX_PENDING_REG_MR = 8,
};
+#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4
#define MLX5_UMR_ALIGN 2048
-static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static int mr_cache_max_order(struct mlx5_ib_dev *dev);
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
-static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev)
+static void
+create_mkey_callback(int status, struct mlx5_async_work *context);
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned long page_size, bool populate,
+ int access_mode, u16 st_index, u8 ph);
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+ struct ib_pd *pd)
{
- return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled);
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ if (acc & IB_ACCESS_RELAXED_ORDERING) {
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
+ (MLX5_CAP_GEN(dev->mdev,
+ relaxed_ordering_read_pci_enabled) &&
+ pcie_relaxed_ordering_enabled(dev->mdev->pdev)))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
+ }
+
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
}
-static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
+static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
{
- return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
+ u8 key = atomic_inc_return(&dev->mkey_var);
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, mkey_7_0, key);
+ *mkey = key;
}
-static bool use_umr(struct mlx5_ib_dev *dev, int order)
+static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
{
- return order <= mr_cache_max_order(dev) &&
- umr_can_modify_entity_size(dev);
+ int ret;
+
+ assign_mkey_variant(dev, &mkey->key, in);
+ ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
+ if (!ret)
+ init_waitqueue_head(&mkey->wait);
+
+ return ret;
}
-static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
{
- int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ struct mlx5_ib_dev *dev = async_create->ent->dev;
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
+
+ MLX5_SET(create_mkey_in, async_create->in, opcode,
+ MLX5_CMD_OP_CREATE_MKEY);
+ assign_mkey_variant(dev, &async_create->mkey, async_create->in);
+ return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
+ async_create->out, outlen, create_mkey_callback,
+ &async_create->cb_work);
+}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- /* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev);
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
- return err;
+static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+ WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
+
+ return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
}
-static int order2idx(struct mlx5_ib_dev *dev, int order)
+static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ if (status == -ENXIO) /* core driver is not available */
+ return;
- if (order < cache->ent[0].order)
- return 0;
- else
- return order - cache->ent[0].order;
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ if (status != -EREMOTEIO) /* driver specific failure */
+ return;
+
+ /* Failed in FW, print cmd out failure details */
+ mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
}
-static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey)
{
- return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
- length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
+ unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE;
+ struct mlx5_mkeys_page *page;
+
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ if (ent->mkeys_queue.ci >=
+ ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) {
+ page = kzalloc(sizeof(*page), GFP_ATOMIC);
+ if (!page)
+ return -ENOMEM;
+ ent->mkeys_queue.num_pages++;
+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
+ } else {
+ page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ }
+
+ page->mkeys[tmp] = mkey;
+ ent->mkeys_queue.ci++;
+ return 0;
}
-static void update_odp_mr(struct mlx5_ib_mr *mr)
+static int pop_mkey_locked(struct mlx5_cache_ent *ent)
{
- if (is_odp_mr(mr)) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- to_ib_umem_odp(mr->umem)->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
+ unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE;
+ struct mlx5_mkeys_page *last_page;
+ u32 mkey;
+
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ last_page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ mkey = last_page->mkeys[tmp];
+ last_page->mkeys[tmp] = 0;
+ ent->mkeys_queue.ci--;
+ if (ent->mkeys_queue.num_pages > 1 && !tmp) {
+ list_del(&last_page->list);
+ ent->mkeys_queue.num_pages--;
+ kfree(last_page);
}
+ return mkey;
}
-static void reg_mr_callback(int status, struct mlx5_async_work *context)
+static void create_mkey_callback(int status, struct mlx5_async_work *context)
{
- struct mlx5_ib_mr *mr =
- container_of(context, struct mlx5_ib_mr, cb_work);
- struct mlx5_ib_dev *dev = mr->dev;
- struct mlx5_mr_cache *cache = &dev->cache;
- int c = order2idx(dev, mr->order);
- struct mlx5_cache_ent *ent = &cache->ent[c];
- u8 key;
+ struct mlx5r_async_create_mkey *mkey_out =
+ container_of(context, struct mlx5r_async_create_mkey, cb_work);
+ struct mlx5_cache_ent *ent = mkey_out->ent;
+ struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
- struct xarray *mkeys = &dev->mdev->priv.mkey_table;
- int err;
- spin_lock_irqsave(&ent->lock, flags);
- ent->pending--;
- spin_unlock_irqrestore(&ent->lock, flags);
if (status) {
- mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
- kfree(mr);
- dev->fill_delay = 1;
+ create_mkey_warn(dev, status, mkey_out->out);
+ kfree(mkey_out);
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ ent->pending--;
+ WRITE_ONCE(dev->fill_delay, 1);
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
mod_timer(&dev->delay_timer, jiffies + HZ);
return;
}
- mr->mmkey.type = MLX5_MKEY_MR;
- spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
- key = dev->mdev->priv.mkey_key++;
- spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
- mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
-
- cache->last_add = jiffies;
-
- spin_lock_irqsave(&ent->lock, flags);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- ent->size++;
- spin_unlock_irqrestore(&ent->lock, flags);
-
- xa_lock_irqsave(mkeys, flags);
- err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_ATOMIC));
- xa_unlock_irqrestore(mkeys, flags);
- if (err)
- pr_err("Error inserting to mkey tree. 0x%x\n", -err);
+ mkey_out->mkey |= mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
+ WRITE_ONCE(dev->cache.last_add, jiffies);
- if (!completion_done(&ent->compl))
- complete(&ent->compl);
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ push_mkey_locked(ent, mkey_out->mkey);
+ ent->pending--;
+ /* If we are doing fill_to_high_water then keep going. */
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
+ kfree(mkey_out);
}
-static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
- int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mr *mr;
+ int ret = 0;
+
+ switch (access_mode) {
+ case MLX5_MKC_ACCESS_MODE_MTT:
+ ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
+ sizeof(struct mlx5_mtt));
+ break;
+ case MLX5_MKC_ACCESS_MODE_KSM:
+ ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
+ sizeof(struct mlx5_klm));
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
+}
+
+static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
+{
+ set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0,
+ ent->dev->umrc.pd);
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (ent->rb_key.access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
+
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_mkc_octo_size(ent->rb_key.access_mode,
+ ent->rb_key.ndescs));
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+
+ if (ent->rb_key.ph != MLX5_IB_NO_PH) {
+ MLX5_SET(mkc, mkc, pcie_tph_en, 1);
+ MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph);
+ if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
+ ent->rb_key.st_index);
+ }
+}
+
+/* Asynchronously schedule new MRs to be populated in the cache. */
+static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
+{
+ struct mlx5r_async_create_mkey *async_create;
void *mkc;
- u32 *in;
int err = 0;
int i;
- in = kzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
+ async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
+ GFP_KERNEL);
+ if (!async_create)
+ return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
+ memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
+ async_create->ent = ent;
+
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
- break;
+ goto free_async_create;
}
+ ent->pending++;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- err = -ENOMEM;
- break;
+ err = mlx5_ib_create_mkey_cb(async_create);
+ if (err) {
+ mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
+ goto err_create_mkey;
}
- mr->order = ent->order;
- mr->allocated_from_cache = 1;
- mr->dev = dev;
+ }
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
- MLX5_SET(mkc, mkc, access_mode_4_2,
- (ent->access_mode >> 2) & 0x7);
+ return 0;
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
- MLX5_SET(mkc, mkc, log_page_size, ent->page);
+err_create_mkey:
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->pending--;
+free_async_create:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ kfree(async_create);
+ return err;
+}
- spin_lock_irq(&ent->lock);
- ent->pending++;
- spin_unlock_irq(&ent->lock);
- err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
- &dev->async_ctx, in, inlen,
- mr->out, sizeof(mr->out),
- reg_mr_callback, &mr->cb_work);
- if (err) {
- spin_lock_irq(&ent->lock);
- ent->pending--;
- spin_unlock_irq(&ent->lock);
- mlx5_ib_warn(dev, "create mkey failed %d\n", err);
- kfree(mr);
- break;
- }
- }
+/* Synchronously create a MR in the cache */
+static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
+{
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
+
+ err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
+ if (err)
+ goto free_in;
+
+ WRITE_ONCE(ent->dev->cache.last_add, jiffies);
+free_in:
kfree(in);
return err;
}
-static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
- struct mlx5_ib_mr *tmp_mr;
- struct mlx5_ib_mr *mr;
- LIST_HEAD(del_list);
- int i;
+ u32 mkey;
- for (i = 0; i < num; i++) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- break;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_move(&mr->list, &del_list);
- ent->cur--;
- ent->size--;
- spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
- }
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ if (!ent->mkeys_queue.ci)
+ return;
+ mkey = pop_mkey_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- synchronize_srcu(&dev->mr_srcu);
+static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
+ bool limit_fill)
+ __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock)
+{
+ int err;
- list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
- list_del(&mr->list);
- kfree(mr);
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+
+ while (true) {
+ if (limit_fill)
+ target = ent->limit * 2;
+ if (target == ent->pending + ent->mkeys_queue.ci)
+ return 0;
+ if (target > ent->pending + ent->mkeys_queue.ci) {
+ u32 todo = target - (ent->pending + ent->mkeys_queue.ci);
+
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ err = add_keys(ent, todo);
+ if (err == -EAGAIN)
+ usleep_range(3000, 5000);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (err) {
+ if (err != -EAGAIN)
+ return err;
+ } else
+ return 0;
+ } else {
+ remove_cache_mr_locked(ent);
+ }
}
}
@@ -267,37 +384,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- struct mlx5_ib_dev *dev = ent->dev;
- char lbuf[20] = {0};
- u32 var;
+ u32 target;
int err;
- int c;
-
- count = min(count, sizeof(lbuf) - 1);
- if (copy_from_user(lbuf, buf, count))
- return -EFAULT;
-
- c = order2idx(dev, ent->order);
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var < ent->limit)
- return -EINVAL;
-
- if (var > ent->size) {
- do {
- err = add_keys(dev, c, var - ent->size);
- if (err && err != -EAGAIN)
- return err;
+ err = kstrtou32_from_user(buf, count, 0, &target);
+ if (err)
+ return err;
- usleep_range(3000, 5000);
- } while (err);
- } else if (var < ent->size) {
- remove_keys(dev, c, ent->size - var);
+ /*
+ * Target is the new value of total_mrs the user requests, however we
+ * cannot free MRs that are in use. Compute the target value for stored
+ * mkeys.
+ */
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (target < ent->in_use) {
+ err = -EINVAL;
+ goto err_unlock;
}
+ target = target - ent->in_use;
+ if (target < ent->limit || target > ent->limit*2) {
+ err = -EINVAL;
+ goto err_unlock;
+ }
+ err = resize_available_mrs(ent, target, false);
+ if (err)
+ goto err_unlock;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
return count;
+
+err_unlock:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ return err;
}
static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@@ -307,7 +425,8 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+ err = snprintf(lbuf, sizeof(lbuf), "%ld\n",
+ ent->mkeys_queue.ci + ent->in_use);
if (err < 0)
return err;
@@ -325,32 +444,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- struct mlx5_ib_dev *dev = ent->dev;
- char lbuf[20] = {0};
u32 var;
int err;
- int c;
-
- count = min(count, sizeof(lbuf) - 1);
- if (copy_from_user(lbuf, buf, count))
- return -EFAULT;
-
- c = order2idx(dev, ent->order);
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var > ent->size)
- return -EINVAL;
+ err = kstrtou32_from_user(buf, count, 0, &var);
+ if (err)
+ return err;
+ /*
+ * Upon set we immediately fill the cache to high water mark implied by
+ * the limit.
+ */
+ spin_lock_irq(&ent->mkeys_queue.lock);
ent->limit = var;
-
- if (ent->cur < ent->limit) {
- err = add_keys(dev, c, 2 * ent->limit - ent->cur);
- if (err)
- return err;
- }
-
+ err = resize_available_mrs(ent, 0, true);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ if (err)
+ return err;
return count;
}
@@ -375,68 +485,140 @@ static const struct file_operations limit_fops = {
.read = limit_read,
};
-static int someone_adding(struct mlx5_mr_cache *cache)
+static bool someone_adding(struct mlx5_mkey_cache *cache)
{
- int i;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+ bool ret;
+
+ mutex_lock(&cache->rb_lock);
+ for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ret = ent->mkeys_queue.ci < ent->limit;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ if (ret) {
+ mutex_unlock(&cache->rb_lock);
+ return true;
+ }
+ }
+ mutex_unlock(&cache->rb_lock);
+ return false;
+}
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- if (cache->ent[i].cur < cache->ent[i].limit)
- return 1;
+/*
+ * Check if the bucket is outside the high/low water mark and schedule an async
+ * update. The cache refill has hysteresis, once the low water mark is hit it is
+ * refilled up to the high mark.
+ */
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
+{
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+
+ if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
+ return;
+ if (ent->mkeys_queue.ci < ent->limit) {
+ ent->fill_to_high_water = true;
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
+ } else if (ent->fill_to_high_water &&
+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) {
+ /*
+ * Once we start populating due to hitting a low water mark
+ * continue until we pass the high water mark.
+ */
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
+ } else if (ent->mkeys_queue.ci == 2 * ent->limit) {
+ ent->fill_to_high_water = false;
+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
+ /* Queue deletion of excess entries */
+ ent->fill_to_high_water = false;
+ if (ent->pending)
+ queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
+ secs_to_jiffies(1));
+ else
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
}
+}
- return 0;
+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
+{
+ u32 mkey;
+
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ while (ent->mkeys_queue.ci) {
+ mkey = pop_mkey_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ mlx5_core_destroy_mkey(dev->mdev, mkey);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ }
+ ent->tmp_cleanup_scheduled = false;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
static void __cache_work_func(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_dev *dev = ent->dev;
- struct mlx5_mr_cache *cache = &dev->cache;
- int i = order2idx(dev, ent->order);
+ struct mlx5_mkey_cache *cache = &dev->cache;
int err;
- if (cache->stopped)
- return;
-
- ent = &dev->cache.ent[i];
- if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
- err = add_keys(dev, i, 1);
- if (ent->cur < 2 * ent->limit) {
- if (err == -EAGAIN) {
- mlx5_ib_dbg(dev, "returned eagain, order %d\n",
- i + 2);
- queue_delayed_work(cache->wq, &ent->dwork,
- msecs_to_jiffies(3));
- } else if (err) {
- mlx5_ib_warn(dev, "command failed order %d, err %d\n",
- i + 2, err);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->disabled)
+ goto out;
+
+ if (ent->fill_to_high_water &&
+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit &&
+ !READ_ONCE(dev->fill_delay)) {
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ err = add_keys(ent, 1);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->disabled)
+ goto out;
+ if (err) {
+ /*
+ * EAGAIN only happens if there are pending MRs, so we
+ * will be rescheduled when storing them. The only
+ * failure path here is ENOMEM.
+ */
+ if (err != -EAGAIN) {
+ mlx5_ib_warn(
+ dev,
+ "add keys command failed, err %d\n",
+ err);
queue_delayed_work(cache->wq, &ent->dwork,
- msecs_to_jiffies(1000));
- } else {
- queue_work(cache->wq, &ent->work);
+ secs_to_jiffies(1));
}
}
- } else if (ent->cur > 2 * ent->limit) {
+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
+ bool need_delay;
+
/*
- * The remove_keys() logic is performed as garbage collection
- * task. Such task is intended to be run when no other active
- * processes are running.
+ * The remove_cache_mr() logic is performed as garbage
+ * collection task. Such task is intended to be run when no
+ * other active processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
- * In such case, we don't execute remove_keys() and postpone
- * the garbage collection work to try to run in next cycle,
- * in order to free CPU resources to other tasks.
+ * In such case, we don't execute remove_cache_mr() and postpone
+ * the garbage collection work to try to run in next cycle, in
+ * order to free CPU resources to other tasks.
*/
- if (!need_resched() && !someone_adding(cache) &&
- time_after(jiffies, cache->last_add + 300 * HZ)) {
- remove_keys(dev, i, 1);
- if (ent->cur > ent->limit)
- queue_work(cache->wq, &ent->work);
- } else {
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ need_delay = need_resched() || someone_adding(cache) ||
+ !time_after(jiffies,
+ READ_ONCE(cache->last_add) + 300 * HZ);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->disabled)
+ goto out;
+ if (need_delay) {
queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+ goto out;
}
+ remove_cache_mr_locked(ent);
+ queue_adjust_cache_locked(ent);
}
+out:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
static void delayed_cache_work_func(struct work_struct *work)
@@ -444,206 +626,353 @@ static void delayed_cache_work_func(struct work_struct *work)
struct mlx5_cache_ent *ent;
ent = container_of(work, struct mlx5_cache_ent, dwork.work);
- __cache_work_func(ent);
+ /* temp entries are never filled, only cleaned */
+ if (ent->is_tmp)
+ clean_keys(ent->dev, ent);
+ else
+ __cache_work_func(ent);
}
-static void cache_work_func(struct work_struct *work)
+static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
+ struct mlx5r_cache_rb_key key2)
{
- struct mlx5_cache_ent *ent;
+ int res;
+
+ res = key1.ats - key2.ats;
+ if (res)
+ return res;
+
+ res = key1.access_mode - key2.access_mode;
+ if (res)
+ return res;
- ent = container_of(work, struct mlx5_cache_ent, work);
- __cache_work_func(ent);
+ res = key1.access_flags - key2.access_flags;
+ if (res)
+ return res;
+
+ res = key1.st_index - key2.st_index;
+ if (res)
+ return res;
+
+ res = key1.ph - key2.ph;
+ if (res)
+ return res;
+
+ /*
+ * keep ndescs the last in the compare table since the find function
+ * searches for an exact match on all properties and only closest
+ * match in size.
+ */
+ return key1.ndescs - key2.ndescs;
}
-struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
+ struct mlx5_cache_ent *ent)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
- struct mlx5_ib_mr *mr;
- int err;
-
- if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
- mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
- return NULL;
+ struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
+ struct mlx5_cache_ent *cur;
+ int cmp;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ cur = rb_entry(*new, struct mlx5_cache_ent, node);
+ parent = *new;
+ cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key);
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ if (cmp < 0)
+ new = &((*new)->rb_right);
+ if (cmp == 0)
+ return -EEXIST;
}
- ent = &cache->ent[entry];
- while (1) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
+ /* Add new node and rebalance tree. */
+ rb_link_node(&ent->node, parent, new);
+ rb_insert_color(&ent->node, &cache->rb_root);
- err = add_keys(dev, entry, 1);
- if (err && err != -EAGAIN)
- return ERR_PTR(err);
+ return 0;
+}
- wait_for_completion(&ent->compl);
- } else {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
- list);
- list_del(&mr->list);
- ent->cur--;
- spin_unlock_irq(&ent->lock);
- if (ent->cur < ent->limit)
- queue_work(cache->wq, &ent->work);
- return mr;
+static struct mlx5_cache_ent *
+mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key)
+{
+ struct rb_node *node = dev->cache.rb_root.rb_node;
+ struct mlx5_cache_ent *cur, *smallest = NULL;
+ u64 ndescs_limit;
+ int cmp;
+
+ /*
+ * Find the smallest ent with order >= requested_order.
+ */
+ while (node) {
+ cur = rb_entry(node, struct mlx5_cache_ent, node);
+ cmp = cache_ent_key_cmp(cur->rb_key, rb_key);
+ if (cmp > 0) {
+ smallest = cur;
+ node = node->rb_left;
}
+ if (cmp < 0)
+ node = node->rb_right;
+ if (cmp == 0)
+ return cur;
}
+
+ /*
+ * Limit the usage of mkeys larger than twice the required size while
+ * also allowing the usage of smallest cache entry for small MRs.
+ */
+ ndescs_limit = max_t(u64, rb_key.ndescs * 2,
+ MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS);
+
+ return (smallest &&
+ smallest->rb_key.access_mode == rb_key.access_mode &&
+ smallest->rb_key.access_flags == rb_key.access_flags &&
+ smallest->rb_key.ats == rb_key.ats &&
+ smallest->rb_key.st_index == rb_key.st_index &&
+ smallest->rb_key.ph == rb_key.ph &&
+ smallest->rb_key.ndescs <= ndescs_limit) ?
+ smallest :
+ NULL;
}
-static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
+static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_ib_mr *mr = NULL;
- struct mlx5_cache_ent *ent;
- int last_umr_cache_entry;
- int c;
- int i;
-
- c = order2idx(dev, order);
- last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
- if (c < 0 || c > last_umr_cache_entry) {
- mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
- return NULL;
- }
+ struct mlx5_ib_mr *mr;
+ int err;
- for (i = c; i <= last_umr_cache_entry; i++) {
- ent = &cache->ent[i];
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use++;
- spin_lock_irq(&ent->lock);
- if (!list_empty(&ent->head)) {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
- list);
- list_del(&mr->list);
- ent->cur--;
- spin_unlock_irq(&ent->lock);
- if (ent->cur < ent->limit)
- queue_work(cache->wq, &ent->work);
- break;
+ if (!ent->mkeys_queue.ci) {
+ queue_adjust_cache_locked(ent);
+ ent->miss++;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ err = create_cache_mkey(ent, &mr->mmkey.key);
+ if (err) {
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use--;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ kfree(mr);
+ return ERR_PTR(err);
}
- spin_unlock_irq(&ent->lock);
-
- queue_work(cache->wq, &ent->work);
+ } else {
+ mr->mmkey.key = pop_mkey_locked(ent);
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
+ mr->mmkey.cache_ent = ent;
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.rb_key = ent->rb_key;
+ mr->mmkey.cacheable = true;
+ init_waitqueue_head(&mr->mmkey.wait);
+ return mr;
+}
- if (!mr)
- cache->ent[c].miss++;
+static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev,
+ int access_flags)
+{
+ int ret = 0;
- return mr;
+ if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ ret |= IB_ACCESS_REMOTE_ATOMIC;
+
+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ ret |= IB_ACCESS_RELAXED_ORDERING;
+
+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ ret |= IB_ACCESS_RELAXED_ORDERING;
+
+ return ret;
}
-void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ int access_flags, int access_mode,
+ int ndescs)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
- int shrink = 0;
- int c;
+ struct mlx5r_cache_rb_key rb_key = {
+ .ndescs = ndescs,
+ .access_mode = access_mode,
+ .access_flags = get_unchangeable_access_flags(dev, access_flags),
+ .ph = MLX5_IB_NO_PH,
+ };
+ struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
+
+ if (!ent)
+ return ERR_PTR(-EOPNOTSUPP);
- if (!mr->allocated_from_cache)
- return;
+ return _mlx5_mr_cache_alloc(dev, ent);
+}
- c = order2idx(dev, mr->order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
- mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
+static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root || dev->is_rep)
return;
- }
- if (unreg_umr(dev, mr))
+ debugfs_remove_recursive(dev->cache.fs_root);
+ dev->cache.fs_root = NULL;
+}
+
+static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent)
+{
+ int order = order_base_2(ent->rb_key.ndescs);
+ struct dentry *dir;
+
+ if (!mlx5_debugfs_root || dev->is_rep)
return;
- ent = &cache->ent[c];
- spin_lock_irq(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- if (ent->cur > 2 * ent->limit)
- shrink = 1;
- spin_unlock_irq(&ent->lock);
+ if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
- if (shrink)
- queue_work(cache->wq, &ent->work);
+ sprintf(ent->name, "%d", order);
+ dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
+ debugfs_create_file("size", 0600, dir, ent, &size_fops);
+ debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
+ debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci);
+ debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
-static void clean_keys(struct mlx5_ib_dev *dev, int c)
+static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
- struct mlx5_ib_mr *tmp_mr;
- struct mlx5_ib_mr *mr;
- LIST_HEAD(del_list);
+ struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev);
+ struct mlx5_mkey_cache *cache = &dev->cache;
- cancel_delayed_work(&ent->dwork);
- while (1) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- break;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_move(&mr->list, &del_list);
- ent->cur--;
- ent->size--;
- spin_unlock_irq(&ent->lock);
- mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
- }
+ if (!mlx5_debugfs_root || dev->is_rep)
+ return;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
-#endif
+ cache->fs_root = debugfs_create_dir("mr_cache", dbg_root);
+}
- list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
- list_del(&mr->list);
- kfree(mr);
- }
+static void delay_time_func(struct timer_list *t)
+{
+ struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer);
+
+ WRITE_ONCE(dev->fill_delay, 0);
}
-static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent)
{
- if (!mlx5_debugfs_root || dev->is_rep)
- return;
+ struct mlx5_mkeys_page *page;
- debugfs_remove_recursive(dev->cache.root);
- dev->cache.root = NULL;
+ page = kzalloc(sizeof(*page), GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&ent->mkeys_queue.pages_list);
+ spin_lock_init(&ent->mkeys_queue.lock);
+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
+ ent->mkeys_queue.num_pages++;
+ return 0;
+}
+
+static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent)
+{
+ struct mlx5_mkeys_page *page;
+
+ WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1);
+ page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ list_del(&page->list);
+ kfree(page);
}
-static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry)
{
- struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
- struct dentry *dir;
- int i;
+ int order;
+ int ret;
- if (!mlx5_debugfs_root || dev->is_rep)
- return;
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
- cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
+ ret = mlx5r_mkeys_init(ent);
+ if (ret)
+ goto mkeys_err;
+ ent->rb_key = rb_key;
+ ent->dev = dev;
+ ent->is_tmp = !persistent_entry;
+
+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+
+ ret = mlx5_cache_ent_insert(&dev->cache, ent);
+ if (ret)
+ goto ent_insert_err;
+
+ if (persistent_entry) {
+ if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY;
+ else
+ order = order_base_2(rb_key.ndescs) - 2;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- sprintf(ent->name, "%d", ent->order);
- dir = debugfs_create_dir(ent->name, cache->root);
- debugfs_create_file("size", 0600, dir, ent, &size_fops);
- debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
- debugfs_create_u32("cur", 0400, dir, &ent->cur);
- debugfs_create_u32("miss", 0600, dir, &ent->miss);
+ if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
+ mlx5r_umr_can_load_pas(dev, 0))
+ ent->limit = dev->mdev->profile.mr_cache[order].limit;
+ else
+ ent->limit = 0;
+
+ mlx5_mkey_cache_debugfs_add_ent(dev, ent);
}
+
+ return ent;
+ent_insert_err:
+ mlx5r_mkeys_uninit(ent);
+mkeys_err:
+ kfree(ent);
+ return ERR_PTR(ret);
}
-static void delay_time_func(struct timer_list *t)
+static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
{
- struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
-
- dev->fill_delay = 0;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+
+ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
}
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+ .ph = MLX5_IB_NO_PH,
+ };
struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+ int ret;
int i;
mutex_init(&dev->slow_path_mutex);
+ mutex_init(&dev->cache.rb_lock);
+ dev->cache.rb_root = RB_ROOT;
cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
if (!cache->wq) {
mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -652,71 +981,79 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
timer_setup(&dev->delay_timer, delay_time_func, 0);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- INIT_LIST_HEAD(&ent->head);
- spin_lock_init(&ent->lock);
- ent->order = i + 2;
- ent->dev = dev;
- ent->limit = 0;
-
- init_completion(&ent->compl);
- INIT_WORK(&ent->work, cache_work_func);
- INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-
- if (i > MR_CACHE_LAST_STD_ENTRY) {
- mlx5_odp_init_mr_cache_entry(ent);
- continue;
+ mlx5_mkey_cache_debugfs_init(dev);
+ mutex_lock(&cache->rb_lock);
+ for (i = 0; i <= mkey_cache_max_order(dev); i++) {
+ rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i;
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
+ goto err;
}
-
- if (ent->order > mr_cache_max_order(dev))
- continue;
-
- ent->page = PAGE_SHIFT;
- ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
- MLX5_IB_UMR_OCTOWORD;
- ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- !dev->is_rep &&
- mlx5_core_is_pf(dev->mdev))
- ent->limit = dev->mdev->profile->mr_cache[i].limit;
- else
- ent->limit = 0;
- queue_work(cache->wq, &ent->work);
}
- mlx5_mr_cache_debugfs_init(dev);
+ ret = mlx5_odp_init_mkey_cache(dev);
+ if (ret)
+ goto err;
+
+ mutex_unlock(&cache->rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ }
return 0;
+
+err:
+ mutex_unlock(&cache->rb_lock);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5r_destroy_cache_entries(dev);
+ destroy_workqueue(cache->wq);
+ mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
+ return ret;
}
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
{
- int i;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
if (!dev->cache.wq)
- return 0;
+ return;
- dev->cache.stopped = 1;
+ mutex_lock(&dev->cache.rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->disabled = true;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ cancel_delayed_work(&ent->dwork);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+
+ /*
+ * After all entries are disabled and will not reschedule on WQ,
+ * flush it and all async commands.
+ */
flush_workqueue(dev->cache.wq);
- mlx5_mr_cache_debugfs_cleanup(dev);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
- clean_keys(dev, i);
+ /* At this point all entries are disabled and have no concurrent work. */
+ mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
- del_timer_sync(&dev->delay_timer);
-
- return 0;
+ timer_delete_sync(&dev->delay_timer);
}
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
@@ -735,18 +1072,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
MLX5_SET(mkc, mkc, length64, 1);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, 0);
+ set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
+ pd);
+ MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats));
- err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
goto err_in;
@@ -778,295 +1109,154 @@ static int get_octo_len(u64 addr, u64 len, int page_shift)
return (npages + 1) / 2;
}
-static int mr_cache_max_order(struct mlx5_ib_dev *dev)
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return MR_CACHE_LAST_STD_ENTRY + 2;
+ return MKEY_CACHE_LAST_STD_ENTRY;
return MLX5_MAX_UMR_SHIFT;
}
-static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- u64 start, u64 length, int access_flags,
- struct ib_umem **umem, int *npages, int *page_shift,
- int *ncont, int *order)
-{
- struct ib_umem *u;
- int err;
-
- *umem = NULL;
-
- u = ib_umem_get(udata, start, length, access_flags, 0);
- err = PTR_ERR_OR_ZERO(u);
- if (err) {
- mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
- return err;
- }
-
- mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
- page_shift, ncont, order);
- if (!*npages) {
- mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(u);
- return -EINVAL;
- }
-
- *umem = u;
-
- mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
- *npages, *ncont, *order, *page_shift);
-
- return 0;
-}
-
-static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct mlx5_ib_umr_context *context =
- container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
-
- context->status = wc->status;
- complete(&context->done);
-}
-
-static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 length, int access_flags, u64 iova)
{
- context->cqe.done = mlx5_ib_umr_done;
- context->status = -1;
- init_completion(&context->done);
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->ibmr.device = &dev->ib_dev;
+ mr->ibmr.iova = iova;
+ mr->access_flags = access_flags;
}
-static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
- struct mlx5_umr_wr *umrwr)
+static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
+ u64 iova)
{
- struct umr_common *umrc = &dev->umrc;
- const struct ib_send_wr *bad;
- int err;
- struct mlx5_ib_umr_context umr_context;
-
- mlx5_ib_init_umr_context(&umr_context);
- umrwr->wr.wr_cqe = &umr_context.cqe;
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
- if (err) {
- mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
- up(&umrc->sem);
- return err;
+ /*
+ * The alignment of iova has already been checked upon entering
+ * UVERBS_METHOD_REG_DMABUF_MR
+ */
+ umem->iova = iova;
+ return PAGE_SIZE;
}
-static struct mlx5_ib_mr *alloc_mr_from_cache(
- struct ib_pd *pd, struct ib_umem *umem,
- u64 virt_addr, u64 len, int npages,
- int page_shift, int order, int access_flags)
+static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags, int access_mode,
+ u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5r_cache_rb_key rb_key = {};
+ struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
- int err = 0;
- int i;
+ unsigned long page_size;
- for (i = 0; i < 1; i++) {
- mr = alloc_cached_mr(dev, order);
- if (mr)
- break;
+ if (umem->is_dmabuf)
+ page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
+ else
+ page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova,
+ access_mode);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
- err = add_keys(dev, order2idx(dev, order), 1);
- if (err && err != -EAGAIN) {
- mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
- break;
- }
+ rb_key.access_mode = access_mode;
+ rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
+ rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
+ rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
+ rb_key.st_index = st_index;
+ rb_key.ph = ph;
+ ent = mkey_cache_ent_from_rb_key(dev, rb_key);
+ /*
+ * If the MR can't come from the cache then synchronously create an uncached
+ * one.
+ */
+ if (!ent) {
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode,
+ st_index, ph);
+ mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(mr))
+ return mr;
+ mr->mmkey.rb_key = rb_key;
+ mr->mmkey.cacheable = true;
+ return mr;
}
- if (!mr)
- return ERR_PTR(-EAGAIN);
+ mr = _mlx5_mr_cache_alloc(dev, ent);
+ if (IS_ERR(mr))
+ return mr;
mr->ibmr.pd = pd;
mr->umem = umem;
- mr->access_flags = access_flags;
- mr->desc_size = sizeof(struct mlx5_mtt);
- mr->mmkey.iova = virt_addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
+ mr->page_shift = order_base_2(page_size);
+ set_mr_fields(dev, mr, umem->length, access_flags, iova);
return mr;
}
-static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
- void *xlt, int page_shift, size_t size,
- int flags)
-{
- struct mlx5_ib_dev *dev = mr->dev;
- struct ib_umem *umem = mr->umem;
-
- if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
- if (!umr_can_use_indirect_mkey(dev))
- return -EPERM;
- mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
- return npages;
- }
-
- npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
-
- if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
- __mlx5_ib_populate_pas(dev, umem, page_shift,
- idx, npages, xlt,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages
- * brought from the umem.
- */
- memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
- size - npages * sizeof(struct mlx5_mtt));
- }
-
- return npages;
-}
-
-#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
- MLX5_UMR_MTT_ALIGNMENT)
-#define MLX5_SPARE_UMR_CHUNK 0x10000
-
-int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
- int page_shift, int flags)
+static struct ib_mr *
+reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags,
+ u32 crossed_lkey)
{
- struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dev.parent;
- int size;
- void *xlt;
- dma_addr_t dma;
- struct mlx5_umr_wr wr;
- struct ib_sge sg;
- int err = 0;
- int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
- ? sizeof(struct mlx5_klm)
- : sizeof(struct mlx5_mtt);
- const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
- const int page_mask = page_align - 1;
- size_t pages_mapped = 0;
- size_t pages_to_map = 0;
- size_t pages_iter = 0;
- gfp_t gfp;
- bool use_emergency_page = false;
-
- if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
- !umr_can_use_indirect_mkey(dev))
- return -EPERM;
-
- /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
- * so we need to align the offset and length accordingly
- */
- if (idx & page_mask) {
- npages += idx & page_mask;
- idx &= ~page_mask;
- }
-
- gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
- gfp |= __GFP_ZERO | __GFP_NOWARN;
-
- pages_to_map = ALIGN(npages, page_align);
- size = desc_size * pages_to_map;
- size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING;
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ int inlen;
+ u32 *in;
+ int err;
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
- mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
- size, get_order(size), MLX5_SPARE_UMR_CHUNK);
+ if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey))
+ return ERR_PTR(-EOPNOTSUPP);
- size = MLX5_SPARE_UMR_CHUNK;
- xlt = (void *)__get_free_pages(gfp, get_order(size));
- }
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- if (!xlt) {
- mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
- xlt = (void *)mlx5_ib_get_xlt_emergency_page();
- size = PAGE_SIZE;
- memset(xlt, 0, size);
- use_emergency_page = true;
- }
- pages_iter = size / desc_size;
- dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
err = -ENOMEM;
- goto free_xlt;
- }
-
- sg.addr = dma;
- sg.lkey = dev->umrc.pd->local_dma_lkey;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
- if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
- wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
-
- wr.pd = mr->ibmr.pd;
- wr.mkey = mr->mmkey.key;
- wr.length = mr->mmkey.size;
- wr.virt_addr = mr->mmkey.iova;
- wr.access_flags = mr->access_flags;
- wr.page_shift = page_shift;
-
- for (pages_mapped = 0;
- pages_mapped < pages_to_map && !err;
- pages_mapped += pages_iter, idx += pages_iter) {
- npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
- dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
- npages = populate_xlt(mr, idx, npages, xlt,
- page_shift, size, flags);
-
- dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
-
- sg.length = ALIGN(npages * desc_size,
- MLX5_UMR_MTT_ALIGNMENT);
-
- if (pages_mapped + pages_iter >= pages_to_map) {
- if (flags & MLX5_IB_UPD_XLT_ENABLE)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_ENABLE_MR |
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- if (flags & MLX5_IB_UPD_XLT_PD ||
- flags & MLX5_IB_UPD_XLT_ACCESS)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
- if (flags & MLX5_IB_UPD_XLT_ADDR)
- wr.wr.send_flags |=
- MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
-
- wr.offset = idx * desc_size;
- wr.xlt_size = sg.length;
-
- err = mlx5_ib_post_send_wait(dev, &wr);
+ goto err_1;
}
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-free_xlt:
- if (use_emergency_page)
- mlx5_ib_put_xlt_emergency_page();
- else
- free_pages((unsigned long)xlt, get_order(size));
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, crossing_target_vhca_id,
+ MLX5_CAP_GEN(dev->mdev, vhca_id));
+ MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+
+ /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */
+ set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd);
+ MLX5_SET64(mkc, mkc, len, iova + length);
+
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_2;
- return err;
+ mr->mmkey.type = MLX5_MKEY_MR;
+ set_mr_fields(dev, mr, length, access_flags, iova);
+ mr->ibmr.pd = pd;
+ kvfree(in);
+ mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key);
+
+ return &mr->ibmr;
+err_2:
+ kvfree(in);
+err_1:
+ kfree(mr);
+ return ERR_PTR(err);
}
/*
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
*/
-static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
- u64 virt_addr, u64 length,
- struct ib_umem *umem, int npages,
- int page_shift, int access_flags,
- bool populate)
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned long page_size, bool populate,
+ int access_mode, u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1075,63 +1265,88 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
int inlen;
u32 *in;
int err;
- bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
+ bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) &&
+ (access_mode == MLX5_MKC_ACCESS_MODE_MTT) &&
+ (ph == MLX5_IB_NO_PH);
+ bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
- mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!page_size)
+ return ERR_PTR(-EINVAL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->ibmr.pd = pd;
mr->access_flags = access_flags;
+ mr->page_shift = order_base_2(page_size);
inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
if (populate)
- inlen += sizeof(*pas) * roundup(npages, 2);
+ inlen += sizeof(*pas) *
+ roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ if (populate) {
+ if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) {
+ err = -EINVAL;
+ goto err_2;
+ }
+ mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ }
/* The pg_access bit allows setting the access flags
- * in the page list submitted with the command. */
+ * in the page list submitted with the command.
+ */
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
+ populate ? pd : dev->umrc.pd);
+ /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */
+ if (umem->is_dmabuf && ksm_mode)
+ MLX5_SET(mkc, mkc, pd, dev->ddr.pdn);
+
MLX5_SET(mkc, mkc, free, !populate);
- MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
- MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET64(mkc, mkc, start_addr, virt_addr);
- MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
- MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(virt_addr, length, page_shift));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ if (ksm_mode)
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift) * 2);
+ else
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
+ if (mlx5_umem_needs_ats(dev, umem, access_flags))
+ MLX5_SET(mkc, mkc, ma_translation_mode, 1);
if (populate) {
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(virt_addr, length, page_shift));
+ get_octo_len(iova, umem->length, mr->page_shift));
+ }
+
+ if (ph != MLX5_IB_NO_PH) {
+ MLX5_SET(mkc, mkc, pcie_tph_en, 1);
+ MLX5_SET(mkc, mkc, pcie_tph_ph, ph);
+ if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index);
}
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err) {
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
}
mr->mmkey.type = MLX5_MKEY_MR;
- mr->desc_size = sizeof(struct mlx5_mtt);
- mr->dev = dev;
+ mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift);
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags, iova);
kvfree(in);
mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1140,31 +1355,16 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
err_2:
kvfree(in);
-
err_1:
- if (!ibmr)
- kfree(mr);
-
+ kfree(mr);
return ERR_PTR(err);
}
-static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- int npages, u64 length, int access_flags)
-{
- mr->npages = npages;
- atomic_add(npages, &dev->mdev->priv.reg_pages);
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->ibmr.length = length;
- mr->access_flags = access_flags;
-}
-
static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
u64 length, int acc, int mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
@@ -1184,25 +1384,16 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, start_addr);
+ set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
- err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
goto err_in;
kfree(in);
- mr->umem = NULL;
- set_mr_fields(dev, mr, 0, length, acc);
+ set_mr_fields(dev, mr, length, acc, start_addr);
return &mr->ibmr;
@@ -1223,7 +1414,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
struct uverbs_attr_bundle *attrs)
{
if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
- advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
return -EOPNOTSUPP;
return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
@@ -1249,6 +1441,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
break;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
return ERR_PTR(-EINVAL);
@@ -1262,257 +1456,519 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
attr->access_flags, mode);
}
-struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
- bool populate_mtts = false;
- struct ib_umem *umem;
- int page_shift;
- int npages;
- int ncont;
- int order;
+ bool xlt_with_umr;
+ u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
+ u8 ph = MLX5_IB_NO_PH;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
- return ERR_PTR(-EOPNOTSUPP);
+ if (dmah) {
+ struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
+ ph = dmah->ph;
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ st_index = mdmah->st_index;
+ }
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
- length == U64_MAX) {
- if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
- !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return ERR_PTR(-EINVAL);
+ xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
+ if (xlt_with_umr) {
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ st_index, ph);
+ } else {
+ unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(
+ dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT);
- mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
- if (IS_ERR(mr))
- return ERR_CAST(mr);
- return &mr->ibmr;
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size,
+ true, MLX5_MKC_ACCESS_MODE_MTT,
+ st_index, ph);
+ mutex_unlock(&dev->slow_path_mutex);
+ }
+ if (IS_ERR(mr)) {
+ ib_umem_release(umem);
+ return ERR_CAST(mr);
}
- err = mr_umem_get(dev, udata, start, length, access_flags, &umem,
- &npages, &page_shift, &ncont, &order);
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- if (err < 0)
- return ERR_PTR(err);
+ atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- if (use_umr(dev, order)) {
- mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
- page_shift, order, access_flags);
- if (PTR_ERR(mr) == -EAGAIN) {
- mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
- mr = NULL;
- }
- populate_mtts = false;
- } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
- if (access_flags & IB_ACCESS_ON_DEMAND) {
- err = -EINVAL;
- pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
- goto error;
+ if (xlt_with_umr) {
+ /*
+ * If the MR was created with reg_create then it will be
+ * configured properly but left disabled. It is safe to go ahead
+ * and configure it again via UMR while enabling it.
+ */
+ err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
+ if (err) {
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ return ERR_PTR(err);
}
- populate_mtts = true;
}
+ return &mr->ibmr;
+}
- if (!mr) {
- if (!umr_can_modify_entity_size(dev))
- populate_mtts = true;
- mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags, populate_mtts);
- mutex_unlock(&dev->slow_path_mutex);
+static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq);
+ if (err)
+ return ERR_PTR(err);
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
+ return &mr->ibmr;
}
+ /* ODP requires xlt update via umr to work. */
+ if (!mlx5r_umr_can_load_pas(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX,
+ MLX5_IB_NO_PH);
if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- goto error;
+ ib_umem_release(&odp->umem);
+ return ERR_CAST(mr);
}
+ xa_init(&mr->implicit_children);
- mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
+ odp->private = mr;
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
- mr->umem = umem;
- set_mr_fields(dev, mr, npages, length, access_flags);
+ err = mlx5_ib_init_odp_mr(mr);
+ if (err)
+ goto err_dereg_mr;
+ return &mr->ibmr;
- update_odp_mr(mr);
+err_dereg_mr:
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ return ERR_PTR(err);
+}
- if (!populate_mtts) {
- int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem;
+ int err;
- if (access_flags & IB_ACCESS_ON_DEMAND)
- update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ ((access_flags & IB_ACCESS_ON_DEMAND) && dmah))
+ return ERR_PTR(-EOPNOTSUPP);
- err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
- update_xlt_flags);
+ mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, access_flags);
- if (err) {
- dereg_mr(dev, mr);
- return ERR_PTR(err);
- }
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
+
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ return create_user_odp_mr(pd, start, length, iova, access_flags,
+ udata);
+ umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+ return create_real_mr(pd, umem, iova, access_flags, dmah);
+}
+
+static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+ struct mlx5_ib_mr *mr = umem_dmabuf->private;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt || !mr)
+ return;
+
+ mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+}
+
+static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
+ .allow_peer2peer = 1,
+ .move_notify = mlx5_ib_dmabuf_invalidate_cb,
+};
+
+static struct ib_mr *
+reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
+ u64 offset, u64 length, u64 virt_addr,
+ int fd, int access_flags, int access_mode,
+ struct ib_dmah *dmah)
+{
+ bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr = NULL;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
+ u8 ph = MLX5_IB_NO_PH;
+ int err;
+
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
+
+ if (!pinned_mode)
+ umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev,
+ offset, length, fd,
+ access_flags,
+ &mlx5_ib_dmabuf_attach_ops);
+ else
+ umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev,
+ dma_device, offset, length,
+ fd, access_flags);
+
+ if (IS_ERR(umem_dmabuf)) {
+ mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf);
+ return ERR_CAST(umem_dmabuf);
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- mr->live = 1;
- atomic_set(&mr->num_pending_prefetch, 0);
+ if (dmah) {
+ struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
+
+ ph = dmah->ph;
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ st_index = mdmah->st_index;
+ }
+
+ mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
+ access_flags, access_mode,
+ st_index, ph);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_CAST(mr);
+ }
+
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
+
+ atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
+ umem_dmabuf->private = mr;
+ if (!pinned_mode) {
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
+ } else {
+ mr->data_direct = true;
}
+ err = mlx5_ib_init_dmabuf_mr(mr);
+ if (err)
+ goto err_dereg_mr;
return &mr->ibmr;
-error:
- ib_umem_release(umem);
+
+err_dereg_mr:
+ __mlx5_ib_dereg_mr(&mr->ibmr);
return ERR_PTR(err);
}
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static struct ib_mr *
+reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags)
{
- struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_umr_wr umrwr = {};
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_data_direct_dev *data_direct_dev;
+ struct ib_mr *crossing_mr;
+ struct ib_mr *crossed_mr;
+ int ret = 0;
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
- return 0;
+ /* As of HW behaviour the IOVA must be page aligned in KSM mode */
+ if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND))
+ return ERR_PTR(-EOPNOTSUPP);
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
- MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.mkey = mr->mmkey.key;
+ mutex_lock(&dev->data_direct_lock);
+ data_direct_dev = dev->data_direct_dev;
+ if (!data_direct_dev) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* If no device's 'data direct mkey' with RO flags exists
+ * mask it out accordingly.
+ */
+ if (!dev->ddr.mkey_ro_valid)
+ access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
+ crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
+ offset, length, virt_addr, fd,
+ access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+ NULL);
+ if (IS_ERR(crossed_mr)) {
+ ret = PTR_ERR(crossed_mr);
+ goto end;
+ }
- return mlx5_ib_post_send_wait(dev, &umrwr);
+ mutex_lock(&dev->slow_path_mutex);
+ crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags,
+ crossed_mr->lkey);
+ mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(crossing_mr)) {
+ __mlx5_ib_dereg_mr(crossed_mr);
+ ret = PTR_ERR(crossing_mr);
+ goto end;
+ }
+
+ list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list);
+ to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr);
+ to_mmr(crossing_mr)->data_direct = true;
+end:
+ mutex_unlock(&dev->data_direct_lock);
+ return ret ? ERR_PTR(ret) : crossing_mr;
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
- int access_flags, int flags)
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr umrwr = {};
+ int mlx5_access_flags = 0;
int err;
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
-
- umrwr.wr.opcode = MLX5_IB_WR_UMR;
- umrwr.mkey = mr->mmkey.key;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
- if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
- umrwr.pd = pd;
- umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
+ err = uverbs_get_flags32(&mlx5_access_flags, attrs,
+ MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT);
+ if (err)
+ return ERR_PTR(err);
}
- err = mlx5_ib_post_send_wait(dev, &umrwr);
+ mlx5_ib_dbg(dev,
+ "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n",
+ offset, virt_addr, length, fd, access_flags, mlx5_access_flags);
- return err;
+ /* dmabuf requires xlt update via umr to work. */
+ if (!mlx5r_umr_can_load_pas(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT)
+ return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr,
+ fd, access_flags);
+
+ return reg_user_mr_dmabuf(pd, pd->device->dma_device,
+ offset, length, virt_addr,
+ fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT,
+ dmah);
}
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int new_access_flags,
- struct ib_pd *new_pd, struct ib_udata *udata)
+/*
+ * True if the change in access flags can be done via UMR, only some access
+ * flags can be updated.
+ */
+static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
{
- struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
- struct mlx5_ib_mr *mr = to_mmr(ib_mr);
- struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
- int access_flags = flags & IB_MR_REREG_ACCESS ?
- new_access_flags :
- mr->access_flags;
- int page_shift = 0;
- int upd_flags = 0;
- int npages = 0;
- int ncont = 0;
- int order = 0;
- u64 addr, len;
- int err;
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING |
+ IB_ACCESS_REMOTE_ATOMIC))
+ return false;
+ return mlx5r_umr_can_reconfig(dev, current_access_flags,
+ target_access_flags);
+}
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
+static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
+ struct ib_umem *new_umem,
+ int new_access_flags, u64 iova,
+ unsigned long *page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+
+ /* We only track the allocated sizes of MRs from the cache */
+ if (!mr->mmkey.cache_ent)
+ return false;
+ if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
+ return false;
+
+ *page_size = mlx5_umem_mkc_find_best_pgsz(
+ dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode);
+ if (WARN_ON(!*page_size))
+ return false;
+ return (mr->mmkey.cache_ent->rb_key.ndescs) >=
+ ib_umem_num_dma_blocks(new_umem, *page_size);
+}
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags, int flags, struct ib_umem *new_umem,
+ u64 iova, unsigned long page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
+ struct ib_umem *old_umem = mr->umem;
+ int err;
- if (!mr->umem)
- return -EINVAL;
+ /*
+ * To keep everything simple the MR is revoked before we start to mess
+ * with it. This ensure the change is atomic relative to any use of the
+ * MR.
+ */
+ err = mlx5r_umr_revoke_mr(mr);
+ if (err)
+ return err;
- if (flags & IB_MR_REREG_TRANS) {
- addr = virt_addr;
- len = length;
- } else {
- addr = mr->umem->address;
- len = mr->umem->length;
+ if (flags & IB_MR_REREG_PD) {
+ mr->ibmr.pd = pd;
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ }
+ if (flags & IB_MR_REREG_ACCESS) {
+ mr->access_flags = access_flags;
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
}
- if (flags != IB_MR_REREG_PD) {
+ mr->ibmr.iova = iova;
+ mr->ibmr.length = new_umem->length;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = new_umem;
+ err = mlx5r_umr_update_mr_pas(mr, upd_flags);
+ if (err) {
/*
- * Replace umem. This needs to be done whether or not UMR is
- * used.
+ * The MR is revoked at this point so there is no issue to free
+ * new_umem.
*/
- flags |= IB_MR_REREG_TRANS;
- ib_umem_release(mr->umem);
- mr->umem = NULL;
- err = mr_umem_get(dev, udata, addr, len, access_flags,
- &mr->umem, &npages, &page_shift, &ncont,
- &order);
- if (err)
- goto err;
+ mr->umem = old_umem;
+ return err;
}
- if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
- /*
- * UMR can't be used - MKey needs to be replaced.
- */
- if (mr->allocated_from_cache)
- err = unreg_umr(dev, mr);
- else
- err = destroy_mkey(dev, mr);
- if (err)
- goto err;
+ atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
+ ib_umem_release(old_umem);
+ atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
+ return 0;
+}
+
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 iova, int new_access_flags,
+ struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
+ struct mlx5_ib_mr *mr = to_mmr(ib_mr);
+ int err;
- mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
- page_shift, access_flags, true);
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct ||
+ mr->mmkey.rb_key.ph != MLX5_IB_NO_PH)
+ return ERR_PTR(-EOPNOTSUPP);
- if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- mr = to_mmr(ib_mr);
- goto err;
+ mlx5_ib_dbg(
+ dev,
+ "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, new_access_flags);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!(flags & IB_MR_REREG_ACCESS))
+ new_access_flags = mr->access_flags;
+ if (!(flags & IB_MR_REREG_PD))
+ new_pd = ib_mr->pd;
+
+ if (!(flags & IB_MR_REREG_TRANS)) {
+ struct ib_umem *umem;
+
+ /* Fast path for PD/access change */
+ if (can_use_umr_rereg_access(dev, mr->access_flags,
+ new_access_flags)) {
+ err = mlx5r_umr_rereg_pd_access(mr, new_pd,
+ new_access_flags);
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
+ /* DM or ODP MR's don't have a normal umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
+ goto recreate;
- mr->allocated_from_cache = 0;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- mr->live = 1;
- } else {
/*
- * Send a UMR WQE
+ * Only one active MR can refer to a umem at one time, revoke
+ * the old MR before assigning the umem to the new one.
*/
- mr->ibmr.pd = pd;
- mr->access_flags = access_flags;
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
-
- if (flags & IB_MR_REREG_TRANS) {
- upd_flags = MLX5_IB_UPD_XLT_ADDR;
- if (flags & IB_MR_REREG_PD)
- upd_flags |= MLX5_IB_UPD_XLT_PD;
- if (flags & IB_MR_REREG_ACCESS)
- upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
- err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
- upd_flags);
- } else {
- err = rereg_umr(pd, mr, access_flags, flags);
- }
-
+ err = mlx5r_umr_revoke_mr(mr);
if (err)
- goto err;
- }
-
- set_mr_fields(dev, mr, npages, len, access_flags);
+ return ERR_PTR(err);
+ umem = mr->umem;
+ mr->umem = NULL;
+ atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- update_odp_mr(mr);
- return 0;
+ return create_real_mr(new_pd, umem, mr->ibmr.iova,
+ new_access_flags, NULL);
+ }
-err:
- if (mr->umem) {
- ib_umem_release(mr->umem);
- mr->umem = NULL;
+ /*
+ * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
+ * but the logic around releasing the umem is different
+ */
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
+ goto recreate;
+
+ if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
+ can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
+ struct ib_umem *new_umem;
+ unsigned long page_size;
+
+ new_umem = ib_umem_get(&dev->ib_dev, start, length,
+ new_access_flags);
+ if (IS_ERR(new_umem))
+ return ERR_CAST(new_umem);
+
+ /* Fast path for PAS change */
+ if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
+ &page_size)) {
+ err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
+ new_umem, iova, page_size);
+ if (err) {
+ ib_umem_release(new_umem);
+ return ERR_PTR(err);
+ }
+ return NULL;
+ }
+ return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL);
}
- clean_mr(dev, mr);
- return err;
+
+ /*
+ * Everything else has no state we can preserve, just create a new MR
+ * from scratch
+ */
+recreate:
+ return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
+ new_access_flags, NULL, udata);
}
static int
@@ -1521,11 +1977,18 @@ mlx5_alloc_priv_descs(struct ib_device *device,
int ndescs,
int desc_size)
{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct device *ddev = &dev->mdev->pdev->dev;
int size = ndescs * desc_size;
int add_size;
int ret;
add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+ if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) {
+ int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size));
+
+ add_size = min_t(int, end - size, add_size);
+ }
mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
if (!mr->descs_alloc)
@@ -1533,9 +1996,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
- size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
+ mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1550,107 +2012,333 @@ err:
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (mr->descs) {
+ if (!mr->umem && !mr->data_direct &&
+ mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(device->dev.parent, mr->desc_map,
- size, DMA_TO_DEVICE);
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
}
-static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
{
- int allocated_from_cache = mr->allocated_from_cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int ret;
+
+ if (mr->mmkey.cache_ent) {
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+ goto end;
+ }
+
+ mutex_lock(&cache->rb_lock);
+ ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
+ if (ent) {
+ if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
+ if (ent->disabled) {
+ mutex_unlock(&cache->rb_lock);
+ return -EOPNOTSUPP;
+ }
+ mr->mmkey.cache_ent = ent;
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+ mutex_unlock(&cache->rb_lock);
+ goto end;
+ }
+ }
+
+ ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false);
+ mutex_unlock(&cache->rb_lock);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ mr->mmkey.cache_ent = ent;
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+
+end:
+ ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key);
+ spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+ return ret;
+}
+
+static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ int err;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+ mr->revoked = true;
+ err = mlx5r_umr_revoke_mr(mr);
+ if (WARN_ON(err))
+ return err;
+
+ ib_umem_dmabuf_revoke(umem_dmabuf);
+ return 0;
+}
+
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_mr *mr, *next;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+
+ list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) {
+ list_del(&mr->dd_node);
+ mlx5_ib_revoke_data_direct_mr(mr);
+ }
+}
+
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
+
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+
+ return ret;
+}
+
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
+
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
+ ent = mr->mmkey.cache_ent;
+ /* upon storing to a clean temp entry - schedule its cleanup */
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (from_cache)
+ ent->in_use--;
+ if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
+ secs_to_jiffies(30));
+ ent->tmp_cleanup_scheduled = true;
+ }
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ return 0;
+ }
+
+ if (ent) {
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use--;
+ mr->mmkey.cache_ent = NULL;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ }
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+ ret = destroy_mkey(dev, mr);
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+ return ret;
+}
+
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ int rc;
+
+ /*
+ * Any async use of the mr must hold the refcount, once the refcount
+ * goes to zero no other thread, such as ODP page faults, prefetch, any
+ * UMR activity, etc can touch the mkey. Thus it is safe to destroy it.
+ */
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ refcount_read(&mr->mmkey.usecount) != 0 &&
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)))
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
+ xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+ mr->sig, NULL, GFP_KERNEL);
+
+ if (mr->mtt_mr) {
+ rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
+ if (rc)
+ return rc;
+ mr->mtt_mr = NULL;
+ }
+ if (mr->klm_mr) {
+ rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
+ if (rc)
+ return rc;
+ mr->klm_mr = NULL;
+ }
- if (mr->sig) {
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
kfree(mr->sig);
mr->sig = NULL;
}
- mlx5_free_priv_descs(mr);
+ /* Stop DMA */
+ rc = mlx5r_handle_mkey_cleanup(mr);
+ if (rc)
+ return rc;
+
+ if (mr->umem) {
+ bool is_odp = is_odp_mr(mr);
+
+ if (!is_odp)
+ atomic_sub(ib_umem_num_pages(mr->umem),
+ &dev->mdev->priv.reg_pages);
+ ib_umem_release(mr->umem);
+ if (is_odp)
+ mlx5_ib_free_odp_mr(mr);
+ }
- if (!allocated_from_cache)
- destroy_mkey(dev, mr);
+ if (!mr->mmkey.cache_ent)
+ mlx5_free_priv_descs(mr);
+
+ kfree(mr);
+ return 0;
}
-static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
{
- int npages = mr->npages;
- struct ib_umem *umem = mr->umem;
+ struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr;
+ int ret;
- if (is_odp_mr(mr)) {
- struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
+ ret = __mlx5_ib_dereg_mr(&mr->ibmr);
+ if (ret)
+ return ret;
- /* Prevent new page faults and
- * prefetch requests from succeeding
- */
- mr->live = 0;
-
- /* dequeue pending prefetch requests for the mr */
- if (atomic_read(&mr->num_pending_prefetch))
- flush_workqueue(system_unbound_wq);
- WARN_ON(atomic_read(&mr->num_pending_prefetch));
-
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
- /* Destroy all page mappings */
- if (umem_odp->page_list)
- mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
- else
- mlx5_ib_free_implicit_mr(mr);
- /*
- * We kill the umem before the MR for ODP,
- * so that there will not be any invalidations in
- * flight, looking at the *mr struct.
- */
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
+ mutex_lock(&dev->data_direct_lock);
+ if (!dd_crossed_mr->revoked)
+ list_del(&dd_crossed_mr->dd_node);
- /* Avoid double-freeing the umem. */
- umem = NULL;
- }
+ ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr);
+ mutex_unlock(&dev->data_direct_lock);
+ return ret;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
- clean_mr(dev, mr);
+ if (mr->data_direct)
+ return dereg_crossing_data_direct_mr(dev, mr);
- /*
- * We should unregister the DMA address from the HCA before
- * remove the DMA mapping.
- */
- mlx5_mr_cache_free(dev, mr);
- if (umem) {
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
- }
- if (!mr->allocated_from_cache)
- kfree(mr);
+ return __mlx5_ib_dereg_mr(ibmr);
}
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
+ int access_mode, int page_shift)
{
- dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
- return 0;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ /* This is only used from the kernel, so setting the PD is OK. */
+ set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ if (access_mode == MLX5_MKC_ACCESS_MODE_PA ||
+ access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats));
}
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
- u32 max_num_sg, struct ib_udata *udata)
+static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, int desc_size, int page_shift,
+ int access_mode, u32 *in, int inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int err;
+
+ mr->access_mode = access_mode;
+ mr->desc_size = desc_size;
+ mr->max_descs = ndescs;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
+ if (err)
+ return err;
+
+ mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
+
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_free_descs;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ return 0;
+
+err_free_descs:
+ mlx5_free_priv_descs(mr);
+ return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg,
+ int desc_size, int access_mode)
+{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- int ndescs = ALIGN(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
+ int page_shift = 0;
struct mlx5_ib_mr *mr;
- void *mkc;
u32 *in;
int err;
@@ -1658,99 +2346,175 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->ibmr.pd = pd;
+ mr->ibmr.device = pd->device;
+
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ page_shift = PAGE_SHIFT;
+
+ err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
+ access_mode, in, inlen);
+ if (err)
+ goto err_free_in;
+
+ mr->umem = NULL;
+ kfree(in);
+
+ return mr;
+
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
+ PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
+ inlen);
+}
+
+static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
+ 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+}
+
+static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int max_num_sg, int max_num_meta_sg,
+ u32 *in, int inlen)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 psv_index[2];
+ void *mkc;
+ int err;
+
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig)
+ return -ENOMEM;
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_klm),
+ MLX5_MKC_ACCESS_MODE_KLMS);
+ if (IS_ERR(mr->klm_mr)) {
+ err = PTR_ERR(mr->klm_mr);
+ goto err_destroy_psv;
+ }
+ mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_mtt),
+ MLX5_MKC_ACCESS_MODE_MTT);
+ if (IS_ERR(mr->mtt_mr)) {
+ err = PTR_ERR(mr->mtt_mr);
+ goto err_free_klm_mr;
+ }
+
+ /* Set bsf descriptors for mkey */
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, bsf_en, 1);
+ MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- if (mr_type == IB_MR_TYPE_MEM_REG) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_mtt));
- if (err)
- goto err_free_in;
+ err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
+ MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+ if (err)
+ goto err_free_mtt_mr;
- mr->desc_size = sizeof(struct mlx5_mtt);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
+ err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+ mr->sig, GFP_KERNEL));
+ if (err)
+ goto err_free_descs;
+ return 0;
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_klm));
- if (err)
- goto err_free_in;
- mr->desc_size = sizeof(struct mlx5_klm);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
- u32 psv_index[2];
-
- MLX5_SET(mkc, mkc, bsf_en, 1);
- MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
- if (!mr->sig) {
- err = -ENOMEM;
- goto err_free_in;
- }
+err_free_descs:
+ destroy_mkey(dev, mr);
+ mlx5_free_priv_descs(mr);
+err_free_mtt_mr:
+ mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
+ mr->mtt_mr = NULL;
+err_free_klm_mr:
+ mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
+ mr->klm_mr = NULL;
+err_destroy_psv:
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+err_free_sig:
+ kfree(mr->sig);
- /* create mem & wire PSVs */
- err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
- 2, psv_index);
- if (err)
- goto err_free_sig;
+ return err;
+}
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
- mr->sig->psv_memory.psv_idx = psv_index[0];
- mr->sig->psv_wire.psv_idx = psv_index[1];
+static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg,
+ u32 max_num_meta_sg)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ int ndescs = ALIGN(max_num_sg, 4);
+ struct mlx5_ib_mr *mr;
+ u32 *in;
+ int err;
- mr->sig->sig_status_checked = true;
- mr->sig->sig_err_exists = false;
- /* Next UMR, Arm SIGERR */
- ++mr->sig->sigerr_count;
- } else {
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mr->ibmr.device = pd->device;
+ mr->umem = NULL;
+
+ switch (mr_type) {
+ case IB_MR_TYPE_MEM_REG:
+ err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_SG_GAPS:
+ err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_INTEGRITY:
+ err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
+ max_num_meta_sg, in, inlen);
+ break;
+ default:
mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
err = -EINVAL;
- goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
- MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
- MLX5_SET(mkc, mkc, umr_en, 1);
-
- mr->ibmr.device = pd->device;
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
- goto err_destroy_psv;
+ goto err_free_in;
- mr->mmkey.type = MLX5_MKEY_MR;
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->umem = NULL;
kfree(in);
return &mr->ibmr;
-err_destroy_psv:
- if (mr->sig) {
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_memory.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
- mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
- mr->sig->psv_wire.psv_idx);
- }
- mlx5_free_priv_descs(mr);
-err_free_sig:
- kfree(mr->sig);
err_free_in:
kfree(in);
err_free:
@@ -1758,15 +2522,27 @@ err_free:
return ERR_PTR(err);
}
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
+}
+
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
+ max_num_meta_sg);
+}
+
+int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mw *mw = NULL;
+ struct mlx5_ib_mw *mw = to_mmw(ibmw);
+ unsigned int ndescs;
u32 *in = NULL;
void *mkc;
- int ndescs;
int err;
struct mlx5_ib_alloc_mw req = {};
struct {
@@ -1776,73 +2552,79 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
- return ERR_PTR(err);
+ return err;
if (req.comp_mask || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(req) &&
!ib_is_udata_cleared(udata, sizeof(req),
udata->inlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
in = kzalloc(inlen, GFP_KERNEL);
- if (!mw || !in) {
- err = -ENOMEM;
- goto free;
- }
+ if (!in)
+ return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lr, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
- MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+ MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
+ err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
if (err)
goto free;
mw->mmkey.type = MLX5_MKEY_MW;
- mw->ibmw.rkey = mw->mmkey.key;
- mw->ndescs = ndescs;
+ ibmw->rkey = mw->mmkey.key;
+ mw->mmkey.ndescs = ndescs;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
+ resp.response_length =
+ min(offsetofend(typeof(resp), response_length), udata->outlen);
if (resp.response_length) {
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err) {
- mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
- goto free;
- }
+ if (err)
+ goto free_mkey;
+ }
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
+ if (err)
+ goto free_mkey;
}
kfree(in);
- return &mw->ibmw;
+ return 0;
+free_mkey:
+ mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
free:
- kfree(mw);
kfree(in);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
+ struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
- int err;
- err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
- &mmw->mmkey);
- if (!err)
- kfree(mmw);
- return err;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
+ /*
+ * pagefault_single_data_segment() may be accessing mmw
+ * if the user bound an ODP MR to this MW.
+ */
+ mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
+
+ return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
@@ -1887,16 +2669,53 @@ done:
}
static int
+mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ unsigned int sg_offset = 0;
+ int n = 0;
+
+ mr->meta_length = 0;
+ if (data_sg_nents == 1) {
+ n++;
+ mr->mmkey.ndescs = 1;
+ if (data_sg_offset)
+ sg_offset = *data_sg_offset;
+ mr->data_length = sg_dma_len(data_sg) - sg_offset;
+ mr->data_iova = sg_dma_address(data_sg) + sg_offset;
+ if (meta_sg_nents == 1) {
+ n++;
+ mr->meta_ndescs = 1;
+ if (meta_sg_offset)
+ sg_offset = *meta_sg_offset;
+ else
+ sg_offset = 0;
+ mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
+ mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
+ }
+ ibmr->length = mr->data_length + mr->meta_length;
+ }
+
+ return n;
+}
+
+static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
unsigned short sg_nents,
- unsigned int *sg_offset_p)
+ unsigned int *sg_offset_p,
+ struct scatterlist *meta_sgl,
+ unsigned short meta_sg_nents,
+ unsigned int *meta_sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
- int i;
+ int i, j = 0;
mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
@@ -1911,12 +2730,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
sg_offset = 0;
}
- mr->ndescs = i;
if (sg_offset_p)
*sg_offset_p = sg_offset;
- return i;
+ mr->mmkey.ndescs = i;
+ mr->data_length = mr->ibmr.length;
+
+ if (meta_sg_nents) {
+ sg = meta_sgl;
+ sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
+ for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
+ if (unlikely(i + j >= mr->max_descs))
+ break;
+ klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
+ sg_offset);
+ klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
+ sg_offset);
+ klms[i + j].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
+
+ sg_offset = 0;
+ }
+ if (meta_sg_offset_p)
+ *meta_sg_offset_p = sg_offset;
+
+ mr->meta_ndescs = j;
+ mr->meta_length = mr->ibmr.length - mr->data_length;
+ }
+
+ return i + j;
}
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
@@ -1924,11 +2767,186 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
struct mlx5_ib_mr *mr = to_mmr(ibmr);
__be64 *descs;
- if (unlikely(mr->ndescs == mr->max_descs))
+ if (unlikely(mr->mmkey.ndescs == mr->max_descs))
return -ENOMEM;
descs = mr->descs;
- descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+ descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->mmkey.ndescs + mr->meta_ndescs++] =
+ cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int
+mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
+ int n;
+
+ pi_mr->mmkey.ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ pi_mr->ibmr.page_size = ibmr->page_size;
+ n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
+ mlx5_set_page);
+ if (n != data_sg_nents)
+ return n;
+
+ pi_mr->data_iova = pi_mr->ibmr.iova;
+ pi_mr->data_length = pi_mr->ibmr.length;
+ pi_mr->ibmr.length = pi_mr->data_length;
+ ibmr->length = pi_mr->data_length;
+
+ if (meta_sg_nents) {
+ u64 page_mask = ~((u64)ibmr->page_size - 1);
+ u64 iova = pi_mr->data_iova;
+
+ n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
+ meta_sg_offset, mlx5_set_page_pi);
+
+ pi_mr->meta_length = pi_mr->ibmr.length;
+ /*
+ * PI address for the HW is the offset of the metadata address
+ * relative to the first data page address.
+ * It equals to first data page address + size of data pages +
+ * metadata offset at the first metadata page
+ */
+ pi_mr->pi_iova = (iova & page_mask) +
+ pi_mr->mmkey.ndescs * ibmr->page_size +
+ (pi_mr->ibmr.iova & ~page_mask);
+ /*
+ * In order to use one MTT MR for data and metadata, we register
+ * also the gaps between the end of the data and the start of
+ * the metadata (the sig MR will verify that the HW will access
+ * to right addresses). This mapping is safe because we use
+ * internal mkey for the registration.
+ */
+ pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
+ pi_mr->ibmr.iova = iova;
+ ibmr->length += pi_mr->meta_length;
+ }
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
+
+static int
+mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->klm_mr;
+ int n;
+
+ pi_mr->mmkey.ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
+ meta_sg, meta_sg_nents, meta_sg_offset);
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ /* This is zero-based memory region */
+ pi_mr->data_iova = 0;
+ pi_mr->ibmr.iova = 0;
+ pi_mr->pi_iova = pi_mr->data_length;
+ ibmr->length = pi_mr->ibmr.length;
+
+ return n;
+}
+
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = NULL;
+ int n;
+
+ WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
+
+ mr->mmkey.ndescs = 0;
+ mr->data_length = 0;
+ mr->data_iova = 0;
+ mr->meta_ndescs = 0;
+ mr->pi_iova = 0;
+ /*
+ * As a performance optimization, if possible, there is no need to
+ * perform UMR operation to register the data/metadata buffers.
+ * First try to map the sg lists to PA descriptors with local_dma_lkey.
+ * Fallback to UMR only in case of a failure.
+ */
+ n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+ /*
+ * As a performance optimization, if possible, there is no need to map
+ * the sg lists to KLM descriptors. First try to map the sg lists to MTT
+ * descriptors and fallback to KLM only in case of a failure.
+ * It's more efficient for the HW to work with MTT descriptors
+ * (especially in high load).
+ * Use KLM (indirect access) only if it's mandatory.
+ */
+ pi_mr = mr->mtt_mr;
+ n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+
+ pi_mr = mr->klm_mr;
+ n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (unlikely(n != data_sg_nents + meta_sg_nents))
+ return -ENOMEM;
+
+out:
+ /* This is zero-based memory region */
+ ibmr->iova = 0;
+ mr->pi_mr = pi_mr;
+ if (pi_mr)
+ ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+ else
+ ibmr->sig_attrs->meta_length = mr->meta_length;
return 0;
}
@@ -1939,14 +2957,15 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int n;
- mr->ndescs = 0;
+ mr->mmkey.ndescs = 0;
ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
mr->desc_size * mr->max_descs,
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
+ NULL);
else
n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
mlx5_set_page);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index c594489eb2d7..e71ee3d52eb0 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -30,19 +30,25 @@
* SOFTWARE.
*/
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include <linux/kernel.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
+#include <linux/pci-p2pdma.h>
#include "mlx5_ib.h"
#include "cmd.h"
+#include "umr.h"
+#include "qp.h"
#include <linux/mlx5/eq.h>
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
- u32 token;
+ u64 token;
u8 event_subtype;
u8 type;
union {
@@ -71,6 +77,14 @@ struct mlx5_pagefault {
u32 rdma_op_len;
u64 rdma_va;
} rdma;
+ struct {
+ u64 va;
+ u32 mkey;
+ u32 fault_byte_count;
+ u32 prefetch_before_byte_count;
+ u32 prefetch_after_byte_count;
+ u8 flags;
+ } memory;
};
struct mlx5_ib_pf_eq *eq;
@@ -83,164 +97,203 @@ struct mlx5_pagefault {
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
-#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
-#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
-#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
-#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
-#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
-
-#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
-
static u64 mlx5_imr_ksm_entries;
+static u64 mlx5_imr_mtt_entries;
+static u64 mlx5_imr_mtt_size;
+static u8 mlx5_imr_mtt_shift;
+static u8 mlx5_imr_ksm_page_shift;
-static int check_parent(struct ib_umem_odp *odp,
- struct mlx5_ib_mr *parent)
+static void populate_ksm(struct mlx5_ksm *pksm, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *imr, int flags)
{
- struct mlx5_ib_mr *mr = odp->private;
+ struct mlx5_core_dev *dev = mr_to_mdev(imr)->mdev;
+ struct mlx5_ksm *end = pksm + nentries;
+ u64 step = MLX5_CAP_ODP(dev, mem_page_fault) ? mlx5_imr_mtt_size : 0;
+ __be32 key = MLX5_CAP_ODP(dev, mem_page_fault) ?
+ cpu_to_be32(imr->null_mmkey.key) :
+ mr_to_mdev(imr)->mkeys.null_mkey;
+ u64 va =
+ MLX5_CAP_ODP(dev, mem_page_fault) ? idx * mlx5_imr_mtt_size : 0;
- return mr && mr->parent == parent && !odp->dying;
-}
+ if (flags & MLX5_IB_UPD_XLT_ZAP) {
+ for (; pksm != end; pksm++, idx++, va += step) {
+ pksm->key = key;
+ pksm->va = cpu_to_be64(va);
+ }
+ return;
+ }
-static struct ib_ucontext_per_mm *mr_to_per_mm(struct mlx5_ib_mr *mr)
-{
- if (WARN_ON(!mr || !is_odp_mr(mr)))
- return NULL;
+ /*
+ * The locking here is pretty subtle. Ideally the implicit_children
+ * xarray would be protected by the umem_mutex, however that is not
+ * possible. Instead this uses a weaker update-then-lock pattern:
+ *
+ * xa_store()
+ * mutex_lock(umem_mutex)
+ * mlx5r_umr_update_xlt()
+ * mutex_unlock(umem_mutex)
+ * destroy lkey
+ *
+ * ie any change the xarray must be followed by the locked update_xlt
+ * before destroying.
+ *
+ * The umem_mutex provides the acquire/release semantic needed to make
+ * the xa_store() visible to a racing thread.
+ */
+ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
- return to_ib_umem_odp(mr->umem)->per_mm;
-}
+ for (; pksm != end; pksm++, idx++, va += step) {
+ struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
-static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
-{
- struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
- struct ib_ucontext_per_mm *per_mm = odp->per_mm;
- struct rb_node *rb;
-
- down_read(&per_mm->umem_rwsem);
- while (1) {
- rb = rb_next(&odp->interval_tree.rb);
- if (!rb)
- goto not_found;
- odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (check_parent(odp, parent))
- goto end;
+ if (mtt) {
+ pksm->key = cpu_to_be32(mtt->ibmr.lkey);
+ pksm->va = cpu_to_be64(idx * mlx5_imr_mtt_size);
+ } else {
+ pksm->key = key;
+ pksm->va = cpu_to_be64(va);
+ }
}
-not_found:
- odp = NULL;
-end:
- up_read(&per_mm->umem_rwsem);
- return odp;
}
-static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
- struct mlx5_ib_mr *parent)
+static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
{
- struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(parent);
- struct ib_umem_odp *odp;
- struct rb_node *rb;
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ bool downgrade = flags & MLX5_IB_UPD_XLT_DOWNGRADE;
+ struct pci_p2pdma_map_state p2pdma_state = {};
+ struct ib_device *dev = odp->umem.ibdev;
+ size_t i;
- down_read(&per_mm->umem_rwsem);
- odp = rbt_ib_umem_lookup(&per_mm->umem_tree, start, length);
- if (!odp)
- goto end;
+ if (flags & MLX5_IB_UPD_XLT_ZAP)
+ return 0;
- while (1) {
- if (check_parent(odp, parent))
- goto end;
- rb = rb_next(&odp->interval_tree.rb);
- if (!rb)
- goto not_found;
- odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
- if (ib_umem_start(&odp->umem) > start + length)
- goto not_found;
- }
-not_found:
- odp = NULL;
-end:
- up_read(&per_mm->umem_rwsem);
- return odp;
-}
+ for (i = 0; i < nentries; i++) {
+ unsigned long pfn = odp->map.pfn_list[start + i];
+ dma_addr_t dma_addr;
-void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
- size_t nentries, struct mlx5_ib_mr *mr, int flags)
-{
- struct ib_pd *pd = mr->ibmr.pd;
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct ib_umem_odp *odp;
- unsigned long va;
- int i;
+ pfn = odp->map.pfn_list[start + i];
+ if (!(pfn & HMM_PFN_VALID))
+ /* ODP initialization */
+ continue;
- if (flags & MLX5_IB_UPD_XLT_ZAP) {
- for (i = 0; i < nentries; i++, pklm++) {
- pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- pklm->key = cpu_to_be32(dev->null_mkey);
- pklm->va = 0;
- }
- return;
- }
+ dma_addr = hmm_dma_map_pfn(dev->dma_device, &odp->map,
+ start + i, &p2pdma_state);
+ if (ib_dma_mapping_error(dev, dma_addr))
+ return -EFAULT;
- odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
- nentries * MLX5_IMR_MTT_SIZE, mr);
+ dma_addr |= MLX5_IB_MTT_READ;
+ if ((pfn & HMM_PFN_WRITE) && !downgrade)
+ dma_addr |= MLX5_IB_MTT_WRITE;
- for (i = 0; i < nentries; i++, pklm++) {
- pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- va = (offset + i) * MLX5_IMR_MTT_SIZE;
- if (odp && odp->umem.address == va) {
- struct mlx5_ib_mr *mtt = odp->private;
+ pas[i] = cpu_to_be64(dma_addr);
+ odp->npages++;
+ }
+ return 0;
+}
- pklm->key = cpu_to_be32(mtt->ibmr.lkey);
- odp = odp_next(odp);
- } else {
- pklm->key = cpu_to_be32(dev->null_mkey);
- }
- mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
- i, va, be32_to_cpu(pklm->key));
+int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
+{
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ populate_ksm(xlt, idx, nentries, mr, flags);
+ return 0;
+ } else {
+ return populate_mtt(xlt, idx, nentries, mr, flags);
}
}
-static void mr_leaf_free_action(struct work_struct *work)
+/*
+ * This must be called after the mr has been removed from implicit_children.
+ * NOTE: The MR does not necessarily have to be
+ * empty here, parallel page faults could have raced with the free process and
+ * added pages to it.
+ */
+static void free_implicit_child_mr_work(struct work_struct *work)
{
- struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
- int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT;
- struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+ struct mlx5_ib_mr *mr =
+ container_of(work, struct mlx5_ib_mr, odp_destroy.work);
+ struct mlx5_ib_mr *imr = mr->parent;
+ struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ mutex_lock(&odp_imr->umem_mutex);
+ mlx5r_umr_update_xlt(mr->parent,
+ ib_umem_start(odp) >> mlx5_imr_mtt_shift, 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+
+ mlx5r_deref_odp_mkey(&imr->mmkey);
+}
- mr->parent = NULL;
- synchronize_srcu(&mr->dev->mr_srcu);
+static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ unsigned long idx = ib_umem_start(odp) >> mlx5_imr_mtt_shift;
+ struct mlx5_ib_mr *imr = mr->parent;
- ib_umem_release(&odp->umem);
- if (imr->live)
- mlx5_ib_update_xlt(imr, idx, 1, 0,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ATOMIC);
- mlx5_mr_cache_free(mr->dev, mr);
+ /*
+ * If userspace is racing freeing the parent implicit ODP MR then we can
+ * loose the race with parent destruction. In this case
+ * mlx5_ib_free_odp_mr() will free everything in the implicit_children
+ * xarray so NOP is fine. This child MR cannot be destroyed here because
+ * we are under its umem_mutex.
+ */
+ if (!refcount_inc_not_zero(&imr->mmkey.usecount))
+ return;
- if (atomic_dec_and_test(&imr->num_leaf_free))
- wake_up(&imr->q_leaf_free);
+ xa_lock(&imr->implicit_children);
+ if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) !=
+ mr) {
+ xa_unlock(&imr->implicit_children);
+ mlx5r_deref_odp_mkey(&imr->mmkey);
+ return;
+ }
+
+ if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
+ xa_unlock(&imr->implicit_children);
+
+ /* Freeing a MR is a sleeping operation, so bounce to a work queue */
+ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
+ queue_work(system_dfl_wq, &mr->odp_destroy.work);
}
-void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
- unsigned long end)
+static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
+ struct ib_umem_odp *umem_odp =
+ container_of(mni, struct ib_umem_odp, notifier);
struct mlx5_ib_mr *mr;
- const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
- sizeof(struct mlx5_mtt)) - 1;
+ const u64 umr_block_mask = MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT - 1;
u64 idx = 0, blk_start_idx = 0;
- struct ib_umem *umem;
+ u64 invalidations = 0;
+ unsigned long start;
+ unsigned long end;
int in_block = 0;
u64 addr;
- if (!umem_odp) {
- pr_err("invalidation called on NULL umem or non-ODP umem\n");
- return;
- }
- umem = &umem_odp->umem;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+ mutex_lock(&umem_odp->umem_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+ /*
+ * If npages is zero then umem_odp->private may not be setup yet. This
+ * does not complete until after the first page is mapped for DMA.
+ */
+ if (!umem_odp->npages)
+ goto out;
mr = umem_odp->private;
+ if (!mr)
+ goto out;
- if (!mr || !mr->ibmr.pd)
- return;
-
- start = max_t(u64, ib_umem_start(umem), start);
- end = min_t(u64, ib_umem_end(umem), end);
+ start = max_t(u64, ib_umem_start(umem_odp), range->start);
+ end = min_t(u64, ib_umem_end(umem_odp), range->end);
/*
* Iteration one - zap the HW's MTTs. The notifiers_count ensures that
@@ -248,17 +301,15 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
* overwrite the same MTTs. Concurent invalidations might race us,
* but they will write 0s as well, so no difference in the end result.
*/
-
- for (addr = start; addr < end; addr += BIT(umem->page_shift)) {
- idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
+ for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
* estimate the cost of another UMR vs. the cost of bigger
* UMR.
*/
- if (umem_odp->dma_list[idx] &
- (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+ if (umem_odp->map.pfn_list[idx] & HMM_PFN_VALID) {
if (!in_block) {
blk_start_idx = idx;
in_block = 1;
@@ -267,19 +318,27 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
- mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx, 0,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5r_umr_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
}
}
}
- if (in_block)
- mlx5_ib_update_xlt(mr, blk_start_idx,
- idx - blk_start_idx + 1, 0,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ATOMIC);
+ if (in_block) {
+ mlx5r_umr_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx + 1, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
+ }
+
+ mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
+
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
@@ -288,21 +347,24 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
- if (unlikely(!umem_odp->npages && mr->parent &&
- !umem_odp->dying)) {
- WRITE_ONCE(umem_odp->dying, 1);
- atomic_inc(&mr->parent->num_leaf_free);
- schedule_work(&umem_odp->work);
- }
+ if (unlikely(!umem_odp->npages && mr->parent))
+ destroy_unused_implicit_child_mr(mr);
+out:
+ mutex_unlock(&umem_odp->umem_mutex);
+ return true;
}
-void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
+const struct mmu_interval_notifier_ops mlx5_mn_ops = {
+ .invalidate = mlx5_ib_invalidate_range,
+};
+
+static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
struct ib_odp_caps *caps = &dev->odp_caps;
memset(caps, 0, sizeof(*caps));
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
@@ -312,54 +374,56 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
else
dev->odp_max_size = BIT_ULL(MLX5_MAX_UMR_SHIFT + PAGE_SHIFT);
- if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, ud_odp_caps.srq_receive))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.send))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.receive))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.write))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.read))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.atomic))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.srq_receive))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.send))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.receive))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.write))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.read))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.atomic))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.srq_receive))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
- MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled) &&
+ mlx5_imr_ksm_entries != 0 &&
+ !(mlx5_imr_ksm_page_shift >
+ get_max_log_entity_size_cap(dev, MLX5_MKC_ACCESS_MODE_KSM)))
caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
-
- return;
}
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
@@ -368,339 +432,520 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
{
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
pfault->wqe.wq_num : pfault->token;
- u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
- u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { };
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {};
+ void *info;
int err;
MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
- MLX5_SET(page_fault_resume_in, in, token, pfault->token);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
- err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ if (pfault->event_subtype == MLX5_PFAULT_SUBTYPE_MEMORY) {
+ info = MLX5_ADDR_OF(page_fault_resume_in, in,
+ page_fault_info.mem_page_fault_info);
+ MLX5_SET(mem_page_fault_info, info, fault_token_31_0,
+ pfault->token & 0xffffffff);
+ MLX5_SET(mem_page_fault_info, info, fault_token_47_32,
+ (pfault->token >> 32) & 0xffff);
+ MLX5_SET(mem_page_fault_info, info, error, !!error);
+ } else {
+ info = MLX5_ADDR_OF(page_fault_resume_in, in,
+ page_fault_info.trans_page_fault_info);
+ MLX5_SET(trans_page_fault_info, info, page_fault_type,
+ pfault->type);
+ MLX5_SET(trans_page_fault_info, info, fault_token,
+ pfault->token);
+ MLX5_SET(trans_page_fault_info, info, wq_number, wq_num);
+ MLX5_SET(trans_page_fault_info, info, error, !!error);
+ }
+
+ err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in);
if (err)
mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
wq_num, err);
}
-static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
- struct ib_umem *umem,
- bool ksm, int access_flags)
+static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
+ unsigned long idx)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_dev *dev = mr_to_mdev(imr);
+ struct ib_umem_odp *odp;
struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *ret;
int err;
- mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
- MLX5_IMR_MTT_CACHE_ENTRY);
-
- if (IS_ERR(mr))
+ odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
+ idx * mlx5_imr_mtt_size,
+ mlx5_imr_mtt_size, &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ mlx5_imr_mtt_entries);
+ if (IS_ERR(mr)) {
+ ib_umem_odp_release(odp);
return mr;
-
- mr->ibmr.pd = pd;
-
- mr->dev = dev;
- mr->access_flags = access_flags;
- mr->mmkey.iova = 0;
- mr->umem = umem;
-
- if (ksm) {
- err = mlx5_ib_update_xlt(mr, 0,
- mlx5_imr_ksm_entries,
- MLX5_KSM_PAGE_SHIFT,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ENABLE);
-
- } else {
- err = mlx5_ib_update_xlt(mr, 0,
- MLX5_IMR_MTT_ENTRIES,
- PAGE_SHIFT,
- MLX5_IB_UPD_XLT_ZAP |
- MLX5_IB_UPD_XLT_ENABLE |
- MLX5_IB_UPD_XLT_ATOMIC);
}
- if (err)
- goto fail;
-
+ mr->access_flags = imr->access_flags;
+ mr->ibmr.pd = imr->ibmr.pd;
+ mr->ibmr.device = &mr_to_mdev(imr)->ib_dev;
+ mr->umem = &odp->umem;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.iova = idx * mlx5_imr_mtt_size;
+ mr->parent = imr;
+ odp->private = mr;
- mr->live = 1;
+ /*
+ * First refcount is owned by the xarray and second refconut
+ * is returned to the caller.
+ */
+ refcount_set(&mr->mmkey.usecount, 2);
- mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
- mr->mmkey.key, dev->mdev, mr);
+ err = mlx5r_umr_update_xlt(mr, 0,
+ mlx5_imr_mtt_entries,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto out_mr;
+ }
- return mr;
+ xa_lock(&imr->implicit_children);
+ ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
+ GFP_KERNEL);
+ if (unlikely(ret)) {
+ if (xa_is_err(ret)) {
+ ret = ERR_PTR(xa_err(ret));
+ goto out_lock;
+ }
+ /*
+ * Another thread beat us to creating the child mr, use
+ * theirs.
+ */
+ refcount_inc(&ret->mmkey.usecount);
+ goto out_lock;
+ }
-fail:
- mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
- mlx5_mr_cache_free(dev, mr);
+ if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
+ if (xa_is_err(ret)) {
+ ret = ERR_PTR(xa_err(ret));
+ __xa_erase(&imr->implicit_children, idx);
+ goto out_lock;
+ }
+ mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD;
+ }
+ xa_unlock(&imr->implicit_children);
+ mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
+ return mr;
- return ERR_PTR(err);
+out_lock:
+ xa_unlock(&imr->implicit_children);
+out_mr:
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ return ret;
}
-static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
- u64 io_virt, size_t bcnt)
+/*
+ * When using memory scheme ODP, implicit MRs can't use the reserved null mkey
+ * and each implicit MR needs to assign a private null mkey to get the page
+ * faults on.
+ * The null mkey is created with the properties to enable getting the page
+ * fault for every time it is accessed and having all relevant access flags.
+ */
+static int alloc_implicit_mr_null_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *imr,
+ struct mlx5_ib_pd *pd)
{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
- struct ib_umem_odp *odp, *result = NULL;
- struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
- u64 addr = io_virt & MLX5_IMR_MTT_MASK;
- int nentries = 0, start_idx = 0, ret;
- struct mlx5_ib_mr *mtt;
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 64;
+ void *mkc;
+ u32 *in;
+ int err;
- mutex_lock(&odp_mr->umem_mutex);
- odp = odp_lookup(addr, 1, mr);
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
- mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
- io_virt, bcnt, addr, odp);
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 4);
+ MLX5_SET(create_mkey_in, in, pg_access, 1);
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+
+ MLX5_SET(mkc, mkc, translations_octword_size, 4);
+ MLX5_SET(mkc, mkc, log_page_size, 61);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, pd, pd->pdn);
+ MLX5_SET64(mkc, mkc, start_addr, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(dev->mdev, &imr->null_mmkey.key, in, inlen);
+ if (err)
+ goto free_in;
-next_mr:
- if (likely(odp)) {
- if (nentries)
- nentries++;
- } else {
- odp = ib_alloc_odp_umem(odp_mr, addr,
- MLX5_IMR_MTT_SIZE);
- if (IS_ERR(odp)) {
- mutex_unlock(&odp_mr->umem_mutex);
- return ERR_CAST(odp);
- }
+ imr->null_mmkey.type = MLX5_MKEY_NULL;
- mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
- mr->access_flags);
- if (IS_ERR(mtt)) {
- mutex_unlock(&odp_mr->umem_mutex);
- ib_umem_release(&odp->umem);
- return ERR_CAST(mtt);
- }
+free_in:
+ kfree(in);
+ return err;
+}
- odp->private = mtt;
- mtt->umem = &odp->umem;
- mtt->mmkey.iova = addr;
- mtt->parent = mr;
- INIT_WORK(&odp->work, mr_leaf_free_action);
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
+ struct ib_umem_odp *umem_odp;
+ struct mlx5_ib_mr *imr;
+ int err;
- if (!nentries)
- start_idx = addr >> MLX5_IMR_MTT_SHIFT;
- nentries++;
- }
+ if (!mlx5r_umr_can_load_pas(dev, mlx5_imr_mtt_entries * PAGE_SIZE))
+ return ERR_PTR(-EOPNOTSUPP);
- /* Return first odp if region not covered by single one */
- if (likely(!result))
- result = odp;
+ umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
+ if (IS_ERR(umem_odp))
+ return ERR_CAST(umem_odp);
- addr += MLX5_IMR_MTT_SIZE;
- if (unlikely(addr < io_virt + bcnt)) {
- odp = odp_next(odp);
- if (odp && odp->umem.address != addr)
- odp = NULL;
- goto next_mr;
+ imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+ mlx5_imr_ksm_entries);
+ if (IS_ERR(imr)) {
+ ib_umem_odp_release(umem_odp);
+ return imr;
}
- if (unlikely(nentries)) {
- ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
- MLX5_IB_UPD_XLT_INDIRECT |
- MLX5_IB_UPD_XLT_ATOMIC);
- if (ret) {
- mlx5_ib_err(dev, "Failed to update PAS\n");
- result = ERR_PTR(ret);
- }
+ imr->access_flags = access_flags;
+ imr->ibmr.pd = &pd->ibpd;
+ imr->ibmr.iova = 0;
+ imr->umem = &umem_odp->umem;
+ imr->ibmr.lkey = imr->mmkey.key;
+ imr->ibmr.rkey = imr->mmkey.key;
+ imr->ibmr.device = &dev->ib_dev;
+ imr->is_odp_implicit = true;
+ xa_init(&imr->implicit_children);
+
+ if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
+ err = alloc_implicit_mr_null_mkey(dev, imr, pd);
+ if (err)
+ goto out_mr;
+
+ err = mlx5r_store_odp_mkey(dev, &imr->null_mmkey);
+ if (err)
+ goto out_mr;
}
- mutex_unlock(&odp_mr->umem_mutex);
- return result;
+ err = mlx5r_umr_update_xlt(imr, 0,
+ mlx5_imr_ksm_entries,
+ mlx5_imr_ksm_page_shift,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+ if (err)
+ goto out_mr;
+
+ err = mlx5r_store_odp_mkey(dev, &imr->mmkey);
+ if (err)
+ goto out_mr;
+
+ mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr);
+ return imr;
+out_mr:
+ mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+ mlx5_ib_dereg_mr(&imr->ibmr, NULL);
+ return ERR_PTR(err);
}
-struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
- struct ib_udata *udata,
- int access_flags)
+void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_mr *imr;
- struct ib_umem *umem;
-
- umem = ib_umem_get(udata, 0, 0, access_flags, 0);
- if (IS_ERR(umem))
- return ERR_CAST(umem);
+ struct mlx5_ib_mr *mtt;
+ unsigned long idx;
- imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
- if (IS_ERR(imr)) {
- ib_umem_release(umem);
- return ERR_CAST(imr);
+ /*
+ * If this is an implicit MR it is already invalidated so we can just
+ * delete the children mkeys.
+ */
+ xa_for_each(&mr->implicit_children, idx, mtt) {
+ xa_erase(&mr->implicit_children, idx);
+ mlx5_ib_dereg_mr(&mtt->ibmr, NULL);
}
- imr->umem = umem;
- init_waitqueue_head(&imr->q_leaf_free);
- atomic_set(&imr->num_leaf_free, 0);
- atomic_set(&imr->num_pending_prefetch, 0);
+ if (mr->null_mmkey.key) {
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->null_mmkey.key));
- return imr;
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev,
+ mr->null_mmkey.key);
+ }
}
-static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
- void *cookie)
+#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
+#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
+#define MLX5_PF_FLAGS_ENABLE BIT(3)
+static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
+ u64 user_va, size_t bcnt, u32 *bytes_mapped,
+ u32 flags)
{
- struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
- struct ib_umem *umem = &umem_odp->umem;
+ int page_shift, ret, np;
+ bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
+ u64 access_mask = 0;
+ u64 start_idx;
+ bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
+ u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
- if (mr->parent != imr)
- return 0;
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
- ib_umem_end(umem));
+ if (flags & MLX5_PF_FLAGS_DOWNGRADE)
+ xlt_flags |= MLX5_IB_UPD_XLT_DOWNGRADE;
- if (umem_odp->dying)
- return 0;
+ page_shift = odp->page_shift;
+ start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
- WRITE_ONCE(umem_odp->dying, 1);
- atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem_odp->work);
+ if (odp->umem.writable && !downgrade)
+ access_mask |= HMM_PFN_WRITE;
- return 0;
-}
+ np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
+ if (np < 0)
+ return np;
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
-{
- struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+ /*
+ * No need to check whether the MTTs really belong to this MR, since
+ * ib_umem_odp_map_dma_and_lock already checks this.
+ */
+ ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
+ mutex_unlock(&odp->umem_mutex);
- down_read(&per_mm->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
- mr_leaf_free, true, imr);
- up_read(&per_mm->umem_rwsem);
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(mr_to_mdev(mr),
+ "Failed to update mkey page tables\n");
+ goto out;
+ }
- wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (user_va - round_down(user_va, 1 << page_shift));
+
+ *bytes_mapped += min_t(u32, new_mappings, bcnt);
+ }
+
+ return np << (page_shift - PAGE_SHIFT);
+
+out:
+ return ret;
}
-#define MLX5_PF_FLAGS_PREFETCH BIT(0)
-#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
-static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- u64 io_virt, size_t bcnt, u32 *bytes_mapped,
- u32 flags)
+static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
+ struct ib_umem_odp *odp_imr, u64 user_va,
+ size_t bcnt, u32 *bytes_mapped, u32 flags)
{
- int npages = 0, current_seq, page_shift, ret, np;
- bool implicit = false;
- struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
- bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
- bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
- u64 access_mask;
- u64 start_idx, page_mask;
- struct ib_umem_odp *odp;
- size_t size;
+ unsigned long end_idx = (user_va + bcnt - 1) >> mlx5_imr_mtt_shift;
+ unsigned long upd_start_idx = end_idx + 1;
+ unsigned long upd_len = 0;
+ unsigned long npages = 0;
+ int err;
+ int ret;
- if (!odp_mr->page_list) {
- odp = implicit_mr_get_data(mr, io_virt, bcnt);
+ if (unlikely(user_va >= mlx5_imr_ksm_entries * mlx5_imr_mtt_size ||
+ mlx5_imr_ksm_entries * mlx5_imr_mtt_size - user_va < bcnt))
+ return -EFAULT;
- if (IS_ERR(odp))
- return PTR_ERR(odp);
- mr = odp->private;
- implicit = true;
- } else {
- odp = odp_mr;
- }
+ /* Fault each child mr that intersects with our interval. */
+ while (bcnt) {
+ unsigned long idx = user_va >> mlx5_imr_mtt_shift;
+ struct ib_umem_odp *umem_odp;
+ struct mlx5_ib_mr *mtt;
+ u64 len;
+
+ xa_lock(&imr->implicit_children);
+ mtt = xa_load(&imr->implicit_children, idx);
+ if (unlikely(!mtt)) {
+ xa_unlock(&imr->implicit_children);
+ mtt = implicit_get_child_mr(imr, idx);
+ if (IS_ERR(mtt)) {
+ ret = PTR_ERR(mtt);
+ goto out;
+ }
+ upd_start_idx = min(upd_start_idx, idx);
+ upd_len = idx - upd_start_idx + 1;
+ } else {
+ refcount_inc(&mtt->mmkey.usecount);
+ xa_unlock(&imr->implicit_children);
+ }
-next_mr:
- size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt);
+ umem_odp = to_ib_umem_odp(mtt->umem);
+ len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) -
+ user_va;
- page_shift = mr->umem->page_shift;
- page_mask = ~(BIT(page_shift) - 1);
- start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
- access_mask = ODP_READ_ALLOWED_BIT;
+ ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
+ bytes_mapped, flags);
- if (prefetch && !downgrade && !mr->umem->writable) {
- /* prefetch with write-access must
- * be supported by the MR
- */
- ret = -EINVAL;
- goto out;
+ mlx5r_deref_odp_mkey(&mtt->mmkey);
+
+ if (ret < 0)
+ goto out;
+ user_va += len;
+ bcnt -= len;
+ npages += ret;
}
- if (mr->umem->writable && !downgrade)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
+ ret = npages;
- current_seq = READ_ONCE(odp->notifiers_seq);
/*
- * Ensure the sequence number is valid for some time before we call
- * gup.
+ * Any time the implicit_children are changed we must perform an
+ * update of the xlt before exiting to ensure the HW and the
+ * implicit_children remains synchronized.
*/
- smp_rmb();
+out:
+ if (likely(!upd_len))
+ return ret;
- ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
- access_mask, current_seq);
+ /*
+ * Notice this is not strictly ordered right, the KSM is updated after
+ * the implicit_children is updated, so a parallel page fault could
+ * see a MR that is not yet visible in the KSM. This is similar to a
+ * parallel page fault seeing a MR that is being concurrently removed
+ * from the KSM. Both of these improbable situations are resolved
+ * safely by resuming the HW and then taking another page fault. The
+ * next pagefault handler will see the new information.
+ */
+ mutex_lock(&odp_imr->umem_mutex);
+ err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ if (err) {
+ mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
+ return err;
+ }
+ return ret;
+}
- if (ret < 0)
- goto out;
+static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
+ u32 *bytes_mapped, u32 flags)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ int access_mode = mr->data_direct ? MLX5_MKC_ACCESS_MODE_KSM :
+ MLX5_MKC_ACCESS_MODE_MTT;
+ unsigned int old_page_shift = mr->page_shift;
+ unsigned int page_shift;
+ unsigned long page_size;
+ u32 xlt_flags = 0;
+ int err;
- np = ret;
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
- mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
- current_seq)) {
- /*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
- */
- ret = mlx5_ib_update_xlt(mr, start_idx, np,
- page_shift, MLX5_IB_UPD_XLT_ATOMIC);
- } else {
- ret = -EAGAIN;
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err) {
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ return err;
}
- mutex_unlock(&odp->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- mlx5_ib_err(dev, "Failed to update mkey page tables\n");
- goto out;
+ page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf, access_mode);
+ if (!page_size) {
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ err = -EINVAL;
+ } else {
+ page_shift = order_base_2(page_size);
+ if (page_shift != mr->page_shift && mr->dmabuf_faulted) {
+ err = mlx5r_umr_dmabuf_update_pgsz(mr, xlt_flags,
+ page_shift);
+ } else {
+ mr->page_shift = page_shift;
+ if (mr->data_direct)
+ err = mlx5r_umr_update_data_direct_ksm_pas(
+ mr, xlt_flags);
+ else
+ err = mlx5r_umr_update_mr_pas(mr,
+ xlt_flags);
+ }
}
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
- if (bytes_mapped) {
- u32 new_mappings = (np << page_shift) -
- (io_virt - round_down(io_virt, 1 << page_shift));
- *bytes_mapped += min_t(u32, new_mappings, size);
+ if (err) {
+ mr->page_shift = old_page_shift;
+ return err;
}
- npages += np << (page_shift - PAGE_SHIFT);
- bcnt -= size;
+ mr->dmabuf_faulted = 1;
- if (unlikely(bcnt)) {
- struct ib_umem_odp *next;
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
- io_virt += size;
- next = odp_next(odp);
- if (unlikely(!next || next->umem.address != io_virt)) {
- mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
- io_virt, next);
- return -EAGAIN;
- }
- odp = next;
- mr = odp->private;
- goto next_mr;
- }
+ return ib_umem_num_pages(mr->umem);
+}
- return npages;
+/*
+ * Returns:
+ * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
+ * not accessible, or the MR is no longer valid.
+ * -EAGAIN/-ENOMEM: The operation should be retried
+ *
+ * -EINVAL/others: General internal malfunction
+ * >0: Number of pages mapped
+ */
+static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
+ u32 *bytes_mapped, u32 flags, bool permissive_fault)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
-out:
- if (ret == -EAGAIN) {
- if (implicit || !odp->dying) {
- unsigned long timeout =
- msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
-
- if (!wait_for_completion_timeout(
- &odp->notifier_completion,
- timeout)) {
- mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
- current_seq, odp->notifiers_seq, odp->notifiers_count);
- }
- } else {
- /* The MR is being killed, kill the QP as well. */
- ret = -EFAULT;
- }
+ if (unlikely(io_virt < mr->ibmr.iova) && !permissive_fault)
+ return -EFAULT;
+
+ if (mr->umem->is_dmabuf)
+ return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
+
+ if (!odp->is_implicit_odp) {
+ u64 offset = io_virt < mr->ibmr.iova ? 0 : io_virt - mr->ibmr.iova;
+ u64 user_va;
+
+ if (check_add_overflow(offset, (u64)odp->umem.address,
+ &user_va))
+ return -EFAULT;
+
+ if (permissive_fault) {
+ if (user_va < ib_umem_start(odp))
+ user_va = ib_umem_start(odp);
+ if ((user_va + bcnt) > ib_umem_end(odp))
+ bcnt = ib_umem_end(odp) - user_va;
+ } else if (unlikely(user_va >= ib_umem_end(odp) ||
+ ib_umem_end(odp) - user_va < bcnt))
+ return -EFAULT;
+ return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped,
+ flags);
}
+ return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped,
+ flags);
+}
- return ret;
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
+{
+ int ret;
+
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
+ mr->umem->length, NULL,
+ MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
+ return ret >= 0 ? 0 : ret;
+}
+
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ int ret;
+
+ ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL,
+ MLX5_PF_FLAGS_ENABLE);
+
+ return ret >= 0 ? 0 : ret;
}
struct pf_frame {
@@ -711,35 +956,41 @@ struct pf_frame {
int depth;
};
-static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key)
+static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key)
{
if (!mmkey)
return false;
- if (mmkey->type == MLX5_MKEY_MW)
+ if (mmkey->type == MLX5_MKEY_MW ||
+ mmkey->type == MLX5_MKEY_INDIRECT_DEVX)
return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key);
return mmkey->key == key;
}
-static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey)
+static struct mlx5_ib_mkey *find_odp_mkey(struct mlx5_ib_dev *dev, u32 key)
{
- struct mlx5_ib_mw *mw;
- struct mlx5_ib_devx_mr *devx_mr;
+ struct mlx5_ib_mkey *mmkey;
- if (mmkey->type == MLX5_MKEY_MW) {
- mw = container_of(mmkey, struct mlx5_ib_mw, mmkey);
- return mw->ndescs;
+ xa_lock(&dev->odp_mkeys);
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
+ if (!mmkey) {
+ mmkey = ERR_PTR(-ENOENT);
+ goto out;
}
+ if (!mkey_is_eq(mmkey, key)) {
+ mmkey = ERR_PTR(-EFAULT);
+ goto out;
+ }
+ refcount_inc(&mmkey->usecount);
+out:
+ xa_unlock(&dev->odp_mkeys);
- devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr,
- mmkey);
- return devx_mr->ndescs;
+ return mmkey;
}
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
- * Returns number of OS pages retrieved on success. The caller may continue to
- * the next data segment.
+ * Returns zero on success. The caller may continue to the next data segment.
* Can return the following error codes:
* -EAGAIN to designate a temporary error. The caller will abort handling the
* page fault and resolve it.
@@ -750,93 +1001,77 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
struct ib_pd *pd, u32 key,
u64 io_virt, size_t bcnt,
u32 *bytes_committed,
- u32 *bytes_mapped, u32 flags)
+ u32 *bytes_mapped)
{
- int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
- bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
+ int ret, i, outlen, cur_outlen = 0, depth = 0, pages_in_range;
struct pf_frame *head = NULL, *frame;
- struct mlx5_core_mkey *mmkey;
+ struct mlx5_ib_mkey *mmkey;
struct mlx5_ib_mr *mr;
struct mlx5_klm *pklm;
u32 *out = NULL;
size_t offset;
- int ndescs;
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
-
next_mr:
- mmkey = xa_load(&dev->mdev->priv.mkey_table, mlx5_base_mkey(key));
- if (!mkey_is_eq(mmkey, key)) {
- mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
- ret = -EFAULT;
- goto srcu_unlock;
- }
-
- if (prefetch && mmkey->type != MLX5_MKEY_MR) {
- mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n");
- ret = -EINVAL;
- goto srcu_unlock;
+ mmkey = find_odp_mkey(dev, key);
+ if (IS_ERR(mmkey)) {
+ ret = PTR_ERR(mmkey);
+ if (ret == -ENOENT) {
+ mlx5_ib_dbg(
+ dev,
+ "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+ /*
+ * The user could specify a SGL with multiple lkeys and
+ * only some of them are ODP. Treat the non-ODP ones as
+ * fully faulted.
+ */
+ ret = 0;
+ }
+ goto end;
}
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!mr->live || !mr->ibmr.pd) {
- mlx5_ib_dbg(dev, "got dead MR\n");
- ret = -EFAULT;
- goto srcu_unlock;
- }
- if (prefetch) {
- if (!is_odp_mr(mr) ||
- mr->ibmr.pd != pd) {
- mlx5_ib_dbg(dev, "Invalid prefetch request: %s\n",
- is_odp_mr(mr) ? "MR is not ODP" :
- "PD is not of the MR");
- ret = -EINVAL;
- goto srcu_unlock;
- }
- }
+ pages_in_range = (ALIGN(io_virt + bcnt, PAGE_SIZE) -
+ (io_virt & PAGE_MASK)) >>
+ PAGE_SHIFT;
+ ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false);
+ if (ret < 0)
+ goto end;
- if (!is_odp_mr(mr)) {
- mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped += bcnt;
- ret = 0;
- goto srcu_unlock;
- }
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
- ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags);
- if (ret < 0)
- goto srcu_unlock;
+ if (ret < pages_in_range) {
+ ret = -EFAULT;
+ goto end;
+ }
- npages += ret;
ret = 0;
break;
case MLX5_MKEY_MW:
case MLX5_MKEY_INDIRECT_DEVX:
- ndescs = get_indirect_num_descs(mmkey);
-
if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
mlx5_ib_dbg(dev, "indirection level exceeded\n");
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
- sizeof(*pklm) * (ndescs - 2);
+ sizeof(*pklm) * (mmkey->ndescs - 2);
if (outlen > cur_outlen) {
kfree(out);
out = kzalloc(outlen, GFP_KERNEL);
if (!out) {
ret = -ENOMEM;
- goto srcu_unlock;
+ goto end;
}
cur_outlen = outlen;
}
@@ -844,14 +1079,14 @@ next_mr:
pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
bsf0_klm0_pas_mtt0_1);
- ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
+ ret = mlx5_core_query_mkey(dev->mdev, mmkey->key, out, outlen);
if (ret)
- goto srcu_unlock;
+ goto end;
offset = io_virt - MLX5_GET64(query_mkey_out, out,
memory_key_mkey_entry.start_addr);
- for (i = 0; bcnt && i < ndescs; i++, pklm++) {
+ for (i = 0; bcnt && i < mmkey->ndescs; i++, pklm++) {
if (offset >= be32_to_cpu(pklm->bcount)) {
offset -= be32_to_cpu(pklm->bcount);
continue;
@@ -860,7 +1095,7 @@ next_mr:
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
if (!frame) {
ret = -ENOMEM;
- goto srcu_unlock;
+ goto end;
}
frame->key = be32_to_cpu(pklm->key);
@@ -879,7 +1114,7 @@ next_mr:
default:
mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
ret = -EFAULT;
- goto srcu_unlock;
+ goto end;
}
if (head) {
@@ -892,10 +1127,13 @@ next_mr:
depth = frame->depth;
kfree(frame);
+ mlx5r_deref_odp_mkey(mmkey);
goto next_mr;
}
-srcu_unlock:
+end:
+ if (!IS_ERR(mmkey))
+ mlx5r_deref_odp_mkey(mmkey);
while (head) {
frame = head;
head = frame->next;
@@ -903,27 +1141,27 @@ srcu_unlock:
}
kfree(out);
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
*bytes_committed = 0;
- return ret ? ret : npages;
+ return ret;
}
-/**
+/*
* Parse a series of data segments for page fault handling.
*
- * @pfault contains page fault information.
- * @wqe points at the first data segment in the WQE.
- * @wqe_end points after the end of the WQE.
- * @bytes_mapped receives the number of bytes that the function was able to
- * map. This allows the caller to decide intelligently whether
- * enough memory was mapped to resolve the page fault
- * successfully (e.g. enough for the next MTU, or the entire
- * WQE).
- * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
- * the committed bytes).
+ * @dev: Pointer to mlx5 IB device
+ * @pfault: contains page fault information.
+ * @wqe: points at the first data segment in the WQE.
+ * @wqe_end: points after the end of the WQE.
+ * @bytes_mapped: receives the number of bytes that the function was able to
+ * map. This allows the caller to decide intelligently whether
+ * enough memory was mapped to resolve the page fault
+ * successfully (e.g. enough for the next MTU, or the entire
+ * WQE).
+ * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus
+ * the committed bytes).
+ * @receive_queue: receive WQE end of sg list
*
- * Returns the number of pages loaded if positive, zero for an empty WQE, or a
- * negative error code.
+ * Returns zero for success or a negative error code.
*/
static int pagefault_data_segments(struct mlx5_ib_dev *dev,
struct mlx5_pagefault *pfault,
@@ -931,9 +1169,9 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
void *wqe_end, u32 *bytes_mapped,
u32 *total_wqe_bytes, bool receive_queue)
{
- int ret = 0, npages = 0;
+ int ret = 0;
u64 io_virt;
- u32 key;
+ __be32 key;
u32 byte_count;
size_t bcnt;
int inline_segment;
@@ -947,7 +1185,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
struct mlx5_wqe_data_seg *dseg = wqe;
io_virt = be64_to_cpu(dseg->addr);
- key = be32_to_cpu(dseg->lkey);
+ key = dseg->lkey;
byte_count = be32_to_cpu(dseg->byte_count);
inline_segment = !!(byte_count & MLX5_INLINE_SEG);
bcnt = byte_count & ~MLX5_INLINE_SEG;
@@ -961,7 +1199,8 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
}
/* receive WQE end of sg list. */
- if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+ if (receive_queue && bcnt == 0 &&
+ key == dev->mkeys.terminate_scatter_list_mkey &&
io_virt == 0)
break;
@@ -981,29 +1220,17 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
continue;
}
- ret = pagefault_single_data_segment(dev, NULL, key,
+ ret = pagefault_single_data_segment(dev, NULL, be32_to_cpu(key),
io_virt, bcnt,
&pfault->bytes_committed,
- bytes_mapped, 0);
+ bytes_mapped);
if (ret < 0)
break;
- npages += ret;
}
- return ret < 0 ? ret : npages;
+ return ret;
}
-static const u32 mlx5_ib_odp_opcode_cap[] = {
- [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE,
- [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE,
- [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ,
- [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC,
- [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC,
-};
-
/*
* Parse initiator WQE. Advances the wqe pointer to point at the
* scatter-gather list, and set wqe_end to the end of the WQE.
@@ -1014,12 +1241,8 @@ static int mlx5_ib_mr_initiator_pfault_handler(
{
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
u16 wqe_index = pfault->wqe.wqe_index;
- u32 transport_caps;
struct mlx5_base_av *av;
unsigned ds, opcode;
-#if defined(DEBUG)
- u32 ctrl_wqe_index, ctrl_qpn;
-#endif
u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
@@ -1035,58 +1258,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
return -EFAULT;
}
-#if defined(DEBUG)
- ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
- MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
- MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
- if (wqe_index != ctrl_wqe_index) {
- mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qpn,
- ctrl_wqe_index);
- return -EFAULT;
- }
-
- ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
- MLX5_WQE_CTRL_QPN_SHIFT;
- if (qpn != ctrl_qpn) {
- mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qpn,
- ctrl_qpn);
- return -EFAULT;
- }
-#endif /* DEBUG */
-
*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
*wqe += sizeof(*ctrl);
opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
MLX5_WQE_CTRL_OPCODE_MASK;
- switch (qp->ibqp.qp_type) {
- case IB_QPT_XRC_INI:
+ if (qp->type == IB_QPT_XRC_INI)
*wqe += sizeof(struct mlx5_wqe_xrc_seg);
- transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps;
- break;
- case IB_QPT_RC:
- transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
- break;
- case IB_QPT_UD:
- transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
- break;
- default:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
- }
-
- if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) ||
- !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) {
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n",
- opcode);
- return -EFAULT;
- }
- if (qp->ibqp.qp_type == IB_QPT_UD) {
+ if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) {
av = *wqe;
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
@@ -1139,7 +1320,7 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_wq *wq = &qp->rq;
int wqe_size = 1 << wq->wqe_shift;
- if (qp->wq_sig) {
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
return -EFAULT;
}
@@ -1149,19 +1330,6 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
return -EFAULT;
}
- switch (qp->ibqp.qp_type) {
- case IB_QPT_RC:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_RECV))
- goto invalid_transport_or_opcode;
- break;
- default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
- }
-
*wqe_end = wqe + wqe_size;
return 0;
@@ -1182,7 +1350,7 @@ static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
case MLX5_WQE_PF_TYPE_RESP:
case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
- common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP);
+ common = mlx5_core_res_hold(dev, wq_num, MLX5_RES_QP);
break;
default:
break;
@@ -1211,7 +1379,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
{
bool sq = pfault->type & MLX5_PFAULT_REQUESTOR;
u16 wqe_index = pfault->wqe.wqe_index;
- void *wqe = NULL, *wqe_end = NULL;
+ void *wqe, *wqe_start = NULL, *wqe_end = NULL;
u32 bytes_mapped, total_wqe_bytes;
struct mlx5_core_rsc_common *res;
int resume_with_error = 1;
@@ -1232,23 +1400,24 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- wqe = (void *)__get_free_page(GFP_KERNEL);
- if (!wqe) {
+ wqe_start = (void *)__get_free_page(GFP_KERNEL);
+ if (!wqe_start) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
+ wqe = wqe_start;
qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL;
if (qp && sq) {
- ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
- &bytes_copied);
+ ret = mlx5_ib_read_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
if (ret)
goto read_user;
ret = mlx5_ib_mr_initiator_pfault_handler(
dev, pfault, qp, &wqe, &wqe_end, bytes_copied);
} else if (qp && !sq) {
- ret = mlx5_ib_read_user_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE,
- &bytes_copied);
+ ret = mlx5_ib_read_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
if (ret)
goto read_user;
ret = mlx5_ib_mr_responder_pfault_handler_rq(
@@ -1256,8 +1425,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
} else if (!qp) {
struct mlx5_ib_srq *srq = res_to_srq(res);
- ret = mlx5_ib_read_user_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE,
- &bytes_copied);
+ ret = mlx5_ib_read_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
if (ret)
goto read_user;
ret = mlx5_ib_mr_responder_pfault_handler_srq(
@@ -1283,7 +1452,7 @@ read_user:
if (ret)
mlx5_ib_err(
dev,
- "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %x\n",
+ "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %llx\n",
ret, wqe_index, pfault->token);
resolve_page_fault:
@@ -1292,13 +1461,7 @@ resolve_page_fault:
pfault->wqe.wq_num, resume_with_error,
pfault->type);
mlx5_core_res_put(res);
- free_page((unsigned long)wqe);
-}
-
-static int pages_in_range(u64 address, u32 length)
-{
- return (ALIGN(address + length, PAGE_SIZE) -
- (address & PAGE_MASK)) >> PAGE_SHIFT;
+ free_page((unsigned long)wqe_start);
}
static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
@@ -1335,21 +1498,20 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
ret = pagefault_single_data_segment(dev, NULL, rkey, address, length,
- &pfault->bytes_committed, NULL,
- 0);
+ &pfault->bytes_committed, NULL);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
- } else if (ret < 0 || pages_in_range(address, length) > ret) {
+ } else if (ret < 0) {
mlx5_ib_page_fault_resume(dev, pfault, 1);
if (ret != -ENOENT)
- mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
+ mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n",
ret, pfault->token, pfault->type);
return;
}
mlx5_ib_page_fault_resume(dev, pfault, 0);
- mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n",
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%llx, type: 0x%x, prefetch_activated: %d\n",
pfault->token, pfault->type,
prefetch_activated);
@@ -1363,15 +1525,82 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, NULL, rkey, address,
prefetch_len,
- &bytes_committed, NULL,
- 0);
+ &bytes_committed, NULL);
if (ret < 0 && ret != -EAGAIN) {
- mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
+ mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%llx, address: 0x%.16llx, length = 0x%.16x\n",
ret, pfault->token, address, prefetch_len);
}
}
}
+#define MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST BIT(7)
+static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
+{
+ u64 prefetch_va =
+ pfault->memory.va - pfault->memory.prefetch_before_byte_count;
+ size_t prefetch_size = pfault->memory.prefetch_before_byte_count +
+ pfault->memory.fault_byte_count +
+ pfault->memory.prefetch_after_byte_count;
+ struct mlx5_ib_mkey *mmkey;
+ struct mlx5_ib_mr *mr, *child_mr;
+ int ret = 0;
+
+ mmkey = find_odp_mkey(dev, pfault->memory.mkey);
+ if (IS_ERR(mmkey))
+ goto err;
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_IMPLICIT_CHILD:
+ child_mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ mr = child_mr->parent;
+ break;
+ case MLX5_MKEY_NULL:
+ mr = container_of(mmkey, struct mlx5_ib_mr, null_mmkey);
+ break;
+ default:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ break;
+ }
+
+ /* If prefetch fails, handle only demanded page fault */
+ ret = pagefault_mr(mr, prefetch_va, prefetch_size, NULL, 0, true);
+ if (ret < 0) {
+ ret = pagefault_mr(mr, pfault->memory.va,
+ pfault->memory.fault_byte_count, NULL, 0,
+ true);
+ if (ret < 0)
+ goto err;
+ }
+
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
+ mlx5r_deref_odp_mkey(mmkey);
+
+ if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST)
+ mlx5_ib_page_fault_resume(dev, pfault, 0);
+
+ mlx5_ib_dbg(
+ dev,
+ "PAGE FAULT completed %s. token 0x%llx, mkey: 0x%x, va: 0x%llx, byte_count: 0x%x\n",
+ pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST ?
+ "" :
+ "without resume cmd",
+ pfault->token, pfault->memory.mkey, pfault->memory.va,
+ pfault->memory.fault_byte_count);
+
+ return;
+
+err:
+ if (!IS_ERR(mmkey))
+ mlx5r_deref_odp_mkey(mmkey);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
+ mlx5_ib_dbg(
+ dev,
+ "PAGE FAULT error. token 0x%llx, mkey: 0x%x, va: 0x%llx, byte_count: 0x%x, err: %d\n",
+ pfault->token, pfault->memory.mkey, pfault->memory.va,
+ pfault->memory.fault_byte_count, ret);
+}
+
static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
{
u8 event_subtype = pfault->event_subtype;
@@ -1383,6 +1612,9 @@ static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfaul
case MLX5_PFAULT_SUBTYPE_RDMA:
mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
break;
+ case MLX5_PFAULT_SUBTYPE_MEMORY:
+ mlx5_ib_mr_memory_pfault_handler(dev, pfault);
+ break;
default:
mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
event_subtype);
@@ -1401,6 +1633,7 @@ static void mlx5_ib_eqe_pf_action(struct work_struct *work)
mempool_free(pfault, eq->pool);
}
+#define MEMORY_SCHEME_PAGE_FAULT_GRANULARITY 4096
static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
{
struct mlx5_eqe_page_fault *pf_eqe;
@@ -1417,15 +1650,12 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
pf_eqe = &eqe->data.page_fault;
pfault->event_subtype = eqe->sub_type;
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
- mlx5_ib_dbg(eq->dev,
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
- eqe->sub_type, pfault->bytes_committed);
switch (eqe->sub_type) {
case MLX5_PFAULT_SUBTYPE_RDMA:
/* RDMA based event */
+ pfault->bytes_committed =
+ be32_to_cpu(pf_eqe->rdma.bytes_committed);
pfault->type =
be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
pfault->token =
@@ -1439,10 +1669,12 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
pfault->rdma.rdma_va =
be64_to_cpu(pf_eqe->rdma.rdma_va);
- mlx5_ib_dbg(eq->dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
- pfault->type, pfault->token,
- pfault->rdma.r_key);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x, type:0x%x, token: 0x%06llx, r_key: 0x%08x\n",
+ eqe->sub_type, pfault->bytes_committed,
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
mlx5_ib_dbg(eq->dev,
"PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
pfault->rdma.rdma_op_len,
@@ -1451,6 +1683,8 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
case MLX5_PFAULT_SUBTYPE_WQE:
/* WQE based event */
+ pfault->bytes_committed =
+ be32_to_cpu(pf_eqe->wqe.bytes_committed);
pfault->type =
(be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
pfault->token =
@@ -1462,11 +1696,47 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
be16_to_cpu(pf_eqe->wqe.wqe_index);
pfault->wqe.packet_size =
be16_to_cpu(pf_eqe->wqe.packet_length);
- mlx5_ib_dbg(eq->dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
- pfault->type, pfault->token,
- pfault->wqe.wq_num,
- pfault->wqe.wqe_index);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x, type:0x%x, token: 0x%06llx, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ eqe->sub_type, pfault->bytes_committed,
+ pfault->type, pfault->token, pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_MEMORY:
+ /* Memory based event */
+ pfault->bytes_committed = 0;
+ pfault->token =
+ be32_to_cpu(pf_eqe->memory.token31_0) |
+ ((u64)be16_to_cpu(pf_eqe->memory.token47_32)
+ << 32);
+ pfault->memory.va = be64_to_cpu(pf_eqe->memory.va);
+ pfault->memory.mkey = be32_to_cpu(pf_eqe->memory.mkey);
+ pfault->memory.fault_byte_count = (be32_to_cpu(
+ pf_eqe->memory.demand_fault_pages) >> 12) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.prefetch_before_byte_count =
+ be16_to_cpu(
+ pf_eqe->memory.pre_demand_fault_pages) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.prefetch_after_byte_count =
+ be16_to_cpu(
+ pf_eqe->memory.post_demand_fault_pages) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.flags = pf_eqe->memory.flags;
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, token: 0x%06llx, mkey: 0x%06x, fault_byte_count: 0x%06x, va: 0x%016llx, flags: 0x%02x\n",
+ eqe->sub_type, pfault->token,
+ pfault->memory.mkey,
+ pfault->memory.fault_byte_count,
+ pfault->memory.va, pfault->memory.flags);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: prefetch size: before: 0x%06x, after 0x%06x\n",
+ pfault->memory.prefetch_before_byte_count,
+ pfault->memory.prefetch_after_byte_count);
break;
default:
@@ -1532,20 +1802,24 @@ enum {
MLX5_IB_NUM_PF_DRAIN = 64,
};
-static int
-mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
struct mlx5_eq_param param = {};
- int err;
+ int err = 0;
+ mutex_lock(&dev->odp_eq_mutex);
+ if (eq->core)
+ goto unlock;
INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
spin_lock_init(&eq->lock);
eq->dev = dev;
eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
sizeof(struct mlx5_pagefault));
- if (!eq->pool)
- return -ENOMEM;
+ if (!eq->pool) {
+ err = -ENOMEM;
+ goto unlock;
+ }
eq->wq = alloc_workqueue("mlx5_ib_page_fault",
WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
@@ -1557,10 +1831,9 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
- .irq_index = 0,
- .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
.nent = MLX5_IB_NUM_PF_EQE,
};
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
@@ -1572,21 +1845,27 @@ mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
goto err_eq;
}
+ mutex_unlock(&dev->odp_eq_mutex);
return 0;
err_eq:
mlx5_eq_destroy_generic(dev->mdev, eq->core);
err_wq:
+ eq->core = NULL;
destroy_workqueue(eq->wq);
err_mempool:
mempool_destroy(eq->pool);
+unlock:
+ mutex_unlock(&dev->odp_eq_mutex);
return err;
}
static int
-mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
int err;
+ if (!eq->core)
+ return 0;
mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
cancel_work_sync(&eq->work);
@@ -1596,30 +1875,23 @@ mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
return err;
}
-void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
{
- if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
- return;
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_KSM,
+ .ndescs = mlx5_imr_ksm_entries,
+ .ph = MLX5_IB_NO_PH,
+ };
+ struct mlx5_cache_ent *ent;
- switch (ent->order - 2) {
- case MLX5_IMR_MTT_CACHE_ENTRY:
- ent->page = PAGE_SHIFT;
- ent->xlt = MLX5_IMR_MTT_ENTRIES *
- sizeof(struct mlx5_mtt) /
- MLX5_IB_UMR_OCTOWORD;
- ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- ent->limit = 0;
- break;
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return 0;
- case MLX5_IMR_KSM_CACHE_ENTRY:
- ent->page = MLX5_KSM_PAGE_SHIFT;
- ent->xlt = mlx5_imr_ksm_entries *
- sizeof(struct mlx5_klm) /
- MLX5_IB_UMR_OCTOWORD;
- ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
- ent->limit = 0;
- break;
- }
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ return 0;
}
static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
@@ -1628,197 +1900,212 @@ static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
- int ret = 0;
-
- if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
-
- if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
- ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
- if (ret) {
- mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
- return ret;
- }
- }
+ internal_fill_odp_caps(dev);
- if (!MLX5_CAP_GEN(dev->mdev, pg))
- return ret;
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
+ return 0;
- ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
- return ret;
+ mutex_init(&dev->odp_eq_mutex);
+ return 0;
}
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
{
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
return;
- mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
+ mlx5_ib_odp_destroy_eq(dev, &dev->odp_pf_eq);
}
int mlx5_ib_odp_init(void)
{
+ u32 log_va_pages = ilog2(TASK_SIZE) - PAGE_SHIFT;
+ u8 mlx5_imr_mtt_bits;
+
+ /* 48 is default ARM64 VA space and covers X86 4-level paging which is 47 */
+ if (log_va_pages <= 48 - PAGE_SHIFT)
+ mlx5_imr_mtt_shift = 30;
+ /* 56 is x86-64, 5-level paging */
+ else if (log_va_pages <= 56 - PAGE_SHIFT)
+ mlx5_imr_mtt_shift = 34;
+ else
+ return 0;
+
+ mlx5_imr_mtt_size = BIT_ULL(mlx5_imr_mtt_shift);
+ mlx5_imr_mtt_bits = mlx5_imr_mtt_shift - PAGE_SHIFT;
+ mlx5_imr_mtt_entries = BIT_ULL(mlx5_imr_mtt_bits);
mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
- MLX5_IMR_MTT_BITS);
+ mlx5_imr_mtt_bits);
+ mlx5_imr_ksm_page_shift = mlx5_imr_mtt_shift;
return 0;
}
struct prefetch_mr_work {
struct work_struct work;
- struct ib_pd *pd;
u32 pf_flags;
u32 num_sge;
- struct ib_sge sg_list[0];
+ struct {
+ u64 io_virt;
+ struct mlx5_ib_mr *mr;
+ size_t length;
+ } frags[];
};
-static void num_pending_prefetch_dec(struct mlx5_ib_dev *dev,
- struct ib_sge *sg_list, u32 num_sge,
- u32 from)
+static void destroy_prefetch_work(struct prefetch_mr_work *work)
{
u32 i;
- int srcu_key;
- srcu_key = srcu_read_lock(&dev->mr_srcu);
+ for (i = 0; i < work->num_sge; ++i)
+ mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey);
- for (i = from; i < num_sge; ++i) {
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
-
- mmkey = xa_load(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(sg_list[i].lkey));
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- atomic_dec(&mr->num_pending_prefetch);
- }
-
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ kvfree(work);
}
-static bool num_pending_prefetch_inc(struct ib_pd *pd,
- struct ib_sge *sg_list, u32 num_sge)
+static struct mlx5_ib_mr *
+get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 lkey)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- bool ret = true;
- u32 i;
+ struct mlx5_ib_mr *mr = NULL;
+ struct mlx5_ib_mkey *mmkey;
- for (i = 0; i < num_sge; ++i) {
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
+ xa_lock(&dev->odp_mkeys);
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
+ if (!mmkey || mmkey->key != lkey) {
+ mr = ERR_PTR(-ENOENT);
+ goto end;
+ }
+ if (mmkey->type != MLX5_MKEY_MR) {
+ mr = ERR_PTR(-EINVAL);
+ goto end;
+ }
- mmkey = xa_load(&dev->mdev->priv.mkey_table,
- mlx5_base_mkey(sg_list[i].lkey));
- if (!mmkey || mmkey->key != sg_list[i].lkey) {
- ret = false;
- break;
- }
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (mmkey->type != MLX5_MKEY_MR) {
- ret = false;
- break;
- }
+ if (mr->ibmr.pd != pd) {
+ mr = ERR_PTR(-EPERM);
+ goto end;
+ }
- mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ /* prefetch with write-access must be supported by the MR */
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ !mr->umem->writable) {
+ mr = ERR_PTR(-EPERM);
+ goto end;
+ }
- if (mr->ibmr.pd != pd) {
- ret = false;
- break;
- }
+ refcount_inc(&mmkey->usecount);
+end:
+ xa_unlock(&dev->odp_mkeys);
+ return mr;
+}
- if (!mr->live) {
- ret = false;
- break;
- }
+static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
+{
+ struct prefetch_mr_work *work =
+ container_of(w, struct prefetch_mr_work, work);
+ u32 bytes_mapped = 0;
+ int ret;
+ u32 i;
- atomic_inc(&mr->num_pending_prefetch);
+ /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+ WARN_ON(!work->num_sge);
+ for (i = 0; i < work->num_sge; ++i) {
+ ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, &bytes_mapped,
+ work->pf_flags, false);
+ if (ret <= 0)
+ continue;
+ mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
}
- if (!ret)
- num_pending_prefetch_dec(dev, sg_list, i, 0);
-
- return ret;
+ destroy_prefetch_work(work);
}
-static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags,
- struct ib_sge *sg_list, u32 num_sge)
+static int init_prefetch_work(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct prefetch_mr_work *work,
+ struct ib_sge *sg_list, u32 num_sge)
{
u32 i;
- int ret = 0;
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+
+ INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
+ work->pf_flags = pf_flags;
for (i = 0; i < num_sge; ++i) {
- struct ib_sge *sg = &sg_list[i];
- int bytes_committed = 0;
+ struct mlx5_ib_mr *mr;
- ret = pagefault_single_data_segment(dev, pd, sg->lkey, sg->addr,
- sg->length,
- &bytes_committed, NULL,
- pf_flags);
- if (ret < 0)
- break;
+ mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (IS_ERR(mr)) {
+ work->num_sge = i;
+ return PTR_ERR(mr);
+ }
+ work->frags[i].io_virt = sg_list[i].addr;
+ work->frags[i].length = sg_list[i].length;
+ work->frags[i].mr = mr;
}
-
- return ret < 0 ? ret : 0;
+ work->num_sge = num_sge;
+ return 0;
}
-static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
+static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct ib_sge *sg_list,
+ u32 num_sge)
{
- struct prefetch_mr_work *w =
- container_of(work, struct prefetch_mr_work, work);
+ u32 bytes_mapped = 0;
+ int ret = 0;
+ u32 i;
- if (ib_device_try_get(w->pd->device)) {
- mlx5_ib_prefetch_sg_list(w->pd, w->pf_flags, w->sg_list,
- w->num_sge);
- ib_device_put(w->pd->device);
+ for (i = 0; i < num_sge; ++i) {
+ struct mlx5_ib_mr *mr;
+
+ mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+ ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
+ &bytes_mapped, pf_flags, false);
+ if (ret < 0) {
+ mlx5r_deref_odp_mkey(&mr->mmkey);
+ return ret;
+ }
+ mlx5_update_odp_stats(mr, prefetch, ret);
+ mlx5r_deref_odp_mkey(&mr->mmkey);
}
- num_pending_prefetch_dec(to_mdev(w->pd->device), w->sg_list,
- w->num_sge, 0);
- kfree(w);
+ return 0;
}
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags, struct ib_sge *sg_list, u32 num_sge)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- u32 pf_flags = MLX5_PF_FLAGS_PREFETCH;
+ u32 pf_flags = 0;
struct prefetch_mr_work *work;
- bool valid_req;
- int srcu_key;
+ int rc;
if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+ pf_flags |= MLX5_PF_FLAGS_SNAPSHOT;
+
if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
- return mlx5_ib_prefetch_sg_list(pd, pf_flags, sg_list,
+ return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
num_sge);
- work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
+ work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL);
if (!work)
return -ENOMEM;
- memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
-
- /* It is guaranteed that the pd when work is executed is the pd when
- * work was queued since pd can't be destroyed while it holds MRs and
- * destroying a MR leads to flushing the workquque
- */
- work->pd = pd;
- work->pf_flags = pf_flags;
- work->num_sge = num_sge;
-
- INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
-
- srcu_key = srcu_read_lock(&dev->mr_srcu);
-
- valid_req = num_pending_prefetch_inc(pd, sg_list, num_sge);
- if (valid_req)
- queue_work(system_unbound_wq, &work->work);
- else
- kfree(work);
-
- srcu_read_unlock(&dev->mr_srcu, srcu_key);
-
- return valid_req ? 0 : -EINVAL;
+ rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge);
+ if (rc) {
+ destroy_prefetch_work(work);
+ return rc;
+ }
+ queue_work(system_dfl_wq, &work->work);
+ return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qos.c b/drivers/infiniband/hw/mlx5/qos.c
new file mode 100644
index 000000000000..dce92554142a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qos.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static bool pp_is_supported(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return (MLX5_CAP_GEN(dev->mdev, qos) &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing) &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_uid));
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PP_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE);
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_pp *pp_entry;
+ void *in_ctx;
+ u16 uid;
+ int inlen;
+ u32 flags;
+ int err;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ /* The allocated entry can be used only by a DEVX context */
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ dev = to_mdev(c->ibucontext.device);
+ pp_entry = kzalloc(sizeof(*pp_entry), GFP_KERNEL);
+ if (!pp_entry)
+ return -ENOMEM;
+
+ in_ctx = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX);
+ memcpy(rl_raw, in_ctx, inlen);
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+ MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX);
+ if (err)
+ goto err;
+
+ uid = (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX) ?
+ c->devx_uid : MLX5_SHARED_RESOURCE_UID;
+
+ err = mlx5_rl_add_rate_raw(dev->mdev, rl_raw, uid,
+ (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX),
+ &pp_entry->index);
+ if (err)
+ goto err;
+
+ pp_entry->mdev = dev->mdev;
+ uobj->object = pp_entry;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+ &pp_entry->index, sizeof(pp_entry->index));
+ return err;
+
+err:
+ kfree(pp_entry);
+ return err;
+}
+
+static int pp_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_pp *pp_entry = uobject->object;
+
+ mlx5_rl_remove_rate_raw(pp_entry->mdev, pp_entry->index);
+ kfree(pp_entry);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_PP_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_PP,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX,
+ UVERBS_ATTR_SIZE(1,
+ MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_pp_alloc_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_PP_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_PP,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_PP,
+ UVERBS_TYPE_ALLOC_IDR(pp_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_DESTROY));
+
+
+const struct uapi_definition mlx5_ib_qos_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_PP,
+ UAPI_DEF_IS_OBJ_SUPPORTED(pp_is_supported)),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index f6623c77443a..69af20790481 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -30,17 +30,19 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
#include "ib_rep.h"
+#include "counters.h"
#include "cmd.h"
-
-/* not supported currently */
-static int wq_signature;
+#include "umr.h"
+#include "qp.h"
+#include "wr.h"
enum {
MLX5_IB_ACK_REQ_FREQ = 8,
@@ -53,37 +55,15 @@ enum {
MLX5_IB_LINK_TYPE_ETH = 1
};
-enum {
- MLX5_IB_SQ_STRIDE = 6,
- MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64,
-};
-
-static const u32 mlx5_ib_opcode[] = {
- [IB_WR_SEND] = MLX5_OPCODE_SEND,
- [IB_WR_LSO] = MLX5_OPCODE_LSO,
- [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
- [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
- [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
- [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
- [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
- [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
- [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
- [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
- [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
-};
-
-struct mlx5_wqe_eth_pad {
- u8 rsvd0[16];
-};
-
enum raw_qp_set_mask_map {
MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0,
MLX5_RAW_QP_RATE_LIMIT = 1UL << 1,
};
+enum {
+ MLX5_QP_RM_GO_BACK_N = 0x1,
+};
+
struct mlx5_modify_raw_qp_param {
u16 operation;
@@ -92,9 +72,17 @@ struct mlx5_modify_raw_qp_param {
struct mlx5_rate_limit rl;
u8 rq_q_ctr_id;
- u16 port;
+ u32 port;
};
+struct mlx5_ib_qp_event_work {
+ struct work_struct work;
+ struct mlx5_core_qp *qp;
+ int type;
+};
+
+static struct workqueue_struct *mlx5_ib_qp_event_wq;
+
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
@@ -128,14 +116,10 @@ static int is_sqp(enum ib_qp_type qp_type)
*
* Return: zero on success, or an error code.
*/
-static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem,
- void *buffer,
- u32 buflen,
- int wqe_index,
- int wq_offset,
- int wq_wqe_cnt,
- int wq_wqe_shift,
- int bcnt,
+static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, void *buffer,
+ size_t buflen, int wqe_index,
+ int wq_offset, int wq_wqe_cnt,
+ int wq_wqe_shift, int bcnt,
size_t *bytes_copied)
{
size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift);
@@ -159,11 +143,43 @@ static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem,
return 0;
}
-int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
- int wqe_index,
- void *buffer,
- int buflen,
- size_t *bc)
+static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ size_t bytes_copied = 0;
+ size_t wqe_length;
+ void *p;
+ int ds;
+
+ wqe_index = wqe_index & qp->sq.fbc.sz_m1;
+
+ /* read the control segment first */
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
+ ctrl = p;
+ ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ wqe_length = ds * MLX5_WQE_DS_UNITS;
+
+ /* read rest of WQE if it spreads over more than one stride */
+ while (bytes_copied < wqe_length) {
+ size_t copy_length =
+ min_t(size_t, buflen - bytes_copied, MLX5_SEND_WQE_BB);
+
+ if (!copy_length)
+ break;
+
+ memcpy(buffer + bytes_copied, p, copy_length);
+ bytes_copied += copy_length;
+
+ wqe_index = (wqe_index + 1) & qp->sq.fbc.sz_m1;
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
+ }
+ *bc = bytes_copied;
+ return 0;
+}
+
+static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct ib_umem *umem = base->ubuffer.umem;
@@ -175,18 +191,10 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
int ret;
int ds;
- if (buflen < sizeof(*ctrl))
- return -EINVAL;
-
/* at first read as much as possible */
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer,
- buflen,
- wqe_index,
- wq->offset,
- wq->wqe_cnt,
- wq->wqe_shift,
- buflen,
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
+ wq->offset, wq->wqe_cnt,
+ wq->wqe_shift, buflen,
&bytes_copied);
if (ret)
return ret;
@@ -209,13 +217,9 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
* so read the remaining bytes starting
* from wqe_index 0
*/
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer + bytes_copied,
- buflen - bytes_copied,
- 0,
- wq->offset,
- wq->wqe_cnt,
- wq->wqe_shift,
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer + bytes_copied,
+ buflen - bytes_copied, 0, wq->offset,
+ wq->wqe_cnt, wq->wqe_shift,
wqe_length - bytes_copied,
&bytes_copied2);
@@ -225,11 +229,24 @@ int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp,
return 0;
}
-int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
- int wqe_index,
- void *buffer,
- int buflen,
- size_t *bc)
+int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+
+ if (buflen < sizeof(struct mlx5_wqe_ctrl_seg))
+ return -EINVAL;
+
+ if (!umem)
+ return mlx5_ib_read_kernel_wqe_sq(qp, wqe_index, buffer,
+ buflen, bc);
+
+ return mlx5_ib_read_user_wqe_sq(qp, wqe_index, buffer, buflen, bc);
+}
+
+static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct ib_umem *umem = base->ubuffer.umem;
@@ -237,14 +254,9 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
size_t bytes_copied;
int ret;
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer,
- buflen,
- wqe_index,
- wq->offset,
- wq->wqe_cnt,
- wq->wqe_shift,
- buflen,
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
+ wq->offset, wq->wqe_cnt,
+ wq->wqe_shift, buflen,
&bytes_copied);
if (ret)
@@ -253,25 +265,33 @@ int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp,
return 0;
}
-int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq,
- int wqe_index,
- void *buffer,
- int buflen,
- size_t *bc)
+int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+ struct mlx5_ib_wq *wq = &qp->rq;
+ size_t wqe_size = 1 << wq->wqe_shift;
+
+ if (buflen < wqe_size)
+ return -EINVAL;
+
+ if (!umem)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_read_user_wqe_rq(qp, wqe_index, buffer, buflen, bc);
+}
+
+static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
struct ib_umem *umem = srq->umem;
size_t bytes_copied;
int ret;
- ret = mlx5_ib_read_user_wqe_common(umem,
- buffer,
- buflen,
- wqe_index,
- 0,
- srq->msrq.max,
- srq->msrq.wqe_shift,
- buflen,
- &bytes_copied);
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, 0,
+ srq->msrq.max, srq->msrq.wqe_shift,
+ buflen, &bytes_copied);
if (ret)
return ret;
@@ -279,51 +299,135 @@ int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq,
return 0;
}
+int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct ib_umem *umem = srq->umem;
+ size_t wqe_size = 1 << srq->msrq.wqe_shift;
+
+ if (buflen < wqe_size)
+ return -EINVAL;
+
+ if (!umem)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
+}
+
+static void mlx5_ib_qp_err_syndrome(struct ib_qp *ibqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ void *pas_ext_union, *err_syn;
+ u32 *outb;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, qpc_extension) ||
+ !MLX5_CAP_GEN(dev->mdev, qp_error_syndrome))
+ return;
+
+ outb = kzalloc(outlen, GFP_KERNEL);
+ if (!outb)
+ return;
+
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ true);
+ if (err)
+ goto out;
+
+ pas_ext_union =
+ MLX5_ADDR_OF(query_qp_out, outb, qp_pas_or_qpc_ext_and_pas);
+ err_syn = MLX5_ADDR_OF(qpc_extension_and_pas_list_in, pas_ext_union,
+ qpc_data_extension.error_syndrome);
+
+ pr_err("%s/%d: QP %d error: %s (0x%x 0x%x 0x%x)\n",
+ ibqp->device->name, ibqp->port, ibqp->qp_num,
+ ib_wc_status_msg(
+ MLX5_GET(cqe_error_syndrome, err_syn, syndrome)),
+ MLX5_GET(cqe_error_syndrome, err_syn, vendor_error_syndrome),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_syndrome_type),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_error_syndrome));
+out:
+ kfree(outb);
+}
+
+static void mlx5_ib_handle_qp_event(struct work_struct *_work)
+{
+ struct mlx5_ib_qp_event_work *qpe_work =
+ container_of(_work, struct mlx5_ib_qp_event_work, work);
+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
+ struct ib_event event = {};
+
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (qpe_work->type) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX5_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n",
+ qpe_work->type, qpe_work->qp->qpn);
+ goto out;
+ }
+
+ if ((event.event == IB_EVENT_QP_FATAL) ||
+ (event.event == IB_EVENT_QP_ACCESS_ERR))
+ mlx5_ib_qp_err_syndrome(ibqp);
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+
+out:
+ mlx5_core_res_put(&qpe_work->qp->common);
+ kfree(qpe_work);
+}
+
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
{
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
- struct ib_event event;
+ struct mlx5_ib_qp_event_work *qpe_work;
if (type == MLX5_EVENT_TYPE_PATH_MIG) {
/* This event is only valid for trans_qps */
to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
}
- if (ibqp->event_handler) {
- event.device = ibqp->device;
- event.element.qp = ibqp;
- switch (type) {
- case MLX5_EVENT_TYPE_PATH_MIG:
- event.event = IB_EVENT_PATH_MIG;
- break;
- case MLX5_EVENT_TYPE_COMM_EST:
- event.event = IB_EVENT_COMM_EST;
- break;
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- break;
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
- break;
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- event.event = IB_EVENT_QP_FATAL;
- break;
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- event.event = IB_EVENT_PATH_MIG_ERR;
- break;
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- event.event = IB_EVENT_QP_REQ_ERR;
- break;
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- break;
- default:
- pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
- return;
- }
+ if (!ibqp->event_handler)
+ goto out_no_handler;
- ibqp->event_handler(&event, ibqp->qp_context);
- }
+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
+ if (!qpe_work)
+ goto out_no_handler;
+
+ qpe_work->qp = qp;
+ qpe_work->type = type;
+ INIT_WORK(&qpe_work->work, mlx5_ib_handle_qp_event);
+ queue_work(mlx5_ib_qp_event_wq, &qpe_work->work);
+ return;
+
+out_no_handler:
+ mlx5_core_res_put(&qp->common);
}
static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
@@ -343,17 +447,26 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
} else {
+ int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE);
+
if (ucmd) {
qp->rq.wqe_cnt = ucmd->rq_wqe_count;
if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
return -EINVAL;
qp->rq.wqe_shift = ucmd->rq_wqe_shift;
- if ((1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) < qp->wq_sig)
+ if ((1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) <
+ wq_sig)
return -EINVAL;
- qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_gs =
+ (1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) -
+ wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
} else {
- wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
+ wqe_size =
+ wq_sig ? sizeof(struct mlx5_wqe_signature_seg) :
+ 0;
wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
wqe_size = roundup_pow_of_two(wqe_size);
wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
@@ -367,7 +480,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
}
qp->rq.wqe_shift = ilog2(wqe_size);
- qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_gs =
+ (1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) -
+ wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
}
}
@@ -382,7 +498,7 @@ static int sq_overhead(struct ib_qp_init_attr *attr)
switch (attr->qp_type) {
case IB_QPT_XRC_INI:
size += sizeof(struct mlx5_wqe_xrc_seg);
- /* fall through */
+ fallthrough;
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
max(sizeof(struct mlx5_wqe_atomic_seg) +
@@ -407,7 +523,7 @@ static int sq_overhead(struct ib_qp_init_attr *attr)
if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
size += sizeof(struct mlx5_wqe_eth_pad) +
sizeof(struct mlx5_wqe_eth_seg);
- /* fall through */
+ fallthrough;
case IB_QPT_SMI:
case MLX5_IB_QPT_HW_GSI:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
@@ -442,9 +558,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
- return MLX5_SIG_WQE_SIZE;
+ return MLX5_SIG_WQE_SIZE;
else
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
@@ -496,9 +612,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- qp->signature_en = true;
-
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
@@ -550,7 +663,7 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
}
if (attr->qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@@ -583,7 +696,8 @@ enum {
static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
{
- return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
+ return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
+ bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR;
}
static int num_med_bfreg(struct mlx5_ib_dev *dev,
@@ -652,6 +766,9 @@ static int alloc_bfreg(struct mlx5_ib_dev *dev,
{
int bfregn = -ENOMEM;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
mutex_lock(&bfregi->lock);
if (bfregi->ver >= 2) {
bfregn = alloc_high_class_bfreg(dev, bfregi);
@@ -702,10 +819,7 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI;
- case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_PACKET:
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
- case IB_QPT_MAX:
+ case IB_QPT_RAW_PACKET: return MLX5_QP_ST_RAW_ETHERTYPE;
default: return -EINVAL;
}
}
@@ -723,6 +837,9 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
u32 index_of_sys_page;
u32 offset;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
MLX5_NON_FP_BFREGS_PER_UAR;
index_of_sys_page = bfregn / bfregs_per_sys_page;
@@ -744,39 +861,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
return bfregi->sys_pages[index_of_sys_page] + offset;
}
-static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- unsigned long addr, size_t size,
- struct ib_umem **umem, int *npages, int *page_shift,
- int *ncont, u32 *offset)
-{
- int err;
-
- *umem = ib_umem_get(udata, addr, size, 0, 0);
- if (IS_ERR(*umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- return PTR_ERR(*umem);
- }
-
- mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
-
- err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
- addr, size, *npages, *page_shift, *ncont, *offset);
-
- return 0;
-
-err_umem:
- ib_umem_release(*umem);
- *umem = NULL;
-
- return err;
-}
-
static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_rwq *rwq, struct ib_udata *udata)
{
@@ -790,8 +874,7 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
atomic_dec(&dev->delay_drop.rqs_cnt);
mlx5_ib_db_unmap_user(context, &rwq->db);
- if (rwq->umem)
- ib_umem_release(rwq->umem);
+ ib_umem_release(rwq->umem);
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
@@ -800,47 +883,47 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- int page_shift = 0;
- int npages;
+ unsigned long page_size = 0;
u32 offset = 0;
- int ncont = 0;
int err;
if (!ucmd->buf_addr)
return -EINVAL;
- rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0);
+ rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0);
if (IS_ERR(rwq->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(rwq->umem);
return err;
}
- mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
- &rwq->rq_page_offset);
- if (err) {
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &rwq->rq_page_offset);
+ if (!page_size) {
mlx5_ib_warn(dev, "bad offset\n");
+ err = -EINVAL;
goto err_umem;
}
- rwq->rq_num_pas = ncont;
- rwq->page_shift = page_shift;
- rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size);
+ rwq->page_shift = order_base_2(page_size);
+ rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT;
rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
- (unsigned long long)ucmd->buf_addr, rwq->buf_size,
- npages, page_shift, ncont, offset);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
+ (unsigned long long)ucmd->buf_addr, rwq->buf_size,
+ ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
+ offset);
- err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db);
+ err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_umem;
}
- rwq->create_type = MLX5_WQ_USER;
return 0;
err_umem:
@@ -855,53 +938,51 @@ static int adjust_bfregn(struct mlx5_ib_dev *dev,
bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
}
-static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_qp *qp, struct ib_udata *udata,
- struct ib_qp_init_attr *attr,
- u32 **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen,
- struct mlx5_ib_qp_base *base)
+static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr, u32 **in,
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base,
+ struct mlx5_ib_create_qp *ucmd)
{
struct mlx5_ib_ucontext *context;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
- int page_shift = 0;
+ unsigned int page_offset_quantized = 0;
+ unsigned long page_size = 0;
int uar_index = 0;
- int npages;
- u32 offset = 0;
int bfregn;
int ncont = 0;
__be64 *pas;
void *qpc;
int err;
u16 uid;
-
- err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return err;
- }
+ u32 uar_flags;
context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
ibucontext);
- if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
+ uar_flags = qp->flags_en &
+ (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX);
+ switch (uar_flags) {
+ case MLX5_QP_FLAG_UAR_PAGE_INDEX:
+ uar_index = ucmd->bfreg_index;
+ bfregn = MLX5_IB_INVALID_BFREG;
+ break;
+ case MLX5_QP_FLAG_BFREG_INDEX:
uar_index = bfregn_to_uar_index(dev, &context->bfregi,
- ucmd.bfreg_index, true);
+ ucmd->bfreg_index, true);
if (uar_index < 0)
return uar_index;
-
bfregn = MLX5_IB_INVALID_BFREG;
- } else if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) {
- /*
- * TBD: should come from the verbs when we have the API
- */
- /* In CROSS_CHANNEL CQ and QP must use the same UAR */
- bfregn = MLX5_CROSS_CHANNEL_BFREG;
- }
- else {
+ break;
+ case 0:
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ return -EINVAL;
bfregn = alloc_bfreg(dev, &context->bfregi);
if (bfregn < 0)
return bfregn;
+ break;
+ default:
+ return -EINVAL;
}
mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
@@ -913,17 +994,27 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd, base, attr);
+ err = set_user_buf_size(dev, qp, ucmd, base, attr);
if (err)
goto err_bfreg;
- if (ucmd.buf_addr && ubuffer->buf_size) {
- ubuffer->buf_addr = ucmd.buf_addr;
- err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr,
- ubuffer->buf_size, &ubuffer->umem,
- &npages, &page_shift, &ncont, &offset);
- if (err)
+ if (ucmd->buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd->buf_addr;
+ ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(ubuffer->umem)) {
+ err = PTR_ERR(ubuffer->umem);
goto err_bfreg;
+ }
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, qpc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+ ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size);
} else {
ubuffer->umem = NULL;
}
@@ -936,18 +1027,16 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
- uid = (attr->qp_type != IB_QPT_XRC_TGT &&
- attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
+ uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
MLX5_SET(create_qp_in, *in, uid, uid);
- pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
- if (ubuffer->umem)
- mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
-
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
-
- MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(qpc, qpc, page_offset, offset);
-
+ pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
+ if (ubuffer->umem) {
+ mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0);
+ MLX5_SET(qpc, qpc, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, page_offset, page_offset_quantized);
+ }
MLX5_SET(qpc, qpc, uar_page, uar_index);
if (bfregn != MLX5_IB_INVALID_BFREG)
resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
@@ -955,30 +1044,19 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
resp->bfreg_index = MLX5_IB_INVALID_BFREG;
qp->bfregn = bfregn;
- err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db);
+ err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_free;
}
- err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- goto err_unmap;
- }
- qp->create_type = MLX5_QP_USER;
-
return 0;
-err_unmap:
- mlx5_ib_db_unmap_user(context, &qp->db);
-
err_free:
kvfree(*in);
err_umem:
- if (ubuffer->umem)
- ib_umem_release(ubuffer->umem);
+ ib_umem_release(ubuffer->umem);
err_bfreg:
if (bfregn != MLX5_IB_INVALID_BFREG)
@@ -986,69 +1064,47 @@ err_bfreg:
return err;
}
-static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base,
- struct ib_udata *udata)
+static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base, struct ib_udata *udata)
{
- struct mlx5_ib_ucontext *context =
- rdma_udata_to_drv_context(
- udata,
- struct mlx5_ib_ucontext,
- ibucontext);
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
- mlx5_ib_db_unmap_user(context, &qp->db);
- if (base->ubuffer.umem)
+ if (udata) {
+ /* User QP */
+ mlx5_ib_db_unmap_user(context, &qp->db);
ib_umem_release(base->ubuffer.umem);
- /*
- * Free only the BFREGs which are handled by the kernel.
- * BFREGs of UARs allocated dynamically are handled by user.
- */
- if (qp->bfregn != MLX5_IB_INVALID_BFREG)
- mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
-}
-
-/* get_sq_edge - Get the next nearby edge.
- *
- * An 'edge' is defined as the first following address after the end
- * of the fragment or the SQ. Accordingly, during the WQE construction
- * which repetitively increases the pointer to write the next data, it
- * simply should check if it gets to an edge.
- *
- * @sq - SQ buffer.
- * @idx - Stride index in the SQ buffer.
- *
- * Return:
- * The new edge.
- */
-static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
-{
- void *fragment_end;
-
- fragment_end = mlx5_frag_buf_get_wqe
- (&sq->fbc,
- mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+ /*
+ * Free only the BFREGs which are handled by the kernel.
+ * BFREGs of UARs allocated dynamically are handled by user.
+ */
+ if (qp->bfregn != MLX5_IB_INVALID_BFREG)
+ mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
+ return;
+ }
- return fragment_end + MLX5_SEND_WQE_BB;
+ /* Kernel QP */
+ kvfree(qp->sq.wqe_head);
+ kvfree(qp->sq.w_list);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->sq.wr_data);
+ kvfree(qp->rq.wrid);
+ if (qp->db.db)
+ mlx5_db_free(dev->mdev, &qp->db);
+ if (qp->buf.frags)
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
}
-static int create_kernel_qp(struct mlx5_ib_dev *dev,
- struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_qp *qp,
- u32 **in, int *inlen,
- struct mlx5_ib_qp_base *base)
+static int _create_kernel_qp(struct mlx5_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx5_ib_qp *qp, u32 **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
int uar_index;
void *qpc;
int err;
- if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
- IB_QP_CREATE_IPOIB_UD_LSO |
- IB_QP_CREATE_NETIF_QP |
- mlx5_ib_create_qp_sqpn_qp1()))
- return -EINVAL;
-
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
else
@@ -1103,16 +1159,15 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, uar_page, uar_index);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
/* Set "fast registration enabled" for all kernel QPs */
MLX5_SET(qpc, qpc, fre, 1);
MLX5_SET(qpc, qpc, rlky, 1);
- if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+ if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
MLX5_SET(qpc, qpc, deth_sqpn, 1);
- qp->flags |= MLX5_IB_QP_SQPN_QP1;
- }
mlx5_fill_page_frag_array(&qp->buf,
(__be64 *)MLX5_ADDR_OF(create_qp_in,
@@ -1140,7 +1195,6 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
err = -ENOMEM;
goto err_wrid;
}
- qp->create_type = MLX5_QP_KERNEL;
return 0;
@@ -1160,36 +1214,15 @@ err_buf:
return err;
}
-static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
-{
- kvfree(qp->sq.wqe_head);
- kvfree(qp->sq.w_list);
- kvfree(qp->sq.wrid);
- kvfree(qp->sq.wr_data);
- kvfree(qp->rq.wrid);
- mlx5_db_free(dev->mdev, &qp->db);
- mlx5_frag_buf_free(dev->mdev, &qp->buf);
-}
-
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
{
- if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
- (attr->qp_type == MLX5_IB_QPT_DCI) ||
- (attr->qp_type == IB_QPT_XRC_INI))
+ if (attr->srq || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI))
return MLX5_SRQ_RQ;
else if (!qp->has_rq)
return MLX5_ZERO_LEN_RQ;
- else
- return MLX5_NON_ZERO_RQ;
-}
-static int is_connected(enum ib_qp_type qp_type)
-{
- if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC ||
- qp_type == MLX5_IB_QPT_DCI)
- return 1;
-
- return 0;
+ return MLX5_NON_ZERO_RQ;
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
@@ -1197,15 +1230,18 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq, u32 tdn,
struct ib_pd *pd)
{
- u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
- if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ if (!mlx5_ib_lag_should_assign_affinity(dev) &&
+ mlx5_lag_is_lacp_owner(dev->mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
- return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+ return mlx5_core_create_tis(dev->mdev, in, &sq->tisn);
}
static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
@@ -1221,10 +1257,85 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq)
sq->flow_rule = NULL;
}
+static bool fr_supported(int ts_cap)
+{
+ return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+}
+
+static int get_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ bool fr_sup, bool rt_sup)
+{
+ if (cq->private_flags & MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS) {
+ if (!rt_sup) {
+ mlx5_ib_dbg(dev,
+ "Real time TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_TIMESTAMP_FORMAT_REAL_TIME;
+ }
+ if (cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
+ if (!fr_sup) {
+ mlx5_ib_dbg(dev,
+ "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ }
+ return fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_TIMESTAMP_FORMAT_DEFAULT;
+}
+
+static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *recv_cq)
+{
+ u8 ts_cap = MLX5_CAP_GEN(dev->mdev, rq_ts_format);
+
+ return get_ts_format(dev, recv_cq, fr_supported(ts_cap),
+ rt_supported(ts_cap));
+}
+
+static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
+{
+ u8 ts_cap = MLX5_CAP_GEN(dev->mdev, sq_ts_format);
+
+ return get_ts_format(dev, send_cq, fr_supported(ts_cap),
+ rt_supported(ts_cap));
+}
+
+static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq)
+{
+ u8 ts_cap = MLX5_CAP_ROCE(dev->mdev, qp_ts_format);
+ bool fr_sup = fr_supported(ts_cap);
+ bool rt_sup = rt_supported(ts_cap);
+ u8 default_ts = fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_TIMESTAMP_FORMAT_DEFAULT;
+ int send_ts_format =
+ send_cq ? get_ts_format(dev, send_cq, fr_sup, rt_sup) :
+ default_ts;
+ int recv_ts_format =
+ recv_cq ? get_ts_format(dev, recv_cq, fr_sup, rt_sup) :
+ default_ts;
+
+ if (send_ts_format < 0 || recv_ts_format < 0)
+ return -EOPNOTSUPP;
+
+ if (send_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
+ recv_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
+ send_ts_format != recv_ts_format) {
+ mlx5_ib_dbg(
+ dev,
+ "The send ts_format does not match the receive ts_format\n");
+ return -EOPNOTSUPP;
+ }
+
+ return send_ts_format == default_ts ? recv_ts_format : send_ts_format;
+}
+
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct ib_udata *udata,
struct mlx5_ib_sq *sq, void *qpin,
- struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
__be64 *pas;
@@ -1234,18 +1345,29 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
void *wq;
int inlen;
int err;
- int page_shift = 0;
- int npages;
- int ncont = 0;
- u32 offset = 0;
-
- err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size,
- &sq->ubuffer.umem, &npages, &page_shift, &ncont,
- &offset);
- if (err)
- return err;
+ unsigned int page_offset_quantized;
+ unsigned long page_size;
+ int ts_format;
+
+ ts_format = get_sq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
+
+ sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(sq->ubuffer.umem))
+ return PTR_ERR(sq->ubuffer.umem);
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
- inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) *
+ ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
@@ -1258,6 +1380,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
@@ -1273,13 +1396,14 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
- MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(wq, wq, page_offset, offset);
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0);
- err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+ err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
kvfree(in);
@@ -1299,44 +1423,40 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
destroy_flow_rule_vport_sq(sq);
- mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ mlx5_core_destroy_sq_tracked(dev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
-static size_t get_rq_pas_size(void *qpc)
-{
- u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
- u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
- u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
- u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
-
- return rq_num_pas * sizeof(u64);
-}
-
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, void *qpin,
- size_t qpinlen, struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
- __be64 *qp_pas;
void *in;
void *rqc;
void *wq;
void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
- size_t rq_pas_size = get_rq_pas_size(qpc);
+ struct ib_umem *umem = rq->base.ubuffer.umem;
+ unsigned int page_offset_quantized;
+ unsigned long page_size = 0;
+ int ts_format;
size_t inlen;
int err;
- if (qpinlen < rq_pas_size + MLX5_BYTE_OFF(create_qp_in, pas))
+ ts_format = get_rq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
+
+ page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
+ MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size)
return -EINVAL;
- inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size);
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
@@ -1347,29 +1467,30 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
- if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
+ if (mqp->flags & IB_QP_CREATE_SCATTER_FCS)
MLX5_SET(rqc, rqc, scatter_fcs, 1);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING)
MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
- MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
- MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
- memcpy(pas, qp_pas, rq_pas_size);
+ mlx5_ib_populate_pas(umem, page_size, pas, 0);
- err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+ err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
kvfree(in);
@@ -1379,14 +1500,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq)
{
- mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
-}
-
-static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
-{
- return (MLX5_CAP_ETH(dev, tunnel_stateless_vxlan) ||
- MLX5_CAP_ETH(dev, tunnel_stateless_gre) ||
- MLX5_CAP_ETH(dev, tunnel_stateless_geneve_rx));
+ mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp);
}
static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
@@ -1402,9 +1516,8 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq, u32 tdn,
- u32 *qp_flags_en,
- struct ib_pd *pd,
- u32 *out, int outlen)
+ u32 *qp_flags_en, struct ib_pd *pd,
+ u32 *out)
{
u8 lb_flag = 0;
u32 *in;
@@ -1437,9 +1550,8 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
}
MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
-
- err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen);
-
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+ err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
rq->tirn = MLX5_GET(create_tir_out, out, tirn);
if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
err = mlx5_ib_enable_lb(dev, false, true);
@@ -1453,10 +1565,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
}
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- u32 *in, size_t inlen,
- struct ib_pd *pd,
+ u32 *in, size_t inlen, struct ib_pd *pd,
struct ib_udata *udata,
- struct mlx5_ib_create_qp_resp *resp)
+ struct mlx5_ib_create_qp_resp *resp,
+ struct ib_qp_init_attr *init_attr)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
@@ -1468,12 +1580,15 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u16 uid = to_mpd(pd)->uid;
u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
+ if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt)
+ return -EINVAL;
if (qp->sq.wqe_cnt) {
err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
if (err)
return err;
- err = create_raw_packet_qp_sq(dev, udata, sq, in, pd);
+ err = create_raw_packet_qp_sq(dev, udata, sq, in, pd,
+ to_mcq(init_attr->send_cq));
if (err)
goto err_destroy_tis;
@@ -1491,17 +1606,17 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
- if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+ if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING)
rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
- if (qp->flags & MLX5_IB_QP_PCI_WRITE_END_PADDING)
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
- err = create_raw_packet_qp_rq(dev, rq, in, inlen, pd);
+ err = create_raw_packet_qp_rq(dev, rq, in, pd,
+ to_mcq(init_attr->recv_cq));
if (err)
goto err_destroy_sq;
- err = create_raw_packet_qp_tir(
- dev, rq, tdn, &qp->flags_en, pd, out,
- MLX5_ST_SZ_BYTES(create_tir_out));
+ err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd,
+ out);
if (err)
goto err_destroy_rq;
@@ -1510,7 +1625,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
resp->tirn = rq->tirn;
resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
resp->tir_icm_addr = MLX5_GET(
create_tir_out, out, icm_address_31_0);
resp->tir_icm_addr |=
@@ -1529,14 +1645,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
rq->base.mqp.qpn;
- err = ib_copy_to_udata(udata, resp, min(udata->outlen, sizeof(*resp)));
- if (err)
- goto err_destroy_tir;
-
return 0;
-err_destroy_tir:
- destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, pd);
err_destroy_rq:
destroy_raw_packet_qp_rq(dev, rq);
err_destroy_sq:
@@ -1588,14 +1698,27 @@ static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *q
to_mpd(qp->ibqp.pd)->uid);
}
-static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+struct mlx5_create_qp_params {
+ struct ib_udata *udata;
+ size_t inlen;
+ size_t outlen;
+ size_t ucmd_size;
+ void *ucmd;
+ u8 is_rss_raw : 1;
+ struct ib_qp_init_attr *attr;
+ u32 uidx;
+ struct mlx5_ib_create_qp_resp resp;
+};
+
+static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp_rss *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_ib_create_qp_resp resp = {};
int inlen;
int outlen;
int err;
@@ -1605,79 +1728,28 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
void *hfso;
u32 selected_fields = 0;
u32 outer_l4;
- size_t min_resp_len;
u32 tdn = mucontext->tdn;
- struct mlx5_ib_create_qp_rss ucmd = {};
- size_t required_cmd_sz;
u8 lb_flag = 0;
- if (init_attr->qp_type != IB_QPT_RAW_PACKET)
- return -EOPNOTSUPP;
-
- if (init_attr->create_flags || init_attr->send_cq)
- return -EINVAL;
-
- min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index);
- if (udata->outlen < min_resp_len)
- return -EINVAL;
-
- required_cmd_sz = offsetof(typeof(ucmd), flags) + sizeof(ucmd.flags);
- if (udata->inlen < required_cmd_sz) {
- mlx5_ib_dbg(dev, "invalid inlen\n");
- return -EINVAL;
- }
-
- if (udata->inlen > sizeof(ucmd) &&
- !ib_is_udata_cleared(udata, sizeof(ucmd),
- udata->inlen - sizeof(ucmd))) {
- mlx5_ib_dbg(dev, "inlen is not supported\n");
- return -EOPNOTSUPP;
- }
-
- if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EFAULT;
- }
-
- if (ucmd.comp_mask) {
+ if (ucmd->comp_mask) {
mlx5_ib_dbg(dev, "invalid comp mask\n");
return -EOPNOTSUPP;
}
- if (ucmd.flags & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)) {
- mlx5_ib_dbg(dev, "invalid flags\n");
- return -EOPNOTSUPP;
- }
-
- if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS &&
- !tunnel_offload_supported(dev->mdev)) {
- mlx5_ib_dbg(dev, "tunnel offloads isn't supported\n");
- return -EOPNOTSUPP;
- }
-
- if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
- !(ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
+ !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n");
return -EOPNOTSUPP;
}
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC || dev->is_rep) {
- lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ if (dev->is_rep)
qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
- }
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
- lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
- qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
- err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EINVAL;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
outlen = MLX5_ST_SZ_BYTES(create_tir_out);
@@ -1696,30 +1768,29 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
- if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
+ if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
- if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_INNER)
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER)
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
else
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
- switch (ucmd.rx_hash_function) {
+ switch (ucmd->rx_hash_function) {
case MLX5_RX_HASH_FUNC_TOEPLITZ:
{
void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
- if (len != ucmd.rx_key_len) {
+ if (len != ucmd->rx_key_len) {
err = -EINVAL;
goto err;
}
MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
- MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, ucmd.rx_hash_key, len);
+ memcpy(rss_key, ucmd->rx_hash_key, len);
break;
}
default:
@@ -1727,7 +1798,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
goto err;
}
- if (!ucmd.rx_hash_fields_mask) {
+ if (!ucmd->rx_hash_fields_mask) {
/* special case when this TIR serves as steering entry without hashing */
if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
goto create_tir;
@@ -1735,29 +1806,31 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
goto err;
}
- if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
+ if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
+ ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
err = -EINVAL;
goto err;
}
/* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
- else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
- outer_l4 = ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) << 0 |
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP)) << 1 |
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
+ outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ << 0 |
+ ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ << 1 |
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
/* Check that only one l4 protocol is set */
if (outer_l4 & (outer_l4 - 1)) {
@@ -1766,38 +1839,39 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
/* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
- else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
- if (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
- err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen);
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+ err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn);
if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
@@ -1812,75 +1886,45 @@ create_tir:
goto err;
if (mucontext->devx_uid) {
- resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
- resp.tirn = qp->rss_qp.tirn;
- if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) {
- resp.tir_icm_addr =
+ params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ params->resp.tirn = qp->rss_qp.tirn;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
+ params->resp.tir_icm_addr =
MLX5_GET(create_tir_out, out, icm_address_31_0);
- resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out,
- icm_address_39_32)
- << 32;
- resp.tir_icm_addr |= (u64)MLX5_GET(create_tir_out, out,
- icm_address_63_40)
- << 40;
- resp.comp_mask |=
+ params->resp.tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_39_32)
+ << 32;
+ params->resp.tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_63_40)
+ << 40;
+ params->resp.comp_mask |=
MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
}
}
- err = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
- if (err)
- goto err_copy;
-
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
- qp->flags |= MLX5_IB_QP_RSS;
+ qp->is_rss = true;
return 0;
-err_copy:
- mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn, mucontext->devx_uid);
err:
kvfree(in);
return err;
}
-static void configure_responder_scat_cqe(struct ib_qp_init_attr *init_attr,
- void *qpc)
-{
- int rcqe_sz;
-
- if (init_attr->qp_type == MLX5_IB_QPT_DCI)
- return;
-
- rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
-
- if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
- if (rcqe_sz == 128)
- MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
-
- return;
- }
-
- MLX5_SET(qpc, qpc, cs_res,
- rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
- MLX5_RES_SCAT_DATA32_CQE);
-}
-
static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_create_qp *ucmd,
void *qpc)
{
- enum ib_qp_type qpt = init_attr->qp_type;
int scqe_sz;
- bool allow_scat_cqe = 0;
+ bool allow_scat_cqe = false;
- if (qpt == IB_QPT_UC || qpt == IB_QPT_UD)
- return;
-
- if (ucmd)
- allow_scat_cqe = ucmd->flags & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
+ allow_scat_cqe = qp->flags_en & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
return;
@@ -1916,7 +1960,7 @@ static int atomic_size_to_mode(int size_mask)
}
static int get_atomic_mode(struct mlx5_ib_dev *dev,
- enum ib_qp_type qp_type)
+ struct mlx5_ib_qp *qp)
{
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
@@ -1926,7 +1970,7 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
if (!atomic)
return -EOPNOTSUPP;
- if (qp_type == MLX5_IB_QPT_DCT)
+ if (qp->type == MLX5_IB_QPT_DCT)
atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
else
atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
@@ -1940,277 +1984,362 @@ static int get_atomic_mode(struct mlx5_ib_dev *dev,
atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
+ /* OOO DP QPs do not support larger than 8-Bytes atomic operations */
+ if (atomic_mode > MLX5_ATOMIC_MODE_8B && qp->is_ooo_rq)
+ atomic_mode = MLX5_ATOMIC_MODE_8B;
+
return atomic_mode;
}
-static inline bool check_flags_mask(uint64_t input, uint64_t supported)
+static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
- return (input & ~supported) == 0;
+ struct ib_qp_init_attr *attr = params->attr;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
+ unsigned long flags;
+ void *qpc;
+ u32 *in;
+ int err;
+
+ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn);
+
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ MLX5_SET(qpc, qpc, cd_master, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
+ MLX5_SET(qpc, qpc, cd_slave_send, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
+ MLX5_SET(qpc, qpc, cd_slave_receive, 1);
+
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
+ MLX5_SET(qpc, qpc, no_sq, 1);
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
+ }
+
+ base = &qp->trans_qp.base;
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
+ kvfree(in);
+ if (err)
+ return err;
+
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn;
+ return 0;
}
-static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, struct mlx5_ib_qp *qp)
+static int create_dci(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ struct ib_udata *udata = params->udata;
+ u32 uidx = params->uidx;
struct mlx5_ib_resources *devr = &dev->devr;
int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_create_qp_resp resp = {};
- struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_ib_cq *send_cq;
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
- u32 uidx = MLX5_IB_DEFAULT_UIDX;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_qp_base *base;
+ int ts_format;
int mlx5_st;
void *qpc;
u32 *in;
int err;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
- mlx5_st = to_mlx5_st(init_attr->qp_type);
+ mlx5_st = to_mlx5_st(qp->type);
if (mlx5_st < 0)
return -EINVAL;
- if (init_attr->rwq_ind_tbl) {
- if (!udata)
- return -ENOSYS;
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ base = &qp->trans_qp.base;
- err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
+ qp->has_rq = qp_has_rq(init_attr);
+ err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
- mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
- return -EINVAL;
- } else {
- qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
- }
+ if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
+ ucmd->rq_wqe_count != qp->rq.wqe_cnt)
+ return -EINVAL;
+
+ if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
+ return -EINVAL;
+
+ ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+
+ if (ts_format < 0)
+ return ts_format;
+
+ err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
+ &inlen, base, ucmd);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, mlx5_st);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
+
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
+ MLX5_SET(qpc, qpc, wq_signature, 1);
+
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ MLX5_SET(qpc, qpc, cd_master, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
+ MLX5_SET(qpc, qpc, cd_slave_send, 1);
+ if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE)
+ configure_requester_scat_cqe(dev, qp, init_attr, qpc);
+
+ if (qp->rq.wqe_cnt) {
+ MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
- if (init_attr->create_flags &
- (IB_QP_CREATE_CROSS_CHANNEL |
- IB_QP_CREATE_MANAGED_SEND |
- IB_QP_CREATE_MANAGED_RECV)) {
- if (!MLX5_CAP_GEN(mdev, cd)) {
- mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
- return -EINVAL;
- }
- if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
- qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
- if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
- qp->flags |= MLX5_IB_QP_MANAGED_SEND;
- if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
- qp->flags |= MLX5_IB_QP_MANAGED_RECV;
- }
-
- if (init_attr->qp_type == IB_QPT_UD &&
- (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
- if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
- mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
- return -EOPNOTSUPP;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_DCI_STREAM) {
+ MLX5_SET(qpc, qpc, log_num_dci_stream_channels,
+ ucmd->dci_streams.log_num_concurent);
+ MLX5_SET(qpc, qpc, log_num_dci_errored_streams,
+ ucmd->dci_streams.log_num_errored);
+ }
- if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
- return -EOPNOTSUPP;
- }
- if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
- !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
- mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
+ MLX5_SET(qpc, qpc, ts_format, ts_format);
+ MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
+
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
+
+ /* Set default resources */
+ if (init_attr->srq) {
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(init_attr->srq)->msrq.srqn);
+ } else {
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(devr->s1)->msrq.srqn);
}
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+ if (init_attr->send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd,
+ to_mcq(init_attr->send_cq)->mcq.cqn);
- if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
- if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
- (init_attr->qp_type != IB_QPT_RAW_PACKET))
- return -EOPNOTSUPP;
- qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+ if (init_attr->recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv,
+ to_mcq(init_attr->recv_cq)->mcq.cqn);
+
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
- if (udata) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EFAULT;
- }
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
- if (!check_flags_mask(ucmd.flags,
- MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
- MLX5_QP_FLAG_BFREG_INDEX |
- MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE |
- MLX5_QP_FLAG_SCATTER_CQE |
- MLX5_QP_FLAG_SIGNATURE |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC |
- MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
- MLX5_QP_FLAG_TUNNEL_OFFLOADS |
- MLX5_QP_FLAG_TYPE_DCI |
- MLX5_QP_FLAG_TYPE_DCT))
- return -EINVAL;
+ kvfree(in);
+ if (err)
+ goto err_create;
- err = get_qp_user_index(ucontext, &ucmd, udata->inlen, &uidx);
- if (err)
- return err;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
- qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
- if (MLX5_CAP_GEN(dev->mdev, sctr_data_cqe))
- qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
- if (ucmd.flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET ||
- !tunnel_offload_supported(mdev)) {
- mlx5_ib_dbg(dev, "Tunnel offload isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags_en |= MLX5_QP_FLAG_TUNNEL_OFFLOADS;
- }
+ get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Self-LB UC isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
- }
+ return 0;
- if (ucmd.flags & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Self-LB UM isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
- }
+err_create:
+ destroy_qp(dev, qp, base, udata);
+ return err;
+}
- if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
- if (init_attr->qp_type != IB_QPT_RC ||
- !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
- mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
- }
+static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ struct ib_udata *udata = params->udata;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
+ struct mlx5_ib_qp_base *base;
+ int ts_format;
+ int mlx5_st;
+ void *qpc;
+ u32 *in;
+ int err;
- if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
- if (init_attr->qp_type != IB_QPT_UD ||
- (MLX5_CAP_GEN(dev->mdev, port_type) !=
- MLX5_CAP_PORT_TYPE_IB) ||
- !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) {
- mlx5_ib_dbg(dev, "Source QP option isn't supported\n");
- return -EOPNOTSUPP;
- }
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
- qp->flags |= MLX5_IB_QP_UNDERLAY;
- qp->underlay_qpn = init_attr->source_qpn;
- }
- } else {
- qp->wq_sig = !!wq_signature;
- }
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
+ qp->underlay_qpn = init_attr->source_qpn;
base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) ?
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
qp->has_rq = qp_has_rq(init_attr);
- err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
- qp, udata ? &ucmd : NULL);
+ err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- if (pd) {
- if (udata) {
- __u32 max_wqes =
- 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
- mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
- if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
- ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
- mlx5_ib_dbg(dev, "invalid rq params\n");
- return -EINVAL;
- }
- if (ucmd.sq_wqe_count > max_wqes) {
- mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
- ucmd.sq_wqe_count, max_wqes);
- return -EINVAL;
- }
- if (init_attr->create_flags &
- mlx5_ib_create_qp_sqpn_qp1()) {
- mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
- return -EINVAL;
- }
- err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
- &resp, &inlen, base);
- if (err)
- mlx5_ib_dbg(dev, "err %d\n", err);
- } else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
- base);
- if (err)
- mlx5_ib_dbg(dev, "err %d\n", err);
- }
+ if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
+ ucmd->rq_wqe_count != qp->rq.wqe_cnt)
+ return -EINVAL;
- if (err)
- return err;
- } else {
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
+ if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
+ return -EINVAL;
- qp->create_type = MLX5_QP_EMPTY;
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ if (ts_format < 0)
+ return ts_format;
}
+ err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
+ &inlen, base, ucmd);
+ if (err)
+ return err;
+
if (is_sqp(init_attr->qp_type))
qp->port = init_attr->port_num;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, st, mlx5_st);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
- if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
- MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
- else
- MLX5_SET(qpc, qpc, latency_sensitive, 1);
-
-
- if (qp->wq_sig)
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
MLX5_SET(qpc, qpc, wq_signature, 1);
- if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
MLX5_SET(qpc, qpc, block_lb_mc, 1);
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
MLX5_SET(qpc, qpc, cd_master, 1);
- if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
MLX5_SET(qpc, qpc, cd_slave_send, 1);
- if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
- if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+ if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)
MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
- if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
- configure_responder_scat_cqe(init_attr, qpc);
- configure_requester_scat_cqe(dev, init_attr,
- udata ? &ucmd : NULL,
- qpc);
+ if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
+ (init_attr->qp_type == IB_QPT_RC ||
+ init_attr->qp_type == IB_QPT_UC)) {
+ int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
+
+ MLX5_SET(qpc, qpc, cs_res,
+ rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+ MLX5_RES_SCAT_DATA32_CQE);
}
+ if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
+ (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
+ configure_requester_scat_cqe(dev, qp, init_attr, qpc);
if (qp->rq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ MLX5_SET(qpc, qpc, ts_format, ts_format);
+
MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
if (qp->sq.wqe_cnt) {
@@ -2225,23 +2354,17 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
/* Set default resources */
switch (init_attr->qp_type) {
- case IB_QPT_XRC_TGT:
- MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
- break;
case IB_QPT_XRC_INI:
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
}
}
@@ -2258,52 +2381,166 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
MLX5_SET(qpc, qpc, user_index, uidx);
- /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
- if (init_attr->qp_type == IB_QPT_UD &&
- (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
- MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
- qp->flags |= MLX5_IB_QP_LSO;
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING &&
+ init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
- if (init_attr->create_flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
- if (!MLX5_CAP_GEN(dev->mdev, end_pad)) {
- mlx5_ib_dbg(dev, "scatter end padding is not supported\n");
- err = -EOPNOTSUPP;
- goto err;
- } else if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- MLX5_SET(qpc, qpc, end_padding_mode,
- MLX5_WQ_END_PAD_MODE_ALIGN);
- } else {
- qp->flags |= MLX5_IB_QP_PCI_WRITE_END_PADDING;
- }
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
+ raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+ err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
+ &params->resp, init_attr);
+ } else
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
+
+ kvfree(in);
+ if (err)
+ goto err_create;
+
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
+
+ get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ return 0;
+
+err_create:
+ destroy_qp(dev, qp, base, udata);
+ return err;
+}
+
+static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *attr = params->attr;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
+ struct mlx5_ib_qp_base *base;
+ int mlx5_st;
+ void *qpc;
+ u32 *in;
+ int err;
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
+ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ base = &qp->trans_qp.base;
+
+ qp->has_rq = qp_has_rq(attr);
+ err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ return err;
}
- if (inlen < 0) {
- err = -EINVAL;
- goto err;
+ err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base);
+ if (err)
+ return err;
+
+ if (is_sqp(attr->qp_type))
+ qp->port = attr->port_num;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, mlx5_st);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+
+ if (attr->qp_type != MLX5_IB_QPT_REG_UMR)
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
+ else
+ MLX5_SET(qpc, qpc, latency_sensitive, 1);
+
+
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
+
+ if (qp->rq.wqe_cnt) {
+ MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
- if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
- qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
- raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
- err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
- &resp);
+ MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr));
+
+ if (qp->sq.wqe_cnt)
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
+ else
+ MLX5_SET(qpc, qpc, no_sq, 1);
+
+ if (attr->srq) {
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(attr->srq)->msrq.srqn);
} else {
- err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(devr->s1)->msrq.srqn);
}
- if (err) {
- mlx5_ib_dbg(dev, "create qp failed\n");
- goto err_create;
- }
+ if (attr->send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn);
+
+ if (attr->recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn);
+
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
+ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
+
+ if (qp->flags & IB_QP_CREATE_INTEGRITY_EN &&
+ MLX5_CAP_GEN(mdev, go_back_n))
+ MLX5_SET(qpc, qpc, retry_mode, MLX5_QP_RM_GO_BACK_N);
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
kvfree(in);
+ if (err)
+ goto err_create;
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
- get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
+ get_cqs(qp->type, attr->send_cq, attr->recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -2323,13 +2560,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return 0;
err_create:
- if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(dev, pd, qp, base, udata);
- else if (qp->create_type == MLX5_QP_KERNEL)
- destroy_qp_kernel(dev, qp);
-
-err:
- kvfree(in);
+ destroy_qp(dev, qp, base, NULL);
return err;
}
@@ -2391,11 +2622,6 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
}
}
-static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
-{
- return to_mpd(qp->ibqp.pd);
-}
-
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
@@ -2416,14 +2642,10 @@ static void get_cqs(enum ib_qp_type qp_type,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
- case IB_QPT_RAW_IPV6:
- case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
-
- case IB_QPT_MAX:
default:
*send_cq = NULL;
*recv_cq = NULL;
@@ -2443,22 +2665,21 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
unsigned long flags;
int err;
- if (qp->ibqp.rwq_ind_tbl) {
+ if (qp->is_rss) {
destroy_rss_raw_qp_tir(dev, qp);
return;
}
- base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
+ base = (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
- !(qp->flags & MLX5_IB_QP_UNDERLAY)) {
- err = mlx5_core_qp_modify(dev->mdev,
- MLX5_CMD_OP_2RST_QP, 0,
- NULL, &base->mqp);
+ if (qp->type != IB_QPT_RAW_PACKET &&
+ !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
+ err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
+ NULL, &base->mqp, NULL);
} else {
struct mlx5_modify_raw_qp_param raw_qp_param = {
.operation = MLX5_CMD_OP_2RST_QP
@@ -2471,8 +2692,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
base->mqp.qpn);
}
- get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
- &send_cq, &recv_cq);
+ get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq,
+ &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -2484,7 +2705,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (recv_cq)
list_del(&qp->cq_recv_list);
- if (qp->create_type == MLX5_QP_KERNEL) {
+ if (!udata) {
__mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
@@ -2494,264 +2715,488 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
destroy_raw_packet_qp(dev, qp);
} else {
- err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ err = mlx5_core_destroy_qp(dev, &base->mqp);
if (err)
mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
base->mqp.qpn);
}
- if (qp->create_type == MLX5_QP_KERNEL)
- destroy_qp_kernel(dev, qp);
- else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base, udata);
+ destroy_qp(dev, qp, base, udata);
}
-static const char *ib_qp_type_str(enum ib_qp_type type)
+static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
- switch (type) {
- case IB_QPT_SMI:
- return "IB_QPT_SMI";
- case IB_QPT_GSI:
- return "IB_QPT_GSI";
+ struct ib_qp_init_attr *attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 uidx = params->uidx;
+ void *dctc;
+
+ if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
+ return -EOPNOTSUPP;
+
+ qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
+ if (!qp->dct.in)
+ return -ENOMEM;
+
+ MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
+ dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
+ MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
+ MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
+ MLX5_SET(dctc, dctc, user_index, uidx);
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
+
+ if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) {
+ int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq);
+
+ if (rcqe_sz == 128)
+ MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ }
+
+ qp->state = IB_QPS_RESET;
+ return 0;
+}
+
+static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
+ enum ib_qp_type *type)
+{
+ if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct))
+ goto out;
+
+ switch (attr->qp_type) {
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_XRC_INI:
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ goto out;
+ fallthrough;
case IB_QPT_RC:
- return "IB_QPT_RC";
case IB_QPT_UC:
- return "IB_QPT_UC";
- case IB_QPT_UD:
- return "IB_QPT_UD";
- case IB_QPT_RAW_IPV6:
- return "IB_QPT_RAW_IPV6";
- case IB_QPT_RAW_ETHERTYPE:
- return "IB_QPT_RAW_ETHERTYPE";
- case IB_QPT_XRC_INI:
- return "IB_QPT_XRC_INI";
- case IB_QPT_XRC_TGT:
- return "IB_QPT_XRC_TGT";
+ case IB_QPT_SMI:
+ case MLX5_IB_QPT_HW_GSI:
+ case IB_QPT_DRIVER:
+ case IB_QPT_GSI:
case IB_QPT_RAW_PACKET:
- return "IB_QPT_RAW_PACKET";
+ case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
- return "MLX5_IB_QPT_REG_UMR";
- case IB_QPT_DRIVER:
- return "IB_QPT_DRIVER";
- case IB_QPT_MAX:
+ break;
default:
- return "Invalid QP type";
+ goto out;
}
+
+ *type = attr->qp_type;
+ return 0;
+
+out:
+ mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type);
+ return -EOPNOTSUPP;
}
-static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
- struct ib_qp_init_attr *attr,
- struct mlx5_ib_create_qp *ucmd,
- struct ib_udata *udata)
+static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- struct mlx5_ib_qp *qp;
- int err = 0;
- u32 uidx = MLX5_IB_DEFAULT_UIDX;
- void *dctc;
- if (!attr->srq || !attr->recv_cq)
- return ERR_PTR(-EINVAL);
+ if (!udata) {
+ /* Kernel create_qp callers */
+ if (attr->rwq_ind_tbl)
+ return -EOPNOTSUPP;
- err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx);
- if (err)
- return ERR_PTR(err);
+ switch (attr->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ case IB_QPT_DRIVER:
+ return -EOPNOTSUPP;
+ default:
+ return 0;
+ }
+ }
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ /* Userspace create_qp callers */
+ if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) {
+ mlx5_ib_dbg(dev,
+ "Raw Packet QP is only supported for CQE version > 0\n");
+ return -EINVAL;
+ }
- qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
- if (!qp->dct.in) {
- err = -ENOMEM;
- goto err_free;
+ if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) {
+ mlx5_ib_dbg(dev,
+ "Wrong QP type %d for the RWQ indirect table\n",
+ attr->qp_type);
+ return -EINVAL;
}
- MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
- dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
- qp->qp_sub_type = MLX5_IB_QPT_DCT;
- MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
- MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
- MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
- MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
- MLX5_SET(dctc, dctc, user_index, uidx);
+ /*
+ * We don't need to see this warning, it means that kernel code
+ * missing ib_pd. Placed here to catch developer's mistakes.
+ */
+ WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT,
+ "There is a missing PD pointer assignment\n");
+ return 0;
+}
- if (ucmd->flags & MLX5_QP_FLAG_SCATTER_CQE)
- configure_responder_scat_cqe(attr, dctc);
+static bool get_dp_ooo_cap(struct mlx5_core_dev *mdev, enum ib_qp_type qp_type)
+{
+ if (!MLX5_CAP_GEN_2(mdev, dp_ordering_force))
+ return false;
- qp->state = IB_QPS_RESET;
+ switch (qp_type) {
+ case IB_QPT_RC:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc);
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc);
+ case IB_QPT_UC:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc);
+ case IB_QPT_UD:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud);
+ case MLX5_IB_QPT_DCI:
+ case MLX5_IB_QPT_DCT:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc);
+ default:
+ return false;
+ }
+}
- return &qp->ibqp;
-err_free:
- kfree(qp);
- return ERR_PTR(err);
+static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
+ bool cond, struct mlx5_ib_qp *qp)
+{
+ if (!(*flags & flag))
+ return;
+
+ if (cond) {
+ qp->flags_en |= flag;
+ *flags &= ~flag;
+ return;
+ }
+
+ switch (flag) {
+ case MLX5_QP_FLAG_SCATTER_CQE:
+ case MLX5_QP_FLAG_ALLOW_SCATTER_CQE:
+ /*
+ * We don't return error if these flags were provided,
+ * and mlx5 doesn't have right capability.
+ */
+ *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE |
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE);
+ return;
+ default:
+ break;
+ }
+ mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
}
-static int set_mlx_qp_type(struct mlx5_ib_dev *dev,
- struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_create_qp *ucmd,
- struct ib_udata *udata)
+static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ void *ucmd, struct ib_qp_init_attr *attr)
{
- enum { MLX_QP_FLAGS = MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI };
- int err;
+ struct mlx5_core_dev *mdev = dev->mdev;
+ bool cond;
+ int flags;
- if (!udata)
+ if (attr->rwq_ind_tbl)
+ flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags;
+ else
+ flags = ((struct mlx5_ib_create_qp *)ucmd)->flags;
+
+ switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) {
+ case MLX5_QP_FLAG_TYPE_DCI:
+ qp->type = MLX5_IB_QPT_DCI;
+ break;
+ case MLX5_QP_FLAG_TYPE_DCT:
+ qp->type = MLX5_IB_QPT_DCT;
+ break;
+ default:
+ if (qp->type != IB_QPT_DRIVER)
+ break;
+ /*
+ * It is IB_QPT_DRIVER and or no subtype or
+ * wrong subtype were provided.
+ */
return -EINVAL;
+ }
- if (udata->inlen < sizeof(*ucmd)) {
- mlx5_ib_dbg(dev, "create_qp user command is smaller than expected\n");
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_DCI_STREAM,
+ MLX5_CAP_GEN(mdev, log_max_dci_stream_channels),
+ qp);
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
+ MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE,
+ MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+
+ if (qp->type == IB_QPT_RAW_PACKET) {
+ cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_gre) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS,
+ cond, qp);
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true,
+ qp);
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true,
+ qp);
+ }
+
+ if (qp->type == IB_QPT_RC)
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE,
+ MLX5_CAP_GEN(mdev, qp_packet_based), qp);
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp);
+
+ cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC);
+ if (attr->rwq_ind_tbl && cond) {
+ mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n",
+ cond);
return -EINVAL;
}
- err = ib_copy_from_udata(ucmd, udata, sizeof(*ucmd));
- if (err)
- return err;
- if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCI) {
- init_attr->qp_type = MLX5_IB_QPT_DCI;
- } else {
- if ((ucmd->flags & MLX_QP_FLAGS) == MLX5_QP_FLAG_TYPE_DCT) {
- init_attr->qp_type = MLX5_IB_QPT_DCT;
- } else {
- mlx5_ib_dbg(dev, "Invalid QP flags\n");
- return -EINVAL;
- }
+ if (flags)
+ mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags);
+
+ return (flags) ? -EINVAL : 0;
}
- if (!MLX5_CAP_GEN(dev->mdev, dct)) {
- mlx5_ib_dbg(dev, "DC transport is not supported\n");
- return -EOPNOTSUPP;
+static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
+ bool cond, struct mlx5_ib_qp *qp)
+{
+ if (!(*flags & flag))
+ return;
+
+ if (cond) {
+ qp->flags |= flag;
+ *flags &= ~flag;
+ return;
}
+ mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
+}
+
+static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *attr)
+{
+ enum ib_qp_type qp_type = qp->type;
+ struct mlx5_core_dev *mdev = dev->mdev;
+ int create_flags = attr->create_flags;
+ bool cond;
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return (create_flags) ? -EINVAL : 0;
+
+ if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl)
+ return (create_flags) ? -EINVAL : 0;
+
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_NETIF_QP,
+ mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_BYPASS),
+ qp);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_INTEGRITY_EN,
+ MLX5_CAP_GEN(mdev, sho), qp);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX5_CAP_GEN(mdev, block_lb_mc), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL,
+ MLX5_CAP_GEN(mdev, cd), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND,
+ MLX5_CAP_GEN(mdev, cd), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV,
+ MLX5_CAP_GEN(mdev, cd), qp);
+
+ if (qp_type == IB_QPT_UD) {
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX5_CAP_GEN(mdev, ipoib_basic_offloads),
+ qp);
+ cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB;
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN,
+ cond, qp);
+ }
+
+ if (qp_type == IB_QPT_RAW_PACKET) {
+ cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(mdev, scatter_fcs);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_SCATTER_FCS, cond, qp);
+
+ cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(mdev, vlan_cap);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_CVLAN_STRIPPING, cond, qp);
+ }
+
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_PCI_WRITE_END_PADDING,
+ MLX5_CAP_GEN(mdev, end_pad), qp);
+
+ process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
+ true, qp);
+
+ if (create_flags) {
+ mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
+ create_flags);
+ return -EOPNOTSUPP;
+ }
return 0;
}
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *verbs_init_attr,
- struct ib_udata *udata)
+static int process_udata_size(struct mlx5_ib_dev *dev,
+ struct mlx5_create_qp_params *params)
{
- struct mlx5_ib_dev *dev;
- struct mlx5_ib_qp *qp;
- u16 xrcdn = 0;
- int err;
- struct ib_qp_init_attr mlx_init_attr;
- struct ib_qp_init_attr *init_attr = verbs_init_attr;
- struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
- udata, struct mlx5_ib_ucontext, ibucontext);
+ size_t ucmd = sizeof(struct mlx5_ib_create_qp);
+ struct ib_udata *udata = params->udata;
+ size_t outlen = udata->outlen;
+ size_t inlen = udata->inlen;
- if (pd) {
- dev = to_mdev(pd->device);
+ params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp));
+ params->ucmd_size = ucmd;
+ if (!params->is_rss_raw) {
+ /* User has old rdma-core, which doesn't support ECE */
+ size_t min_inlen =
+ offsetof(struct mlx5_ib_create_qp, ece_options);
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
- if (!ucontext) {
- mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
- return ERR_PTR(-EINVAL);
- } else if (!ucontext->cqe_version) {
- mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
- return ERR_PTR(-EINVAL);
- }
- }
- } else {
- /* being cautious here */
- if (init_attr->qp_type != IB_QPT_XRC_TGT &&
- init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
- pr_warn("%s: no PD for transport %s\n", __func__,
- ib_qp_type_str(init_attr->qp_type));
- return ERR_PTR(-EINVAL);
- }
- dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+ /*
+ * We will check in check_ucmd_data() that user
+ * cleared everything after inlen.
+ */
+ params->inlen = (inlen < min_inlen) ? 0 : min(inlen, ucmd);
+ goto out;
}
- if (init_attr->qp_type == IB_QPT_DRIVER) {
- struct mlx5_ib_create_qp ucmd;
+ /* RSS RAW QP */
+ if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags))
+ return -EINVAL;
- init_attr = &mlx_init_attr;
- memcpy(init_attr, verbs_init_attr, sizeof(*verbs_init_attr));
- err = set_mlx_qp_type(dev, init_attr, &ucmd, udata);
- if (err)
- return ERR_PTR(err);
+ if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index))
+ return -EINVAL;
- if (init_attr->qp_type == MLX5_IB_QPT_DCI) {
- if (init_attr->cap.max_recv_wr ||
- init_attr->cap.max_recv_sge) {
- mlx5_ib_dbg(dev, "DCI QP requires zero size receive queue\n");
- return ERR_PTR(-EINVAL);
- }
- } else {
- return mlx5_ib_create_dct(pd, init_attr, &ucmd, udata);
- }
+ ucmd = sizeof(struct mlx5_ib_create_qp_rss);
+ params->ucmd_size = ucmd;
+ if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd))
+ return -EINVAL;
+
+ params->inlen = min(ucmd, inlen);
+out:
+ if (!params->inlen)
+ mlx5_ib_dbg(dev, "udata is too small\n");
+
+ return (params->inlen) ? 0 : -EINVAL;
+}
+
+static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ int err;
+
+ if (params->is_rss_raw) {
+ err = create_rss_raw_qp_tir(dev, pd, qp, params);
+ goto out;
}
- switch (init_attr->qp_type) {
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ err = create_dct(dev, pd, qp, params);
+ break;
+ case MLX5_IB_QPT_DCI:
+ err = create_dci(dev, pd, qp, params);
+ break;
case IB_QPT_XRC_TGT:
- case IB_QPT_XRC_INI:
- if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
- mlx5_ib_dbg(dev, "XRC not supported\n");
- return ERR_PTR(-ENOSYS);
- }
- init_attr->recv_cq = NULL;
- if (init_attr->qp_type == IB_QPT_XRC_TGT) {
- xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
- init_attr->send_cq = NULL;
- }
-
- /* fall through */
- case IB_QPT_RAW_PACKET:
- case IB_QPT_RC:
- case IB_QPT_UC:
- case IB_QPT_UD:
- case IB_QPT_SMI:
+ err = create_xrc_tgt_qp(dev, qp, params);
+ break;
+ case IB_QPT_GSI:
+ err = mlx5_ib_create_gsi(pd, qp, params->attr);
+ break;
case MLX5_IB_QPT_HW_GSI:
+ rdma_restrack_no_track(&qp->ibqp.res);
+ fallthrough;
case MLX5_IB_QPT_REG_UMR:
- case MLX5_IB_QPT_DCI:
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
- err = create_qp_common(dev, pd, init_attr, udata, qp);
- if (err) {
- mlx5_ib_dbg(dev, "create_qp_common failed\n");
- kfree(qp);
- return ERR_PTR(err);
- }
-
- if (is_qp0(init_attr->qp_type))
- qp->ibqp.qp_num = 0;
- else if (is_qp1(init_attr->qp_type))
- qp->ibqp.qp_num = 1;
+ default:
+ if (params->udata)
+ err = create_user_qp(dev, pd, qp, params);
else
- qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
+ err = create_kernel_qp(dev, pd, qp, params);
+ }
- mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
- init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
- init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
+out:
+ if (err) {
+ mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
+ return err;
+ }
- qp->trans_qp.xrcdn = xrcdn;
+ if (is_qp0(qp->type))
+ qp->ibqp.qp_num = 0;
+ else if (is_qp1(qp->type))
+ qp->ibqp.qp_num = 1;
+ else
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
- break;
+ mlx5_ib_dbg(dev,
+ "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x, ece 0x%x\n",
+ qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn :
+ -1,
+ params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn :
+ -1,
+ params->resp.ece_options);
- case IB_QPT_GSI:
- return mlx5_ib_gsi_create_qp(pd, init_attr);
+ return 0;
+}
+
+static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *attr)
+{
+ int ret = 0;
- case IB_QPT_RAW_IPV6:
- case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_MAX:
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0;
+ break;
+ case MLX5_IB_QPT_DCI:
+ ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ?
+ -EINVAL :
+ 0;
+ break;
+ case IB_QPT_RAW_PACKET:
+ ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0;
+ break;
default:
- mlx5_ib_dbg(dev, "unsupported qp type %d\n",
- init_attr->qp_type);
- /* Don't support raw QPs */
- return ERR_PTR(-EINVAL);
+ break;
}
- if (verbs_init_attr->qp_type == IB_QPT_DRIVER)
- qp->qp_sub_type = init_attr->qp_type;
+ if (ret)
+ mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type);
+
+ return ret;
+}
+
+static int get_qp_uidx(struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ if (params->is_rss_raw)
+ return 0;
- return &qp->ibqp;
+ return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &params->uidx);
}
static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
@@ -2761,7 +3206,7 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
if (mqp->state == IB_QPS_RTR) {
int err;
- err = mlx5_core_destroy_dct(dev->mdev, &mqp->dct.mdct);
+ err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct);
if (err) {
mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
return err;
@@ -2769,36 +3214,165 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
}
kfree(mqp->dct.in);
- kfree(mqp);
return 0;
}
+static int check_ucmd_data(struct mlx5_ib_dev *dev,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_udata *udata = params->udata;
+ size_t size, last;
+ int ret;
+
+ if (params->is_rss_raw)
+ /*
+ * These QPs don't have "reserved" field in their
+ * create_qp input struct, so their data is always valid.
+ */
+ last = sizeof(struct mlx5_ib_create_qp_rss);
+ else
+ last = offsetof(struct mlx5_ib_create_qp, reserved);
+
+ if (udata->inlen <= last)
+ return 0;
+
+ /*
+ * User provides different create_qp structures based on the
+ * flow and we need to know if he cleared memory after our
+ * struct create_qp ends.
+ */
+ size = udata->inlen - last;
+ ret = ib_is_udata_cleared(params->udata, last, size);
+ if (!ret)
+ mlx5_ib_dbg(
+ dev,
+ "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n",
+ udata->inlen, params->ucmd_size, last, size);
+ return ret ? 0 : -EINVAL;
+}
+
+int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_create_qp_params params = {};
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct ib_pd *pd = ibqp->pd;
+ enum ib_qp_type type;
+ int err;
+
+ err = mlx5_ib_dev_res_srq_init(dev);
+ if (err)
+ return err;
+
+ err = check_qp_type(dev, attr, &type);
+ if (err)
+ return err;
+
+ err = check_valid_flow(dev, pd, attr, udata);
+ if (err)
+ return err;
+
+ params.udata = udata;
+ params.uidx = MLX5_IB_DEFAULT_UIDX;
+ params.attr = attr;
+ params.is_rss_raw = !!attr->rwq_ind_tbl;
+
+ if (udata) {
+ err = process_udata_size(dev, &params);
+ if (err)
+ return err;
+
+ err = check_ucmd_data(dev, &params);
+ if (err)
+ return err;
+
+ params.ucmd = kzalloc(params.ucmd_size, GFP_KERNEL);
+ if (!params.ucmd)
+ return -ENOMEM;
+
+ err = ib_copy_from_udata(params.ucmd, udata, params.inlen);
+ if (err)
+ goto free_ucmd;
+ }
+
+ mutex_init(&qp->mutex);
+ qp->type = type;
+ if (udata) {
+ err = process_vendor_flags(dev, qp, params.ucmd, attr);
+ if (err)
+ goto free_ucmd;
+
+ err = get_qp_uidx(qp, &params);
+ if (err)
+ goto free_ucmd;
+ }
+ err = process_create_flags(dev, qp, attr);
+ if (err)
+ goto free_ucmd;
+
+ err = check_qp_attr(dev, qp, attr);
+ if (err)
+ goto free_ucmd;
+
+ err = create_qp(dev, pd, qp, &params);
+ if (err)
+ goto free_ucmd;
+
+ kfree(params.ucmd);
+ params.ucmd = NULL;
+
+ if (udata)
+ /*
+ * It is safe to copy response for all user create QP flows,
+ * including MLX5_IB_QPT_DCT, which doesn't need it.
+ * In that case, resp will be filled with zeros.
+ */
+ err = ib_copy_to_udata(udata, &params.resp, params.outlen);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ mlx5_ib_destroy_dct(qp);
+ break;
+ case IB_QPT_GSI:
+ mlx5_ib_destroy_gsi(qp);
+ break;
+ default:
+ destroy_qp_common(dev, qp, udata);
+ }
+
+free_ucmd:
+ kfree(params.ucmd);
+ return err;
+}
+
int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
- if (unlikely(qp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_destroy_qp(qp);
+ if (mqp->type == IB_QPT_GSI)
+ return mlx5_ib_destroy_gsi(mqp);
- if (mqp->qp_sub_type == MLX5_IB_QPT_DCT)
+ if (mqp->type == MLX5_IB_QPT_DCT)
return mlx5_ib_destroy_dct(mqp);
destroy_qp_common(dev, mqp, udata);
-
- kfree(mqp);
-
return 0;
}
-static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
- const struct ib_qp_attr *attr,
- int attr_mask, __be32 *hw_access_flags_be)
+static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ void *qpc)
{
- u8 dest_rd_atomic;
- u32 access_flags, hw_access_flags = 0;
-
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+ u8 dest_rd_atomic;
+ u32 access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
@@ -2813,24 +3387,20 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
- if (access_flags & IB_ACCESS_REMOTE_READ)
- hw_access_flags |= MLX5_QP_BIT_RRE;
+ MLX5_SET(qpc, qpc, rre, !!(access_flags & IB_ACCESS_REMOTE_READ));
+
if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
int atomic_mode;
- atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type);
+ atomic_mode = get_atomic_mode(dev, qp);
if (atomic_mode < 0)
return -EOPNOTSUPP;
- hw_access_flags |= MLX5_QP_BIT_RAE;
- hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET;
+ MLX5_SET(qpc, qpc, rae, 1);
+ MLX5_SET(qpc, qpc, atomic_mode, atomic_mode);
}
- if (access_flags & IB_ACCESS_REMOTE_WRITE)
- hw_access_flags |= MLX5_QP_BIT_RWE;
-
- *hw_access_flags_be = cpu_to_be32(hw_access_flags);
-
+ MLX5_SET(qpc, qpc, rwe, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
return 0;
}
@@ -2840,20 +3410,60 @@ enum {
MLX5_PATH_FLAG_COUNTER = 1 << 2,
};
-static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
+static int mlx5_to_ib_rate_map(u8 rate)
{
- if (rate == IB_RATE_PORT_CURRENT)
+ static const int rates[] = { IB_RATE_PORT_CURRENT, IB_RATE_56_GBPS,
+ IB_RATE_25_GBPS, IB_RATE_100_GBPS,
+ IB_RATE_200_GBPS, IB_RATE_50_GBPS,
+ IB_RATE_400_GBPS };
+
+ if (rate < ARRAY_SIZE(rates))
+ return rates[rate];
+
+ return rate - MLX5_STAT_RATE_OFFSET;
+}
+
+static int ib_to_mlx5_rate_map(u8 rate)
+{
+ switch (rate) {
+ case IB_RATE_PORT_CURRENT:
return 0;
+ case IB_RATE_56_GBPS:
+ return 1;
+ case IB_RATE_25_GBPS:
+ return 2;
+ case IB_RATE_100_GBPS:
+ return 3;
+ case IB_RATE_200_GBPS:
+ return 4;
+ case IB_RATE_50_GBPS:
+ return 5;
+ case IB_RATE_400_GBPS:
+ return 6;
+ default:
+ return rate + MLX5_STAT_RATE_OFFSET;
+ }
- if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
+ return 0;
+}
+
+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
+{
+ u32 stat_rate_support;
+
+ if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS ||
+ rate == IB_RATE_1600_GBPS)
+ return 0;
+
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_1600_GBPS)
return -EINVAL;
+ stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
while (rate != IB_RATE_PORT_CURRENT &&
- !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
+ !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
--rate;
- return rate ? rate + MLX5_STAT_RATE_OFFSET : rate;
+ return ib_to_mlx5_rate_map(rate);
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
@@ -2876,7 +3486,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
- err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+ err = mlx5_core_modify_tis(dev, sq->tisn, in);
kvfree(in);
@@ -2903,18 +3513,29 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
- err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+ err = mlx5_core_modify_tis(dev, sq->tisn, in);
kvfree(in);
return err;
}
+static void mlx5_set_path_udp_sport(void *path, const struct rdma_ah_attr *ah,
+ u32 lqpn, u32 rqpn)
+
+{
+ u32 fl = ah->grh.flow_label;
+
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ MLX5_SET(ads, path, udp_sport, rdma_flow_label_to_udp_sport(fl));
+}
+
static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- const struct rdma_ah_attr *ah,
- struct mlx5_qp_path *path, u8 port, int attr_mask,
- u32 path_flags, const struct ib_qp_attr *attr,
- bool alt)
+ const struct rdma_ah_attr *ah, void *path, u8 port,
+ int attr_mask, u32 path_flags,
+ const struct ib_qp_attr *attr, bool alt)
{
const struct ib_global_route *grh = rdma_ah_read_grh(ah);
int err;
@@ -2923,15 +3544,17 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u8 sl = rdma_ah_get_sl(ah);
if (attr_mask & IB_QP_PKEY_INDEX)
- path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
- attr->pkey_index);
+ MLX5_SET(ads, path, pkey_index,
+ alt ? attr->alt_pkey_index : attr->pkey_index);
if (ah_flags & IB_AH_GRH) {
- if (grh->sgid_index >=
- dev->mdev->port_caps[port - 1].gid_table_len) {
+ const struct ib_port_immutable *immutable;
+
+ immutable = ib_port_immutable_read(&dev->ib_dev, port);
+ if (grh->sgid_index >= immutable->gid_tbl_len) {
pr_err("sgid_index (%u) too large. max is %d\n",
grh->sgid_index,
- dev->mdev->port_caps[port - 1].gid_table_len);
+ immutable->gid_tbl_len);
return -EINVAL;
}
}
@@ -2940,47 +3563,51 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (!(ah_flags & IB_AH_GRH))
return -EINVAL;
- memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
- if (qp->ibqp.qp_type == IB_QPT_RC ||
- qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_XRC_INI ||
- qp->ibqp.qp_type == IB_QPT_XRC_TGT)
- path->udp_sport =
- mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
- path->dci_cfi_prio_sl = (sl & 0x7) << 4;
+ ether_addr_copy(MLX5_ADDR_OF(ads, path, rmac_47_32),
+ ah->roce.dmac);
+ if ((qp->type == IB_QPT_RC ||
+ qp->type == IB_QPT_UC ||
+ qp->type == IB_QPT_XRC_INI ||
+ qp->type == IB_QPT_XRC_TGT) &&
+ (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (attr_mask & IB_QP_DEST_QPN))
+ mlx5_set_path_udp_sport(path, ah,
+ qp->ibqp.qp_num,
+ attr->dest_qp_num);
+ MLX5_SET(ads, path, eth_prio, sl & 0x7);
gid_type = ah->grh.sgid_attr->gid_type;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
- path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
+ MLX5_SET(ads, path, dscp, grh->traffic_class >> 2);
} else {
- path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->fl_free_ar |=
- (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
- path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
- path->grh_mlid = rdma_ah_get_path_bits(ah) & 0x7f;
- if (ah_flags & IB_AH_GRH)
- path->grh_mlid |= 1 << 7;
- path->dci_cfi_prio_sl = sl & 0xf;
+ MLX5_SET(ads, path, fl, !!(path_flags & MLX5_PATH_FLAG_FL));
+ MLX5_SET(ads, path, free_ar,
+ !!(path_flags & MLX5_PATH_FLAG_FREE_AR));
+ MLX5_SET(ads, path, rlid, rdma_ah_get_dlid(ah));
+ MLX5_SET(ads, path, mlid, rdma_ah_get_path_bits(ah));
+ MLX5_SET(ads, path, grh, !!(ah_flags & IB_AH_GRH));
+ MLX5_SET(ads, path, sl, sl);
}
if (ah_flags & IB_AH_GRH) {
- path->mgid_index = grh->sgid_index;
- path->hop_limit = grh->hop_limit;
- path->tclass_flowlabel =
- cpu_to_be32((grh->traffic_class << 20) |
- (grh->flow_label));
- memcpy(path->rgid, grh->dgid.raw, 16);
+ MLX5_SET(ads, path, src_addr_index, grh->sgid_index);
+ MLX5_SET(ads, path, hop_limit, grh->hop_limit);
+ MLX5_SET(ads, path, tclass, grh->traffic_class);
+ MLX5_SET(ads, path, flow_label, grh->flow_label);
+ memcpy(MLX5_ADDR_OF(ads, path, rgid_rip), grh->dgid.raw,
+ sizeof(grh->dgid.raw));
}
- err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah));
+ err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah));
if (err < 0)
return err;
- path->static_rate = err;
- path->port = port;
+ MLX5_SET(ads, path, stat_rate, err);
+ MLX5_SET(ads, path, vhca_port_num, port);
if (attr_mask & IB_QP_TIMEOUT)
- path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3;
+ MLX5_SET(ads, path, ack_timeout,
+ alt ? attr->alt_timeout : attr->timeout);
- if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ if ((qp->type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
sl & 0xf, qp->ibqp.pd);
@@ -2995,10 +3622,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_PRI_PORT,
@@ -3006,17 +3635,20 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
@@ -3025,7 +3657,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
},
},
[MLX5_QP_STATE_RTR] = {
@@ -3085,6 +3718,17 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RRE,
},
},
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RRE,
+ },
+ },
};
static int ib_nr_to_mlx5_nr(int ib_mask)
@@ -3183,7 +3827,7 @@ static int modify_raw_packet_qp_rq(
"RAW PACKET QP counters are not supported on current FW\n");
}
- err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
+ err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in);
if (err)
goto out;
@@ -3246,7 +3890,7 @@ static int modify_raw_packet_qp_sq(
MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
}
- err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in);
if (err) {
/* Remove new rate from table if failed */
if (new_rate_added)
@@ -3255,10 +3899,12 @@ static int modify_raw_packet_qp_sq(
}
/* Only remove the old rate after new rate was set */
- if ((old_rl.rate &&
- !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
- (new_state != MLX5_SQC_STATE_RDY))
+ if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+ (new_state != MLX5_SQC_STATE_RDY)) {
mlx5_rl_remove_rate(dev, &old_rl);
+ if (new_state != MLX5_SQC_STATE_RDY)
+ memset(&new_rl, 0, sizeof(new_rl));
+ }
ibqp->rl = new_rl;
sq->state = new_state;
@@ -3284,7 +3930,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
switch (raw_qp_param->operation) {
case MLX5_CMD_OP_RST2INIT_QP:
rq_state = MLX5_RQC_STATE_RDY;
- sq_state = MLX5_SQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RST;
break;
case MLX5_CMD_OP_2ERR_QP:
rq_state = MLX5_RQC_STATE_ERR;
@@ -3296,13 +3942,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
break;
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
- if (raw_qp_param->set_mask ==
- MLX5_RAW_QP_RATE_LIMIT) {
- modify_rq = 0;
- sq_state = sq->state;
- } else {
- return raw_qp_param->set_mask ? -EINVAL : 0;
- }
+ if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
+ return -EINVAL;
+
+ modify_rq = 0;
+ sq_state = MLX5_SQC_STATE_RDY;
break;
case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
@@ -3357,33 +4001,121 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return 0;
}
-static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
- struct mlx5_ib_pd *pd,
- struct mlx5_ib_qp_base *qp_base,
- u8 port_num, struct ib_udata *udata)
+static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ atomic_t *tx_port_affinity;
+
+ if (ucontext)
+ tx_port_affinity = &ucontext->tx_port_affinity;
+ else
+ tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
+
+ return (unsigned int)atomic_add_return(1, tx_port_affinity) %
+ (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1;
+}
+
+static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
+{
+ if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
+ (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
+ (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI))
+ return true;
+ return false;
+}
+
+static unsigned int get_tx_affinity(struct ib_qp *qp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, u8 init,
+ struct ib_udata *udata)
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct mlx5_ib_ucontext, ibucontext);
- unsigned int tx_port_affinity;
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_qp_base *qp_base;
+ unsigned int tx_affinity;
+
+ if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
+ qp_supports_affinity(mqp)))
+ return 0;
- if (ucontext) {
- tx_port_affinity = (unsigned int)atomic_add_return(
- 1, &ucontext->tx_port_affinity) %
- MLX5_MAX_PORTS +
- 1;
+ if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
+ tx_affinity = mqp->gsi_lag_port;
+ else if (init)
+ tx_affinity = get_tx_affinity_rr(dev, udata);
+ else if ((attr_mask & IB_QP_AV) && attr->xmit_slave)
+ tx_affinity =
+ mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave);
+ else
+ return 0;
+
+ qp_base = &mqp->trans_qp.base;
+ if (ucontext)
mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
- tx_port_affinity, qp_base->mqp.qpn, ucontext);
- } else {
- tx_port_affinity =
- (unsigned int)atomic_add_return(
- 1, &dev->port[port_num].roce.tx_port_affinity) %
- MLX5_MAX_PORTS +
- 1;
+ tx_affinity, qp_base->mqp.qpn, ucontext);
+ else
mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
- tx_port_affinity, qp_base->mqp.qpn);
- }
+ tx_affinity, qp_base->mqp.qpn);
+ return tx_affinity;
+}
+
+static int __mlx5_ib_qp_set_raw_qp_counter(struct mlx5_ib_qp *qp, u32 set_id,
+ struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {};
+ void *rqc;
+
+ if (!qp->rq.wqe_cnt)
+ return 0;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(qp->ibqp.pd)->uid);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id, set_id);
- return tx_port_affinity;
+ return mlx5_core_modify_rq(mdev, rq->base.mqp.qpn, in);
+}
+
+static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
+ struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ u32 in[MLX5_ST_SZ_DW(rts2rts_qp_in)] = {};
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_qp_base *base;
+ u32 set_id;
+ u32 *qpc;
+
+ if (counter)
+ set_id = counter->id;
+ else
+ set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
+
+ if (mqp->type == IB_QPT_RAW_PACKET)
+ return __mlx5_ib_qp_set_raw_qp_counter(mqp, set_id, dev->mdev);
+
+ base = &mqp->trans_qp.base;
+ MLX5_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP);
+ MLX5_SET(rts2rts_qp_in, in, qpn, base->mqp.qpn);
+ MLX5_SET(rts2rts_qp_in, in, uid, base->mqp.uid);
+ MLX5_SET(rts2rts_qp_in, in, opt_param_mask,
+ MLX5_QP_OPTPAR_COUNTER_SET_ID);
+
+ qpc = MLX5_ADDR_OF(rts2rts_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, counter_set_id, set_id);
+ return mlx5_cmd_exec_in(dev->mdev, rts2rts_qp, in);
}
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
@@ -3391,6 +4123,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
enum ib_qp_state cur_state,
enum ib_qp_state new_state,
const struct mlx5_ib_modify_qp *ucmd,
+ struct mlx5_ib_modify_qp_resp *resp,
struct ib_udata *udata)
{
static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
@@ -3418,6 +4151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
[MLX5_QP_STATE_SQD] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD_RTS_QP,
},
[MLX5_QP_STATE_SQER] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
@@ -3434,67 +4168,60 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
- struct mlx5_qp_context *context;
struct mlx5_ib_pd *pd;
- struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
- enum mlx5_qp_optpar optpar;
+ void *qpc, *pri_path, *alt_path;
+ enum mlx5_qp_optpar optpar = 0;
+ u32 set_id = 0;
int mlx5_st;
int err;
u16 op;
u8 tx_affinity = 0;
- mlx5_st = to_mlx5_st(ibqp->qp_type == IB_QPT_DRIVER ?
- qp->qp_sub_type : ibqp->qp_type);
+ mlx5_st = to_mlx5_st(qp->type);
if (mlx5_st < 0)
return -EINVAL;
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc)
return -ENOMEM;
- pd = get_pd(qp);
- context->flags = cpu_to_be32(mlx5_st << 16);
+ pd = to_mpd(qp->ibqp.pd);
+ MLX5_SET(qpc, qpc, st, mlx5_st);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
- context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
} else {
switch (attr->path_mig_state) {
case IB_MIG_MIGRATED:
- context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
break;
case IB_MIG_REARM:
- context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_REARM);
break;
case IB_MIG_ARMED:
- context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_ARMED);
break;
}
}
- if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
- if ((ibqp->qp_type == IB_QPT_RC) ||
- (ibqp->qp_type == IB_QPT_UD &&
- !(qp->flags & MLX5_IB_QP_SQPN_QP1)) ||
- (ibqp->qp_type == IB_QPT_UC) ||
- (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
- (ibqp->qp_type == IB_QPT_XRC_INI) ||
- (ibqp->qp_type == IB_QPT_XRC_TGT)) {
- if (dev->lag_active) {
- u8 p = mlx5_core_native_port_num(dev->mdev) - 1;
- tx_affinity = get_tx_affinity(dev, pd, base, p,
- udata);
- context->flags |= cpu_to_be32(tx_affinity << 24);
- }
- }
- }
+ tx_affinity = get_tx_affinity(ibqp, attr, attr_mask,
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT, udata);
- if (is_sqp(ibqp->qp_type)) {
- context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
- } else if ((ibqp->qp_type == IB_QPT_UD &&
- !(qp->flags & MLX5_IB_QP_UNDERLAY)) ||
- ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
- context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ MLX5_SET(qpc, qpc, lag_tx_port_affinity, tx_affinity);
+ if (tx_affinity && new_state == IB_QPS_RTR &&
+ MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity))
+ optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF;
+
+ if (is_sqp(qp->type)) {
+ MLX5_SET(qpc, qpc, mtu, IB_MTU_256);
+ MLX5_SET(qpc, qpc, log_msg_max, 8);
+ } else if ((qp->type == IB_QPT_UD &&
+ !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) ||
+ qp->type == MLX5_IB_QPT_REG_UMR) {
+ MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
+ MLX5_SET(qpc, qpc, log_msg_max, 12);
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 ||
attr->path_mtu > IB_MTU_4096) {
@@ -3502,114 +4229,125 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
err = -EINVAL;
goto out;
}
- context->mtu_msgmax = (attr->path_mtu << 5) |
- (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ MLX5_SET(qpc, qpc, mtu, attr->path_mtu);
+ MLX5_SET(qpc, qpc, log_msg_max,
+ MLX5_CAP_GEN(dev->mdev, log_max_msg));
}
if (attr_mask & IB_QP_DEST_QPN)
- context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
+ MLX5_SET(qpc, qpc, remote_qpn, attr->dest_qp_num);
+
+ pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
if (attr_mask & IB_QP_PKEY_INDEX)
- context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
+ MLX5_SET(ads, pri_path, pkey_index, attr->pkey_index);
/* todo implement counter_index functionality */
- if (is_sqp(ibqp->qp_type))
- context->pri_path.port = qp->port;
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI && is_qp0(qp->type)) {
+ MLX5_SET(ads, pri_path, vhca_port_num,
+ smi_to_native_portnum(dev, qp->port));
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)
+ MLX5_SET(ads, pri_path, plane_index, qp->port);
+ } else if (is_sqp(qp->type))
+ MLX5_SET(ads, pri_path, vhca_port_num, qp->port);
if (attr_mask & IB_QP_PORT)
- context->pri_path.port = attr->port_num;
+ MLX5_SET(ads, pri_path, vhca_port_num, attr->port_num);
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num :
+ qp->port,
attr_mask, 0, attr, false);
if (err)
goto out;
}
if (attr_mask & IB_QP_TIMEOUT)
- context->pri_path.ackto_lt |= attr->timeout << 3;
+ MLX5_SET(ads, pri_path, ack_timeout, attr->timeout);
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
- &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, alt_path,
attr->alt_port_num,
- attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
+ attr_mask | IB_QP_PKEY_INDEX |
+ IB_QP_TIMEOUT,
0, attr, true);
if (err)
goto out;
}
- get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
+ get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
- context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
- context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
- context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
- context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
+ MLX5_SET(qpc, qpc, pd, pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
+ if (send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd, send_cq->mcq.cqn);
+ if (recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv, recv_cq->mcq.cqn);
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, MLX5_IB_ACK_REQ_FREQ);
if (attr_mask & IB_QP_RNR_RETRY)
- context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+ MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
if (attr_mask & IB_QP_RETRY_CNT)
- context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+ MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
- if (attr->max_rd_atomic)
- context->params1 |=
- cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
- }
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic)
+ MLX5_SET(qpc, qpc, log_sra_max, fls(attr->max_rd_atomic - 1));
if (attr_mask & IB_QP_SQ_PSN)
- context->next_send_psn = cpu_to_be32(attr->sq_psn);
+ MLX5_SET(qpc, qpc, next_send_psn, attr->sq_psn);
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
- if (attr->max_dest_rd_atomic)
- context->params2 |=
- cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
- }
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic)
+ MLX5_SET(qpc, qpc, log_rra_max,
+ fls(attr->max_dest_rd_atomic - 1));
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
- __be32 access_flags;
-
- err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags);
+ err = set_qpc_atomic_flags(qp, attr, attr_mask, qpc);
if (err)
goto out;
-
- context->params2 |= access_flags;
}
if (attr_mask & IB_QP_MIN_RNR_TIMER)
- context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+ MLX5_SET(qpc, qpc, min_rnr_nak, attr->min_rnr_timer);
if (attr_mask & IB_QP_RQ_PSN)
- context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+ MLX5_SET(qpc, qpc, next_rcv_psn, attr->rq_psn);
if (attr_mask & IB_QP_QKEY)
- context->qkey = cpu_to_be32(attr->qkey);
+ MLX5_SET(qpc, qpc, q_key, attr->qkey);
if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->db_rec_addr = cpu_to_be64(qp->db.dma);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
/* Underlay port should be used - index 0 function per port */
- if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
port_num = 0;
- mibport = &dev->port[port_num];
- context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
+ if (ibqp->counter)
+ set_id = ibqp->counter->id;
+ else
+ set_id = mlx5_ib_get_counters_id(dev, port_num);
+ MLX5_SET(qpc, qpc, counter_set_id, set_id);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->sq_crq_size |= cpu_to_be16(1 << 4);
+ MLX5_SET(qpc, qpc, rlky, 1);
- if (qp->flags & MLX5_IB_QP_SQPN_QP1)
- context->deth_sqpn = cpu_to_be32(1);
+ if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
+ MLX5_SET(qpc, qpc, deth_sqpn, 1);
+
+ if (qp->is_ooo_rq && cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ MLX5_SET(qpc, qpc, dp_ordering_1, 1);
+ MLX5_SET(qpc, qpc, dp_ordering_force, 1);
+ }
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
@@ -3621,16 +4359,16 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
op = optab[mlx5_cur][mlx5_new];
- optpar = ib_mask_to_mlx5_opt(attr_mask);
+ optpar |= ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->cnts.set_id;
+ raw_qp_param.rq_q_ctr_id = set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -3667,8 +4405,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
} else {
- err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
- &base->mqp);
+ if (udata) {
+ /* For the kernel flows, the resp will stay zero */
+ resp->ece_options =
+ MLX5_CAP_GEN(dev->mdev, ece_support) ?
+ ucmd->ece_options : 0;
+ resp->response_length = sizeof(*resp);
+ }
+ err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp,
+ &resp->ece_options);
}
if (err)
@@ -3690,7 +4435,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET &&
- !ibqp->uobject && ibqp->qp_type != IB_QPT_XRC_TGT) {
+ !ibqp->uobject && qp->type != IB_QPT_XRC_TGT) {
mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
@@ -3703,12 +4448,19 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.cur_post = 0;
if (qp->sq.wqe_cnt)
qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
+ if ((new_state == IB_QPS_RTS) && qp->counter_pending) {
+ err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter);
+ if (!err)
+ qp->counter_pending = 0;
+ }
+
out:
- kfree(context);
+ kfree(qpc);
return err;
}
@@ -3766,14 +4518,15 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
* Other transitions and attributes are illegal
*/
static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+ int attr_mask, struct mlx5_ib_modify_qp *ucmd,
+ struct ib_udata *udata)
{
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
enum ib_qp_state cur_state, new_state;
- int err = 0;
int required = IB_QP_STATE;
void *dctc;
+ int err;
if (!(attr_mask & IB_QP_STATE))
return -EINVAL;
@@ -3782,13 +4535,24 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_state = attr->qp_state;
dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ if (MLX5_CAP_GEN(dev->mdev, ece_support) && ucmd->ece_options)
+ /*
+ * DCT doesn't initialize QP till modify command is executed,
+ * so we need to overwrite previously set ECE field if user
+ * provided any value except zero, which means not set/not
+ * valid.
+ */
+ MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
+
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ u16 set_id;
+
required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (!is_valid_mask(attr_mask, required, 0))
return -EINVAL;
if (attr->port_num == 0 ||
- attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)) {
+ attr->port_num > dev->num_ports) {
mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
attr->port_num, dev->num_ports);
return -EINVAL;
@@ -3800,7 +4564,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
int atomic_mode;
- atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
+ atomic_mode = get_atomic_mode(dev, qp);
if (atomic_mode < 0)
return -EOPNOTSUPP;
@@ -3808,18 +4572,30 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
MLX5_SET(dctc, dctc, rae, 1);
}
MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
- MLX5_SET(dctc, dctc, port, attr->port_num);
- MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id);
+ if (mlx5_lag_is_active(dev->mdev))
+ MLX5_SET(dctc, dctc, port,
+ get_tx_affinity_rr(dev, udata));
+ else
+ MLX5_SET(dctc, dctc, port, attr->port_num);
+ set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
+ MLX5_SET(dctc, dctc, counter_set_id, set_id);
+
+ qp->port = attr->port_num;
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
struct mlx5_ib_modify_qp_resp resp = {};
- u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
- u32 min_resp_len = offsetof(typeof(resp), dctn) +
- sizeof(resp.dctn);
+ u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
+ u32 min_resp_len = offsetofend(typeof(resp), dctn);
if (udata->outlen < min_resp_len)
return -EINVAL;
- resp.response_length = min_resp_len;
+ /*
+ * If we don't have enough space for the ECE options,
+ * simply indicate it with resp.response_length.
+ */
+ resp.response_length = (udata->outlen < sizeof(resp)) ?
+ min_resp_len :
+ sizeof(resp);
required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU;
if (!is_valid_mask(attr_mask, required, 0))
@@ -3830,48 +4606,104 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
+ if (qp->is_ooo_rq) {
+ MLX5_SET(dctc, dctc, dp_ordering_1, 1);
+ MLX5_SET(dctc, dctc, dp_ordering_force, 1);
+ }
- err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in,
+ err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
MLX5_ST_SZ_BYTES(create_dct_in), out,
sizeof(out));
+ err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out);
if (err)
return err;
resp.dctn = qp->dct.mdct.mqp.qpn;
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp.ece_options = MLX5_GET(create_dct_out, out, ece);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err) {
- mlx5_core_destroy_dct(dev->mdev, &qp->dct.mdct);
+ mlx5_core_destroy_dct(dev, &qp->dct.mdct);
return err;
}
} else {
mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
return -EINVAL;
}
- if (err)
- qp->state = IB_QPS_ERR;
- else
- qp->state = new_state;
- return err;
+
+ qp->state = new_state;
+ return 0;
+}
+
+static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ if (dev->profile != &raw_eth_profile)
+ return true;
+
+ if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
+ return true;
+
+ return false;
+}
+
+static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_type qp_type)
+{
+ int log_max_ra_res;
+ int log_max_ra_req;
+
+ if (qp_type == MLX5_IB_QPT_DCI) {
+ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_res_dc);
+ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_req_dc);
+ } else {
+ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_res_qp);
+ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_req_qp);
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > log_max_ra_res) {
+ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+ attr->max_rd_atomic);
+ return false;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > log_max_ra_req) {
+ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+ attr->max_dest_rd_atomic);
+ return false;
+ }
+ return true;
}
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_modify_qp_resp resp = {};
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_modify_qp ucmd = {};
enum ib_qp_type qp_type;
enum ib_qp_state cur_state, new_state;
- size_t required_cmd_sz;
int err = -EINVAL;
- int port;
+
+ if (!mlx5_ib_modify_qp_allowed(dev, qp))
+ return -EOPNOTSUPP;
+
+ if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
if (udata && udata->inlen) {
- required_cmd_sz = offsetof(typeof(ucmd), reserved) +
- sizeof(ucmd.reserved);
- if (udata->inlen < required_cmd_sz)
+ if (udata->inlen < offsetofend(typeof(ucmd), ece_options))
return -EINVAL;
if (udata->inlen > sizeof(ucmd) &&
@@ -3883,35 +4715,32 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
min(udata->inlen, sizeof(ucmd))))
return -EFAULT;
- if (ucmd.comp_mask ||
- memchr_inv(&ucmd.reserved, 0, sizeof(ucmd.reserved)) ||
+ if (ucmd.comp_mask & ~MLX5_IB_MODIFY_QP_OOO_DP ||
memchr_inv(&ucmd.burst_info.reserved, 0,
sizeof(ucmd.burst_info.reserved)))
return -EOPNOTSUPP;
+
+ if (ucmd.comp_mask & MLX5_IB_MODIFY_QP_OOO_DP) {
+ if (!get_dp_ooo_cap(dev->mdev, qp->type))
+ return -EOPNOTSUPP;
+ qp->is_ooo_rq = 1;
+ }
}
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ if (qp->type == IB_QPT_GSI)
return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
- if (ibqp->qp_type == IB_QPT_DRIVER)
- qp_type = qp->qp_sub_type;
- else
- qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
- IB_QPT_GSI : ibqp->qp_type;
+ qp_type = (qp->type == MLX5_IB_QPT_HW_GSI) ? IB_QPT_GSI : qp->type;
if (qp_type == MLX5_IB_QPT_DCT)
- return mlx5_ib_modify_dct(ibqp, attr, attr_mask, udata);
+ return mlx5_ib_modify_dct(ibqp, attr, attr_mask, &ucmd, udata);
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- }
-
- if (qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
attr_mask);
@@ -3922,7 +4751,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
!ib_modify_qp_is_ok(cur_state, new_state, qp_type,
attr_mask)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
- cur_state, new_state, ibqp->qp_type, attr_mask);
+ cur_state, new_state, qp->type, attr_mask);
goto out;
} else if (qp_type == MLX5_IB_QPT_DCI &&
!modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) {
@@ -3939,31 +4768,14 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- if (attr_mask & IB_QP_PKEY_INDEX) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >=
- dev->mdev->port_caps[port - 1].pkey_table_len) {
- mlx5_ib_dbg(dev, "invalid pkey index %d\n",
- attr->pkey_index);
- goto out;
- }
- }
-
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
- attr->max_rd_atomic);
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->pkey_table_len) {
+ mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index);
goto out;
}
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
- attr->max_dest_rd_atomic);
+ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
goto out;
- }
if (cur_state == new_state && cur_state == IB_QPS_RESET) {
err = 0;
@@ -3971,1346 +4783,19 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
- new_state, &ucmd, udata);
-
-out:
- mutex_unlock(&qp->mutex);
- return err;
-}
-
-static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
- u32 wqe_sz, void **cur_edge)
-{
- u32 idx;
-
- idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
- *cur_edge = get_sq_edge(sq, idx);
+ new_state, &ucmd, &resp, udata);
- *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
-}
-
-/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
- * next nearby edge and get new address translation for current WQE position.
- * @sq - SQ buffer.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @cur_edge: Updated current edge.
- */
-static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
- u32 wqe_sz, void **cur_edge)
-{
- if (likely(*seg != *cur_edge))
- return;
-
- _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
-}
-
-/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
- * pointers. At the end @seg is aligned to 16B regardless the copied size.
- * @sq - SQ buffer.
- * @cur_edge: Updated current edge.
- * @seg: Current WQE position (16B aligned).
- * @wqe_sz: Total current WQE size [16B].
- * @src: Pointer to copy from.
- * @n: Number of bytes to copy.
- */
-static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
- void **seg, u32 *wqe_sz, const void *src,
- size_t n)
-{
- while (likely(n)) {
- size_t leftlen = *cur_edge - *seg;
- size_t copysz = min_t(size_t, leftlen, n);
- size_t stride;
-
- memcpy(*seg, src, copysz);
-
- n -= copysz;
- src += copysz;
- stride = !n ? ALIGN(copysz, 16) : copysz;
- *seg += stride;
- *wqe_sz += stride >> 4;
- handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
- }
-}
-
-static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
-{
- struct mlx5_ib_cq *cq;
- unsigned cur;
-
- cur = wq->head - wq->tail;
- if (likely(cur + nreq < wq->max_post))
- return 0;
-
- cq = to_mcq(ib_cq);
- spin_lock(&cq->lock);
- cur = wq->head - wq->tail;
- spin_unlock(&cq->lock);
-
- return cur + nreq >= wq->max_post;
-}
-
-static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
- u64 remote_addr, u32 rkey)
-{
- rseg->raddr = cpu_to_be64(remote_addr);
- rseg->rkey = cpu_to_be32(rkey);
- rseg->reserved = 0;
-}
-
-static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
- void **seg, int *size, void **cur_edge)
-{
- struct mlx5_wqe_eth_seg *eseg = *seg;
-
- memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
-
- if (wr->send_flags & IB_SEND_IP_CSUM)
- eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
- MLX5_ETH_WQE_L4_CSUM;
-
- if (wr->opcode == IB_WR_LSO) {
- struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- size_t left, copysz;
- void *pdata = ud_wr->header;
- size_t stride;
-
- left = ud_wr->hlen;
- eseg->mss = cpu_to_be16(ud_wr->mss);
- eseg->inline_hdr.sz = cpu_to_be16(left);
-
- /* memcpy_send_wqe should get a 16B align address. Hence, we
- * first copy up to the current edge and then, if needed,
- * fall-through to memcpy_send_wqe.
- */
- copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
- left);
- memcpy(eseg->inline_hdr.start, pdata, copysz);
- stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
- sizeof(eseg->inline_hdr.start) + copysz, 16);
- *size += stride / 16;
- *seg += stride;
-
- if (copysz < left) {
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- left -= copysz;
- pdata += copysz;
- memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
- left);
- }
-
- return;
- }
-
- *seg += sizeof(struct mlx5_wqe_eth_seg);
- *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
-}
-
-static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
- const struct ib_send_wr *wr)
-{
- memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
- dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
- dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
-}
-
-static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
-{
- dseg->byte_count = cpu_to_be32(sg->length);
- dseg->lkey = cpu_to_be32(sg->lkey);
- dseg->addr = cpu_to_be64(sg->addr);
-}
-
-static u64 get_xlt_octo(u64 bytes)
-{
- return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
- MLX5_IB_UMR_OCTOWORD;
-}
-
-static __be64 frwr_mkey_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_EN_RINVAL |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
- MLX5_MKEY_MASK_SMALL_FENCE |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 sig_mkey_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_EN_SIGERR |
- MLX5_MKEY_MASK_EN_RINVAL |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_SMALL_FENCE |
- MLX5_MKEY_MASK_FREE |
- MLX5_MKEY_MASK_BSF_EN;
-
- return cpu_to_be64(result);
-}
-
-static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr, bool umr_inline)
-{
- int size = mr->ndescs * mr->desc_size;
-
- memset(umr, 0, sizeof(*umr));
-
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- if (umr_inline)
- umr->flags |= MLX5_UMR_INLINE;
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
- umr->mkey_mask = frwr_mkey_mask();
-}
-
-static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
-{
- memset(umr, 0, sizeof(*umr));
- umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = MLX5_UMR_INLINE;
-}
-
-static __be64 get_umr_enable_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_disable_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_translation_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_access_mask(int atomic)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW;
-
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_pd_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_PD;
-
- return cpu_to_be64(result);
-}
-
-static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
-{
- if ((mask & MLX5_MKEY_MASK_PAGE_SIZE &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) ||
- (mask & MLX5_MKEY_MASK_A &&
- MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)))
- return -EPERM;
- return 0;
-}
-
-static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
- struct mlx5_wqe_umr_ctrl_seg *umr,
- const struct ib_send_wr *wr, int atomic)
-{
- const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(umr, 0, sizeof(*umr));
-
- if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
- umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
- else
- umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
-
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
- u64 offset = get_xlt_octo(umrwr->offset);
-
- umr->xlt_offset = cpu_to_be16(offset & 0xffff);
- umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
- umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
- umr->mkey_mask |= get_umr_update_translation_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
- umr->mkey_mask |= get_umr_update_access_mask(atomic);
- umr->mkey_mask |= get_umr_update_pd_mask();
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
- umr->mkey_mask |= get_umr_enable_mr_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- umr->mkey_mask |= get_umr_disable_mr_mask();
-
- if (!wr->num_sge)
- umr->flags |= MLX5_UMR_INLINE;
-
- return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
-}
-
-static u8 get_umr_flags(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
-}
-
-static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
- struct mlx5_ib_mr *mr,
- u32 key, int access)
-{
- int ndescs = ALIGN(mr->ndescs, 8) >> 1;
-
- memset(seg, 0, sizeof(*seg));
-
- if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
- seg->log2_page_size = ilog2(mr->ibmr.page_size);
- else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- /* KLMs take twice the size of MTTs */
- ndescs *= 2;
-
- seg->flags = get_umr_flags(access) | mr->access_mode;
- seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
- seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
- seg->start_addr = cpu_to_be64(mr->ibmr.iova);
- seg->len = cpu_to_be64(mr->ibmr.length);
- seg->xlt_oct_size = cpu_to_be32(ndescs);
-}
-
-static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
-{
- memset(seg, 0, sizeof(*seg));
- seg->status = MLX5_MKEY_STATUS_FREE;
-}
-
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
- const struct ib_send_wr *wr)
-{
- const struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(seg, 0, sizeof(*seg));
- if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
- seg->status = MLX5_MKEY_STATUS_FREE;
-
- seg->flags = convert_access(umrwr->access_flags);
- if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
- !umrwr->length)
- seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
-
- seg->start_addr = cpu_to_be64(umrwr->virt_addr);
- seg->len = cpu_to_be64(umrwr->length);
- seg->log2_page_size = umrwr->page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(umrwr->mkey));
-}
-
-static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
- struct mlx5_ib_mr *mr,
- struct mlx5_ib_pd *pd)
-{
- int bcount = mr->desc_size * mr->ndescs;
-
- dseg->addr = cpu_to_be64(mr->desc_map);
- dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
- dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
-}
-
-static __be32 send_ieth(const struct ib_send_wr *wr)
-{
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- return wr->ex.imm_data;
-
- case IB_WR_SEND_WITH_INV:
- return cpu_to_be32(wr->ex.invalidate_rkey);
-
- default:
- return 0;
- }
-}
-
-static u8 calc_sig(void *wqe, int size)
-{
- u8 *p = wqe;
- u8 res = 0;
- int i;
-
- for (i = 0; i < size; i++)
- res ^= p[i];
-
- return ~res;
-}
-
-static u8 wq_sig(void *wqe)
-{
- return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
-}
-
-static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
- void **wqe, int *wqe_sz, void **cur_edge)
-{
- struct mlx5_wqe_inline_seg *seg;
- size_t offset;
- int inl = 0;
- int i;
-
- seg = *wqe;
- *wqe += sizeof(*seg);
- offset = sizeof(*seg);
-
- for (i = 0; i < wr->num_sge; i++) {
- size_t len = wr->sg_list[i].length;
- void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
-
- inl += len;
-
- if (unlikely(inl > qp->max_inline_data))
- return -ENOMEM;
-
- while (likely(len)) {
- size_t leftlen;
- size_t copysz;
-
- handle_post_send_edge(&qp->sq, wqe,
- *wqe_sz + (offset >> 4),
- cur_edge);
-
- leftlen = *cur_edge - *wqe;
- copysz = min_t(size_t, leftlen, len);
-
- memcpy(*wqe, addr, copysz);
- len -= copysz;
- addr += copysz;
- *wqe += copysz;
- offset += copysz;
- }
- }
-
- seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
-
- *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
-
- return 0;
-}
-
-static u16 prot_field_size(enum ib_signature_type type)
-{
- switch (type) {
- case IB_SIG_TYPE_T10_DIF:
- return MLX5_DIF_SIZE;
- default:
- return 0;
- }
-}
-
-static u8 bs_selector(int block_size)
-{
- switch (block_size) {
- case 512: return 0x1;
- case 520: return 0x2;
- case 4096: return 0x3;
- case 4160: return 0x4;
- case 1073741824: return 0x5;
- default: return 0;
- }
-}
-
-static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
- struct mlx5_bsf_inl *inl)
-{
- /* Valid inline section and allow BSF refresh */
- inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
- MLX5_BSF_REFRESH_DIF);
- inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
- inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
- /* repeating block */
- inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
- inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
- MLX5_DIF_CRC : MLX5_DIF_IPCS;
-
- if (domain->sig.dif.ref_remap)
- inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
-
- if (domain->sig.dif.app_escape) {
- if (domain->sig.dif.ref_escape)
- inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
- else
- inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
- }
-
- inl->dif_app_bitmask_check =
- cpu_to_be16(domain->sig.dif.apptag_check_mask);
-}
-
-static int mlx5_set_bsf(struct ib_mr *sig_mr,
- struct ib_sig_attrs *sig_attrs,
- struct mlx5_bsf *bsf, u32 data_size)
-{
- struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
- struct mlx5_bsf_basic *basic = &bsf->basic;
- struct ib_sig_domain *mem = &sig_attrs->mem;
- struct ib_sig_domain *wire = &sig_attrs->wire;
-
- memset(bsf, 0, sizeof(*bsf));
-
- /* Basic + Extended + Inline */
- basic->bsf_size_sbs = 1 << 7;
- /* Input domain check byte mask */
- basic->check_byte_mask = sig_attrs->check_mask;
- basic->raw_data_size = cpu_to_be32(data_size);
-
- /* Memory domain */
- switch (sig_attrs->mem.sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
- basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
- mlx5_fill_inl_bsf(mem, &bsf->m_inl);
- break;
- default:
- return -EINVAL;
- }
-
- /* Wire domain */
- switch (sig_attrs->wire.sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
- mem->sig_type == wire->sig_type) {
- /* Same block structure */
- basic->bsf_size_sbs |= 1 << 4;
- if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
- basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
- if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
- basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
- if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
- basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
- } else
- basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
-
- basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
- mlx5_fill_inl_bsf(wire, &bsf->w_inl);
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg,
- int *size, void **cur_edge)
-{
- struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
- struct ib_mr *sig_mr = wr->sig_mr;
- struct mlx5_bsf *bsf;
- u32 data_len = wr->wr.sg_list->length;
- u32 data_key = wr->wr.sg_list->lkey;
- u64 data_va = wr->wr.sg_list->addr;
- int ret;
- int wqe_size;
-
- if (!wr->prot ||
- (data_key == wr->prot->lkey &&
- data_va == wr->prot->addr &&
- data_len == wr->prot->length)) {
- /**
- * Source domain doesn't contain signature information
- * or data and protection are interleaved in memory.
- * So need construct:
- * ------------------
- * | data_klm |
- * ------------------
- * | BSF |
- * ------------------
- **/
- struct mlx5_klm *data_klm = *seg;
-
- data_klm->bcount = cpu_to_be32(data_len);
- data_klm->key = cpu_to_be32(data_key);
- data_klm->va = cpu_to_be64(data_va);
- wqe_size = ALIGN(sizeof(*data_klm), 64);
- } else {
- /**
- * Source domain contains signature information
- * So need construct a strided block format:
- * ---------------------------
- * | stride_block_ctrl |
- * ---------------------------
- * | data_klm |
- * ---------------------------
- * | prot_klm |
- * ---------------------------
- * | BSF |
- * ---------------------------
- **/
- struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
- struct mlx5_stride_block_entry *data_sentry;
- struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->prot->lkey;
- u64 prot_va = wr->prot->addr;
- u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
- int prot_size;
-
- sblock_ctrl = *seg;
- data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
- prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
-
- prot_size = prot_field_size(sig_attrs->mem.sig_type);
- if (!prot_size) {
- pr_err("Bad block size given: %u\n", block_size);
- return -EINVAL;
- }
- sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
- prot_size);
- sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
- sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
- sblock_ctrl->num_entries = cpu_to_be16(2);
-
- data_sentry->bcount = cpu_to_be16(block_size);
- data_sentry->key = cpu_to_be32(data_key);
- data_sentry->va = cpu_to_be64(data_va);
- data_sentry->stride = cpu_to_be16(block_size);
-
- prot_sentry->bcount = cpu_to_be16(prot_size);
- prot_sentry->key = cpu_to_be32(prot_key);
- prot_sentry->va = cpu_to_be64(prot_va);
- prot_sentry->stride = cpu_to_be16(prot_size);
-
- wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
- sizeof(*prot_sentry), 64);
- }
-
- *seg += wqe_size;
- *size += wqe_size / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- bsf = *seg;
- ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
- if (ret)
- return -EINVAL;
-
- *seg += sizeof(*bsf);
- *size += sizeof(*bsf) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- return 0;
-}
-
-static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- const struct ib_sig_handover_wr *wr, u32 size,
- u32 length, u32 pdn)
-{
- struct ib_mr *sig_mr = wr->sig_mr;
- u32 sig_key = sig_mr->rkey;
- u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
-
- memset(seg, 0, sizeof(*seg));
-
- seg->flags = get_umr_flags(wr->access_flags) |
- MLX5_MKC_ACCESS_MODE_KLMS;
- seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
- seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
- MLX5_MKEY_BSF_EN | pdn);
- seg->len = cpu_to_be64(length);
- seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
- seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
-}
-
-static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- u32 size)
-{
- memset(umr, 0, sizeof(*umr));
-
- umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
- umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
- umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
- umr->mkey_mask = sig_mkey_mask();
-}
-
-
-static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
-{
- const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
- u32 pdn = get_pd(qp)->pdn;
- u32 xlt_size;
- int region_len, ret;
-
- if (unlikely(wr->wr.num_sge != 1) ||
- unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
- unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
- unlikely(!sig_mr->sig->sig_status_checked))
- return -EINVAL;
-
- /* length of the protected region, data + protection */
- region_len = wr->wr.sg_list->length;
- if (wr->prot &&
- (wr->prot->lkey != wr->wr.sg_list->lkey ||
- wr->prot->addr != wr->wr.sg_list->addr ||
- wr->prot->length != wr->wr.sg_list->length))
- region_len += wr->prot->length;
-
- /**
- * KLM octoword size - if protection was provided
- * then we use strided block format (3 octowords),
- * else we use single KLM (1 octoword)
- **/
- xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
-
- set_sig_umr_segment(*seg, xlt_size);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
- if (ret)
- return ret;
-
- sig_mr->sig->sig_status_checked = false;
- return 0;
-}
-
-static int set_psv_wr(struct ib_sig_domain *domain,
- u32 psv_idx, void **seg, int *size)
-{
- struct mlx5_seg_set_psv *psv_seg = *seg;
-
- memset(psv_seg, 0, sizeof(*psv_seg));
- psv_seg->psv_num = cpu_to_be32(psv_idx);
- switch (domain->sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
- domain->sig.dif.app_tag);
- psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
- break;
- default:
- pr_err("Bad signature type (%d) is given.\n",
- domain->sig_type);
- return -EINVAL;
- }
-
- *seg += sizeof(*psv_seg);
- *size += sizeof(*psv_seg) / 16;
-
- return 0;
-}
-
-static int set_reg_wr(struct mlx5_ib_qp *qp,
- const struct ib_reg_wr *wr,
- void **seg, int *size, void **cur_edge)
-{
- struct mlx5_ib_mr *mr = to_mmr(wr->mr);
- struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- size_t mr_list_size = mr->ndescs * mr->desc_size;
- bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
-
- if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
- mlx5_ib_warn(to_mdev(qp->ibqp.device),
- "Invalid IB_SEND_INLINE send flag\n");
- return -EINVAL;
- }
-
- set_reg_umr_seg(*seg, mr, umr_inline);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-
- if (umr_inline) {
- memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
- mr_list_size);
- *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
- } else {
- set_reg_data_seg(*seg, mr, pd);
- *seg += sizeof(struct mlx5_wqe_data_seg);
- *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
- }
- return 0;
-}
-
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
- void **cur_edge)
-{
- set_linv_umr_seg(*seg);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- set_linv_mkey_seg(*seg);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
-}
-
-static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
-{
- __be32 *p = NULL;
- int i, j;
-
- pr_debug("dump WQE index %u:\n", idx);
- for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
- if ((i & 0xf) == 0) {
- p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
- pr_debug("WQBB at %p:\n", (void *)p);
- j = 0;
- idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
- }
- pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
- be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
- be32_to_cpu(p[j + 3]));
- }
-}
-
-static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned int *idx,
- int *size, void **cur_edge, int nreq,
- bool send_signaled, bool solicited)
-{
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
- return -ENOMEM;
-
- *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
- *ctrl = *seg;
- *(uint32_t *)(*seg + 8) = 0;
- (*ctrl)->imm = send_ieth(wr);
- (*ctrl)->fm_ce_se = qp->sq_signal_bits |
- (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
-
- *seg += sizeof(**ctrl);
- *size = sizeof(**ctrl) / 16;
- *cur_edge = qp->sq.cur_edge;
-
- return 0;
-}
-
-static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, void **cur_edge, int nreq)
-{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
- wr->send_flags & IB_SEND_SIGNALED,
- wr->send_flags & IB_SEND_SOLICITED);
-}
-
-static void finish_wqe(struct mlx5_ib_qp *qp,
- struct mlx5_wqe_ctrl_seg *ctrl,
- void *seg, u8 size, void *cur_edge,
- unsigned int idx, u64 wr_id, int nreq, u8 fence,
- u32 mlx5_opcode)
-{
- u8 opmod = 0;
-
- ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
- mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
- ctrl->fm_ce_se |= fence;
- if (unlikely(qp->wq_sig))
- ctrl->signature = wq_sig(ctrl);
-
- qp->sq.wrid[idx] = wr_id;
- qp->sq.w_list[idx].opcode = mlx5_opcode;
- qp->sq.wqe_head[idx] = qp->sq.head + nreq;
- qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
-
- /* We save the edge which was possibly updated during the WQE
- * construction, into SQ's cache.
- */
- seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
- qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
- get_sq_edge(&qp->sq, qp->sq.cur_post &
- (qp->sq.wqe_cnt - 1)) :
- cur_edge;
-}
-
-static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr, bool drain)
-{
- struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_qp *qp;
- struct mlx5_ib_mr *mr;
- struct mlx5_wqe_xrc_seg *xrc;
- struct mlx5_bf *bf;
- void *cur_edge;
- int uninitialized_var(size);
- unsigned long flags;
- unsigned idx;
- int err = 0;
- int num_sge;
- void *seg;
- int nreq;
- int i;
- u8 next_fence = 0;
- u8 fence;
-
- if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
- !drain)) {
- *bad_wr = wr;
- return -EIO;
- }
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
-
- qp = to_mqp(ibqp);
- bf = &qp->bf;
-
- spin_lock_irqsave(&qp->sq.lock, flags);
-
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
- mlx5_ib_warn(dev, "\n");
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- num_sge = wr->num_sge;
- if (unlikely(num_sge > qp->sq.max_gs)) {
- mlx5_ib_warn(dev, "\n");
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
- nreq);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (wr->opcode == IB_WR_REG_MR) {
- fence = dev->umr_fence;
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- } else {
- if (wr->send_flags & IB_SEND_FENCE) {
- if (qp->next_fence)
- fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
- else
- fence = MLX5_FENCE_MODE_FENCE;
- } else {
- fence = qp->next_fence;
- }
- }
-
- switch (ibqp->qp_type) {
- case IB_QPT_XRC_INI:
- xrc = seg;
- seg += sizeof(*xrc);
- size += sizeof(*xrc) / 16;
- /* fall through */
- case IB_QPT_RC:
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
- size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
-
- case IB_WR_LOCAL_INV:
- qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
- ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size, &cur_edge);
- num_sge = 0;
- break;
-
- case IB_WR_REG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_MR;
- ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
- &cur_edge);
- if (err) {
- *bad_wr = wr;
- goto out;
- }
- num_sge = 0;
- break;
-
- case IB_WR_REG_SIG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(sig_handover_wr(wr)->sig_mr);
-
- ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size,
- &cur_edge);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
-
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
- MLX5_OPCODE_UMR);
- /*
- * SET_PSV WQEs are not signaled and solicited
- * on error
- */
- err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, &cur_edge, nreq, false,
- true);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
- mr->sig->psv_memory.psv_idx, &seg,
- &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
-
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
- MLX5_OPCODE_SET_PSV);
- err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, &cur_edge, nreq, false,
- true);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
- mr->sig->psv_wire.psv_idx, &seg,
- &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
-
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
- wr->wr_id, nreq, fence,
- MLX5_OPCODE_SET_PSV);
- qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- num_sge = 0;
- goto skip_psv;
-
- default:
- break;
- }
- break;
-
- case IB_QPT_UC:
- switch (wr->opcode) {
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
- size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
- break;
-
- default:
- break;
- }
- break;
-
- case IB_QPT_SMI:
- if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
- mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
- err = -EPERM;
- *bad_wr = wr;
- goto out;
- }
- /* fall through */
- case MLX5_IB_QPT_HW_GSI:
- set_datagram_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_datagram_seg);
- size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
-
- break;
- case IB_QPT_UD:
- set_datagram_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_datagram_seg);
- size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
-
- /* handle qp that supports ud offload */
- if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
- struct mlx5_wqe_eth_pad *pad;
-
- pad = seg;
- memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
- seg += sizeof(struct mlx5_wqe_eth_pad);
- size += sizeof(struct mlx5_wqe_eth_pad) / 16;
- set_eth_seg(wr, qp, &seg, &size, &cur_edge);
- handle_post_send_edge(&qp->sq, &seg, size,
- &cur_edge);
- }
- break;
- case MLX5_IB_QPT_REG_UMR:
- if (wr->opcode != MLX5_IB_WR_UMR) {
- err = -EINVAL;
- mlx5_ib_warn(dev, "bad opcode\n");
- goto out;
- }
- qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- err = set_reg_umr_segment(dev, seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
- if (unlikely(err))
- goto out;
- seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
- set_reg_mkey_segment(seg, wr);
- seg += sizeof(struct mlx5_mkey_seg);
- size += sizeof(struct mlx5_mkey_seg) / 16;
- handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
- break;
-
- default:
- break;
- }
-
- if (wr->send_flags & IB_SEND_INLINE && num_sge) {
- err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
- if (unlikely(err)) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
- } else {
- for (i = 0; i < num_sge; i++) {
- handle_post_send_edge(&qp->sq, &seg, size,
- &cur_edge);
- if (likely(wr->sg_list[i].length)) {
- set_data_ptr_seg
- ((struct mlx5_wqe_data_seg *)seg,
- wr->sg_list + i);
- size += sizeof(struct mlx5_wqe_data_seg) / 16;
- seg += sizeof(struct mlx5_wqe_data_seg);
- }
- }
- }
-
- qp->next_fence = next_fence;
- finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
- fence, mlx5_ib_opcode[wr->opcode]);
-skip_psv:
- if (0)
- dump_wqe(qp, idx, size);
- }
-
-out:
- if (likely(nreq)) {
- qp->sq.head += nreq;
-
- /* Make sure that descriptors are written before
- * updating doorbell record and ringing the doorbell
- */
- wmb();
-
- qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
-
- /* Make sure doorbell record is visible to the HCA before
- * we hit doorbell */
- wmb();
-
- /* currently we support only regular doorbells */
- mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
- /* Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- bf->offset ^= bf->buf_size;
- }
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return err;
-}
-
-int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
- const struct ib_send_wr **bad_wr)
-{
- return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
-}
-
-static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
-{
- sig->signature = calc_sig(sig, size);
-}
-
-static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr, bool drain)
-{
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
- struct mlx5_wqe_data_seg *scat;
- struct mlx5_rwqe_sig *sig;
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
- unsigned long flags;
- int err = 0;
- int nreq;
- int ind;
- int i;
-
- if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
- !drain)) {
- *bad_wr = wr;
- return -EIO;
- }
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
-
- spin_lock_irqsave(&qp->rq.lock, flags);
-
- ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
-
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > qp->rq.max_gs)) {
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
- if (qp->wq_sig)
- scat++;
-
- for (i = 0; i < wr->num_sge; i++)
- set_data_ptr_seg(scat + i, wr->sg_list + i);
-
- if (i < qp->rq.max_gs) {
- scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
- scat[i].addr = 0;
- }
-
- if (qp->wq_sig) {
- sig = (struct mlx5_rwqe_sig *)scat;
- set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
- }
-
- qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
- }
+ /* resp.response_length is set in ECE supported flows only */
+ if (!err && resp.response_length &&
+ udata->outlen >= resp.response_length)
+ /* Return -EFAULT to the user and expect him to destroy QP. */
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
out:
- if (likely(nreq)) {
- qp->rq.head += nreq;
-
- /* Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
-
- *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
- }
-
- spin_unlock_irqrestore(&qp->rq.lock, flags);
-
+ mutex_unlock(&qp->mutex);
return err;
}
-int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
- const struct ib_recv_wr **bad_wr)
-{
- return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
-}
-
static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
{
switch (mlx5_state) {
@@ -5336,48 +4821,34 @@ static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
}
}
-static int to_ib_qp_access_flags(int mlx5_flags)
-{
- int ib_flags = 0;
-
- if (mlx5_flags & MLX5_QP_BIT_RRE)
- ib_flags |= IB_ACCESS_REMOTE_READ;
- if (mlx5_flags & MLX5_QP_BIT_RWE)
- ib_flags |= IB_ACCESS_REMOTE_WRITE;
- if (mlx5_flags & MLX5_QP_BIT_RAE)
- ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
-
- return ib_flags;
-}
-
static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
- struct rdma_ah_attr *ah_attr,
- struct mlx5_qp_path *path)
+ struct rdma_ah_attr *ah_attr, void *path)
{
+ int port = MLX5_GET(ads, path, vhca_port_num);
+ int static_rate;
memset(ah_attr, 0, sizeof(*ah_attr));
- if (!path->port || path->port > ibdev->num_ports)
+ if (!port || port > ibdev->num_ports)
return;
- ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port);
- rdma_ah_set_port_num(ah_attr, path->port);
- rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf);
+ rdma_ah_set_port_num(ah_attr, port);
+ rdma_ah_set_sl(ah_attr, MLX5_GET(ads, path, sl));
- rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
- rdma_ah_set_path_bits(ah_attr, path->grh_mlid & 0x7f);
- rdma_ah_set_static_rate(ah_attr,
- path->static_rate ? path->static_rate - 5 : 0);
- if (path->grh_mlid & (1 << 7)) {
- u32 tc_fl = be32_to_cpu(path->tclass_flowlabel);
+ rdma_ah_set_dlid(ah_attr, MLX5_GET(ads, path, rlid));
+ rdma_ah_set_path_bits(ah_attr, MLX5_GET(ads, path, mlid));
- rdma_ah_set_grh(ah_attr, NULL,
- tc_fl & 0xfffff,
- path->mgid_index,
- path->hop_limit,
- (tc_fl >> 20) & 0xff);
- rdma_ah_set_dgid_raw(ah_attr, path->rgid);
+ static_rate = MLX5_GET(ads, path, stat_rate);
+ rdma_ah_set_static_rate(ah_attr, mlx5_to_ib_rate_map(static_rate));
+ if (MLX5_GET(ads, path, grh) ||
+ ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ rdma_ah_set_grh(ah_attr, NULL, MLX5_GET(ads, path, flow_label),
+ MLX5_GET(ads, path, src_addr_index),
+ MLX5_GET(ads, path, hop_limit),
+ MLX5_GET(ads, path, tclass));
+ rdma_ah_set_dgid_raw(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip));
}
}
@@ -5434,7 +4905,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_RESET,
},
[MLX5_RQC_STATE_RDY] = {
- [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
@@ -5446,7 +4917,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_ERR,
},
[MLX5_RQ_STATE_NA] = {
- [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
@@ -5499,61 +4970,60 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct ib_qp_attr *qp_attr)
{
int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
- struct mlx5_qp_context *context;
- int mlx5_state;
+ void *qpc, *pri_path, *alt_path;
u32 *outb;
- int err = 0;
+ int err;
outb = kzalloc(outlen, GFP_KERNEL);
if (!outb)
return -ENOMEM;
- err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
- outlen);
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ false);
if (err)
goto out;
- /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
- context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
+ qpc = MLX5_ADDR_OF(query_qp_out, outb, qpc);
- mlx5_state = be32_to_cpu(context->flags) >> 28;
+ qp->state = to_ib_qp_state(MLX5_GET(qpc, qpc, state));
+ if (MLX5_GET(qpc, qpc, state) == MLX5_QP_STATE_SQ_DRAINING)
+ qp_attr->sq_draining = 1;
- qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->path_mtu = context->mtu_msgmax >> 5;
- qp_attr->path_mig_state =
- to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
- qp_attr->qkey = be32_to_cpu(context->qkey);
- qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
- qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
- qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
- qp_attr->qp_access_flags =
- to_ib_qp_access_flags(be32_to_cpu(context->params2));
+ qp_attr->path_mtu = MLX5_GET(qpc, qpc, mtu);
+ qp_attr->path_mig_state = to_ib_mig_state(MLX5_GET(qpc, qpc, pm_state));
+ qp_attr->qkey = MLX5_GET(qpc, qpc, q_key);
+ qp_attr->rq_psn = MLX5_GET(qpc, qpc, next_rcv_psn);
+ qp_attr->sq_psn = MLX5_GET(qpc, qpc, next_send_psn);
+ qp_attr->dest_qp_num = MLX5_GET(qpc, qpc, remote_qpn);
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
- qp_attr->alt_pkey_index =
- be16_to_cpu(context->alt_path.pkey_index);
- qp_attr->alt_port_num =
- rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
- }
+ if (MLX5_GET(qpc, qpc, rre))
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+ if (MLX5_GET(qpc, qpc, rwe))
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (MLX5_GET(qpc, qpc, rae))
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_ATOMIC;
- qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
- qp_attr->port_num = context->pri_path.port;
+ qp_attr->max_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_sra_max);
+ qp_attr->max_dest_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_rra_max);
+ qp_attr->min_rnr_timer = MLX5_GET(qpc, qpc, min_rnr_nak);
+ qp_attr->retry_cnt = MLX5_GET(qpc, qpc, retry_count);
+ qp_attr->rnr_retry = MLX5_GET(qpc, qpc, rnr_retry);
- /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
- qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
+ pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
- qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+ if (qp->type == IB_QPT_RC || qp->type == IB_QPT_UC ||
+ qp->type == IB_QPT_XRC_INI || qp->type == IB_QPT_XRC_TGT) {
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path);
+ qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index);
+ qp_attr->alt_port_num = MLX5_GET(ads, alt_path, vhca_port_num);
+ }
- qp_attr->max_dest_rd_atomic =
- 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
- qp_attr->min_rnr_timer =
- (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
- qp_attr->timeout = context->pri_path.ackto_lt >> 3;
- qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
- qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
- qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+ qp_attr->pkey_index = MLX5_GET(ads, pri_path, pkey_index);
+ qp_attr->port_num = MLX5_GET(ads, pri_path, vhca_port_num);
+ qp_attr->timeout = MLX5_GET(ads, pri_path, ack_timeout);
+ qp_attr->alt_timeout = MLX5_GET(ads, alt_path, ack_timeout);
out:
kfree(outb);
@@ -5587,7 +5057,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
if (!out)
return -ENOMEM;
- err = mlx5_core_dct_query(dev->mdev, dct, out, outlen);
+ err = mlx5_core_dct_query(dev, dct, out, outlen);
if (err)
goto out;
@@ -5607,7 +5077,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
}
if (qp_attr_mask & IB_QP_PORT)
- qp_attr->port_num = MLX5_GET(dctc, dctc, port);
+ qp_attr->port_num = mqp->port;
if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
if (qp_attr_mask & IB_QP_AV) {
@@ -5636,7 +5106,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ if (qp->type == IB_QPT_GSI)
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
@@ -5644,14 +5114,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
memset(qp_attr, 0, sizeof(*qp_attr));
- if (unlikely(qp->qp_sub_type == MLX5_IB_QPT_DCT))
+ if (unlikely(qp->type == MLX5_IB_QPT_DCT))
return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
qp_attr_mask, qp_init_attr);
mutex_lock(&qp->mutex);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
- qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@@ -5677,7 +5147,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->cap.max_send_sge = 0;
}
- qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->qp_type = qp->type;
qp_init_attr->recv_cq = ibqp->recv_cq;
qp_init_attr->send_cq = ibqp->send_cq;
qp_init_attr->srq = ibqp->srq;
@@ -5685,18 +5155,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
- qp_init_attr->create_flags = 0;
- if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
- qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
-
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
- qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
- if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
- qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
- if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
- qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
- if (qp->flags & MLX5_IB_QP_SQPN_QP1)
- qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
+ qp_init_attr->create_flags = qp->flags;
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
@@ -5706,41 +5165,23 @@ out:
return err;
}
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_udata *udata)
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_xrcd *xrcd;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(-ENOMEM);
- }
+ return -EOPNOTSUPP;
- return &xrcd->ibxrcd;
+ return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
- if (err)
- mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
-
- kfree(xrcd);
- return 0;
+ return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
@@ -5773,7 +5214,7 @@ static int set_delay_drop(struct mlx5_ib_dev *dev)
if (dev->delay_drop.activate)
goto out;
- err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout);
+ err = mlx5_core_set_delay_drop(dev, dev->delay_drop.timeout);
if (err)
goto out;
@@ -5792,6 +5233,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct mlx5_ib_dev *dev;
int has_net_offloads;
__be64 *rq_pas0;
+ int ts_format;
void *in;
void *rqc;
void *wq;
@@ -5800,6 +5242,10 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
dev = to_mdev(pd->device);
+ ts_format = get_rq_ts_format(dev, to_mcq(init_attr->cq));
+ if (ts_format < 0)
+ return ts_format;
+
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
@@ -5809,6 +5255,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, user_index, rwq->user_index);
MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
@@ -5828,12 +5275,21 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
}
MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) {
+ /*
+ * In Firmware number of strides in each WQE is:
+ * "512 * 2^single_wqe_log_num_of_strides"
+ * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
+ * accepted as 0 to 9
+ */
+ static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9 };
MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en);
MLX5_SET(wq, wq, log_wqe_stride_size,
rwq->single_stride_log_num_of_bytes -
MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES);
- MLX5_SET(wq, wq, log_wqe_num_of_strides, rwq->log_num_strides -
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES);
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ fw_map[rwq->log_num_strides -
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]);
}
MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
@@ -5869,14 +5325,14 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(rqc, rqc, delay_drop_en, 1);
}
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
- err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+ mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0);
+ err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
err = set_delay_drop(dev);
if (err) {
mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
err);
- mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
} else {
rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
}
@@ -5908,6 +5364,19 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev,
return 0;
}
+static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides)
+{
+ if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
+ (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+ return false;
+
+ if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) &&
+ (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+ return false;
+
+ return true;
+}
+
static int prepare_user_rq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata,
@@ -5918,8 +5387,8 @@ static int prepare_user_rq(struct ib_pd *pd,
int err;
size_t required_cmd_sz;
- required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes)
- + sizeof(ucmd.single_stride_log_num_of_bytes);
+ required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
+ single_stride_log_num_of_bytes);
if (udata->inlen < required_cmd_sz) {
mlx5_ib_dbg(dev, "invalid inlen\n");
return -EINVAL;
@@ -5955,14 +5424,16 @@ static int prepare_user_rq(struct ib_pd *pd,
MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES);
return -EINVAL;
}
- if ((ucmd.single_wqe_log_num_of_strides >
- MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
- (ucmd.single_wqe_log_num_of_strides <
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES)) {
- mlx5_ib_dbg(dev, "Invalid log num strides (%u. Range is %u - %u)\n",
- ucmd.single_wqe_log_num_of_strides,
- MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
- MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
+ if (!log_of_strides_valid(dev,
+ ucmd.single_wqe_log_num_of_strides)) {
+ mlx5_ib_dbg(
+ dev,
+ "Invalid log num strides (%u. Range is %u - %u)\n",
+ ucmd.single_wqe_log_num_of_strides,
+ MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ?
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES :
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
+ MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
return -EINVAL;
}
rwq->single_stride_log_num_of_bytes =
@@ -6001,10 +5472,14 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (!udata)
return ERR_PTR(-ENOSYS);
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO) &&
+ init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
+ return ERR_PTR(-EPERM);
+
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
@@ -6027,8 +5502,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
rwq->ibwq.wq_num = rwq->core_qp.qpn;
rwq->ibwq.state = IB_WQS_RESET;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = offsetofend(
+ struct mlx5_ib_create_wq_resp, response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
@@ -6039,7 +5514,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
return &rwq->ibwq;
err_copy:
- mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
err_user_rq:
destroy_user_rq(dev, pd, rwq, udata);
err:
@@ -6051,20 +5526,23 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+ int ret;
- mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ if (ret)
+ return ret;
destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
-
return 0;
}
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
+ to_mrwq_ind_table(ib_rwq_ind_table);
+ struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
int sz = 1 << init_attr->log_ind_tbl_size;
struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
size_t min_resp_len;
@@ -6077,30 +5555,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (init_attr->log_ind_tbl_size >
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
init_attr->log_ind_tbl_size,
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
-
- rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
- if (!rwq_ind_tbl)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
in = kvzalloc(inlen, GFP_KERNEL);
- if (!in) {
- err = -ENOMEM;
- goto err;
- }
+ if (!in)
+ return -ENOMEM;
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
@@ -6115,26 +5588,24 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
-
if (err)
- goto err;
+ return err;
rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
+ response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
}
- return &rwq_ind_tbl->ib_rwq_ind_tbl;
+ return 0;
err_copy:
mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-err:
- kfree(rwq_ind_tbl);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
@@ -6142,10 +5613,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
-
- kfree(rwq_ind_tbl);
- return 0;
+ return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
}
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
@@ -6162,7 +5630,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
void *rqc;
void *in;
- required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
if (udata->inlen < required_cmd_sz)
return -EINVAL;
@@ -6184,10 +5652,8 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
- curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
- wq_attr->curr_wq_state : wq->state;
- wq_state = (wq_attr_mask & IB_WQ_STATE) ?
- wq_attr->wq_state : curr_wq_state;
+ curr_wq_state = wq_attr->curr_wq_state;
+ wq_state = wq_attr->wq_state;
if (curr_wq_state == IB_WQS_ERR)
curr_wq_state = MLX5_RQC_STATE_ERR;
if (wq_state == IB_WQS_ERR)
@@ -6200,8 +5666,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
- mlx5_ib_dbg(dev, "VLAN offloads are not "
- "supported\n");
+ mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
err = -EOPNOTSUPP;
goto out;
}
@@ -6219,18 +5684,20 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
}
if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+ u16 set_id;
+
+ set_id = mlx5_ib_get_counters_id(dev, 0);
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
- MLX5_SET(rqc, rqc, counter_set_id,
- dev->port->cnts.set_id);
+ MLX5_SET(rqc, rqc, counter_set_id, set_id);
} else
dev_info_once(
&dev->ib_dev.dev,
"Receive WQ counters are not supported on current FW\n");
}
- err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
+ err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
@@ -6297,7 +5764,7 @@ static void handle_drain_completion(struct ib_cq *cq,
/* Run the CQ handler - this makes sure that the drain WR will
* be processed if wasn't processed yet.
*/
- mcq->mcq.comp(&mcq->mcq);
+ mcq->mcq.comp(&mcq->mcq, NULL);
}
wait_for_completion(&sdrain->done);
@@ -6329,7 +5796,7 @@ void mlx5_ib_drain_sq(struct ib_qp *qp)
sdrain.cqe.done = mlx5_ib_drain_qp_done;
init_completion(&sdrain.done);
- ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true);
+ ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
@@ -6359,7 +5826,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
rdrain.cqe.done = mlx5_ib_drain_qp_done;
init_completion(&rdrain.done);
- ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true);
+ ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
@@ -6367,3 +5834,54 @@ void mlx5_ib_drain_rq(struct ib_qp *qp)
handle_drain_completion(cq, &rdrain, dev);
}
+
+/*
+ * Bind a qp to a counter. If @counter is NULL then bind the qp to
+ * the default counter
+ */
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ int err = 0;
+
+ mutex_lock(&mqp->mutex);
+ if (mqp->state == IB_QPS_RESET) {
+ qp->counter = counter;
+ goto out;
+ }
+
+ if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (mqp->state == IB_QPS_RTS) {
+ err = __mlx5_ib_qp_set_counter(qp, counter);
+ if (!err)
+ qp->counter = counter;
+
+ goto out;
+ }
+
+ mqp->counter_pending = 1;
+ qp->counter = counter;
+
+out:
+ mutex_unlock(&mqp->mutex);
+ return err;
+}
+
+int mlx5_ib_qp_event_init(void)
+{
+ mlx5_ib_qp_event_wq = alloc_ordered_workqueue("mlx5_ib_qp_event_wq", 0);
+ if (!mlx5_ib_qp_event_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_ib_qp_event_cleanup(void)
+{
+ destroy_workqueue(mlx5_ib_qp_event_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
new file mode 100644
index 000000000000..2530e7730635
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_QP_H
+#define _MLX5_IB_QP_H
+
+struct mlx5_ib_dev;
+
+struct mlx5_qp_table {
+ struct notifier_block nb;
+ struct xarray dct_xa;
+
+ /* protect radix tree
+ */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
+
+int mlx5_init_qp_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev);
+
+int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *qp,
+ u32 *in, int inlen, u32 *out, int outlen);
+int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *in, int inlen, u32 *out);
+int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
+ void *qpc, struct mlx5_core_qp *qp, u32 *ece);
+int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp);
+int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct);
+int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *out, int outlen, bool qpc_ext);
+int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen);
+
+int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec);
+
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *sq);
+
+int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_ib_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
+int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
+int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
+int mlx5_ib_qp_event_init(void);
+void mlx5_ib_qp_event_cleanup(void);
+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate);
+#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
new file mode 100644
index 000000000000..146d03ae40bd
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -0,0 +1,697 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/gfp.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+#include "qp.h"
+
+static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct);
+
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
+{
+ struct mlx5_core_rsc_common *common;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+
+ common = radix_tree_lookup(&table->tree, rsn);
+ if (common && !common->invalid)
+ refcount_inc(&common->refcount);
+ else
+ common = NULL;
+
+ spin_unlock_irqrestore(&table->lock, flags);
+
+ return common;
+}
+
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+ if (refcount_dec_and_test(&common->refcount))
+ complete(&common->free);
+}
+
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static u64 dct_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_DCT:
+ return BIT(event_type) & dct_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
+static int dct_event_notifier(struct mlx5_ib_dev *dev, struct mlx5_eqe *eqe)
+{
+ struct mlx5_core_dct *dct;
+ unsigned long flags;
+ u32 qpn;
+
+ qpn = be32_to_cpu(eqe->data.dct.dctn) & 0xFFFFFF;
+ xa_lock_irqsave(&dev->qp_table.dct_xa, flags);
+ dct = xa_load(&dev->qp_table.dct_xa, qpn);
+ if (dct)
+ complete(&dct->drained);
+ xa_unlock_irqrestore(&dev->qp_table.dct_xa, flags);
+ return NOTIFY_OK;
+}
+
+static int rsc_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(nb, struct mlx5_ib_dev, qp_table.nb);
+ struct mlx5_core_rsc_common *common;
+ struct mlx5_eqe *eqe = data;
+ u8 event_type = (u8)type;
+ struct mlx5_core_qp *qp;
+ u32 rsn;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ return dct_event_notifier(dev, eqe);
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ common = mlx5_get_rsc(&dev->qp_table, rsn);
+ if (!common)
+ return NOTIFY_OK;
+
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type))
+ goto out;
+
+ switch (common->res) {
+ case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
+ qp = (struct mlx5_core_qp *)common;
+ qp->event(qp, event_type);
+ /* Need to put resource in event handler */
+ return NOTIFY_OK;
+ default:
+ break;
+ }
+out:
+ mlx5_core_put_rsc(common);
+
+ return NOTIFY_OK;
+}
+
+static int create_resource_common(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp, int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ refcount_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void modify_resource_common_state(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp,
+ bool invalid)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ qp->common.invalid = invalid;
+ spin_unlock_irqrestore(&table->lock, flags);
+}
+
+static void destroy_resource_common(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
+static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, in, uid, qp->uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
+}
+
+int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+ u32 *in, int inlen, u32 *out, int outlen)
+{
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ init_completion(&dct->drained);
+ MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
+
+ err = mlx5_cmd_do(dev->mdev, in, inlen, out, outlen);
+ if (err)
+ return err;
+
+ qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ qp->uid = MLX5_GET(create_dct_in, in, uid);
+ err = xa_err(xa_store_irq(&dev->qp_table.dct_xa, qp->qpn, dct, GFP_KERNEL));
+ if (err)
+ goto err_cmd;
+
+ return 0;
+err_cmd:
+ _mlx5_core_destroy_dct(dev, dct);
+ return err;
+}
+
+int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *in, int inlen, u32 *out)
+{
+ u32 din[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+ int err;
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+
+ err = mlx5_cmd_exec(dev->mdev, in, inlen, out,
+ MLX5_ST_SZ_BYTES(create_qp_out));
+ if (err)
+ return err;
+
+ qp->uid = MLX5_GET(create_qp_in, in, uid);
+ qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+
+ err = create_resource_common(dev, qp, MLX5_RES_QP);
+ if (err)
+ goto err_cmd;
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_debug_qp_add(dev->mdev, qp);
+
+ return 0;
+
+err_cmd:
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, din, uid, qp->uid);
+ mlx5_cmd_exec_in(dev->mdev, destroy_qp, din);
+ return err;
+}
+
+static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
+ MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(drain_dct_in, in, uid, qp->uid);
+ return mlx5_cmd_exec_in(dev->mdev, drain_dct, in);
+}
+
+int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ struct mlx5_core_dct *tmp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto destroy;
+
+ return err;
+ }
+ wait_for_completion(&dct->drained);
+
+destroy:
+ tmp = xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, dct, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (WARN_ON(tmp != dct))
+ return xa_err(tmp) ?: -EINVAL;
+
+ err = _mlx5_core_destroy_dct(dev, dct);
+ if (err) {
+ xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0);
+ return err;
+ }
+ xa_erase_irq(&table->dct_xa, dct->mqp.qpn);
+ return 0;
+}
+
+int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_debug_qp_remove(dev->mdev, qp);
+
+ destroy_resource_common(dev, qp);
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, in, uid, qp->uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
+}
+
+int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev,
+ u32 timeout_usec)
+{
+ u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {};
+
+ MLX5_SET(set_delay_drop_params_in, in, opcode,
+ MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
+ MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
+ timeout_usec / 100);
+ return mlx5_cmd_exec_in(dev->mdev, set_delay_drop_params, in);
+}
+
+struct mbox_info {
+ u32 *in;
+ u32 *out;
+ int inlen;
+ int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+ mbox->inlen = inlen;
+ mbox->outlen = outlen;
+ mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+ mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+ if (!mbox->in || !mbox->out) {
+ kfree(mbox->in);
+ kfree(mbox->out);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+ kfree(mbox->in);
+ kfree(mbox->out);
+}
+
+static int get_ece_from_mbox(void *out, u16 opcode)
+{
+ int ece = 0;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ ece = MLX5_GET(init2init_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ ece = MLX5_GET(init2rtr_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ ece = MLX5_GET(rtr2rts_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ ece = MLX5_GET(rts2rts_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_RST2INIT_QP:
+ ece = MLX5_GET(rst2init_qp_out, out, ece);
+ break;
+ default:
+ break;
+ }
+
+ return ece;
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+ u32 opt_param_mask, void *qpc,
+ struct mbox_info *mbox, u16 uid, u32 ece)
+{
+ mbox->out = NULL;
+ mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ) \
+ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \
+ do { \
+ MLX5_SET(typ##_in, in, opcode, _opcode); \
+ MLX5_SET(typ##_in, in, qpn, _qpn); \
+ MLX5_SET(typ##_in, in, uid, _uid); \
+ } while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \
+ do { \
+ MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \
+ MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+ memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \
+ MLX5_ST_SZ_BYTES(qpc)); \
+ } while (0)
+
+ switch (opcode) {
+ /* 2RST & 2ERR */
+ case MLX5_CMD_OP_2RST_QP:
+ if (MBOX_ALLOC(mbox, qp_2rst))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ if (MBOX_ALLOC(mbox, qp_2err))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
+ break;
+
+ /* MODIFY with QPC */
+ case MLX5_CMD_OP_RST2INIT_QP:
+ if (MBOX_ALLOC(mbox, rst2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(rst2init_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ if (MBOX_ALLOC(mbox, init2rtr_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(init2rtr_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ if (MBOX_ALLOC(mbox, rtr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(rtr2rts_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ if (MBOX_ALLOC(mbox, rts2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(rts2rts_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ break;
+ case MLX5_CMD_OP_SQD_RTS_QP:
+ if (MBOX_ALLOC(mbox, sqd2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(sqd2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ if (MBOX_ALLOC(mbox, init2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(init2init_qp_in, mbox->in, ece, ece);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
+ void *qpc, struct mlx5_core_qp *qp, u32 *ece)
+{
+ struct mbox_info mbox;
+ int err;
+
+ err = modify_qp_mbox_alloc(dev->mdev, opcode, qp->qpn, opt_param_mask,
+ qpc, &mbox, qp->uid, (ece) ? *ece : 0);
+ if (err)
+ return err;
+
+ err = mlx5_cmd_exec(dev->mdev, mbox.in, mbox.inlen, mbox.out,
+ mbox.outlen);
+
+ if (ece)
+ *ece = get_ece_from_mbox(mbox.out, opcode);
+
+ mbox_free(&mbox);
+ return err;
+}
+
+int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ xa_init(&table->dct_xa);
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_qp_debugfs_init(dev->mdev);
+
+ table->nb.notifier_call = rsc_event_notifier;
+ mlx5_notifier_register(dev->mdev, &table->nb);
+
+ return 0;
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+
+ mlx5_notifier_unregister(dev->mdev, &table->nb);
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_qp_debugfs_cleanup(dev->mdev);
+}
+
+int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *out, int outlen, bool qpc_ext)
+{
+ u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
+
+ MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+ MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(query_qp_in, in, qpc_ext, qpc_ext);
+
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
+ MLX5_SET(query_dct_in, in, dctn, qp->qpn);
+
+ return mlx5_cmd_exec(dev->mdev, (void *)&in, sizeof(in), (void *)out,
+ outlen);
+}
+
+int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_xrcd, in, out);
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+
+int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_xrcd, in);
+}
+
+static int destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ MLX5_SET(destroy_rq_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
+}
+
+int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev->mdev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->uid = MLX5_GET(create_rq_in, in, uid);
+ rq->qpn = rqn;
+ err = create_resource_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_rq_tracked(dev, rq->qpn, rq->uid);
+
+ return err;
+}
+
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ int ret;
+
+ /* The rq destruction can be called again in case it fails, hence we
+ * mark the common resource as invalid and only once FW destruction
+ * is completed successfully we actually destroy the resources.
+ */
+ modify_resource_common_state(dev, rq, true);
+ ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ if (ret) {
+ modify_resource_common_state(dev, rq, false);
+ return ret;
+ }
+ destroy_resource_common(dev, rq);
+ return 0;
+}
+
+static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ MLX5_SET(destroy_sq_in, in, uid, uid);
+ mlx5_cmd_exec_in(dev->mdev, destroy_sq, in);
+}
+
+int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {};
+ int err;
+
+ MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+ err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ sq->qpn = MLX5_GET(create_sq_out, out, sqn);
+ sq->uid = MLX5_GET(create_sq_in, in, uid);
+ err = create_resource_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
+
+ return err;
+}
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_resource_common(dev, sq);
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
+}
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_ib_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+ struct mlx5_qp_table *table = &dev->qp_table;
+
+ return mlx5_get_rsc(table, rsn);
+}
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
new file mode 100644
index 000000000000..67841922c7b8
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#include <uapi/rdma/rdma_netlink.h>
+#include <linux/mlx5/rsc_dump.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/restrack.h>
+#include "mlx5_ib.h"
+#include "restrack.h"
+
+#define MAX_DUMP_SIZE 1024
+
+static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type,
+ int index, void *data, int *data_len)
+{
+ struct mlx5_core_dev *mdev = dev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int offset = 0;
+ int err = 0;
+ int cmd_err;
+ int size;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.size = PAGE_SIZE;
+ key.rsc = type;
+ key.index1 = index;
+ key.num_of_obj1 = 1;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+ memcpy(data + offset, page_address(page), size);
+ offset += size;
+ } while (cmd_err > 0);
+ *data_len = offset;
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+ return err;
+}
+
+static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev,
+ enum mlx5_sgmt_type type, u32 key)
+{
+ int len = 0;
+ void *data;
+ int err;
+
+ data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = dump_rsc(dev->mdev, type, key, data, &len);
+ if (err)
+ goto out;
+
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+out:
+ kfree(data);
+ return err;
+}
+
+static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct nlattr *table_attr;
+
+ if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+ return 0;
+
+ table_attr = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+
+ if (!table_attr)
+ goto err;
+
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_faults",
+ atomic64_read(&mr->odp_stats.faults)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_faults_handled",
+ atomic64_read(&mr->odp_stats.faults_handled)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations",
+ atomic64_read(&mr->odp_stats.invalidations)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations_handled",
+ atomic64_read(&mr->odp_stats.invalidations_handled)))
+ goto err_table;
+
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
+ atomic64_read(&mr->odp_stats.prefetch)))
+ goto err_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+ return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ mlx5_mkey_to_idx(mr->mmkey.key));
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct nlattr *table_attr;
+
+ if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ if (mr->is_odp_implicit) {
+ if (rdma_nl_put_driver_string(msg, "odp", "implicit"))
+ goto err;
+ } else {
+ if (rdma_nl_put_driver_string(msg, "odp", "explicit"))
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn);
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int ret;
+
+ if (qp->type < IB_QPT_DRIVER)
+ return 0;
+
+ switch (qp->type) {
+ case MLX5_IB_QPT_REG_UMR:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE,
+ "REG_UMR");
+ break;
+ case MLX5_IB_QPT_DCT:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCT");
+ break;
+ case MLX5_IB_QPT_DCI:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCI");
+ break;
+ default:
+ return 0;
+ }
+ if (ret)
+ return ret;
+
+ return nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, IB_QPT_DRIVER);
+}
+
+static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP,
+ ibqp->qp_num);
+}
+
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry_raw = fill_res_cq_entry_raw,
+ .fill_res_mr_entry = fill_res_mr_entry,
+ .fill_res_mr_entry_raw = fill_res_mr_entry_raw,
+ .fill_res_qp_entry = fill_res_qp_entry,
+ .fill_res_qp_entry_raw = fill_res_qp_entry_raw,
+ .fill_stat_mr_entry = fill_stat_mr_entry,
+};
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &restrack_ops);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h
new file mode 100644
index 000000000000..e8d81270f1b6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_RESTRACK_H
+#define _MLX5_IB_RESTRACK_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev);
+
+#endif /* _MLX5_IB_RESTRACK_H */
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4e7fde86c96b..bcb6b324af50 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -3,7 +3,6 @@
* Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
-#include <linux/module.h>
#include <linux/mlx5/qp.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
@@ -51,10 +50,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata, struct mlx5_ib_ucontext, ibucontext);
size_t ucmdlen;
int err;
- int npages;
- int page_shift;
- int ncont;
- u32 offset;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
ucmdlen = min(udata->inlen, sizeof(ucmd));
@@ -80,38 +75,20 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
- srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
+ srq->umem = ib_umem_get(pd->device, ucmd.buf_addr, buf_size, 0);
if (IS_ERR(srq->umem)) {
mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
err = PTR_ERR(srq->umem);
return err;
}
+ in->umem = srq->umem;
- mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
- &page_shift, &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
- &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- in->pas = kvcalloc(ncont, sizeof(*in->pas), GFP_KERNEL);
- if (!in->pas) {
- err = -ENOMEM;
- goto err_umem;
- }
-
- mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
-
- err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db);
+ err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db);
if (err) {
mlx5_ib_dbg(dev, "map doorbell failed\n");
- goto err_in;
+ goto err_umem;
}
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- in->page_offset = offset;
in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
@@ -119,9 +96,6 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
return 0;
-err_in:
- kvfree(in->pas);
-
err_umem:
ib_umem_release(srq->umem);
@@ -225,15 +199,27 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
int err;
struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
-
- /* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= max_srq_wqes) {
- mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
- init_attr->attr.max_wr,
- max_srq_wqes);
+ __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) /
+ sizeof(struct mlx5_wqe_data_seg);
+
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC &&
+ init_attr->srq_type != IB_SRQT_TM)
+ return -EOPNOTSUPP;
+
+ /* Sanity check SRQ and sge size before proceeding */
+ if (init_attr->attr.max_wr >= max_srq_wqes ||
+ init_attr->attr.max_sge > max_sge_sz) {
+ mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n",
+ init_attr->attr.max_wr, max_srq_wqes,
+ init_attr->attr.max_sge, max_sge_sz);
return -EINVAL;
}
+ err = mlx5_ib_dev_res_cq_init(dev);
+ if (err)
+ return err;
+
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
@@ -274,10 +260,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC)
+ if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
else
- in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
+ in.xrcd = dev->devr.xrcdn0;
if (init_attr->srq_type == IB_SRQT_TM) {
in.tm_log_list_size =
@@ -310,12 +296,18 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (udata)
- if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+ if (udata) {
+ struct mlx5_ib_create_srq_resp resp = {
+ .srqn = srq->msrq.srqn,
+ };
+
+ if (ib_copy_to_udata(udata, &resp, min(udata->outlen,
+ sizeof(resp)))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
goto err_core;
}
+ }
init_attr->attr.max_wr = srq->msrq.max - 1;
@@ -383,24 +375,21 @@ out_box:
return ret;
}
-void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
+ int ret;
+
+ ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq);
+ if (ret)
+ return ret;
- mlx5_cmd_destroy_srq(dev, &msrq->msrq);
-
- if (srq->uobject) {
- mlx5_ib_db_unmap_user(
- rdma_udata_to_drv_context(
- udata,
- struct mlx5_ib_ucontext,
- ibucontext),
- &msrq->db);
- ib_umem_release(msrq->umem);
- } else {
+ if (udata)
+ destroy_srq_user(srq->pd, msrq, udata);
+ else
destroy_srq_kernel(dev, msrq);
- }
+ return 0;
}
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
@@ -465,7 +454,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
if (i < srq->msrq.max_avail_gather) {
scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
scat[i].addr = 0;
}
}
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
index af197c36d757..a7e3dc5564ac 100644
--- a/drivers/infiniband/hw/mlx5/srq.h
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -28,6 +28,7 @@ struct mlx5_srq_attr {
u32 user_index;
u64 db_record;
__be64 *pas;
+ struct ib_umem *umem;
u32 tm_log_list_size;
u32 tm_next_tag;
u32 tm_hw_phase_cnt;
@@ -56,7 +57,7 @@ struct mlx5_srq_table {
int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *in);
-void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
struct mlx5_srq_attr *out);
int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index b0d0687c7a68..8b3385396599 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c