summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-09 08:33:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-09 08:33:31 -0700
commit26d2177e977c912863ac04f6c1a967e793ca3a56 (patch)
tree48da04fb0b947cfa404747690d7081b657e33221 /include
parenta794b4f3292160bb3fd0f1f90ec8df454e3b17b3 (diff)
parentd1178cbcdcf91900ccf10a177350d7945703c151 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull inifiniband/rdma updates from Doug Ledford: "This is a fairly sizeable set of changes. I've put them through a decent amount of testing prior to sending the pull request due to that. There are still a few fixups that I know are coming, but I wanted to go ahead and get the big, sizable chunk into your hands sooner rather than waiting for those last few fixups. Of note is the fact that this creates what is intended to be a temporary area in the drivers/staging tree specifically for some cleanups and additions that are coming for the RDMA stack. We deprecated two drivers (ipath and amso1100) and are waiting to hear back if we can deprecate another one (ehca). We also put Intel's new hfi1 driver into this area because it needs to be refactored and a transfer library created out of the factored out code, and then it and the qib driver and the soft-roce driver should all be modified to use that library. I expect drivers/staging/rdma to be around for three or four kernel releases and then to go away as all of the work is completed and final deletions of deprecated drivers are done. Summary of changes for 4.3: - Create drivers/staging/rdma - Move amso1100 driver to staging/rdma and schedule for deletion - Move ipath driver to staging/rdma and schedule for deletion - Add hfi1 driver to staging/rdma and set TODO for move to regular tree - Initial support for namespaces to be used on RDMA devices - Add RoCE GID table handling to the RDMA core caching code - Infrastructure to support handling of devices with differing read and write scatter gather capabilities - Various iSER updates - Kill off unsafe usage of global mr registrations - Update SRP driver - Misc mlx4 driver updates - Support for the mr_alloc verb - Support for a netlink interface between kernel and user space cache daemon to speed path record queries and route resolution - Ininitial support for safe hot removal of verbs devices" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (136 commits) IB/ipoib: Suppress warning for send only join failures IB/ipoib: Clean up send-only multicast joins IB/srp: Fix possible protection fault IB/core: Move SM class defines from ib_mad.h to ib_smi.h IB/core: Remove unnecessary defines from ib_mad.h IB/hfi1: Add PSM2 user space header to header_install IB/hfi1: Add CSRs for CONFIG_SDMA_VERBOSITY mlx5: Fix incorrect wc pkey_index assignment for GSI messages IB/mlx5: avoid destroying a NULL mr in reg_user_mr error flow IB/uverbs: reject invalid or unknown opcodes IB/cxgb4: Fix if statement in pick_local_ip6adddrs IB/sa: Fix rdma netlink message flags IB/ucma: HW Device hot-removal support IB/mlx4_ib: Disassociate support IB/uverbs: Enable device removal when there are active user space applications IB/uverbs: Explicitly pass ib_dev to uverbs commands IB/uverbs: Fix race between ib_uverbs_open and remove_one IB/uverbs: Fix reference counting usage of event files IB/core: Make ib_dealloc_pd return void IB/srp: Create an insecure all physical rkey only if needed ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx4/device.h3
-rw-r--r--include/linux/mlx4/driver.h1
-rw-r--r--include/linux/mlx5/device.h11
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/sunrpc/svc_rdma.h1
-rw-r--r--include/net/addrconf.h31
-rw-r--r--include/net/bonding.h7
-rw-r--r--include/rdma/ib_cm.h25
-rw-r--r--include/rdma/ib_mad.h82
-rw-r--r--include/rdma/ib_pack.h2
-rw-r--r--include/rdma/ib_smi.h47
-rw-r--r--include/rdma/ib_verbs.h203
-rw-r--r--include/rdma/opa_port_info.h433
-rw-r--r--include/rdma/opa_smi.h47
-rw-r--r--include/rdma/rdma_netlink.h7
-rw-r--r--include/uapi/rdma/Kbuild1
-rw-r--r--include/uapi/rdma/hfi/Kbuild2
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h427
-rw-r--r--include/uapi/rdma/rdma_netlink.h82
19 files changed, 1312 insertions, 101 deletions
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index bcbf8c72a77b..baad4cb8e9b0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -79,7 +79,8 @@ enum {
enum {
MLX4_MAX_PORTS = 2,
- MLX4_MAX_PORT_PKEYS = 128
+ MLX4_MAX_PORT_PKEYS = 128,
+ MLX4_MAX_PORT_GIDS = 128
};
/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 9553a73d2049..5a06d969338e 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -59,6 +59,7 @@ struct mlx4_interface {
void (*event) (struct mlx4_dev *dev, void *context,
enum mlx4_dev_event event, unsigned long param);
void * (*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
+ void (*activate)(struct mlx4_dev *dev, void *context);
struct list_head list;
enum mlx4_protocol protocol;
int flags;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 250b1ff8b48d..8eb3b19af2a4 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -402,6 +402,17 @@ struct mlx5_cmd_teardown_hca_mbox_out {
u8 rsvd[8];
};
+struct mlx5_cmd_query_special_contexts_mbox_in {
+ struct mlx5_inbox_hdr hdr;
+ u8 rsvd[8];
+};
+
+struct mlx5_cmd_query_special_contexts_mbox_out {
+ struct mlx5_outbox_hdr hdr;
+ __be32 dump_fill_mkey;
+ __be32 resd_lkey;
+};
+
struct mlx5_cmd_layout {
u8 type;
u8 rsvd0[3];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8b6d6f2154a4..27b53f9a24ad 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -845,6 +845,7 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol);
int mlx5_register_interface(struct mlx5_interface *intf);
void mlx5_unregister_interface(struct mlx5_interface *intf);
int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
+int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey);
struct mlx5_profile {
u64 mask;
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index d5ee6d8b7c58..7ccc961f33e9 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -132,6 +132,7 @@ struct svcxprt_rdma {
struct list_head sc_accept_q; /* Conn. waiting accept */
int sc_ord; /* RDMA read limit */
int sc_max_sge;
+ int sc_max_sge_rd; /* max sge for read target */
int sc_sq_depth; /* Depth of SQ */
atomic_t sc_sq_count; /* Number of SQ WR on queue */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0c3ac5acb85f..b5474b1fcd83 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -91,6 +91,37 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
+static inline int addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
+{
+ if (dev->addr_len != ETH_ALEN)
+ return -1;
+ memcpy(eui, dev->dev_addr, 3);
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+
+ /*
+ * The zSeries OSA network cards can be shared among various
+ * OS instances, but the OSA cards have only one MAC address.
+ * This leads to duplicate address conflicts in conjunction
+ * with IPv6 if more than one instance uses the same card.
+ *
+ * The driver for these cards can deliver a unique 16-bit
+ * identifier for each instance sharing the same card. It is
+ * placed instead of 0xFFFE in the interface identifier. The
+ * "u" bit of the interface identifier is not inverted in this
+ * case. Hence the resulting interface identifier has local
+ * scope according to RFC2373.
+ */
+ if (dev->dev_id) {
+ eui[3] = (dev->dev_id >> 8) & 0xFF;
+ eui[4] = dev->dev_id & 0xFF;
+ } else {
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ }
+ return 0;
+}
+
static inline unsigned long addrconf_timeout_fixup(u32 timeout,
unsigned int unit)
{
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 20defc0353d1..c1740a2794a3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -310,6 +310,13 @@ static inline bool bond_uses_primary(struct bonding *bond)
return bond_mode_uses_primary(BOND_MODE(bond));
}
+static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond)
+{
+ struct slave *slave = rcu_dereference(bond->curr_active_slave);
+
+ return bond_uses_primary(bond) && slave ? slave->dev : NULL;
+}
+
static inline bool bond_slave_is_up(struct slave *slave)
{
return netif_running(slave->dev) && netif_carrier_ok(slave->dev);
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 39ed2d2fbd51..92a7d85917b4 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -105,14 +105,16 @@ enum ib_cm_data_size {
IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
IB_CM_SIDR_REP_INFO_LENGTH = 72,
- /* compare done u32 at a time */
- IB_CM_COMPARE_SIZE = (64 / sizeof(u32))
};
struct ib_cm_id;
struct ib_cm_req_event_param {
struct ib_cm_id *listen_id;
+
+ /* P_Key that was used by the GMP's BTH header */
+ u16 bth_pkey;
+
u8 port;
struct ib_sa_path_rec *primary_path;
@@ -223,6 +225,9 @@ struct ib_cm_apr_event_param {
struct ib_cm_sidr_req_event_param {
struct ib_cm_id *listen_id;
+ __be64 service_id;
+ /* P_Key that was used by the GMP's BTH header */
+ u16 bth_pkey;
u8 port;
u16 pkey;
};
@@ -337,11 +342,6 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id);
#define IB_SDP_SERVICE_ID cpu_to_be64(0x0000000000010000ULL)
#define IB_SDP_SERVICE_ID_MASK cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
-struct ib_cm_compare_data {
- u32 data[IB_CM_COMPARE_SIZE];
- u32 mask[IB_CM_COMPARE_SIZE];
-};
-
/**
* ib_cm_listen - Initiates listening on the specified service ID for
* connection and service ID resolution requests.
@@ -354,12 +354,13 @@ struct ib_cm_compare_data {
* range of service IDs. If set to 0, the service ID is matched
* exactly. This parameter is ignored if %service_id is set to
* IB_CM_ASSIGN_SERVICE_ID.
- * @compare_data: This parameter is optional. It specifies data that must
- * appear in the private data of a connection request for the specified
- * listen request.
*/
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask,
- struct ib_cm_compare_data *compare_data);
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
+ __be64 service_mask);
+
+struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ __be64 service_id);
struct ib_cm_req_param {
struct ib_sa_path_rec *primary_path;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index c8422d5a5a91..188df91d5851 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -127,6 +127,23 @@
#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF
#define IB_DEFAULT_PKEY_FULL 0xFFFF
+/*
+ * Generic trap/notice types
+ */
+#define IB_NOTICE_TYPE_FATAL 0x80
+#define IB_NOTICE_TYPE_URGENT 0x81
+#define IB_NOTICE_TYPE_SECURITY 0x82
+#define IB_NOTICE_TYPE_SM 0x83
+#define IB_NOTICE_TYPE_INFO 0x84
+
+/*
+ * Generic trap/notice producers
+ */
+#define IB_NOTICE_PROD_CA cpu_to_be16(1)
+#define IB_NOTICE_PROD_SWITCH cpu_to_be16(2)
+#define IB_NOTICE_PROD_ROUTER cpu_to_be16(3)
+#define IB_NOTICE_PROD_CLASS_MGR cpu_to_be16(4)
+
enum {
IB_MGMT_MAD_HDR = 24,
IB_MGMT_MAD_DATA = 232,
@@ -240,6 +257,70 @@ struct ib_class_port_info {
__be32 trap_qkey;
};
+struct ib_mad_notice_attr {
+ u8 generic_type;
+ u8 prod_type_msb;
+ __be16 prod_type_lsb;
+ __be16 trap_num;
+ __be16 issuer_lid;
+ __be16 toggle_count;
+
+ union {
+ struct {
+ u8 details[54];
+ } raw_data;
+
+ struct {
+ __be16 reserved;
+ __be16 lid; /* where violation happened */
+ u8 port_num; /* where violation happened */
+ } __packed ntc_129_131;
+
+ struct {
+ __be16 reserved;
+ __be16 lid; /* LID where change occurred */
+ u8 reserved2;
+ u8 local_changes; /* low bit - local changes */
+ __be32 new_cap_mask; /* new capability mask */
+ u8 reserved3;
+ u8 change_flags; /* low 3 bits only */
+ } __packed ntc_144;
+
+ struct {
+ __be16 reserved;
+ __be16 lid; /* lid where sys guid changed */
+ __be16 reserved2;
+ __be64 new_sys_guid;
+ } __packed ntc_145;
+
+ struct {
+ __be16 reserved;
+ __be16 lid;
+ __be16 dr_slid;
+ u8 method;
+ u8 reserved2;
+ __be16 attr_id;
+ __be32 attr_mod;
+ __be64 mkey;
+ u8 reserved3;
+ u8 dr_trunc_hop;
+ u8 dr_rtn_path[30];
+ } __packed ntc_256;
+
+ struct {
+ __be16 reserved;
+ __be16 lid1;
+ __be16 lid2;
+ __be32 key;
+ __be32 sl_qp1; /* SL: high 4 bits */
+ __be32 qp2; /* high 8 bits reserved */
+ union ib_gid gid1;
+ union ib_gid gid2;
+ } __packed ntc_257_258;
+
+ } details;
+};
+
/**
* ib_mad_send_buf - MAD data buffer and work request for sends.
* @next: A pointer used to chain together MADs for posting.
@@ -388,7 +469,6 @@ enum {
struct ib_mad_agent {
struct ib_device *device;
struct ib_qp *qp;
- struct ib_mr *mr;
ib_mad_recv_handler recv_handler;
ib_mad_send_handler send_handler;
ib_mad_snoop_handler snoop_handler;
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index b1f7592e02e4..709a5331e6b9 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -76,6 +76,8 @@ enum {
IB_OPCODE_UC = 0x20,
IB_OPCODE_RD = 0x40,
IB_OPCODE_UD = 0x60,
+ /* per IBTA 3.1 Table 38, A10.3.2 */
+ IB_OPCODE_CNP = 0x80,
/* operations -- just used to define real constants */
IB_OPCODE_SEND_FIRST = 0x00,
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
index 98b9086d769a..b439e988408e 100644
--- a/include/rdma/ib_smi.h
+++ b/include/rdma/ib_smi.h
@@ -119,10 +119,57 @@ struct ib_port_info {
u8 link_roundtrip_latency[3];
};
+struct ib_node_info {
+ u8 base_version;
+ u8 class_version;
+ u8 node_type;
+ u8 num_ports;
+ __be64 sys_guid;
+ __be64 node_guid;
+ __be64 port_guid;
+ __be16 partition_cap;
+ __be16 device_id;
+ __be32 revision;
+ u8 local_port_num;
+ u8 vendor_id[3];
+} __packed;
+
+struct ib_vl_weight_elem {
+ u8 vl; /* IB: VL is low 4 bits, upper 4 bits reserved */
+ /* OPA: VL is low 5 bits, upper 3 bits reserved */
+ u8 weight;
+};
+
static inline u8
ib_get_smp_direction(struct ib_smp *smp)
{
return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION);
}
+/*
+ * SM Trap/Notice numbers
+ */
+#define IB_NOTICE_TRAP_LLI_THRESH cpu_to_be16(129)
+#define IB_NOTICE_TRAP_EBO_THRESH cpu_to_be16(130)
+#define IB_NOTICE_TRAP_FLOW_UPDATE cpu_to_be16(131)
+#define IB_NOTICE_TRAP_CAP_MASK_CHG cpu_to_be16(144)
+#define IB_NOTICE_TRAP_SYS_GUID_CHG cpu_to_be16(145)
+#define IB_NOTICE_TRAP_BAD_MKEY cpu_to_be16(256)
+#define IB_NOTICE_TRAP_BAD_PKEY cpu_to_be16(257)
+#define IB_NOTICE_TRAP_BAD_QKEY cpu_to_be16(258)
+
+/*
+ * Other local changes flags (trap 144).
+ */
+#define IB_NOTICE_TRAP_LSE_CHG 0x04 /* Link Speed Enable changed */
+#define IB_NOTICE_TRAP_LWE_CHG 0x02 /* Link Width Enable changed */
+#define IB_NOTICE_TRAP_NODE_DESC_CHG 0x01
+
+/*
+ * M_Key volation flags in dr_trunc_hop (trap 256).
+ */
+#define IB_NOTICE_TRAP_DR_NOTICE 0x80
+#define IB_NOTICE_TRAP_DR_TRUNC 0x40
+
+
#endif /* IB_SMI_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 43c1cf01c84b..7845fae6f2df 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -48,6 +48,7 @@
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
+#include <linux/socket.h>
#include <uapi/linux/if_ether.h>
#include <linux/atomic.h>
@@ -64,6 +65,12 @@ union ib_gid {
} global;
};
+extern union ib_gid zgid;
+
+struct ib_gid_attr {
+ struct net_device *ndev;
+};
+
enum rdma_node_type {
/* IB values map to NodeInfo:NodeType. */
RDMA_NODE_IB_CA = 1,
@@ -284,7 +291,7 @@ enum ib_port_cap_flags {
IB_PORT_BOOT_MGMT_SUP = 1 << 23,
IB_PORT_LINK_LATENCY_SUP = 1 << 24,
IB_PORT_CLIENT_REG_SUP = 1 << 25,
- IB_PORT_IP_BASED_GIDS = 1 << 26
+ IB_PORT_IP_BASED_GIDS = 1 << 26,
};
enum ib_port_width {
@@ -556,20 +563,18 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
*/
__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
-enum ib_mr_create_flags {
- IB_MR_SIGNATURE_EN = 1,
-};
/**
- * ib_mr_init_attr - Memory region init attributes passed to routine
- * ib_create_mr.
- * @max_reg_descriptors: max number of registration descriptors that
- * may be used with registration work requests.
- * @flags: MR creation flags bit mask.
+ * enum ib_mr_type - memory region type
+ * @IB_MR_TYPE_MEM_REG: memory region that is used for
+ * normal registration
+ * @IB_MR_TYPE_SIGNATURE: memory region that is used for
+ * signature operations (data-integrity
+ * capable regions)
*/
-struct ib_mr_init_attr {
- int max_reg_descriptors;
- u32 flags;
+enum ib_mr_type {
+ IB_MR_TYPE_MEM_REG,
+ IB_MR_TYPE_SIGNATURE,
};
/**
@@ -1252,9 +1257,11 @@ struct ib_udata {
};
struct ib_pd {
+ u32 local_dma_lkey;
struct ib_device *device;
struct ib_uobject *uobject;
atomic_t usecnt; /* count all resources */
+ struct ib_mr *local_mr;
};
struct ib_xrcd {
@@ -1488,7 +1495,7 @@ struct ib_cache {
rwlock_t lock;
struct ib_event_handler event_handler;
struct ib_pkey_cache **pkey_cache;
- struct ib_gid_cache **gid_cache;
+ struct ib_gid_table **gid_cache;
u8 *lmc_cache;
};
@@ -1550,6 +1557,8 @@ struct ib_device {
spinlock_t client_data_lock;
struct list_head core_list;
+ /* Access to the client_data_list is protected by the client_data_lock
+ * spinlock and the lists_rwsem read-write semaphore */
struct list_head client_data_list;
struct ib_cache cache;
@@ -1572,9 +1581,47 @@ struct ib_device {
struct ib_port_attr *port_attr);
enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
u8 port_num);
+ /* When calling get_netdev, the HW vendor's driver should return the
+ * net device of device @device at port @port_num or NULL if such
+ * a net device doesn't exist. The vendor driver should call dev_hold
+ * on this net device. The HW vendor's device driver must guarantee
+ * that this function returns NULL before the net device reaches
+ * NETDEV_UNREGISTER_FINAL state.
+ */
+ struct net_device *(*get_netdev)(struct ib_device *device,
+ u8 port_num);
int (*query_gid)(struct ib_device *device,
u8 port_num, int index,
union ib_gid *gid);
+ /* When calling add_gid, the HW vendor's driver should
+ * add the gid of device @device at gid index @index of
+ * port @port_num to be @gid. Meta-info of that gid (for example,
+ * the network device related to this gid is available
+ * at @attr. @context allows the HW vendor driver to store extra
+ * information together with a GID entry. The HW vendor may allocate
+ * memory to contain this information and store it in @context when a
+ * new GID entry is written to. Params are consistent until the next
+ * call of add_gid or delete_gid. The function should return 0 on
+ * success or error otherwise. The function could be called
+ * concurrently for different ports. This function is only called
+ * when roce_gid_table is used.
+ */
+ int (*add_gid)(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context);
+ /* When calling del_gid, the HW vendor's driver should delete the
+ * gid of device @device at gid index @index of port @port_num.
+ * Upon the deletion of a GID entry, the HW vendor must free any
+ * allocated memory. The caller will clear @context afterwards.
+ * This function is only called when roce_gid_table is used.
+ */
+ int (*del_gid)(struct ib_device *device,
+ u8 port_num,
+ unsigned int index,
+ void **context);
int (*query_pkey)(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey);
int (*modify_device)(struct ib_device *device,
@@ -1668,11 +1715,9 @@ struct ib_device {
int (*query_mr)(struct ib_mr *mr,
struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
- int (*destroy_mr)(struct ib_mr *mr);
- struct ib_mr * (*create_mr)(struct ib_pd *pd,
- struct ib_mr_init_attr *mr_init_attr);
- struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd,
- int max_page_list_len);
+ struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
int page_list_len);
void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
@@ -1724,6 +1769,7 @@ struct ib_device {
int (*destroy_flow)(struct ib_flow *flow_id);
int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+ void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
struct ib_dma_mapping_ops *dma_ops;
@@ -1761,8 +1807,30 @@ struct ib_device {
struct ib_client {
char *name;
void (*add) (struct ib_device *);
- void (*remove)(struct ib_device *);
-
+ void (*remove)(struct ib_device *, void *client_data);
+
+ /* Returns the net_dev belonging to this ib_client and matching the
+ * given parameters.
+ * @dev: An RDMA device that the net_dev use for communication.
+ * @port: A physical port number on the RDMA device.
+ * @pkey: P_Key that the net_dev uses if applicable.
+ * @gid: A GID that the net_dev uses to communicate.
+ * @addr: An IP address the net_dev is configured with.
+ * @client_data: The device's client data set by ib_set_client_data().
+ *
+ * An ib_client that implements a net_dev on top of RDMA devices
+ * (such as IP over IB) should implement this callback, allowing the
+ * rdma_cm module to find the right net_dev for a given request.
+ *
+ * The caller is responsible for calling dev_put on the returned
+ * netdev. */
+ struct net_device *(*get_net_dev_by_params)(
+ struct ib_device *dev,
+ u8 port,
+ u16 pkey,
+ const union ib_gid *gid,
+ const struct sockaddr *addr,
+ void *client_data);
struct list_head list;
};
@@ -2071,34 +2139,6 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
}
/**
- * rdma_cap_read_multi_sge - Check if the port of device has the capability
- * RDMA Read Multiple Scatter-Gather Entries.
- * @device: Device to check
- * @port_num: Port number to check
- *
- * iWARP has a restriction that RDMA READ requests may only have a single
- * Scatter/Gather Entry (SGE) in the work request.
- *
- * NOTE: although the linux kernel currently assumes all devices are either
- * single SGE RDMA READ devices or identical SGE maximums for RDMA READs and
- * WRITEs, according to Tom Talpey, this is not accurate. There are some
- * devices out there that support more than a single SGE on RDMA READ
- * requests, but do not support the same number of SGEs as they do on
- * RDMA WRITE requests. The linux kernel would need rearchitecting to
- * support these imbalanced READ/WRITE SGEs allowed devices. So, for now,
- * suffice with either the device supports the same READ/WRITE SGEs, or
- * it only gets one READ sge.
- *
- * Return: true for any device that allows more than one SGE in RDMA READ
- * requests.
- */
-static inline bool rdma_cap_read_multi_sge(struct ib_device *device,
- u8 port_num)
-{
- return !(device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP);
-}
-
-/**
* rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
*
* @device: Device
@@ -2115,6 +2155,26 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_n
return device->port_immutable[port_num].max_mad_size;
}
+/**
+ * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE GID table mechanism manages the various GIDs for a device.
+ *
+ * NOTE: if allocating the port's GID table has failed, this call will still
+ * return true, but any RoCE GID table API will fail.
+ *
+ * Return: true if the port uses RoCE GID table mechanism in order to manage
+ * its GIDs.
+ */
+static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
+ u8 port_num)
+{
+ return rdma_protocol_roce(device, port_num) &&
+ device->add_gid && device->del_gid;
+}
+
int ib_query_gid(struct ib_device *device,
u8 port_num, int index, union ib_gid *gid);
@@ -2135,20 +2195,9 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
-/**
- * ib_alloc_pd - Allocates an unused protection domain.
- * @device: The device on which to allocate the protection domain.
- *
- * A protection domain object provides an association between QPs, shared
- * receive queues, address handles, memory regions, and memory windows.
- */
struct ib_pd *ib_alloc_pd(struct ib_device *device);
-/**
- * ib_dealloc_pd - Deallocates a protection domain.
- * @pd: The protection domain to deallocate.
- */
-int ib_dealloc_pd(struct ib_pd *pd);
+void ib_dealloc_pd(struct ib_pd *pd);
/**
* ib_create_ah - Creates an address handle for the given address vector.
@@ -2775,33 +2824,9 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
*/
int ib_dereg_mr(struct ib_mr *mr);
-
-/**
- * ib_create_mr - Allocates a memory region that may be used for
- * signature handover operations.
- * @pd: The protection domain associated with the region.
- * @mr_init_attr: memory region init attributes.
- */
-struct ib_mr *ib_create_mr(struct ib_pd *pd,
- struct ib_mr_init_attr *mr_init_attr);
-
-/**
- * ib_destroy_mr - Destroys a memory region that was created using
- * ib_create_mr and removes it from HW translation tables.
- * @mr: The memory region to destroy.
- *
- * This function can fail, if the memory region has memory windows bound to it.
- */
-int ib_destroy_mr(struct ib_mr *mr);
-
-/**
- * ib_alloc_fast_reg_mr - Allocates memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
- * @pd: The protection domain associated with the region.
- * @max_page_list_len: requested max physical buffer list length to be
- * used with fast register work requests for this MR.
- */
-struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type,
+ u32 max_num_sg);
/**
* ib_alloc_fast_reg_page_list - Allocates a page list array
@@ -2994,4 +3019,8 @@ static inline int ib_check_mr_access(int flags)
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status);
+struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
+ u16 pkey, const union ib_gid *gid,
+ const struct sockaddr *addr);
+
#endif /* IB_VERBS_H */
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
new file mode 100644
index 000000000000..391dae1931c0
--- /dev/null
+++ b/include/rdma/opa_port_info.h
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if !defined(OPA_PORT_INFO_H)
+#define OPA_PORT_INFO_H
+
+/* Temporary until HFI driver is updated */
+#ifndef USE_PI_LED_ENABLE
+#define USE_PI_LED_ENABLE 0
+#endif
+
+#define OPA_PORT_LINK_MODE_NOP 0 /* No change */
+#define OPA_PORT_LINK_MODE_OPA 4 /* Port mode is OPA */
+
+#define OPA_PORT_PACKET_FORMAT_NOP 0 /* No change */
+#define OPA_PORT_PACKET_FORMAT_8B 1 /* Format 8B */
+#define OPA_PORT_PACKET_FORMAT_9B 2 /* Format 9B */
+#define OPA_PORT_PACKET_FORMAT_10B 4 /* Format 10B */
+#define OPA_PORT_PACKET_FORMAT_16B 8 /* Format 16B */
+
+#define OPA_PORT_LTP_CRC_MODE_NONE 0 /* No change */
+#define OPA_PORT_LTP_CRC_MODE_14 1 /* 14-bit LTP CRC mode (optional) */
+#define OPA_PORT_LTP_CRC_MODE_16 2 /* 16-bit LTP CRC mode */
+#define OPA_PORT_LTP_CRC_MODE_48 4 /* 48-bit LTP CRC mode (optional) */
+#define OPA_PORT_LTP_CRC_MODE_PER_LANE 8 /* 12/16-bit per lane LTP CRC mode */
+
+/* Link Down / Neighbor Link Down Reason; indicated as follows: */
+#define OPA_LINKDOWN_REASON_NONE 0 /* No specified reason */
+#define OPA_LINKDOWN_REASON_RCV_ERROR_0 1
+#define OPA_LINKDOWN_REASON_BAD_PKT_LEN 2
+#define OPA_LINKDOWN_REASON_PKT_TOO_LONG 3
+#define OPA_LINKDOWN_REASON_PKT_TOO_SHORT 4
+#define OPA_LINKDOWN_REASON_BAD_SLID 5
+#define OPA_LINKDOWN_REASON_BAD_DLID 6
+#define OPA_LINKDOWN_REASON_BAD_L2 7
+#define OPA_LINKDOWN_REASON_BAD_SC 8
+#define OPA_LINKDOWN_REASON_RCV_ERROR_8 9
+#define OPA_LINKDOWN_REASON_BAD_MID_TAIL 10
+#define OPA_LINKDOWN_REASON_RCV_ERROR_10 11
+#define OPA_LINKDOWN_REASON_PREEMPT_ERROR 12
+#define OPA_LINKDOWN_REASON_PREEMPT_VL15 13
+#define OPA_LINKDOWN_REASON_BAD_VL_MARKER 14
+#define OPA_LINKDOWN_REASON_RCV_ERROR_14 15
+#define OPA_LINKDOWN_REASON_RCV_ERROR_15 16
+#define OPA_LINKDOWN_REASON_BAD_HEAD_DIST 17
+#define OPA_LINKDOWN_REASON_BAD_TAIL_DIST 18
+#define OPA_LINKDOWN_REASON_BAD_CTRL_DIST 19
+#define OPA_LINKDOWN_REASON_BAD_CREDIT_ACK 20
+#define OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER 21
+#define OPA_LINKDOWN_REASON_BAD_PREEMPT 22
+#define OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT 23
+#define OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT 24
+#define OPA_LINKDOWN_REASON_RCV_ERROR_24 25
+#define OPA_LINKDOWN_REASON_RCV_ERROR_25 26
+#define OPA_LINKDOWN_REASON_RCV_ERROR_26 27
+#define OPA_LINKDOWN_REASON_RCV_ERROR_27 28
+#define OPA_LINKDOWN_REASON_RCV_ERROR_28 29
+#define OPA_LINKDOWN_REASON_RCV_ERROR_29 30
+#define OPA_LINKDOWN_REASON_RCV_ERROR_30 31
+#define OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN 32
+#define OPA_LINKDOWN_REASON_UNKNOWN 33
+/* 34 -reserved */
+#define OPA_LINKDOWN_REASON_REBOOT 35
+#define OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN 36
+/* 37-38 reserved */
+#define OPA_LINKDOWN_REASON_FM_BOUNCE 39
+#define OPA_LINKDOWN_REASON_SPEED_POLICY 40
+#define OPA_LINKDOWN_REASON_WIDTH_POLICY 41
+/* 42-48 reserved */
+#define OPA_LINKDOWN_REASON_DISCONNECTED 49
+#define OPA_LINKDOWN_REASONLOCAL_MEDIA_NOT_INSTALLED 50
+#define OPA_LINKDOWN_REASON_NOT_INSTALLED 51
+#define OPA_LINKDOWN_REASON_CHASSIS_CONFIG 52
+/* 53 reserved */
+#define OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED 54
+/* 55 reserved */
+#define OPA_LINKDOWN_REASON_POWER_POLICY 56
+#define OPA_LINKDOWN_REASON_LINKSPEED_POLICY 57
+#define OPA_LINKDOWN_REASON_LINKWIDTH_POLICY 58
+/* 59 reserved */
+#define OPA_LINKDOWN_REASON_SWITCH_MGMT 60
+#define OPA_LINKDOWN_REASON_SMA_DISABLED 61
+/* 62 reserved */
+#define OPA_LINKDOWN_REASON_TRANSIENT 63
+/* 64-255 reserved */
+
+/* OPA Link Init reason; indicated as follows: */
+/* 3-7; 11-15 reserved; 8-15 cleared on Polling->LinkUp */
+#define OPA_LINKINIT_REASON_NOP 0
+#define OPA_LINKINIT_REASON_LINKUP (1 << 4)
+#define OPA_LINKINIT_REASON_FLAPPING (2 << 4)
+#define OPA_LINKINIT_REASON_CLEAR (8 << 4)
+#define OPA_LINKINIT_OUTSIDE_POLICY (8 << 4)
+#define OPA_LINKINIT_QUARANTINED (9 << 4)
+#define OPA_LINKINIT_INSUFIC_CAPABILITY (10 << 4)
+
+#define OPA_LINK_SPEED_NOP 0x0000 /* Reserved (1-5 Gbps) */
+#define OPA_LINK_SPEED_12_5G 0x0001 /* 12.5 Gbps */
+#define OPA_LINK_SPEED_25G 0x0002 /* 25.78125? Gbps (EDR) */
+
+#define OPA_LINK_WIDTH_1X 0x0001
+#define OPA_LINK_WIDTH_2X 0x0002
+#define OPA_LINK_WIDTH_3X 0x0004
+#define OPA_LINK_WIDTH_4X 0x0008
+
+#define OPA_CAP_MASK3_IsSnoopSupported (1 << 7)
+#define OPA_CAP_MASK3_IsAsyncSC2VLSupported (1 << 6)
+#define OPA_CAP_MASK3_IsAddrRangeConfigSupported (1 << 5)
+#define OPA_CAP_MASK3_IsPassThroughSupported (1 << 4)
+#define OPA_CAP_MASK3_IsSharedSpaceSupported (1 << 3)
+/* reserved (1 << 2) */
+#define OPA_CAP_MASK3_IsVLMarkerSupported (1 << 1)
+#define OPA_CAP_MASK3_IsVLrSupported (1 << 0)
+
+/**
+ * new MTU values
+ */
+enum {
+ OPA_MTU_8192 = 6,
+ OPA_MTU_10240 = 7,
+};
+
+enum {
+ OPA_PORT_PHYS_CONF_DISCONNECTED = 0,
+ OPA_PORT_PHYS_CONF_STANDARD = 1,
+ OPA_PORT_PHYS_CONF_FIXED = 2,
+ OPA_PORT_PHYS_CONF_VARIABLE = 3,
+ OPA_PORT_PHYS_CONF_SI_PHOTO = 4
+};
+
+enum port_info_field_masks {
+ /* vl.cap */
+ OPA_PI_MASK_VL_CAP = 0x1F,
+ /* port_states.ledenable_offlinereason */
+ OPA_PI_MASK_OFFLINE_REASON = 0x0F,
+ OPA_PI_MASK_LED_ENABLE = 0x40,
+ /* port_states.unsleepstate_downdefstate */
+ OPA_PI_MASK_UNSLEEP_STATE = 0xF0,
+ OPA_PI_MASK_DOWNDEF_STATE = 0x0F,
+ /* port_states.portphysstate_portstate */
+ OPA_PI_MASK_PORT_PHYSICAL_STATE = 0xF0,
+ OPA_PI_MASK_PORT_STATE = 0x0F,
+ /* port_phys_conf */
+ OPA_PI_MASK_PORT_PHYSICAL_CONF = 0x0F,
+ /* collectivemask_multicastmask */
+ OPA_PI_MASK_COLLECT_MASK = 0x38,
+ OPA_PI_MASK_MULTICAST_MASK = 0x07,
+ /* mkeyprotect_lmc */
+ OPA_PI_MASK_MKEY_PROT_BIT = 0xC0,
+ OPA_PI_MASK_LMC = 0x0F,
+ /* smsl */
+ OPA_PI_MASK_SMSL = 0x1F,
+ /* partenforce_filterraw */
+ /* Filter Raw In/Out bits 1 and 2 were removed */
+ OPA_PI_MASK_LINKINIT_REASON = 0xF0,
+ OPA_PI_MASK_PARTITION_ENFORCE_IN = 0x08,
+ OPA_PI_MASK_PARTITION_ENFORCE_OUT = 0x04,
+ /* operational_vls */
+ OPA_PI_MASK_OPERATIONAL_VL = 0x1F,
+ /* sa_qp */
+ OPA_PI_MASK_SA_QP = 0x00FFFFFF,
+ /* sm_trap_qp */
+ OPA_PI_MASK_SM_TRAP_QP = 0x00FFFFFF,
+ /* localphy_overrun_errors */
+ OPA_PI_MASK_LOCAL_PHY_ERRORS = 0xF0,
+ OPA_PI_MASK_OVERRUN_ERRORS = 0x0F,
+ /* clientrereg_subnettimeout */
+ OPA_PI_MASK_CLIENT_REREGISTER = 0x80,
+ OPA_PI_MASK_SUBNET_TIMEOUT = 0x1F,
+ /* port_link_mode */
+ OPA_PI_MASK_PORT_LINK_SUPPORTED = (0x001F << 10),
+ OPA_PI_MASK_PORT_LINK_ENABLED = (0x001F << 5),
+ OPA_PI_MASK_PORT_LINK_ACTIVE = (0x001F << 0),
+ /* port_link_crc_mode */
+ OPA_PI_MASK_PORT_LINK_CRC_SUPPORTED = 0x0F00,
+ OPA_PI_MASK_PORT_LINK_CRC_ENABLED = 0x00F0,
+ OPA_PI_MASK_PORT_LINK_CRC_ACTIVE = 0x000F,
+ /* port_mode */
+ OPA_PI_MASK_PORT_MODE_SECURITY_CHECK = 0x0001,
+ OPA_PI_MASK_PORT_MODE_16B_TRAP_QUERY = 0x0002,
+ OPA_PI_MASK_PORT_MODE_PKEY_CONVERT = 0x0004,
+ OPA_PI_MASK_PORT_MODE_SC2SC_MAPPING = 0x0008,
+ OPA_PI_MASK_PORT_MODE_VL_MARKER = 0x0010,
+ OPA_PI_MASK_PORT_PASS_THROUGH = 0x0020,
+ OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE = 0x0040,
+ /* flit_control.interleave */
+ OPA_PI_MASK_INTERLEAVE_DIST_SUP = (0x0003 << 12),
+ OPA_PI_MASK_INTERLEAVE_DIST_ENABLE = (0x0003 << 10),
+ OPA_PI_MASK_INTERLEAVE_MAX_NEST_TX = (0x001F << 5),
+ OPA_PI_MASK_INTERLEAVE_MAX_NEST_RX = (0x001F << 0),
+
+ /* port_error_action */
+ OPA_PI_MASK_EX_BUFFER_OVERRUN = 0x80000000,
+ /* 7 bits reserved */
+ OPA_PI_MASK_FM_CFG_ERR_EXCEED_MULTICAST_LIMIT = 0x00800000,
+ OPA_PI_MASK_FM_CFG_BAD_CONTROL_FLIT = 0x00400000,
+ OPA_PI_MASK_FM_CFG_BAD_PREEMPT = 0x00200000,
+ OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER = 0x00100000,
+ OPA_PI_MASK_FM_CFG_BAD_CRDT_ACK = 0x00080000,
+ OPA_PI_MASK_FM_CFG_BAD_CTRL_DIST = 0x00040000,
+ OPA_PI_MASK_FM_CFG_BAD_TAIL_DIST = 0x00020000,
+ OPA_PI_MASK_FM_CFG_BAD_HEAD_DIST = 0x00010000,
+ /* 2 bits reserved */
+ OPA_PI_MASK_PORT_RCV_BAD_VL_MARKER = 0x00002000,
+ OPA_PI_MASK_PORT_RCV_PREEMPT_VL15 = 0x00001000,
+ OPA_PI_MASK_PORT_RCV_PREEMPT_ERROR = 0x00000800,
+ /* 1 bit reserved */
+ OPA_PI_MASK_PORT_RCV_BAD_MidTail = 0x00000200,
+ /* 1 bit reserved */
+ OPA_PI_MASK_PORT_RCV_BAD_SC = 0x00000080,
+ OPA_PI_MASK_PORT_RCV_BAD_L2 = 0x00000040,
+ OPA_PI_MASK_PORT_RCV_BAD_DLID = 0x00000020,
+ OPA_PI_MASK_PORT_RCV_BAD_SLID = 0x00000010,
+ OPA_PI_MASK_PORT_RCV_PKTLEN_TOOSHORT = 0x00000008,
+ OPA_PI_MASK_PORT_RCV_PKTLEN_TOOLONG = 0x00000004,
+ OPA_PI_MASK_PORT_RCV_BAD_PKTLEN = 0x00000002,
+ OPA_PI_MASK_PORT_RCV_BAD_LT = 0x00000001,
+
+ /* pass_through.res_drctl */
+ OPA_PI_MASK_PASS_THROUGH_DR_CONTROL = 0x01,
+
+ /* buffer_units */
+ OPA_PI_MASK_BUF_UNIT_VL15_INIT = (0x00000FFF << 11),
+ OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE = (0x0000001F << 6),
+ OPA_PI_MASK_BUF_UNIT_CREDIT_ACK = (0x00000003 << 3),
+ OPA_PI_MASK_BUF_UNIT_BUF_ALLOC = (0x00000003 << 0),
+
+ /* neigh_mtu.pvlx_to_mtu */
+ OPA_PI_MASK_NEIGH_MTU_PVL0 = 0xF0,
+ OPA_PI_MASK_NEIGH_MTU_PVL1 = 0x0F,
+
+ /* neigh_mtu.vlstall_hoq_life */
+ OPA_PI_MASK_VL_STALL = (0x03 << 5),
+ OPA_PI_MASK_HOQ_LIFE = (0x1F << 0),
+
+ /* port_neigh_mode */
+ OPA_PI_MASK_NEIGH_MGMT_ALLOWED = (0x01 << 3),
+ OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS = (0x01 << 2),
+ OPA_PI_MASK_NEIGH_NODE_TYPE = (0x03 << 0),
+
+ /* resptime_value */
+ OPA_PI_MASK_RESPONSE_TIME_VALUE = 0x1F,
+
+ /* mtucap */
+ OPA_PI_MASK_MTU_CAP = 0x0F,
+};
+
+#if USE_PI_LED_ENABLE
+struct opa_port_states {
+ u8 reserved;
+ u8 ledenable_offlinereason; /* 1 res, 1 bit, 6 bits */
+ u8 reserved2;
+ u8 portphysstate_portstate; /* 4 bits, 4 bits */
+};
+#define PI_LED_ENABLE_SUP 1
+#else
+struct opa_port_states {
+ u8 reserved;
+ u8 offline_reason; /* 2 res, 6 bits */
+ u8 reserved2;
+ u8 portphysstate_portstate; /* 4 bits, 4 bits */
+};
+#define PI_LED_ENABLE_SUP 0
+#endif
+
+struct opa_port_state_info {
+ struct opa_port_states port_states;
+ u16 link_width_downgrade_tx_active;
+ u16 link_width_downgrade_rx_active;
+};
+
+struct opa_port_info {
+ __be32 lid;
+ __be32 flow_control_mask;
+
+ struct {
+ u8 res; /* was inittype */
+ u8 cap; /* 3 res, 5 bits */
+ __be16 high_limit;
+ __be16 preempt_limit;
+ u8 arb_high_cap;
+ u8 arb_low_cap;
+ } vl;
+
+ struct opa_port_states port_states;
+ u8 port_phys_conf; /* 4 res, 4 bits */
+ u8 collectivemask_multicastmask; /* 2 res, 3, 3 */
+ u8 mkeyprotect_lmc; /* 2 bits, 2 res, 4 bits */
+ u8 smsl; /* 3 res, 5 bits */
+
+ u8 partenforce_filterraw; /* bit fields */
+ u8 operational_vls; /* 3 res, 5 bits */
+ __be16 pkey_8b;
+ __be16 pkey_10b;
+ __be16 mkey_violations;
+
+ __be16 pkey_violations;
+ __be16 qkey_violations;
+ __be32 sm_trap_qp; /* 8 bits, 24 bits */
+
+ __be32 sa_qp; /* 8 bits, 24 bits */
+ u8 neigh_port_num;
+ u8 link_down_reason;
+ u8 neigh_link_down_reason;
+ u8 clientrereg_subnettimeout; /* 1 bit, 2 bits, 5 */
+
+ struct {
+ __be16 supported;
+ __be16 enabled;
+ __be16 active;
+ } link_speed;
+ struct {
+ __be16 supported;
+ __be16 enabled;
+ __be16 active;
+ } link_width;
+ struct {
+ __be16 supported;
+ __be16 enabled;
+ __be16 tx_active;
+ __be16 rx_active;
+ } link_width_downgrade;
+ __be16 port_link_mode; /* 1 res, 5 bits, 5 bits, 5 bits */
+ __be16 port_ltp_crc_mode; /* 4 res, 4 bits, 4 bits, 4 bits */
+
+ __be16 port_mode; /* 9 res, bit fields */
+ struct {
+ __be16 supported;
+ __be16 enabled;
+ } port_packet_format;
+ struct {
+ __be16 interleave; /* 2 res, 2,2,5,5 */
+ struct {
+ __be16 min_initial;
+ __be16 min_tail;
+ u8 large_pkt_limit;
+ u8 small_pkt_limit;
+ u8 max_small_pkt_limit;
+ u8 preemption_limit;
+ } preemption;
+ } flit_control;
+
+ __be32 reserved4;
+ __be32 port_error_action; /* bit field */
+
+ struct {
+ u8 egress_port;
+ u8 res_drctl; /* 7 res, 1 */
+ } pass_through;
+ __be16 mkey_lease_period;
+ __be32 buffer_units; /* 9 res, 12, 5, 3, 3 */
+
+ __be32 reserved5;
+ __be32 sm_lid;
+
+ __be64 mkey;
+
+ __be64 subnet_prefix;
+
+ struct {
+ u8 pvlx_to_mtu[OPA_MAX_VLS/2]; /* 4 bits, 4 bits */
+ } neigh_mtu;
+
+ struct {
+ u8 vlstall_hoqlife; /* 3 bits, 5 bits */
+ } xmit_q[OPA_MAX_VLS];
+
+ struct {
+ u8 addr[16];
+ } ipaddr_ipv6;
+
+ struct {
+ u8 addr[4];
+ } ipaddr_ipv4;
+
+ u32 reserved6;
+ u32 reserved7;
+ u32 reserved8;
+
+ __be64 neigh_node_guid;
+
+ __be32 ib_cap_mask;
+ __be16 reserved9; /* was ib_cap_mask2 */
+ __be16 opa_cap_mask;
+
+ __be32 reserved10; /* was link_roundtrip_latency */
+ __be16 overall_buffer_space;
+ __be16 reserved11; /* was max_credit_hint */
+
+ __be16 diag_code;
+ struct {
+ u8 buffer;
+ u8 wire;
+ } replay_depth;
+ u8 port_neigh_mode;
+ u8 mtucap; /* 4 res, 4 bits */
+
+ u8 resptimevalue; /* 3 res, 5 bits */
+ u8 local_port_num;
+ u8 reserved12;
+ u8 reserved13; /* was guid_cap */
+} __attribute__ ((packed));
+
+#endif /* OPA_PORT_INFO_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
index 29063e84c253..4a529ef47995 100644
--- a/include/rdma/opa_smi.h
+++ b/include/rdma/opa_smi.h
@@ -40,6 +40,10 @@
#define OPA_SMP_DR_DATA_SIZE 1872
#define OPA_SMP_MAX_PATH_HOPS 64
+#define OPA_MAX_VLS 32
+#define OPA_MAX_SLS 32
+#define OPA_MAX_SCS 32
+
#define OPA_SMI_CLASS_VERSION 0x80
#define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF)
@@ -73,6 +77,49 @@ struct opa_smp {
} __packed;
+/* Subnet management attributes */
+/* ... */
+#define OPA_ATTRIB_ID_NODE_DESCRIPTION cpu_to_be16(0x0010)
+#define OPA_ATTRIB_ID_NODE_INFO cpu_to_be16(0x0011)
+#define OPA_ATTRIB_ID_PORT_INFO cpu_to_be16(0x0015)
+#define OPA_ATTRIB_ID_PARTITION_TABLE cpu_to_be16(0x0016)
+#define OPA_ATTRIB_ID_SL_TO_SC_MAP cpu_to_be16(0x0017)
+#define OPA_ATTRIB_ID_VL_ARBITRATION cpu_to_be16(0x0018)
+#define OPA_ATTRIB_ID_SM_INFO cpu_to_be16(0x0020)
+#define OPA_ATTRIB_ID_CABLE_INFO cpu_to_be16(0x0032)
+#define OPA_ATTRIB_ID_AGGREGATE cpu_to_be16(0x0080)
+#define OPA_ATTRIB_ID_SC_TO_SL_MAP cpu_to_be16(0x0082)
+#define OPA_ATTRIB_ID_SC_TO_VLR_MAP cpu_to_be16(0x0083)
+#define OPA_ATTRIB_ID_SC_TO_VLT_MAP cpu_to_be16(0x0084)
+#define OPA_ATTRIB_ID_SC_TO_VLNT_MAP cpu_to_be16(0x0085)
+/* ... */
+#define OPA_ATTRIB_ID_PORT_STATE_INFO cpu_to_be16(0x0087)
+/* ... */
+#define OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE cpu_to_be16(0x008A)
+/* ... */
+
+struct opa_node_description {
+ u8 data[64];
+} __attribute__ ((packed));
+
+struct opa_node_info {
+ u8 base_version;
+ u8 class_version;
+ u8 node_type;
+ u8 num_ports;
+ __be32 reserved;
+ __be64 system_image_guid;
+ __be64 node_guid;
+ __be64 port_guid;
+ __be16 partition_cap;
+ __be16 device_id;
+ __be32 revision;
+ u8 local_port_num;
+ u8 vendor_id[3]; /* network byte order */
+} __attribute__ ((packed));
+
+#define OPA_PARTITION_TABLE_BLK_SIZE 32
+
static inline u8
opa_get_smp_direction(struct opa_smp *smp)
{
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 0790882e0c9b..585266144329 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -77,4 +77,11 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int group, gfp_t flags);
+/**
+ * Check if there are any listeners to the netlink group
+ * @group: the netlink group ID
+ * Returns 0 on success or a negative for no listeners.
+ */
+int ibnl_chk_listeners(unsigned int group);
+
#endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index 687ae332200f..231901b08f6c 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -5,3 +5,4 @@ header-y += ib_user_sa.h
header-y += ib_user_verbs.h
header-y += rdma_netlink.h
header-y += rdma_user_cm.h
+header-y += hfi/
diff --git a/include/uapi/rdma/hfi/Kbuild b/include/uapi/rdma/hfi/Kbuild
new file mode 100644
index 000000000000..ef23c294fc71
--- /dev/null
+++ b/include/uapi/rdma/hfi/Kbuild
@@ -0,0 +1,2 @@
+# UAPI Header export list
+header-y += hfi1_user.h
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
new file mode 100644
index 000000000000..78c442fbf263
--- /dev/null
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -0,0 +1,427 @@
+/*
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * This file contains defines, structures, etc. that are used
+ * to communicate between kernel and user code.
+ */
+
+#ifndef _LINUX__HFI1_USER_H
+#define _LINUX__HFI1_USER_H
+
+#include <linux/types.h>
+
+/*
+ * This version number is given to the driver by the user code during
+ * initialization in the spu_userversion field of hfi1_user_info, so
+ * the driver can check for compatibility with user code.
+ *
+ * The major version changes when data structures change in an incompatible
+ * way. The driver must be the same for initialization to succeed.
+ */
+#define HFI1_USER_SWMAJOR 4
+
+/*
+ * Minor version differences are always compatible
+ * a within a major version, however if user software is larger
+ * than driver software, some new features and/or structure fields
+ * may not be implemented; the user code must deal with this if it
+ * cares, or it must abort after initialization reports the difference.
+ */
+#define HFI1_USER_SWMINOR 0
+
+/*
+ * Set of HW and driver capability/feature bits.
+ * These bit values are used to configure enabled/disabled HW and
+ * driver features. The same set of bits are communicated to user
+ * space.
+ */
+#define HFI1_CAP_DMA_RTAIL (1UL << 0) /* Use DMA'ed RTail value */
+#define HFI1_CAP_SDMA (1UL << 1) /* Enable SDMA support */
+#define HFI1_CAP_SDMA_AHG (1UL << 2) /* Enable SDMA AHG support */
+#define HFI1_CAP_EXTENDED_PSN (1UL << 3) /* Enable Extended PSN support */
+#define HFI1_CAP_HDRSUPP (1UL << 4) /* Enable Header Suppression */
+/* 1UL << 5 reserved */
+#define HFI1_CAP_USE_SDMA_HEAD (1UL << 6) /* DMA Hdr Q tail vs. use CSR */
+#define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/
+#define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */
+#define HFI1_CAP_NODROP_EGR_FULL (1UL << 9) /* Don't drop on EGR buffs full */
+#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Enable Expected TID caching */
+#define HFI1_CAP_PRINT_UNIMPL (1UL << 11) /* Show for unimplemented feats */
+#define HFI1_CAP_ALLOW_PERM_JKEY (1UL << 12) /* Allow use of permissive JKEY */
+#define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */
+#define HFI1_CAP_PKEY_CHECK (1UL << 14) /* Enable ctxt PKey checking */
+#define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */
+#define HFI1_CAP_QSFP_ENABLED (1UL << 16) /* Enable QSFP check during LNI */
+#define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */
+#define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */
+
+#define HFI1_RCVHDR_ENTSIZE_2 (1UL << 0)
+#define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1)
+#define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2)
+
+/*
+ * If the unit is specified via open, HFI choice is fixed. If port is
+ * specified, it's also fixed. Otherwise we try to spread contexts
+ * across ports and HFIs, using different algorithms. WITHIN is
+ * the old default, prior to this mechanism.
+ */
+#define HFI1_ALG_ACROSS 0 /* round robin contexts across HFIs, then
+ * ports; this is the default */
+#define HFI1_ALG_WITHIN 1 /* use all contexts on an HFI (round robin
+ * active ports within), then next HFI */
+#define HFI1_ALG_COUNT 2 /* number of algorithm choices */
+
+
+/* User commands. */
+#define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */
+#define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */
+#define HFI1_CMD_USER_INFO 3 /* set up userspace */
+#define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */
+#define HFI1_CMD_TID_FREE 5 /* free expected TID entries */
+#define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */
+#define HFI1_CMD_SDMA_STATUS_UPD 7 /* force update of SDMA status ring */
+
+#define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */
+#define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */
+#define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */
+#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */
+#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */
+/* separate EPROM commands from normal PSM commands */
+#define HFI1_CMD_EP_INFO 64 /* read EPROM device ID */
+#define HFI1_CMD_EP_ERASE_CHIP 65 /* erase whole EPROM */
+#define HFI1_CMD_EP_ERASE_P0 66 /* erase EPROM partition 0 */
+#define HFI1_CMD_EP_ERASE_P1 67 /* erase EPROM partition 1 */
+#define HFI1_CMD_EP_READ_P0 68 /* read EPROM partition 0 */
+#define HFI1_CMD_EP_READ_P1 69 /* read EPROM partition 1 */
+#define HFI1_CMD_EP_WRITE_P0 70 /* write EPROM partition 0 */
+#define HFI1_CMD_EP_WRITE_P1 71 /* write EPROM partition 1 */
+
+#define _HFI1_EVENT_FROZEN_BIT 0
+#define _HFI1_EVENT_LINKDOWN_BIT 1
+#define _HFI1_EVENT_LID_CHANGE_BIT 2
+#define _HFI1_EVENT_LMC_CHANGE_BIT 3
+#define _HFI1_EVENT_SL2VL_CHANGE_BIT 4
+#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_SL2VL_CHANGE_BIT
+
+#define HFI1_EVENT_FROZEN (1UL << _HFI1_EVENT_FROZEN_BIT)
+#define HFI1_EVENT_LINKDOWN_BIT (1UL << _HFI1_EVENT_LINKDOWN_BIT)
+#define HFI1_EVENT_LID_CHANGE_BIT (1UL << _HFI1_EVENT_LID_CHANGE_BIT)
+#define HFI1_EVENT_LMC_CHANGE_BIT (1UL << _HFI1_EVENT_LMC_CHANGE_BIT)
+#define HFI1_EVENT_SL2VL_CHANGE_BIT (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT)
+
+/*
+ * These are the status bits readable (in ASCII form, 64bit value)
+ * from the "status" sysfs file. For binary compatibility, values
+ * must remain as is; removed states can be reused for different
+ * purposes.
+ */
+#define HFI1_STATUS_INITTED 0x1 /* basic initialization done */
+/* Chip has been found and initialized */
+#define HFI1_STATUS_CHIP_PRESENT 0x20
+/* IB link is at ACTIVE, usable for data traffic */
+#define HFI1_STATUS_IB_READY 0x40
+/* link is configured, LID, MTU, etc. have been set */
+#define HFI1_STATUS_IB_CONF 0x80
+/* A Fatal hardware error has occurred. */
+#define HFI1_STATUS_HWERROR 0x200
+
+/*
+ * Number of supported shared contexts.
+ * This is the maximum number of software contexts that can share
+ * a hardware send/receive context.
+ */
+#define HFI1_MAX_SHARED_CTXTS 8
+
+/*
+ * Poll types
+ */
+#define HFI1_POLL_TYPE_ANYRCV 0x0
+#define HFI1_POLL_TYPE_URGENT 0x1
+
+/*
+ * This structure is passed to the driver to tell it where
+ * user code buffers are, sizes, etc. The offsets and sizes of the
+ * fields must remain unchanged, for binary compatibility. It can
+ * be extended, if userversion is changed so user code can tell, if needed
+ */
+struct hfi1_user_info {
+ /*
+ * version of user software, to detect compatibility issues.
+ * Should be set to HFI1_USER_SWVERSION.
+ */
+ __u32 userversion;
+ __u16 pad;
+ /* HFI selection algorithm, if unit has not selected */
+ __u16 hfi1_alg;
+ /*
+ * If two or more processes wish to share a context, each process
+ * must set the subcontext_cnt and subcontext_id to the same
+ * values. The only restriction on the subcontext_id is that
+ * it be unique for a given node.
+ */
+ __u16 subctxt_cnt;
+ __u16 subctxt_id;
+ /* 128bit UUID passed in by PSM. */
+ __u8 uuid[16];
+};
+
+struct hfi1_ctxt_info {
+ __u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */
+ __u32 rcvegr_size; /* size of each eager buffer */
+ __u16 num_active; /* number of active units */
+ __u16 unit; /* unit (chip) assigned to caller */
+ __u16 ctxt; /* ctxt on unit assigned to caller */
+ __u16 subctxt; /* subctxt on unit assigned to caller */
+ __u16 rcvtids; /* number of Rcv TIDs for this context */
+ __u16 credits; /* number of PIO credits for this context */
+ __u16 numa_node; /* NUMA node of the assigned device */
+ __u16 rec_cpu; /* cpu # for affinity (0xffff if none) */
+ __u16 send_ctxt; /* send context in use by this user context */
+ __u16 egrtids; /* number of RcvArray entries for Eager Rcvs */
+ __u16 rcvhdrq_cnt; /* number of RcvHdrQ entries */
+ __u16 rcvhdrq_entsize; /* size (in bytes) for each RcvHdrQ entry */
+ __u16 sdma_ring_size; /* number of entries in SDMA request ring */
+};
+
+struct hfi1_tid_info {
+ /* virtual address of first page in transfer */
+ __u64 vaddr;
+ /* pointer to tid array. this array is big enough */
+ __u64 tidlist;
+ /* number of tids programmed by this request */
+ __u32 tidcnt;
+ /* length of transfer buffer programmed by this request */
+ __u32 length;
+ /*
+ * pointer to bitmap of TIDs used for this call;
+ * checked for being large enough at open
+ */
+ __u64 tidmap;
+};
+
+struct hfi1_cmd {
+ __u32 type; /* command type */
+ __u32 len; /* length of struct pointed to by add */
+ __u64 addr; /* pointer to user structure */
+};
+
+enum hfi1_sdma_comp_state {
+ FREE = 0,
+ QUEUED,
+ COMPLETE,
+ ERROR
+};
+
+/*
+ * SDMA completion ring entry
+ */
+struct hfi1_sdma_comp_entry {
+ __u32 status;
+ __u32 errcode;
+};
+
+/*
+ * Device status and notifications from driver to user-space.
+ */
+struct hfi1_status {
+ __u64 dev; /* device/hw status bits */
+ __u64 port; /* port state and status bits */
+ char freezemsg[0];
+};
+
+/*
+ * This structure is returned by the driver immediately after
+ * open to get implementation-specific info, and info specific to this
+ * instance.
+ *
+ * This struct must have explicit pad fields where type sizes
+ * may result in different alignments between 32 and 64 bit
+ * programs, since the 64 bit * bit kernel requires the user code
+ * to have matching offsets
+ */
+struct hfi1_base_info {
+ /* version of hardware, for feature checking. */
+ __u32 hw_version;
+ /* version of software, for feature checking. */
+ __u32 sw_version;
+ /* Job key */
+ __u16 jkey;
+ __u16 padding1;
+ /*
+ * The special QP (queue pair) value that identifies PSM
+ * protocol packet from standard IB packets.
+ */
+ __u32 bthqp;
+ /* PIO credit return address, */
+ __u64 sc_credits_addr;
+ /*
+ * Base address of write-only pio buffers for this process.
+ * Each buffer has sendpio_credits*64 bytes.
+ */
+ __u64 pio_bufbase_sop;
+ /*
+ * Base address of write-only pio buffers for this process.
+ * Each buffer has sendpio_credits*64 bytes.
+ */
+ __u64 pio_bufbase;
+ /* address where receive buffer queue is mapped into */
+ __u64 rcvhdr_bufbase;
+ /* base address of Eager receive buffers. */
+ __u64 rcvegr_bufbase;
+ /* base address of SDMA completion ring */
+ __u64 sdma_comp_bufbase;
+ /*
+ * User register base for init code, not to be used directly by
+ * protocol or applications. Always maps real chip register space.
+ * the register addresses are:
+ * ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail,
+ * ur_rcvtidflow
+ */
+ __u64 user_regbase;
+ /* notification events */
+ __u64 events_bufbase;
+ /* status page */
+ __u64 status_bufbase;
+ /* rcvhdrtail update */
+ __u64 rcvhdrtail_base;
+ /*
+ * shared memory pages for subctxts if ctxt is shared; these cover
+ * all the processes in the group sharing a single context.
+ * all have enough space for the num_subcontexts value on this job.
+ */
+ __u64 subctxt_uregbase;
+ __u64 subctxt_rcvegrbuf;
+ __u64 subctxt_rcvhdrbuf;
+};
+
+enum sdma_req_opcode {
+ EXPECTED = 0,
+ EAGER
+};
+
+#define HFI1_SDMA_REQ_VERSION_MASK 0xF
+#define HFI1_SDMA_REQ_VERSION_SHIFT 0x0
+#define HFI1_SDMA_REQ_OPCODE_MASK 0xF
+#define HFI1_SDMA_REQ_OPCODE_SHIFT 0x4
+#define HFI1_SDMA_REQ_IOVCNT_MASK 0xFF
+#define HFI1_SDMA_REQ_IOVCNT_SHIFT 0x8
+
+struct sdma_req_info {
+ /*
+ * bits 0-3 - version (currently unused)
+ * bits 4-7 - opcode (enum sdma_req_opcode)
+ * bits 8-15 - io vector count
+ */
+ __u16 ctrl;
+ /*
+ * Number of fragments contained in this request.
+ * User-space has already computed how many
+ * fragment-sized packet the user buffer will be
+ * split into.
+ */
+ __u16 npkts;
+ /*
+ * Size of each fragment the user buffer will be
+ * split into.
+ */
+ __u16 fragsize;
+ /*
+ * Index of the slot in the SDMA completion ring
+ * this request should be using. User-space is
+ * in charge of managing its own ring.
+ */
+ __u16 comp_idx;
+} __packed;
+
+/*
+ * SW KDETH header.
+ * swdata is SW defined portion.
+ */
+struct hfi1_kdeth_header {
+ __le32 ver_tid_offset;
+ __le16 jkey;
+ __le16 hcrc;
+ __le32 swdata[7];
+} __packed;
+
+/*
+ * Structure describing the headers that User space uses. The
+ * structure above is a subset of this one.
+ */
+struct hfi1_pkt_header {
+ __le16 pbc[4];
+ __be16 lrh[4];
+ __be32 bth[3];
+ struct hfi1_kdeth_header kdeth;
+} __packed;
+
+
+/*
+ * The list of usermode accessible registers.
+ */
+enum hfi1_ureg {
+ /* (RO) DMA RcvHdr to be used next. */
+ ur_rcvhdrtail = 0,
+ /* (RW) RcvHdr entry to be processed next by host. */
+ ur_rcvhdrhead = 1,
+ /* (RO) Index of next Eager index to use. */
+ ur_rcvegrindextail = 2,
+ /* (RW) Eager TID to be processed next */
+ ur_rcvegrindexhead = 3,
+ /* (RO) Receive Eager Offset Tail */
+ ur_rcvegroffsettail = 4,
+ /* For internal use only; max register number. */
+ ur_maxreg,
+ /* (RW) Receive TID flow table */
+ ur_rcvtidflowtable = 256
+};
+
+#endif /* _LINIUX__HFI1_USER_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 6e4bb4270ca2..c19a5dc1531a 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -7,12 +7,14 @@ enum {
RDMA_NL_RDMA_CM = 1,
RDMA_NL_NES,
RDMA_NL_C4IW,
+ RDMA_NL_LS, /* RDMA Local Services */
RDMA_NL_NUM_CLIENTS
};
enum {
RDMA_NL_GROUP_CM = 1,
RDMA_NL_GROUP_IWPM,
+ RDMA_NL_GROUP_LS,
RDMA_NL_NUM_GROUPS
};
@@ -128,5 +130,85 @@ enum {
IWPM_NLA_ERR_MAX
};
+/*
+ * Local service operations:
+ * RESOLVE - The client requests the local service to resolve a path.
+ * SET_TIMEOUT - The local service requests the client to set the timeout.
+ */
+enum {
+ RDMA_NL_LS_OP_RESOLVE = 0,
+ RDMA_NL_LS_OP_SET_TIMEOUT,
+ RDMA_NL_LS_NUM_OPS
+};
+
+/* Local service netlink message flags */
+#define RDMA_NL_LS_F_ERR 0x0100 /* Failed response */
+
+/*
+ * Local service resolve operation family header.
+ * The layout for the resolve operation:
+ * nlmsg header
+ * family header
+ * attributes
+ */
+
+/*
+ * Local service path use:
+ * Specify how the path(s) will be used.
+ * ALL - For connected CM operation (6 pathrecords)
+ * UNIDIRECTIONAL - For unidirectional UD (1 pathrecord)
+ * GMP - For miscellaneous GMP like operation (at least 1 reversible
+ * pathrecord)
+ */
+enum {
+ LS_RESOLVE_PATH_USE_ALL = 0,
+ LS_RESOLVE_PATH_USE_UNIDIRECTIONAL,
+ LS_RESOLVE_PATH_USE_GMP,
+ LS_RESOLVE_PATH_USE_MAX
+};
+
+#define LS_DEVICE_NAME_MAX 64
+
+struct rdma_ls_resolve_header {
+ __u8 device_name[LS_DEVICE_NAME_MAX];
+ __u8 port_num;
+ __u8 path_use;
+};
+
+/* Local service attribute type */
+#define RDMA_NLA_F_MANDATORY (1 << 13)
+#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \
+ RDMA_NLA_F_MANDATORY))
+
+/*
+ * Local service attributes:
+ * Attr Name Size Byte order
+ * -----------------------------------------------------
+ * PATH_RECORD struct ib_path_rec_data
+ * TIMEOUT u32 cpu
+ * SERVICE_ID u64 cpu
+ * DGID u8[16] BE
+ * SGID u8[16] BE
+ * TCLASS u8
+ * PKEY u16 cpu
+ * QOS_CLASS u16 cpu
+ */
+enum {
+ LS_NLA_TYPE_UNSPEC = 0,
+ LS_NLA_TYPE_PATH_RECORD,
+ LS_NLA_TYPE_TIMEOUT,
+ LS_NLA_TYPE_SERVICE_ID,
+ LS_NLA_TYPE_DGID,
+ LS_NLA_TYPE_SGID,
+ LS_NLA_TYPE_TCLASS,
+ LS_NLA_TYPE_PKEY,
+ LS_NLA_TYPE_QOS_CLASS,
+ LS_NLA_TYPE_MAX
+};
+
+/* Local service DGID/SGID attribute: big endian */
+struct rdma_nla_ls_gid {
+ __u8 gid[16];
+};
#endif /* _UAPI_RDMA_NETLINK_H */