From c7c122ed67e47a7d8033ae21f0cb22a21686194a Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 19 Jul 2016 20:54:56 +0300 Subject: net/mlx4: Add diagnostic counters capability bit Add a bit that indicates if the firmware supports per port diagnostic counters. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80dec87a94f8..680d110aff05 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -220,6 +220,7 @@ enum { MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, + MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, }; enum { -- cgit From bfaf31687c2628ebb95c58a472d46db3f94c554f Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 19 Jul 2016 20:54:57 +0300 Subject: net/mlx4: Query performance and diagnostics counters Add a function to query diagnostics counters from the firmware. Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 680d110aff05..abcce821ac00 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1381,6 +1381,9 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); int mlx4_test_interrupts(struct mlx4_dev *dev); +int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, + const u32 offset[], u32 value[], + size_t array_len, u8 port); u32 mlx4_get_eqs_per_port(struct mlx4_dev *dev, u8 port); bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector); struct cpu_rmap *mlx4_get_cpu_rmap(struct mlx4_dev *dev, int port); -- cgit From 3f85f2aaabf785e53bbcd242cb92aeda28990ef5 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 19 Jul 2016 20:54:58 +0300 Subject: IB/mlx4: Add diagnostic hardware counters Expose IB diagnostic hardware counters. The counters count IB events and are applicable for IB and RoCE. The counters can be divided into two groups, per device and per port. Device counters are always exposed. Port counters are exposed only if the firmware supports per port counters. rq_num_dup and sq_num_to are only exposed if we have firmware support for them, if we do, we expose them per device and per port. rq_num_udsdprd and num_cqovf are device only counters. rq - denotes responder. sq - denotes requester. |-----------------------|---------------------------------------| | Name | Description | |-----------------------|---------------------------------------| |rq_num_lle | Number of local length errors | |-----------------------|---------------------------------------| |sq_num_lle | number of local length errors | |-----------------------|---------------------------------------| |rq_num_lqpoe | Number of local QP operation errors | |-----------------------|---------------------------------------| |sq_num_lqpoe | Number of local QP operation errors | |-----------------------|---------------------------------------| |rq_num_lpe | Number of local protection errors | |-----------------------|---------------------------------------| |sq_num_lpe | Number of local protection errors | |-----------------------|---------------------------------------| |rq_num_wrfe | Number of CQEs with error | |-----------------------|---------------------------------------| |sq_num_wrfe | Number of CQEs with error | |-----------------------|---------------------------------------| |sq_num_mwbe | Number of Memory Window bind errors | |-----------------------|---------------------------------------| |sq_num_bre | Number of bad response errors | |-----------------------|---------------------------------------| |sq_num_rire | Number of Remote Invalid request | | | errors | |-----------------------|---------------------------------------| |rq_num_rire | Number of Remote Invalid request | | | errors | |-----------------------|---------------------------------------| |sq_num_rae | Number of remote access errors | |-----------------------|---------------------------------------| |rq_num_rae | Number of remote access errors | |-----------------------|---------------------------------------| |sq_num_roe | Number of remote operation errors | |-----------------------|---------------------------------------| |sq_num_tree | Number of transport retries exceeded | | | errors | |-----------------------|---------------------------------------| |sq_num_rree | Number of RNR NAK retries exceeded | | | errors | |-----------------------|---------------------------------------| |rq_num_rnr | Number of RNR NAKs sent | |-----------------------|---------------------------------------| |sq_num_rnr | Number of RNR NAKs received | |-----------------------|---------------------------------------| |rq_num_oos | Number of Out of Sequence requests | | | received | |-----------------------|---------------------------------------| |sq_num_oos | Number of Out of Sequence NAKs | | | received | |-----------------------|---------------------------------------| |rq_num_udsdprd | Number of UD packets silently | | | discarded on the Receive Queue due to | | | lack of receive descriptor | |-----------------------|---------------------------------------| |rq_num_dup | Number of duplicate requests received | |-----------------------|---------------------------------------| |sq_num_to | Number of time out received | |-----------------------|---------------------------------------| |num_cqovf | Number of CQ overflows | |-----------------------|---------------------------------------| Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index abcce821ac00..d73d8e4d3e09 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1341,6 +1341,9 @@ enum { VXLAN_STEER_BY_INNER_VLAN = 1 << 4, }; +enum { + MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS = 0x2, +}; int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn, enum mlx4_net_trans_promisc_mode mode); -- cgit From ab15c95a17b3fe8c0e01bb7ce1dd0b657598eb61 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 6 Jul 2016 16:36:35 +0300 Subject: IB/core: Support for CMA multicast join flags Added UCMA and CMA support for multicast join flags. Flags are passed using UCMA CM join command previously reserved fields. Currently supporting two join flags indicating two different multicast JoinStates: 1. Full Member: The initiator creates the Multicast group(MCG) if it wasn't previously created, can send Multicast messages to the group and receive messages from the MCG. 2. Send Only Full Member: The initiator creates the Multicast group(MCG) if it wasn't previously created, can send Multicast messages to the group but doesn't receive any messages from the MCG. IB: Send Only Full Member requires a query of ClassPortInfo to determine if SM/SA supports this option. If SM/SA doesn't support Send-Only there will be no join request sent and an error will be returned. ETH: When Send Only Full Member is requested no IGMP join will be sent. Signed-off-by: Alex Vesker Reviewed by: Hal Rosenstock Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/rdma/ib_sa.h | 13 +++++++++++++ include/rdma/rdma_cm.h | 4 +++- include/uapi/rdma/rdma_user_cm.h | 9 ++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 384041669489..5ee7aab95eb8 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -94,6 +94,19 @@ enum ib_sa_selector { IB_SA_BEST = 3 }; +/* + * There are 4 types of join states: + * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember. + * The order corresponds to JoinState bits in MCMemberRecord. + */ +enum ib_sa_mc_join_states { + FULLMEMBER_JOIN, + NONMEMBER_JOIN, + SENDONLY_NONMEBER_JOIN, + SENDONLY_FULLMEMBER_JOIN, + NUM_JOIN_MEMBERSHIP_TYPES, +}; + #define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12) /* diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index afe44fde72a5..81fb1d15e8bb 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -333,11 +333,13 @@ int rdma_disconnect(struct rdma_cm_id *id); * address. * @id: Communication identifier associated with the request. * @addr: Multicast address identifying the group to join. + * @join_state: Multicast JoinState bitmap requested by port. + * Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits. * @context: User-defined context associated with the join request, returned * to the user through the private_data pointer in multicast events. */ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, - void *context); + u8 join_state, void *context); /** * rdma_leave_multicast - Leave the multicast group specified by the given diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 3066718eb120..01923d463673 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -244,12 +244,19 @@ struct rdma_ucm_join_ip_mcast { __u32 id; }; +/* Multicast join flags */ +enum { + RDMA_MC_JOIN_FLAG_FULLMEMBER, + RDMA_MC_JOIN_FLAG_SENDONLY_FULLMEMBER, + RDMA_MC_JOIN_FLAG_RESERVED, +}; + struct rdma_ucm_join_mcast { __u64 response; /* rdma_ucma_create_id_resp */ __u64 uid; __u32 id; __u16 addr_size; - __u16 reserved; + __u16 join_flags; struct sockaddr_storage addr; }; -- cgit From 8700e3e7c4857d28ebaa824509934556da0b3e76 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 16 Jun 2016 16:45:23 +0300 Subject: Soft RoCE driver Soft RoCE (RXE) - The software RoCE driver ib_rxe implements the RDMA transport and registers to the RDMA core device as a kernel verbs provider. It also implements the packet IO layer. On the other hand ib_rxe registers to the Linux netdev stack as a udp encapsulating protocol, in that case RDMA, for sending and receiving packets over any Ethernet device. This yields a RDMA transport over the UDP/Ethernet network layer forming a RoCEv2 compatible device. The configuration procedure of the Soft RoCE drivers requires binding to any existing Ethernet network device. This is done with /sys interface. A userspace Soft RoCE library (librxe) provides user applications the ability to run with Soft RoCE devices. The use of rxe verbs ins user space requires the inclusion of librxe as a device specifics plug-in to libibverbs. librxe is packaged separately. Architecture: +-----------------------------------------------------------+ | Application | +-----------------------------------------------------------+ +-----------------------------------+ | libibverbs | User +-----------------------------------+ +----------------+ +----------------+ | librxe | | HW RoCE lib | +----------------+ +----------------+ +---------------------------------------------------------------+ +--------------+ +------------+ | Sockets | | RDMA ULP | +--------------+ +------------+ +--------------+ +---------------------+ | TCP/IP | | ib_core | +--------------+ +---------------------+ +------------+ +----------------+ Kernel | ib_rxe | | HW RoCE driver | +------------+ +----------------+ +------------------------------------+ | NIC driver | +------------------------------------+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-----------------------------------------------------------+ | Application | +-----------------------------------------------------------+ +-----------------------------------+ | libibverbs | User +-----------------------------------+ +----------------+ +----------------+ | librxe | | HW RoCE lib | +----------------+ +----------------+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------+ +------------+ | Sockets | | RDMA ULP | +--------------+ +------------+ +--------------+ +---------------------+ | TCP/IP | | ib_core | +--------------+ +---------------------+ +------------+ +----------------+ Kernel | ib_rxe | | HW RoCE driver | +------------+ +----------------+ +------------------------------------+ | NIC driver | +------------------------------------+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Soft RoCE resources: [1[ https://github.com/SoftRoCE/librxe-dev librxe - source code in Github [2] https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home - Soft RoCE Wiki page [3] https://github.com/SoftRoCE/librxe-dev - Soft RoCE userspace library Signed-off-by: Kamal Heib Signed-off-by: Amir Vadai Signed-off-by: Moni Shoua Reviewed-by: Haggai Eran Signed-off-by: Doug Ledford --- include/uapi/rdma/Kbuild | 1 + include/uapi/rdma/rdma_user_rxe.h | 144 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 include/uapi/rdma/rdma_user_rxe.h (limited to 'include') diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild index 231901b08f6c..4edb0f2b4f9f 100644 --- a/include/uapi/rdma/Kbuild +++ b/include/uapi/rdma/Kbuild @@ -6,3 +6,4 @@ header-y += ib_user_verbs.h header-y += rdma_netlink.h header-y += rdma_user_cm.h header-y += hfi/ +header-y += rdma_user_rxe.h diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h new file mode 100644 index 000000000000..1de99cfdaf7d --- /dev/null +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef RDMA_USER_RXE_H +#define RDMA_USER_RXE_H + +#include + +union rxe_gid { + __u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +struct rxe_global_route { + union rxe_gid dgid; + __u32 flow_label; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; +}; + +struct rxe_av { + __u8 port_num; + __u8 network_type; + struct rxe_global_route grh; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; +}; + +struct rxe_send_wr { + __u64 wr_id; + __u32 num_sge; + __u32 opcode; + __u32 send_flags; + union { + __be32 imm_data; + __u32 invalidate_rkey; + } ex; + union { + struct { + __u64 remote_addr; + __u32 rkey; + } rdma; + struct { + __u64 remote_addr; + __u64 compare_add; + __u64 swap; + __u32 rkey; + } atomic; + struct { + __u32 remote_qpn; + __u32 remote_qkey; + __u16 pkey_index; + } ud; + struct { + struct ib_mr *mr; + __u32 key; + int access; + } reg; + } wr; +}; + +struct rxe_sge { + __u64 addr; + __u32 length; + __u32 lkey; +}; + +struct mminfo { + __u64 offset; + __u32 size; + __u32 pad; +}; + +struct rxe_dma_info { + __u32 length; + __u32 resid; + __u32 cur_sge; + __u32 num_sge; + __u32 sge_offset; + union { + __u8 inline_data[0]; + struct rxe_sge sge[0]; + }; +}; + +struct rxe_send_wqe { + struct rxe_send_wr wr; + struct rxe_av av; + __u32 status; + __u32 state; + __u64 iova; + __u32 mask; + __u32 first_psn; + __u32 last_psn; + __u32 ack_length; + __u32 ssn; + __u32 has_rd_atomic; + struct rxe_dma_info dma; +}; + +struct rxe_recv_wqe { + __u64 wr_id; + __u32 num_sge; + __u32 padding; + struct rxe_dma_info dma; +}; + +#endif /* RDMA_USER_RXE_H */ -- cgit