diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 12:05:10 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 12:05:10 -0800 |
commit | 7b1cd95d65eb3b1e13f8a90eb757e0ea232c7899 (patch) | |
tree | cbc3ec5d45b04666c24f7c0b1df04a85d29c7d0f /include/rdma | |
parent | 2155e69a9d9acd42488ef994a4e1ff535438c128 (diff) | |
parent | e7996a9a77fc669387da43ff4823b91cc4872bd0 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull RDMA subsystem updates from Jason Gunthorpe:
"Overall this cycle did not have any major excitement, and did not
require any shared branch with netdev.
Lots of driver updates, particularly of the scale-up and performance
variety. The largest body of core work was Parav's patches fixing and
restructing some of the core code to make way for future RDMA
containerization.
Summary:
- misc small driver fixups to
bnxt_re/hfi1/qib/hns/ocrdma/rdmavt/vmw_pvrdma/nes
- several major feature adds to bnxt_re driver: SRIOV VF RoCE
support, HugePages support, extended hardware stats support, and
SRQ support
- a notable number of fixes to the i40iw driver from debugging scale
up testing
- more work to enable the new hip08 chip in the hns driver
- misc small ULP fixups to srp/srpt//ipoib
- preparation for srp initiator and target to support the RDMA-CM
protocol for connections
- add RDMA-CM support to srp initiator, srp target is still a WIP
- fixes for a couple of places where ipoib could spam the dmesg log
- fix encode/decode of FDR/EDR data rates in the core
- many patches from Parav with ongoing work to clean up
inconsistencies and bugs in RoCE support around the rdma_cm
- mlx5 driver support for the userspace features 'thread domain',
'wallclock timestamps' and 'DV Direct Connected transport'. Support
for the firmware dual port rocee capability
- core support for more than 32 rdma devices in the char dev
allocation
- kernel doc updates from Randy Dunlap
- new netlink uAPI for inspecting RDMA objects similar in spirit to 'ss'
- one minor change to the kobject code acked by Greg KH"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (259 commits)
RDMA/nldev: Provide detailed QP information
RDMA/nldev: Provide global resource utilization
RDMA/core: Add resource tracking for create and destroy PDs
RDMA/core: Add resource tracking for create and destroy CQs
RDMA/core: Add resource tracking for create and destroy QPs
RDMA/restrack: Add general infrastructure to track RDMA resources
RDMA/core: Save kernel caller name when creating PD and CQ objects
RDMA/core: Use the MODNAME instead of the function name for pd callers
RDMA: Move enum ib_cq_creation_flags to uapi headers
IB/rxe: Change RDMA_RXE kconfig to use select
IB/qib: remove qib_keys.c
IB/mthca: remove mthca_user.h
RDMA/cm: Fix access to uninitialized variable
RDMA/cma: Use existing netif_is_bond_master function
IB/core: Avoid SGID attributes query while converting GID from OPA to IB
RDMA/mlx5: Avoid memory leak in case of XRCD dealloc failure
IB/umad: Fix use of unprotected device pointer
IB/iser: Combine substrings for three messages
IB/iser: Delete an unnecessary variable initialisation in iser_send_data_out()
IB/iser: Delete an error message for a failed memory allocation in iser_send_data_out()
...
Diffstat (limited to 'include/rdma')
-rw-r--r-- | include/rdma/ib_addr.h | 38 | ||||
-rw-r--r-- | include/rdma/ib_sa.h | 10 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 64 | ||||
-rw-r--r-- | include/rdma/opa_addr.h | 16 | ||||
-rw-r--r-- | include/rdma/rdma_cm.h | 19 | ||||
-rw-r--r-- | include/rdma/rdma_cm_ib.h | 8 | ||||
-rw-r--r-- | include/rdma/rdma_vt.h | 31 | ||||
-rw-r--r-- | include/rdma/restrack.h | 157 |
8 files changed, 280 insertions, 63 deletions
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 18c564f60e93..d656809f1217 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -94,7 +94,7 @@ struct rdma_dev_addr { * The dev_addr->net field must be initialized. */ int rdma_translate_ip(const struct sockaddr *addr, - struct rdma_dev_addr *dev_addr, u16 *vlan_id); + struct rdma_dev_addr *dev_addr); /** * rdma_resolve_ip - Resolve source and destination IP addresses to @@ -131,10 +131,9 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr, int rdma_addr_size(struct sockaddr *addr); -int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id); int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, - u8 *smac, u16 *vlan_id, int *if_index, + u8 *dmac, const struct net_device *ndev, int *hoplimit); static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) @@ -198,34 +197,15 @@ static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid) } } -static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) -{ - struct net_device *dev; - struct in_device *ip4; - - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); - if (dev) { - ip4 = in_dev_get(dev); - if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address) - ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address, - (struct in6_addr *)gid); - - if (ip4) - in_dev_put(ip4); - - dev_put(dev); - } -} - +/* + * rdma_get/set_sgid/dgid() APIs are applicable to IB, and iWarp. + * They are not applicable to RoCE. + * RoCE GIDs are derived from the IP addresses. + */ static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - if (dev_addr->transport == RDMA_TRANSPORT_IB && - dev_addr->dev_type != ARPHRD_INFINIBAND) - iboe_addr_get_sgid(dev_addr, gid); - else - memcpy(gid, dev_addr->src_dev_addr + - rdma_addr_gid_offset(dev_addr), sizeof *gid); + memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), + sizeof(*gid)); } static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 1f7f604db5aa..811cfcfcbe3d 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -549,12 +549,12 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num, struct rdma_ah_attr *ah_attr); /** - * ib_init_ah_from_path - Initialize address handle attributes based on an SA - * path record. + * ib_init_ah_attr_from_path - Initialize address handle attributes based on + * an SA path record. */ -int ib_init_ah_from_path(struct ib_device *device, u8 port_num, - struct sa_path_rec *rec, - struct rdma_ah_attr *ah_attr); +int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num, + struct sa_path_rec *rec, + struct rdma_ah_attr *ah_attr); /** * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fd84cda5ed7c..5263c86fd103 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -63,6 +63,7 @@ #include <linux/uaccess.h> #include <linux/cgroup_rdma.h> #include <uapi/rdma/ib_user_verbs.h> +#include <rdma/restrack.h> #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN @@ -300,11 +301,6 @@ struct ib_tm_caps { u32 max_sge; }; -enum ib_cq_creation_flags { - IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0, - IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1, -}; - struct ib_cq_init_attr { unsigned int cqe; int comp_vector; @@ -983,9 +979,9 @@ struct ib_wc { u32 invalidate_rkey; } ex; u32 src_qp; + u32 slid; int wc_flags; u16 pkey_index; - u32 slid; u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ @@ -1082,6 +1078,7 @@ enum ib_qp_type { IB_QPT_XRC_INI = 9, IB_QPT_XRC_TGT, IB_QPT_MAX, + IB_QPT_DRIVER = 0xFF, /* Reserve a range for qp types internal to the low level driver. * These qp types will not be visible at the IB core layer, so the * IB_QPT_MAX usages should not be affected in the core layer @@ -1529,6 +1526,7 @@ struct ib_pd { * Implementation details of the RDMA core, don't use in drivers: */ struct ib_mr *__internal_mr; + struct rdma_restrack_entry res; }; struct ib_xrcd { @@ -1538,6 +1536,10 @@ struct ib_xrcd { struct mutex tgt_qp_mutex; struct list_head tgt_qp_list; + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; struct ib_ah { @@ -1569,6 +1571,10 @@ struct ib_cq { struct irq_poll iop; struct work_struct work; }; + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; struct ib_srq { @@ -1745,6 +1751,11 @@ struct ib_qp { struct ib_rwq_ind_table *rwq_ind_tbl; struct ib_qp_security *qp_sec; u8 port; + + /* + * Implementation details of the RDMA core, don't use in drivers: + */ + struct rdma_restrack_entry res; }; struct ib_mr { @@ -2351,6 +2362,10 @@ struct ib_device { #endif u32 index; + /* + * Implementation details of the RDMA core, don't use in drivers + */ + struct rdma_restrack_root res; /** * The following mandatory functions are used only at device @@ -2836,8 +2851,7 @@ int ib_modify_port(struct ib_device *device, struct ib_port_modify *port_modify); int ib_find_gid(struct ib_device *device, union ib_gid *gid, - enum ib_gid_type gid_type, struct net_device *ndev, - u8 *port_num, u16 *index); + struct net_device *ndev, u8 *port_num, u16 *index); int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); @@ -2858,7 +2872,7 @@ enum ib_pd_flags { struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, const char *caller); #define ib_alloc_pd(device, flags) \ - __ib_alloc_pd((device), (flags), __func__) + __ib_alloc_pd((device), (flags), KBUILD_MODNAME) void ib_dealloc_pd(struct ib_pd *pd); /** @@ -2905,7 +2919,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); /** - * ib_init_ah_from_wc - Initializes address handle attributes from a + * ib_init_ah_attr_from_wc - Initializes address handle attributes from a * work completion. * @device: Device on which the received message arrived. * @port_num: Port on which the received message arrived. @@ -2915,9 +2929,9 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); * @ah_attr: Returned attributes that can be used when creating an address * handle for replying to the message. */ -int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, - const struct ib_wc *wc, const struct ib_grh *grh, - struct rdma_ah_attr *ah_attr); +int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num, + const struct ib_wc *wc, const struct ib_grh *grh, + struct rdma_ah_attr *ah_attr); /** * ib_create_ah_from_wc - Creates an address handle associated with the @@ -3135,8 +3149,12 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } -struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); +struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, + enum ib_poll_context poll_ctx, const char *caller); +#define ib_alloc_cq(device, priv, nr_cqe, comp_vect, poll_ctx) \ + __ib_alloc_cq((device), (priv), (nr_cqe), (comp_vect), (poll_ctx), KBUILD_MODNAME) + void ib_free_cq(struct ib_cq *cq); int ib_process_cq_direct(struct ib_cq *cq, int budget); @@ -3560,8 +3578,11 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); /** * ib_alloc_xrcd - Allocates an XRC domain. * @device: The device on which to allocate the XRC domain. + * @caller: Module name for kernel consumers */ -struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); +struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller); +#define ib_alloc_xrcd(device) \ + __ib_alloc_xrcd((device), KBUILD_MODNAME) /** * ib_dealloc_xrcd - Deallocates an XRC domain. @@ -3793,8 +3814,7 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr, static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, u32 port_num) { - if ((rdma_protocol_roce(dev, port_num)) || - (rdma_protocol_iwarp(dev, port_num))) + if (rdma_protocol_roce(dev, port_num)) return RDMA_AH_ATTR_TYPE_ROCE; else if ((rdma_protocol_ib(dev, port_num)) && (rdma_cap_opa_ah(dev, port_num))) @@ -3850,4 +3870,12 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) } +/** + * rdma_roce_rescan_device - Rescan all of the network devices in the system + * and add their gids, as needed, to the relevant RoCE devices. + * + * @device: the rdma device + */ +void rdma_roce_rescan_device(struct ib_device *ibdev); + #endif /* IB_VERBS_H */ diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h index f68fca296631..2bbb7a67e643 100644 --- a/include/rdma/opa_addr.h +++ b/include/rdma/opa_addr.h @@ -114,4 +114,20 @@ static inline u32 opa_get_mcast_base(u32 nr_top_bits) return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits)); } +/* Check for a valid unicast LID for non-SM traffic types */ +static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr) +{ + if (attr->type == RDMA_AH_ATTR_TYPE_IB) { + if (!rdma_ah_get_dlid(attr) || + rdma_ah_get_dlid(attr) >= + be32_to_cpu(IB_MULTICAST_LID_BASE)) + return false; + } else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) { + if (!rdma_ah_get_dlid(attr) || + rdma_ah_get_dlid(attr) >= + opa_get_mcast_base(OPA_MCAST_NR)) + return false; + } + return true; +} #endif /* OPA_ADDR_H */ diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 3d2eed3c4e75..6538a5cc27b6 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -413,4 +413,23 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason); const void *rdma_consumer_reject_data(struct rdma_cm_id *id, struct rdma_cm_event *ev, u8 *data_len); +/** + * rdma_read_gids - Return the SGID and DGID used for establishing + * connection. This can be used after rdma_resolve_addr() + * on client side. This can be use on new connection + * on server side. This is applicable to IB, RoCE, iWarp. + * If cm_id is not bound yet to the RDMA device, it doesn't + * copy and SGID or DGID to the given pointers. + * @id: Communication identifier whose GIDs are queried. + * @sgid: Pointer to SGID where SGID will be returned. It is optional. + * @dgid: Pointer to DGID where DGID will be returned. It is optional. + * Note: This API should not be used by any new ULPs or new code. + * Instead, users interested in querying GIDs should refer to path record + * of the rdma_cm_id to query the GIDs. + * This API is provided for compatibility for existing users. + */ + +void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid, + union ib_gid *dgid); + #endif /* RDMA_CM_H */ diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h index 6947a6ba2557..6a69d71a21a5 100644 --- a/include/rdma/rdma_cm_ib.h +++ b/include/rdma/rdma_cm_ib.h @@ -36,17 +36,17 @@ #include <rdma/rdma_cm.h> /** - * rdma_set_ib_paths - Manually sets the path records used to establish a + * rdma_set_ib_path - Manually sets the path record used to establish a * connection. * @id: Connection identifier associated with the request. * @path_rec: Reference to the path record * * This call permits a user to specify routing information for rdma_cm_id's - * bound to Infiniband devices. It is called on the client side of a + * bound to InfiniBand devices. It is called on the client side of a * connection and replaces the call to rdma_resolve_route. */ -int rdma_set_ib_paths(struct rdma_cm_id *id, - struct sa_path_rec *path_rec, int num_paths); +int rdma_set_ib_path(struct rdma_cm_id *id, + struct sa_path_rec *path_rec); /* Global qkey for UDP QPs and multicast groups. */ #define RDMA_UDP_QKEY 0x01234567 diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1ba84a78f1c5..4118324a0310 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -228,13 +228,6 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); /* - * Returns a string to represent the device for which is being - * registered. This is primarily used for error and debug messages on - * the console. - */ - const char * (*get_card_name)(struct rvt_dev_info *rdi); - - /* * Returns a pointer to the undelying hardware's PCI device. This is * used to display information as to what hardware is being referenced * in an output message @@ -419,6 +412,30 @@ struct rvt_dev_info { }; +/** + * rvt_set_ibdev_name - Craft an IB device name from client info + * @rdi: pointer to the client rvt_dev_info structure + * @name: client specific name + * @unit: client specific unit number. + */ +static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, + const char *fmt, const char *name, + const int unit) +{ + snprintf(rdi->ibdev.name, sizeof(rdi->ibdev.name), fmt, name, unit); +} + +/** + * rvt_get_ibdev_name - return the IB name + * @rdi: rdmavt device + * + * Return the registered name of the device. + */ +static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi) +{ + return rdi->ibdev.name; +} + static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) { return container_of(ibpd, struct rvt_pd, ibpd); diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h new file mode 100644 index 000000000000..c2d81167c858 --- /dev/null +++ b/include/rdma/restrack.h @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_RESTRACK_H_ +#define _RDMA_RESTRACK_H_ + +#include <linux/typecheck.h> +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/kref.h> +#include <linux/completion.h> + +/** + * enum rdma_restrack_type - HW objects to track + */ +enum rdma_restrack_type { + /** + * @RDMA_RESTRACK_PD: Protection domain (PD) + */ + RDMA_RESTRACK_PD, + /** + * @RDMA_RESTRACK_CQ: Completion queue (CQ) + */ + RDMA_RESTRACK_CQ, + /** + * @RDMA_RESTRACK_QP: Queue pair (QP) + */ + RDMA_RESTRACK_QP, + /** + * @RDMA_RESTRACK_XRCD: XRC domain (XRCD) + */ + RDMA_RESTRACK_XRCD, + /** + * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations + */ + RDMA_RESTRACK_MAX +}; + +#define RDMA_RESTRACK_HASH_BITS 8 +/** + * struct rdma_restrack_root - main resource tracking management + * entity, per-device + */ +struct rdma_restrack_root { + /* + * @rwsem: Read/write lock to protect lists + */ + struct rw_semaphore rwsem; + /** + * @hash: global database for all resources per-device + */ + DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS); +}; + +/** + * struct rdma_restrack_entry - metadata per-entry + */ +struct rdma_restrack_entry { + /** + * @valid: validity indicator + * + * The entries are filled during rdma_restrack_add, + * can be attempted to be free during rdma_restrack_del. + * + * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI + */ + bool valid; + /* + * @kref: Protect destroy of the resource + */ + struct kref kref; + /* + * @comp: Signal that all consumers of resource are completed their work + */ + struct completion comp; + /** + * @task: owner of resource tracking entity + * + * There are two types of entities: created by user and created + * by kernel. + * + * This is relevant for the entities created by users. + * For the entities created by kernel, this pointer will be NULL. + */ + struct task_struct *task; + /** + * @kern_name: name of owner for the kernel created entities. + */ + const char *kern_name; + /** + * @node: hash table entry + */ + struct hlist_node node; + /** + * @type: various objects in restrack database + */ + enum rdma_restrack_type type; +}; + +/** + * rdma_restrack_init() - initialize resource tracking + * @res: resource tracking root + */ +void rdma_restrack_init(struct rdma_restrack_root *res); + +/** + * rdma_restrack_clean() - clean resource tracking + * @res: resource tracking root + */ +void rdma_restrack_clean(struct rdma_restrack_root *res); + +/** + * rdma_restrack_count() - the current usage of specific object + * @res: resource entry + * @type: actual type of object to operate + * @ns: PID namespace + */ +int rdma_restrack_count(struct rdma_restrack_root *res, + enum rdma_restrack_type type, + struct pid_namespace *ns); + +/** + * rdma_restrack_add() - add object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_add(struct rdma_restrack_entry *res); + +/** + * rdma_restrack_del() - delete object from the reource tracking database + * @res: resource entry + * @type: actual type of object to operate + */ +void rdma_restrack_del(struct rdma_restrack_entry *res); + +/** + * rdma_is_kernel_res() - check the owner of resource + * @res: resource entry + */ +static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res) +{ + return !res->task; +} + +/** + * rdma_restrack_get() - grab to protect resource from release + * @res: resource entry + */ +int __must_check rdma_restrack_get(struct rdma_restrack_entry *res); + +/** + * rdma_restrack_put() - relase resource + * @res: resource entry + */ +int rdma_restrack_put(struct rdma_restrack_entry *res); +#endif /* _RDMA_RESTRACK_H_ */ |