summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core/cma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/cma.c')
-rw-r--r--drivers/infiniband/core/cma.c1003
1 files changed, 767 insertions, 236 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 515a7e95a421..95e89f5c147c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -11,6 +11,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
@@ -20,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -44,8 +46,7 @@ MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
#define CMA_MAX_CM_RETRIES 15
-#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
-#define CMA_IBOE_PACKET_LIFETIME 18
+#define CMA_IBOE_PACKET_LIFETIME 16
#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
static const char * const cma_events[] = {
@@ -67,8 +68,10 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
-static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr,
- union ib_gid *mgid);
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
+
+static void cma_netevent_work_handler(struct work_struct *_work);
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
@@ -142,19 +145,6 @@ struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
}
EXPORT_SYMBOL(rdma_iw_cm_id);
-/**
- * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
- * @res: rdma resource tracking entry pointer
- */
-struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
-{
- struct rdma_id_private *id_priv =
- container_of(res, struct rdma_id_private, res);
-
- return &id_priv->id;
-}
-EXPORT_SYMBOL(rdma_res_to_id);
-
static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
@@ -168,6 +158,9 @@ static struct ib_sa_client sa_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
@@ -202,6 +195,11 @@ struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
@@ -420,11 +418,21 @@ static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
return hdr->ip_version >> 4;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
{
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
+}
+
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
+{
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
+}
+
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
struct in_device *in_dev = NULL;
@@ -445,6 +453,124 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET &&
+ __builtin_object_size(sa, 0) >= sizeof(struct sockaddr_in)) {
+ return memcmp(&((struct sockaddr_in *)sa)->sin_addr,
+ &((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+ }
+
+ if (sa->sa_family == AF_INET6 &&
+ __builtin_object_size(sa, 0) >= sizeof(struct sockaddr_in6)) {
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+ }
+
+ return -1;
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
@@ -453,7 +579,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
- list_add_tail(&id_priv->list, &cma_dev->id_list);
+ list_add_tail(&id_priv->device_item, &cma_dev->id_list);
trace_cm_id_attach(id_priv, cma_dev->device);
}
@@ -470,7 +596,7 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
- list_del(&id_priv->list);
+ list_del_init(&id_priv->device_item);
cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL;
id_priv->id.device = NULL;
@@ -481,37 +607,16 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
}
-static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+static int cma_set_default_qkey(struct rdma_id_private *id_priv)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
- if (id_priv->qkey) {
- if (qkey && id_priv->qkey != qkey)
- return -EINVAL;
- return 0;
- }
-
- if (qkey) {
- id_priv->qkey = qkey;
- return 0;
- }
-
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
case RDMA_PS_IB:
@@ -531,6 +636,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
return ret;
}
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+{
+ if (!qkey ||
+ (id_priv->qkey && (id_priv->qkey != qkey)))
+ return -EINVAL;
+
+ id_priv->qkey = qkey;
+ return 0;
+}
+
static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -559,31 +674,84 @@ cma_validate_port(struct ib_device *device, u32 port,
struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV);
int bound_if_index = dev_addr->bound_dev_if;
- const struct ib_gid_attr *sgid_attr;
int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
+ struct net_device *pdev = NULL;
if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
- return ERR_PTR(-ENODEV);
+ goto out;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
- return ERR_PTR(-ENODEV);
+ goto out;
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
- return ERR_PTR(-ENODEV);
+ goto out;
+
+ /*
+ * For drivers that do not associate more than one net device with
+ * their gid tables, such as iWARP drivers, it is sufficient to
+ * return the first table entry.
+ *
+ * Other driver classes might be included in the future.
+ */
+ if (rdma_protocol_iwarp(device, port)) {
+ sgid_attr = rdma_get_gid_attr(device, port, 0);
+ if (IS_ERR(sgid_attr))
+ goto out;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(sgid_attr->ndev);
+ if (ndev->ifindex != bound_if_index) {
+ pdev = dev_get_by_index_rcu(dev_addr->net, bound_if_index);
+ if (pdev) {
+ if (is_vlan_dev(pdev)) {
+ pdev = vlan_dev_real_dev(pdev);
+ if (ndev->ifindex == pdev->ifindex)
+ bound_if_index = pdev->ifindex;
+ }
+ if (is_vlan_dev(ndev)) {
+ pdev = vlan_dev_real_dev(ndev);
+ if (bound_if_index == pdev->ifindex)
+ bound_if_index = ndev->ifindex;
+ }
+ }
+ }
+ if (!net_eq(dev_net(ndev), dev_addr->net) ||
+ ndev->ifindex != bound_if_index) {
+ rdma_put_gid_attr(sgid_attr);
+ sgid_attr = ERR_PTR(-ENODEV);
+ }
+ rcu_read_unlock();
+ goto out;
+ }
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
- ndev = dev_get_by_index(dev_addr->net, bound_if_index);
- if (!ndev)
- return ERR_PTR(-ENODEV);
+ /*
+ * For a RXE device, it should work with TUN device and normal ethernet
+ * devices. Use driver_id to check if a device is a RXE device or not.
+ * ARPHDR_NONE means a TUN device.
+ */
+ if (device->ops.driver_id == RDMA_DRIVER_RXE) {
+ if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER)
+ && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ }
} else {
- gid_type = IB_GID_TYPE_IB;
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
}
sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
+out:
return sgid_attr;
}
@@ -766,6 +934,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
unsigned int p;
u16 pkey, index;
enum ib_port_state port_state;
+ int ret;
int i;
cma_dev = NULL;
@@ -784,9 +953,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !rdma_query_gid(cur_dev->device,
- p, i, &gid);
- i++) {
+
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(cur_dev->device, p, i,
+ &gid);
+ if (ret)
+ continue;
+
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -854,11 +1028,14 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
init_completion(&id_priv->comp);
refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
+ INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
+ INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler);
rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
if (parent)
@@ -926,12 +1103,25 @@ static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
return ret;
}
+static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
+ if (ret)
+ return ret;
+
+ return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+}
+
int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
struct rdma_id_private *id_priv;
struct ib_qp *qp;
- int ret = 0;
+ int ret;
id_priv = container_of(id, struct rdma_id_private, id);
if (id->device != pd->device) {
@@ -948,6 +1138,8 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
+ else
+ ret = cma_init_conn_qp(id_priv, qp);
if (ret)
goto out_destroy;
@@ -1078,7 +1270,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (id_priv->id.qp_type == IB_QPT_UD) {
- ret = cma_set_qkey(id_priv, 0);
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
@@ -1412,7 +1604,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
return false;
memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_iif = net_dev->ifindex;
+ fl4.flowi4_oif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
@@ -1632,7 +1824,7 @@ static struct rdma_id_private *cma_find_listener(
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
- listen_list) {
+ listen_item) {
if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(&id_priv_dev->id,
net_dev, req))
@@ -1697,8 +1889,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
}
if (!validate_net_dev(*net_dev,
- (struct sockaddr *)&req->listen_addr_storage,
- (struct sockaddr *)&req->src_addr_storage)) {
+ (struct sockaddr *)&req->src_addr_storage,
+ (struct sockaddr *)&req->listen_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH);
goto err;
}
@@ -1731,28 +1923,36 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
+ lockdep_assert_held(&lock);
+
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
- mutex_lock(&lock);
- list_del(&id_priv->list);
+ list_del_init(&id_priv->listen_any_item);
while (!list_empty(&id_priv->listen_list)) {
- dev_id_priv = list_entry(id_priv->listen_list.next,
- struct rdma_id_private, listen_list);
+ dev_id_priv =
+ list_first_entry(&id_priv->listen_list,
+ struct rdma_id_private, listen_item);
/* sync with device removal to avoid duplicate destruction */
- list_del_init(&dev_id_priv->list);
- list_del(&dev_id_priv->listen_list);
+ list_del_init(&dev_id_priv->device_item);
+ list_del_init(&dev_id_priv->listen_item);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+ mutex_lock(&lock);
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
}
@@ -1761,6 +1961,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -1795,6 +2003,8 @@ static void cma_release_port(struct rdma_id_private *id_priv)
static void destroy_mc(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
+
if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
ib_sa_free_multicast(mc->sa_mc);
@@ -1806,14 +2016,19 @@ static void destroy_mc(struct rdma_id_private *id_priv,
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net,
dev_addr->bound_dev_if);
- if (ndev) {
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
union ib_gid mgid;
- cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr,
- &mgid);
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
cma_igmp_send(ndev, &mgid, false);
- dev_put(ndev);
}
+ dev_put(ndev);
cancel_work_sync(&mc->iboe_join.work);
}
@@ -1838,6 +2053,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_cancel_operation(id_priv, state);
rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1858,6 +2074,9 @@ static void _destroy_id(struct rdma_id_private *id_priv,
cma_id_put(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+ kfree(id_priv->id.route.path_rec_inbound);
+ kfree(id_priv->id.route.path_rec_outbound);
+ kfree(id_priv->id.route.service_recs);
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
@@ -1982,8 +2201,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id,
case IB_CM_REP_RECEIVED:
if (state == RDMA_CM_CONNECT &&
(id_priv->id.qp_type != IB_QPT_UD)) {
- trace_cm_send_mra(id_priv);
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ trace_cm_prepare_mra(id_priv);
+ ib_prepare_cm_mra(cm_id);
}
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
@@ -2073,14 +2292,14 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
goto err;
rt = &id->route;
- rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
- rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec),
- GFP_KERNEL);
+ rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc_array(rt->num_pri_alt_paths,
+ sizeof(*rt->path_rec), GFP_KERNEL);
if (!rt->path_rec)
goto err;
rt->path_rec[0] = *path;
- if (rt->num_paths == 2)
+ if (rt->num_pri_alt_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
@@ -2244,8 +2463,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
conn_id->id.qp_type != IB_QPT_UD) {
- trace_cm_send_mra(cm_id->context);
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
+ trace_cm_prepare_mra(cm_id->context);
+ ib_prepare_cm_mra(cm_id);
}
mutex_unlock(&conn_id->handler_mutex);
@@ -2253,8 +2472,7 @@ err_unlock:
mutex_unlock(&listen_id->handler_mutex);
net_dev_put:
- if (net_dev)
- dev_put(net_dev);
+ dev_put(net_dev);
return ret;
}
@@ -2529,7 +2747,7 @@ static int cma_listen_on_dev(struct rdma_id_private *id_priv,
ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
goto err_listen;
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+ list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list);
return 0;
err_listen:
/* Caller must destroy this after releasing lock */
@@ -2545,13 +2763,13 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv)
int ret;
mutex_lock(&lock);
- list_add_tail(&id_priv->list, &listen_any_list);
+ list_add_tail(&id_priv->listen_any_item, &listen_any_list);
list_for_each_entry(cma_dev, &dev_list, list) {
ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
if (ret) {
/* Prevent racing with cma_process_remove() */
if (to_destroy)
- list_del_init(&to_destroy->list);
+ list_del_init(&to_destroy->device_item);
goto err_listen;
}
}
@@ -2559,7 +2777,7 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv)
return 0;
err_listen:
- list_del(&id_priv->list);
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
if (to_destroy)
rdma_destroy_id(&to_destroy->id);
@@ -2597,7 +2815,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
{
struct rdma_id_private *id_priv;
- if (id->qp_type != IB_QPT_RC)
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
return -EINVAL;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2649,26 +2867,76 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
}
EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+static int route_set_path_rec_inbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_inbound) {
+ route->path_rec_inbound =
+ kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL);
+ if (!route->path_rec_inbound)
+ return -ENOMEM;
+ }
+
+ *route->path_rec_inbound = *path_rec;
+ return 0;
+}
+
+static int route_set_path_rec_outbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_outbound) {
+ route->path_rec_outbound =
+ kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL);
+ if (!route->path_rec_outbound)
+ return -ENOMEM;
+ }
+
+ *route->path_rec_outbound = *path_rec;
+ return 0;
+}
+
static void cma_query_handler(int status, struct sa_path_rec *path_rec,
- void *context)
+ unsigned int num_prs, void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
+ int i;
route = &work->id->id.route;
- if (!status) {
- route->num_paths = 1;
- *route->path_rec = *path_rec;
- } else {
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
- work->event.status = status;
- pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
- status);
+ if (status)
+ goto fail;
+
+ for (i = 0; i < num_prs; i++) {
+ if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP))
+ *route->path_rec = path_rec[i];
+ else if (path_rec[i].flags & IB_PATH_INBOUND)
+ status = route_set_path_rec_inbound(work, &path_rec[i]);
+ else if (path_rec[i].flags & IB_PATH_OUTBOUND)
+ status = route_set_path_rec_outbound(work,
+ &path_rec[i]);
+ else
+ status = -EINVAL;
+
+ if (status)
+ goto fail;
}
+ route->num_pri_alt_paths = 1;
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
queue_work(cma_wq, &work->work);
}
@@ -2913,7 +3181,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
dev_put(ndev);
}
- id->route.num_paths = 1;
+ id->route.num_pri_alt_paths = 1;
return 0;
err_free:
@@ -3046,7 +3314,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
goto err1;
}
- route->num_paths = 1;
+ route->num_pri_alt_paths = 1;
ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
@@ -3071,7 +3339,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
- route->path_rec->rate = iboe_get_rate(ndev);
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
/* In case ACK timeout is set, use this value to calculate
@@ -3106,7 +3374,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
- route->num_paths = 0;
+ route->num_pri_alt_paths = 0;
err1:
kfree(work);
return ret;
@@ -3115,17 +3383,28 @@ err1:
int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
+ enum rdma_cm_state state;
int ret;
+ if (!timeout_ms)
+ return -EINVAL;
+
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
+ state = id_priv->state;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY) &&
+ !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED,
+ RDMA_CM_ROUTE_QUERY))
return -EINVAL;
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
else
@@ -3136,7 +3415,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
return 0;
err:
- cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state);
cma_id_put(id_priv);
return ret;
}
@@ -3319,98 +3598,6 @@ err:
return ret;
}
-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr)
-{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (IS_ENABLED(CONFIG_IPV6) &&
- dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
- }
- return rdma_bind_addr(id, src_addr);
-}
-
-/*
- * If required, resolve the source address for bind and leave the id_priv in
- * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
- * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
- * ignored.
- */
-static int resolve_prepare_src(struct rdma_id_private *id_priv,
- struct sockaddr *src_addr,
- const struct sockaddr *dst_addr)
-{
- int ret;
-
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
- /* For a well behaved ULP state will be RDMA_CM_IDLE */
- ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
- if (ret)
- goto err_dst;
- if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
- RDMA_CM_ADDR_QUERY))) {
- ret = -EINVAL;
- goto err_dst;
- }
- }
-
- if (cma_family(id_priv) != dst_addr->sa_family) {
- ret = -EINVAL;
- goto err_state;
- }
- return 0;
-
-err_state:
- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
-err_dst:
- memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
- return ret;
-}
-
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- const struct sockaddr *dst_addr, unsigned long timeout_ms)
-{
- struct rdma_id_private *id_priv =
- container_of(id, struct rdma_id_private, id);
- int ret;
-
- ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
- if (ret)
- return ret;
-
- if (cma_any_addr(dst_addr)) {
- ret = cma_resolve_loopback(id_priv);
- } else {
- if (dst_addr->sa_family == AF_IB) {
- ret = cma_resolve_ib_addr(id_priv);
- } else {
- ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
- &id->route.addr.dev_addr,
- timeout_ms, addr_handler,
- false, id_priv);
- }
- }
- if (ret)
- goto err;
-
- return 0;
-err:
- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
- return ret;
-}
-EXPORT_SYMBOL(rdma_resolve_addr);
-
int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
{
struct rdma_id_private *id_priv;
@@ -3562,7 +3749,7 @@ static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = get_random_u32_inclusive(low, remaining + low - 1);
retry:
if (last_used_port != rover) {
struct rdma_bind_list *bind_list;
@@ -3753,9 +3940,13 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
int ret;
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ struct sockaddr_in any_in = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+
/* For a well behaved ULP state will be RDMA_CM_IDLE */
- id->route.addr.src_addr.ss_family = AF_INET;
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
if (ret)
return ret;
if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
@@ -3809,27 +4000,26 @@ err:
}
EXPORT_SYMBOL(rdma_listen);
-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+static int rdma_bind_addr_dst(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, const struct sockaddr *daddr)
{
- struct rdma_id_private *id_priv;
+ struct sockaddr *id_daddr;
int ret;
- struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
return -EAFNOSUPPORT;
- id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;
- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr);
if (ret)
goto err1;
memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
+ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr);
if (ret)
goto err1;
@@ -3849,8 +4039,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
}
#endif
}
- daddr = cma_dst_addr(id_priv);
- daddr->sa_family = addr->sa_family;
+ id_daddr = cma_dst_addr(id_priv);
+ if (daddr != id_daddr)
+ memcpy(id_daddr, daddr, rdma_addr_size(addr));
+ id_daddr->sa_family = addr->sa_family;
ret = cma_get_port(id_priv);
if (ret)
@@ -3866,6 +4058,129 @@ err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ }
+ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr);
+}
+
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ int ret;
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY)))
+ return -EINVAL;
+
+ } else {
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+ }
+
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err_state;
+ }
+ return 0;
+
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
+
+ if (cma_any_addr(dst_addr)) {
+ ret = cma_resolve_loopback(id_priv);
+ } else {
+ if (dst_addr->sa_family == AF_IB) {
+ ret = cma_resolve_ib_addr(id_priv);
+ } else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+ &id->route.addr.dev_addr,
+ timeout_ms, addr_handler,
+ false, id_priv);
+ }
+ }
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_addr);
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv));
+}
EXPORT_SYMBOL(rdma_bind_addr);
static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
@@ -3974,8 +4289,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -4034,8 +4348,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -4066,7 +4379,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
}
req.primary_path = &route->path_rec[0];
- if (route->num_paths == 2)
+ req.primary_path_inbound = route->path_rec_inbound;
+ req.primary_path_outbound = route->path_rec_outbound;
+ if (route->num_pri_alt_paths == 2)
req.alternate_path = &route->path_rec[1];
req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
@@ -4160,6 +4475,8 @@ int rdma_connect_locked(struct rdma_cm_id *id,
container_of(id, struct rdma_id_private, id);
int ret;
+ lockdep_assert_held(&id_priv->handler_mutex);
+
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
@@ -4302,7 +4619,10 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
- ret = cma_set_qkey(id_priv, qkey);
+ if (qkey)
+ ret = cma_set_qkey(id_priv, qkey);
+ else
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
@@ -4507,9 +4827,7 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
enum ib_gid_type gid_type;
struct net_device *ndev;
- if (!status)
- status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
- else
+ if (status)
pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
status);
@@ -4537,11 +4855,10 @@ static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
}
event->param.ud.qp_num = 0xFFFFFF;
- event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ event->param.ud.qkey = id_priv->qkey;
out:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
}
static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
@@ -4556,8 +4873,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
goto out;
- cma_make_mc_event(status, id_priv, multicast, &event, mc);
- ret = cma_cm_event_handler(id_priv, &event);
+ ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ if (!ret) {
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
+ }
rdma_destroy_ah_attr(&event.param.ud.ah_attr);
WARN_ON(ret);
@@ -4610,9 +4930,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (ret)
return ret;
- ret = cma_set_qkey(id_priv, 0);
- if (ret)
- return ret;
+ if (!id_priv->qkey) {
+ ret = cma_set_default_qkey(id_priv);
+ if (ret)
+ return ret;
+ }
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
rec.qkey = cpu_to_be32(id_priv->qkey);
@@ -4675,7 +4997,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
- struct ib_sa_multicast ib;
+ struct ib_sa_multicast ib = {};
enum ib_gid_type gid_type;
bool send_only;
@@ -4689,15 +5011,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
ib.rec.pkey = cpu_to_be16(0xffff);
- if (id_priv->id.ps == RDMA_PS_UDP)
- ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
-
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!ndev)
return -ENODEV;
- ib.rec.rate = iboe_get_rate(ndev);
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
ib.rec.hop_limit = 1;
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
@@ -4717,6 +5036,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (err || !ib.rec.mtu)
return err ?: -EINVAL;
+ if (!id_priv->qkey)
+ cma_set_default_qkey(id_priv);
+
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&ib.rec.port_gid);
INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
@@ -4742,6 +5064,9 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
return -EINVAL;
+ if (id_priv->id.qp_type != IB_QPT_UD)
+ return -EINVAL;
+
mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
@@ -4838,7 +5163,7 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ list_for_each_entry(id_priv, &cma_dev->id_list, device_item) {
ret = cma_netdev_change(ndev, id_priv);
if (ret)
goto out;
@@ -4849,10 +5174,87 @@ out:
return ret;
}
+static void cma_netevent_work_handler(struct work_struct *_work)
+{
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
+
+ mutex_lock(&id_priv->handler_mutex);
+
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ return;
+ }
+
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+}
+
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
+{
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
+
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
+
+ if (!ips_node)
+ goto out;
+
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ cma_id_get(current_id);
+ if (!queue_work(cma_wq, &current_id->id.net_work))
+ cma_id_put(current_id);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
static struct notifier_block cma_nb = {
.notifier_call = cma_netdev_callback
};
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
{
struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
@@ -4898,10 +5300,10 @@ static void cma_process_remove(struct cma_device *cma_dev)
mutex_lock(&lock);
while (!list_empty(&cma_dev->id_list)) {
struct rdma_id_private *id_priv = list_first_entry(
- &cma_dev->id_list, struct rdma_id_private, list);
+ &cma_dev->id_list, struct rdma_id_private, device_item);
- list_del(&id_priv->listen_list);
- list_del_init(&id_priv->list);
+ list_del_init(&id_priv->listen_item);
+ list_del_init(&id_priv->device_item);
cma_id_get(id_priv);
mutex_unlock(&lock);
@@ -4978,7 +5380,7 @@ static int cma_add_one(struct ib_device *device)
mutex_lock(&lock);
list_add_tail(&cma_dev->list, &dev_list);
- list_for_each_entry(id_priv, &listen_any_list, list) {
+ list_for_each_entry(id_priv, &listen_any_list, listen_any_item) {
ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
if (ret)
goto free_listen;
@@ -5075,6 +5477,7 @@ static int __init cma_init(void)
ib_sa_register_client(&sa_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
@@ -5089,6 +5492,7 @@ static int __init cma_init(void)
err_ib:
ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -5101,6 +5505,7 @@ static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
@@ -5109,3 +5514,129 @@ static void __exit cma_cleanup(void)
module_init(cma_init);
module_exit(cma_cleanup);
+
+static void cma_query_ib_service_handler(int status,
+ struct sa_service_rec *recs,
+ unsigned int num_recs, void *context)
+{
+ struct cma_work *work = context;
+ struct rdma_id_private *id_priv = work->id;
+ struct sockaddr_ib *addr;
+
+ if (status)
+ goto fail;
+
+ if (!num_recs) {
+ status = -ENOENT;
+ goto fail;
+ }
+
+ if (id_priv->id.route.service_recs) {
+ status = -EALREADY;
+ goto fail;
+ }
+
+ id_priv->id.route.service_recs =
+ kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL);
+ if (!id_priv->id.route.service_recs) {
+ status = -ENOMEM;
+ goto fail;
+ }
+
+ id_priv->id.route.num_service_recs = num_recs;
+ memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs);
+
+ addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr;
+ addr->sib_family = AF_IB;
+ addr->sib_addr = *(struct ib_addr *)&recs->gid;
+ addr->sib_pkey = recs->pkey;
+ addr->sib_sid = recs->id;
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
+ (union ib_gid *)&addr->sib_addr);
+ ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr,
+ ntohs(addr->sib_pkey));
+
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDR_BOUND;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited(
+ "RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n",
+ status);
+ queue_work(cma_wq, &work->work);
+}
+
+static int cma_resolve_ib_service(struct rdma_id_private *id_priv,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct sa_service_rec sr = {};
+ ib_sa_comp_mask mask = 0;
+ struct cma_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ cma_id_get(id_priv);
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDRINFO_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED;
+
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) {
+ sr.id = cpu_to_be64(ibs->service_id);
+ mask |= IB_SA_SERVICE_REC_SERVICE_ID;
+ }
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) {
+ strscpy(sr.name, ibs->service_name, sizeof(sr.name));
+ mask |= IB_SA_SERVICE_REC_SERVICE_NAME;
+ }
+
+ id_priv->query_id = ib_sa_service_rec_get(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num,
+ &sr, mask,
+ 2000, GFP_KERNEL,
+ cma_query_ib_service_handler,
+ work, &id_priv->query);
+
+ if (id_priv->query_id < 0) {
+ cma_id_put(id_priv);
+ kfree(work);
+ return id_priv->query_id;
+ }
+
+ return 0;
+}
+
+int rdma_resolve_ib_service(struct rdma_cm_id *id,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!id_priv->cma_dev ||
+ !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY))
+ return -EINVAL;
+
+ if (rdma_cap_ib_sa(id->device, id->port_num))
+ ret = cma_resolve_ib_service(id_priv, ibs);
+ else
+ ret = -EOPNOTSUPP;
+
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ib_service);