diff options
Diffstat (limited to 'drivers/infiniband/core/addr.c')
| -rw-r--r-- | drivers/infiniband/core/addr.c | 151 |
1 files changed, 72 insertions, 79 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 0dce94e3c495..61596cda2b65 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -37,14 +37,14 @@ #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/workqueue.h> -#include <linux/module.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> #include <net/netevent.h> -#include <net/addrconf.h> +#include <net/ipv6_stubs.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> #include <rdma/ib_sa.h> #include <rdma/ib.h> #include <rdma/rdma_netlink.h> @@ -75,7 +75,9 @@ static struct workqueue_struct *addr_wq; static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, - .len = sizeof(struct rdma_nla_ls_gid)}, + .len = sizeof(struct rdma_nla_ls_gid), + .validation_type = NLA_VALIDATE_MIN, + .min = sizeof(struct rdma_nla_ls_gid)}, }; static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) @@ -86,8 +88,8 @@ static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return false; - ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), - nlmsg_len(nlh), ib_nl_addr_policy, NULL); + ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), + nlmsg_len(nlh), ib_nl_addr_policy, NULL); if (ret) return false; @@ -138,7 +140,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, @@ -182,7 +184,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); - rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL); + rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. @@ -336,7 +338,7 @@ static int dst_fetch_ha(const struct dst_entry *dst, neigh_event_send(n, NULL); ret = -ENODATA; } else { - memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN); + neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); } neigh_release(n); @@ -346,16 +348,10 @@ static int dst_fetch_ha(const struct dst_entry *dst, static bool has_gateway(const struct dst_entry *dst, sa_family_t family) { - struct rtable *rt; - struct rt6_info *rt6; - - if (family == AF_INET) { - rt = container_of(dst, struct rtable, dst); - return rt->rt_uses_gateway; - } + if (family == AF_INET) + return dst_rtable(dst)->rt_uses_gateway; - rt6 = container_of(dst, struct rt6_info, dst); - return rt6->rt6i_flags & RTF_GATEWAY; + return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY; } static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, @@ -370,6 +366,8 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, (const void *)&dst_in6->sin6_addr; sa_family_t family = dst_in->sa_family; + might_sleep(); + /* If we have a gateway in IB mode then it must be an IB network */ if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) return ib_nl_fetch_ha(dev_addr, daddr, seq, family); @@ -420,16 +418,15 @@ static int addr6_resolve(struct sockaddr *src_sock, (const struct sockaddr_in6 *)dst_sock; struct flowi6 fl6; struct dst_entry *dst; - int ret; memset(&fl6, 0, sizeof fl6); fl6.daddr = dst_in->sin6_addr; fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; - ret = ipv6_stub->ipv6_dst_lookup(addr->net, NULL, &dst, &fl6); - if (ret < 0) - return ret; + dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + return PTR_ERR(dst); if (ipv6_addr_any(&src_in->sin6_addr)) src_in->sin6_addr = fl6.saddr; @@ -449,63 +446,41 @@ static int addr6_resolve(struct sockaddr *src_sock, } #endif +static bool is_dst_local(const struct dst_entry *dst) +{ + if (dst->ops->family == AF_INET) + return !!(dst_rtable(dst)->rt_type & RTN_LOCAL); + else if (dst->ops->family == AF_INET6) + return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL); + else + return false; +} + static int addr_resolve_neigh(const struct dst_entry *dst, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, - unsigned int ndev_flags, u32 seq) { - int ret = 0; - - if (ndev_flags & IFF_LOOPBACK) { + if (is_dst_local(dst)) { + /* When the destination is local entry, source and destination + * are same. Skip the neighbour lookup. + */ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); - } else { - if (!(ndev_flags & IFF_NOARP)) { - /* If the device doesn't do ARP internally */ - ret = fetch_ha(dst, addr, dst_in, seq); - } + return 0; } - return ret; -} - -static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, - const struct sockaddr *dst_in, - const struct dst_entry *dst, - const struct net_device *ndev) -{ - int ret = 0; - - if (dst->dev->flags & IFF_LOOPBACK) - ret = rdma_translate_ip(dst_in, dev_addr); - else - rdma_copy_src_l2_addr(dev_addr, dst->dev); - - /* - * If there's a gateway and type of device not ARPHRD_INFINIBAND, - * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the - * network type accordingly. - */ - if (has_gateway(dst, dst_in->sa_family) && - ndev->type != ARPHRD_INFINIBAND) - dev_addr->network = dst_in->sa_family == AF_INET ? - RDMA_NETWORK_IPV4 : - RDMA_NETWORK_IPV6; - else - dev_addr->network = RDMA_NETWORK_IB; - return ret; + return fetch_ha(dst, addr, dst_in, seq); } static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, - unsigned int *ndev_flags, const struct sockaddr *dst_in, const struct dst_entry *dst) { struct net_device *ndev = READ_ONCE(dst->dev); - *ndev_flags = ndev->flags; /* A physical device must be the RDMA device to use */ - if (ndev->flags & IFF_LOOPBACK) { + if (is_dst_local(dst)) { + int ret; /* * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or * loopback IP address. So if route is resolved to loopback @@ -515,9 +490,27 @@ static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); if (IS_ERR(ndev)) return -ENODEV; + ret = rdma_translate_ip(dst_in, dev_addr); + if (ret) + return ret; + } else { + rdma_copy_src_l2_addr(dev_addr, dst->dev); } - return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); + /* + * If there's a gateway and type of device not ARPHRD_INFINIBAND, + * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the + * network type accordingly. + */ + if (has_gateway(dst, dst_in->sa_family) && + ndev->type != ARPHRD_INFINIBAND) + dev_addr->network = dst_in->sa_family == AF_INET ? + RDMA_NETWORK_IPV4 : + RDMA_NETWORK_IPV6; + else + dev_addr->network = RDMA_NETWORK_IB; + + return 0; } static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) @@ -554,7 +547,6 @@ static int addr_resolve(struct sockaddr *src_in, u32 seq) { struct dst_entry *dst = NULL; - unsigned int ndev_flags = 0; struct rtable *rt = NULL; int ret; @@ -591,7 +583,7 @@ static int addr_resolve(struct sockaddr *src_in, rcu_read_unlock(); goto done; } - ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); + ret = rdma_set_src_addr_rcu(addr, dst_in, dst); rcu_read_unlock(); /* @@ -599,7 +591,7 @@ static int addr_resolve(struct sockaddr *src_in, * only if src addr translation didn't fail. */ if (!ret && resolve_neigh) - ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); + ret = addr_resolve_neigh(dst, dst_in, addr, seq); if (src_in->sa_family == AF_INET) ip_rt_put(rt); @@ -645,13 +637,12 @@ static void process_one_req(struct work_struct *_work) req->callback = NULL; spin_lock_bh(&lock); + /* + * Although the work will normally have been canceled by the workqueue, + * it can still be requeued as long as it is on the req_list. + */ + cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list. - */ - cancel_delayed_work(&req->work); list_del_init(&req->list); kfree(req); } @@ -727,11 +718,13 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, struct rdma_dev_addr dev_addr = {}; int ret; + might_sleep(); + if (rec->roce.route_resolved) return 0; - rdma_gid2ip(&sgid._sockaddr, &rec->sgid); - rdma_gid2ip(&dgid._sockaddr, &rec->dgid); + rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid); + rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid); if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) return -EINVAL; @@ -742,7 +735,7 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec, dev_addr.net = &init_net; dev_addr.sgid_attr = attr; - ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr, + ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid, &dev_addr, false, true, 0); if (ret) return ret; @@ -814,22 +807,22 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, struct rdma_dev_addr dev_addr; struct resolve_cb_context ctx; union { - struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; int ret; - rdma_gid2ip(&sgid_addr._sockaddr, sgid); - rdma_gid2ip(&dgid_addr._sockaddr, dgid); + rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); + rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); dev_addr.net = &init_net; dev_addr.sgid_attr = sgid_attr; init_completion(&ctx.comp); - ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, - &dev_addr, 1000, resolve_cb, true, &ctx); + ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr, + (struct sockaddr *)&dgid_addr, &dev_addr, 1000, + resolve_cb, true, &ctx); if (ret) return ret; |
