summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorParav Pandit <parav@nvidia.com>2025-09-16 14:11:02 +0300
committerLeon Romanovsky <leon@kernel.org>2025-09-18 05:20:35 -0400
commitc31e4038c97f355967bf906c0b6914edb4f20d75 (patch)
tree6f962cb2be69b86b501cbde4689643980ef3ff56
parent200651b9b8aadfbbec852f0e5d042d9abe75e2ab (diff)
RDMA/core: Use route entry flag to decide on loopback traffic
addr_resolve() considers a destination to be local if the next-hop device of the resolved route for the destination is the loopback netdevice. This fails when the source and destination IP addresses belong to a netdev enslaved to a VRF netdev. In this case the next-hop device is the VRF itself: $ ip link add name myvrf up type vrf table 100 $ ip link set ens2f0np0 master myvrf up $ ip addr add 192.168.1.1/24 dev ens2f0np0 $ ip route get 192.168.1.1 oif myvrf local 192.168.1.1 dev myvrf table 100 src 192.168.1.1 uid 0 cache <local> This results in packets being generated with an incorrect destination MAC of the VRF netdevice and ib_write_bw failing with timeout. Solve this by determining if a destination is local or not based on the resolved route's type rather than based on its next-hop netdevice loopback flag. This enables to resolve loopback traffic with and without VRF configurations in a uniform way. Signed-off-by: Parav Pandit <parav@nvidia.com> Reviewed-by: Vlad Dumitrescu <vdumitrescu@nvidia.com> Signed-off-by: Edward Srouji <edwards@nvidia.com> Link: https://patch.msgid.link/20250916111103.84069-4-edwards@nvidia.com Signed-off-by: Leon Romanovsky <leon@kernel.org>
-rw-r--r--drivers/infiniband/core/addr.c34
1 files changed, 21 insertions, 13 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index ca86c482662f..61596cda2b65 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -446,31 +446,40 @@ static int addr6_resolve(struct sockaddr *src_sock,
}
#endif
+static bool is_dst_local(const struct dst_entry *dst)
+{
+ if (dst->ops->family == AF_INET)
+ return !!(dst_rtable(dst)->rt_type & RTN_LOCAL);
+ else if (dst->ops->family == AF_INET6)
+ return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL);
+ else
+ return false;
+}
+
static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
- unsigned int ndev_flags,
u32 seq)
{
- int ret = 0;
-
- if (ndev_flags & IFF_LOOPBACK)
+ if (is_dst_local(dst)) {
+ /* When the destination is local entry, source and destination
+ * are same. Skip the neighbour lookup.
+ */
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- else
- ret = fetch_ha(dst, addr, dst_in, seq);
- return ret;
+ return 0;
+ }
+
+ return fetch_ha(dst, addr, dst_in, seq);
}
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
- unsigned int *ndev_flags,
const struct sockaddr *dst_in,
const struct dst_entry *dst)
{
struct net_device *ndev = READ_ONCE(dst->dev);
- *ndev_flags = ndev->flags;
/* A physical device must be the RDMA device to use */
- if (ndev->flags & IFF_LOOPBACK) {
+ if (is_dst_local(dst)) {
int ret;
/*
* RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
@@ -538,7 +547,6 @@ static int addr_resolve(struct sockaddr *src_in,
u32 seq)
{
struct dst_entry *dst = NULL;
- unsigned int ndev_flags = 0;
struct rtable *rt = NULL;
int ret;
@@ -575,7 +583,7 @@ static int addr_resolve(struct sockaddr *src_in,
rcu_read_unlock();
goto done;
}
- ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
+ ret = rdma_set_src_addr_rcu(addr, dst_in, dst);
rcu_read_unlock();
/*
@@ -583,7 +591,7 @@ static int addr_resolve(struct sockaddr *src_in,
* only if src addr translation didn't fail.
*/
if (!ret && resolve_neigh)
- ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
+ ret = addr_resolve_neigh(dst, dst_in, addr, seq);
if (src_in->sa_family == AF_INET)
ip_rt_put(rt);