diff options
author | David S. Miller <davem@davemloft.net> | 2022-04-22 13:06:03 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-04-22 13:06:03 +0100 |
commit | fb799dd49a25625db05af51dd141371f6f64d3d1 (patch) | |
tree | aa3ca5138109769f191846bf2102a23d473dbfc0 | |
parent | cb1e6bf42bce63158884ab83fb171b518693c771 (diff) | |
parent | b1ad41384866aafadf24c6b0f7e7701c86bdddc2 (diff) |
Merge branch 'ipv6-RT_ONLINK-remove-prep'
Guillaume Nault says:
====================
ipv4: First steps toward removing RTO_ONLINK
RTO_ONLINK is a flag that allows to reduce the scope of route lookups.
It's stored in a normally unused bit of the ->flowi4_tos field, in
struct flowi4. However it has several problems:
* This bit is also used by ECN. Although ECN bits are supposed to be
cleared before doing a route lookup, it happened that some code
paths didn't properly sanitise their ->flowi4_tos. So this mechanism
is fragile and we had bugs in the past where ECN bits slipped in and
could end up being erroneously interpreted as RTO_ONLINK.
* A dscp_t type was recently introduced to ensure ECN bits are cleared
during route lookups. ->flowi4_tos is the most important structure
field to convert, but RTO_ONLINK prevents such conversion, as dscp_t
mandates that ECN bits (where RTO_ONLINK is stored) be zero.
Therefore we need to stop using RTO_ONLINK altogether. Fortunately
RTO_ONLINK isn't a necessity. Instead of passing a flag in ->flowi4_tos
to tell the route lookup function to restrict the scope, we can simply
initialise the scope correctly.
Patch 1 does some preparatory work: it stops resetting ->flowi4_scope
automatically before a route lookup, thus allowing callers to set their
desired scope without having to rely on the RTO_ONLINK flag.
Patch 2-3 convert a few code paths to avoid relying on RTO_ONLINK.
More conversions will have to take place before we can eventually
remove this flag.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/route.h | 36 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 5 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 6 | ||||
-rw-r--r-- | net/ipv4/datagram.c | 7 | ||||
-rw-r--r-- | net/ipv4/route.c | 41 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 5 |
6 files changed, 53 insertions, 47 deletions
diff --git a/include/net/route.h b/include/net/route.h index 25404fc2b483..991a3985712d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -43,6 +43,19 @@ #define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE)) #define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE)) +static inline __u8 ip_sock_rt_scope(const struct sock *sk) +{ + if (sock_flag(sk, SOCK_LOCALROUTE)) + return RT_SCOPE_LINK; + + return RT_SCOPE_UNIVERSE; +} + +static inline __u8 ip_sock_rt_tos(const struct sock *sk) +{ + return RT_TOS(inet_sk(sk)->tos); +} + struct ip_tunnel_info; struct fib_nh; struct fib_info; @@ -289,39 +302,38 @@ static inline char rt_tos2priority(u8 tos) * ip_route_newports() calls. */ -static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, - u32 tos, int oif, u8 protocol, +static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, - struct sock *sk) + const struct sock *sk) { __u8 flow_flags = 0; if (inet_sk(sk)->transparent) flow_flags |= FLOWI_FLAG_ANYSRC; - flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport, - sk->sk_uid); + flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk), + ip_sock_rt_scope(sk), protocol, flow_flags, dst, + src, dport, sport, sk->sk_uid); } -static inline struct rtable *ip_route_connect(struct flowi4 *fl4, - __be32 dst, __be32 src, u32 tos, - int oif, u8 protocol, +static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, + __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; - ip_route_connect_init(fl4, dst, src, tos, oif, protocol, - sport, dport, sk); + ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); - flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); + flowi4_update_output(fl4, oif, fl4->flowi4_tos, fl4->daddr, + fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index ae662567a6cb..82696ab86f74 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -76,9 +76,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_dport = usin->sin_port; fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_DCCP, - orig_sport, orig_dport, sk); + sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport, + orig_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 195ecfa2f000..93da9f783bec 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1233,9 +1233,9 @@ static int inet_sk_reselect_saddr(struct sock *sk) /* Query new route. */ fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, sk->sk_protocol, - inet->inet_sport, inet->inet_dport, sk); + rt = ip_route_connect(fl4, daddr, 0, sk->sk_bound_dev_if, + sk->sk_protocol, inet->inet_sport, + inet->inet_dport, sk); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 48f337ccf949..ffd57523331f 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -44,10 +44,9 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len saddr = inet->mc_addr; } fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, - RT_CONN_FLAGS(sk), oif, - sk->sk_protocol, - inet->inet_sport, usin->sin_port, sk); + rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif, + sk->sk_protocol, inet->inet_sport, + usin->sin_port, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e839d424b861..ffbe2e4f8c89 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -503,28 +503,29 @@ static void ip_rt_fix_tos(struct flowi4 *fl4) __u8 tos = RT_FL_TOS(fl4); fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = tos & RTO_ONLINK ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE; + if (tos & RTO_ONLINK) + fl4->flowi4_scope = RT_SCOPE_LINK; } static void __build_flow_key(const struct net *net, struct flowi4 *fl4, - const struct sock *sk, - const struct iphdr *iph, - int oif, u8 tos, - u8 prot, u32 mark, int flow_flags) + const struct sock *sk, const struct iphdr *iph, + int oif, __u8 tos, u8 prot, u32 mark, + int flow_flags) { + __u8 scope = RT_SCOPE_UNIVERSE; + if (sk) { const struct inet_sock *inet = inet_sk(sk); oif = sk->sk_bound_dev_if; mark = sk->sk_mark; - tos = RT_CONN_FLAGS(sk); + tos = ip_sock_rt_tos(sk); + scope = ip_sock_rt_scope(sk); prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; } - flowi4_init_output(fl4, oif, mark, tos, - RT_SCOPE_UNIVERSE, prot, - flow_flags, - iph->daddr, iph->saddr, 0, 0, + + flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope, + prot, flow_flags, iph->daddr, iph->saddr, 0, 0, sock_net_uid(net, sk)); } @@ -534,9 +535,9 @@ static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; - u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; + __u8 tos = iph->tos; __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } @@ -552,7 +553,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, - RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + ip_sock_rt_tos(sk) & IPTOS_RT_MASK, + ip_sock_rt_scope(sk), inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, 0, 0, sk->sk_uid); @@ -825,14 +827,13 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf const struct iphdr *iph = (const struct iphdr *) skb->data; struct net *net = dev_net(skb->dev); int oif = skb->dev->ifindex; - u8 tos = RT_TOS(iph->tos); u8 prot = iph->protocol; u32 mark = skb->mark; + __u8 tos = iph->tos; rt = (struct rtable *) dst; __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); - ip_rt_fix_tos(&fl4); __ip_do_redirect(rt, skb, &fl4, true); } @@ -1061,7 +1062,6 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - ip_rt_fix_tos(&fl4); /* Don't make lookup fail for bridged encapsulations */ if (skb && netif_is_any_bridge_port(skb->dev)) @@ -1078,8 +1078,8 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, struct rtable *rt; u32 mark = IP4_REPLY_MARK(net, skb->mark); - __build_flow_key(net, &fl4, NULL, iph, oif, - RT_TOS(iph->tos), protocol, mark, 0); + __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark, + 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); @@ -1136,8 +1136,6 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) goto out; new = true; - } else { - ip_rt_fix_tos(&fl4); } __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu); @@ -1169,8 +1167,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net, struct flowi4 fl4; struct rtable *rt; - __build_flow_key(net, &fl4, NULL, iph, oif, - RT_TOS(iph->tos), protocol, 0, 0); + __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 157265aecbed..2c2d42142555 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -229,9 +229,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) orig_dport = usin->sin_port; fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_TCP, - orig_sport, orig_dport, sk); + sk->sk_bound_dev_if, IPPROTO_TCP, orig_sport, + orig_dport, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) |