summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c57
-rw-r--r--net/ipv6/af_inet6.c20
-rw-r--r--net/ipv6/datagram.c15
-rw-r--r--net/ipv6/esp6.c6
-rw-r--r--net/ipv6/esp6_offload.c10
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ioam6_iptunnel.c2
-rw-r--r--net/ipv6/ip6_flowlabel.c8
-rw-r--r--net/ipv6/ip6_output.c171
-rw-r--r--net/ipv6/ip6_udp_tunnel.c75
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/ipv6_sockglue.c242
-rw-r--r--net/ipv6/mcast.c11
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c9
-rw-r--r--net/ipv6/ping.c6
-rw-r--r--net/ipv6/proc.c3
-rw-r--r--net/ipv6/raw.c18
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/syncookies.c5
-rw-r--r--net/ipv6/tcp_ao.c168
-rw-r--r--net/ipv6/tcp_ipv6.c415
-rw-r--r--net/ipv6/udp.c52
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_input.c107
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/ipv6/xfrm6_policy.c4
30 files changed, 940 insertions, 494 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 3036a45e8a1e..d283c59df4c1 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -52,4 +52,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
ifneq ($(CONFIG_IPV6),)
obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
obj-y += mcast_snoop.o
+obj-$(CONFIG_TCP_AO) += tcp_ao.o
endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0b6ee962c84e..3aaea56b5166 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
+ .ra_honor_pio_life = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
+ .ra_honor_pio_life = 0,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -1397,6 +1399,7 @@ retry:
idev->cnf.temp_valid_lft + age);
cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+ cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft);
cfg.plen = ifp->prefix_len;
tmp_tstamp = ifp->tstamp;
@@ -1404,15 +1407,23 @@ retry:
write_unlock_bh(&idev->lock);
- /* A temporary address is created only if this calculated Preferred
- * Lifetime is greater than REGEN_ADVANCE time units. In particular,
- * an implementation must not create a temporary address with a zero
- * Preferred Lifetime.
+ /* From RFC 4941:
+ *
+ * A temporary address is created only if this calculated Preferred
+ * Lifetime is greater than REGEN_ADVANCE time units. In
+ * particular, an implementation must not create a temporary address
+ * with a zero Preferred Lifetime.
+ *
+ * Clamp the preferred lifetime to a minimum of regen_advance, unless
+ * that would exceed valid_lft.
+ *
* Use age calculation as in addrconf_verify to avoid unnecessary
* temporary addresses being generated.
*/
age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
- if (cfg.preferred_lft <= regen_advance + age) {
+ if (cfg.preferred_lft <= regen_advance + age)
+ cfg.preferred_lft = regen_advance + age + 1;
+ if (cfg.preferred_lft > cfg.valid_lft) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
@@ -2657,22 +2668,23 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
else
stored_lft = 0;
- if (!create && stored_lft) {
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = !create && stored_lft;
+
+ if (update_lft && !in6_dev->cnf.ra_honor_pio_life) {
const u32 minimum_lft = min_t(u32,
stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
-
- /* RFC4862 Section 5.5.3e:
- * "Note that the preferred lifetime of the
- * corresponding address is always reset to
- * the Preferred Lifetime in the received
- * Prefix Information option, regardless of
- * whether the valid lifetime is also reset or
- * ignored."
- *
- * So we should always update prefered_lft here.
- */
- update_lft = 1;
}
if (update_lft) {
@@ -6846,6 +6858,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "ra_honor_pio_life",
+ .data = &ipv6_devconf.ra_honor_pio_life,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
#ifdef CONFIG_IPV6_ROUTER_PREF
{
.procname = "accept_ra_rtr_pref",
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 368824fe9719..13a1833a4df5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -217,10 +217,11 @@ lookup_protocol:
inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
np->hop_limit = -1;
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
- np->mc_loop = 1;
- np->mc_all = 1;
+ inet6_set_bit(MC6_LOOP, sk);
+ inet6_set_bit(MC6_ALL, sk);
np->pmtudisc = IPV6_PMTUDISC_WANT;
- np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
+ inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect &
+ FLOWLABEL_REFLECT_ESTABLISHED);
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
@@ -453,7 +454,7 @@ int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
+ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len,
CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -519,6 +520,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
+ int sin_addr_len = sizeof(*sin);
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -536,9 +538,9 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_port = inet->inet_dport;
sin->sin6_addr = sk->sk_v6_daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin->sin6_flowinfo = np->flow_label;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET6_GETPEERNAME);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -546,13 +548,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
- BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len,
CGROUP_INET6_GETSOCKNAME);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
release_sock(sk);
- return sizeof(*sin);
+ return sin_addr_len;
}
EXPORT_SYMBOL(inet6_getname);
@@ -1048,6 +1050,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
#if IS_ENABLED(CONFIG_XFRM)
.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+ .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
.xfrm6_rcv_encap = xfrm6_rcv_encap,
#endif
.nd_tbl = &nd_tbl,
@@ -1060,6 +1063,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_setsockopt = do_ipv6_setsockopt,
.ipv6_getsockopt = do_ipv6_getsockopt,
+ .ipv6_dev_get_saddr = ipv6_dev_get_saddr,
};
static int __init inet6_init(void)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 41ebc4e57473..cc6a502db39d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
struct flowi6 fl6;
int err = 0;
- if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+ if (inet6_test_bit(SNDFLOW, sk) &&
+ (np->flow_label & IPV6_FLOWLABEL_MASK)) {
flowlabel = fl6_sock_lookup(sk, np->flow_label);
if (IS_ERR(flowlabel))
return -EINVAL;
@@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (ipv6_addr_any(&usin->sin6_addr)) {
@@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb,
void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct icmp6hdr *icmph = icmp6_hdr(skb);
struct sock_exterr_skb *serr;
- if (!np->recverr)
+ if (!inet6_test_bit(RECVERR6, sk))
return;
skb = skb_clone(skb, GFP_ATOMIC);
@@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
__skb_pull(skb, payload - skb->data);
- if (inet6_sk(sk)->recverr_rfc4884)
+ if (inet6_test_bit(RECVERR6_RFC4884, sk))
ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
skb_reset_transport_header(skb);
@@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error);
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
- const struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct ipv6hdr *iph;
struct sk_buff *skb;
- if (!np->recverr)
+ if (!inet6_test_bit(RECVERR6, sk))
return;
skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
@@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
struct ipv6hdr, daddr);
sin->sin6_addr = ip6h->daddr;
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
sin->sin6_flowinfo = ip6_flowinfo(ip6h);
sin->sin6_scope_id =
ipv6_iface_scope_id(&sin->sin6_addr,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index fddd0cbdede1..2cc1a45742d8 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -770,7 +770,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
skb->csum = csum_block_sub(skb->csum, csumdiff,
skb->len - trimlen);
}
- pskb_trim(skb, skb->len - trimlen);
+ ret = pskb_trim(skb, skb->len - trimlen);
+ if (unlikely(ret))
+ return ret;
ret = nexthdr[1];
@@ -831,7 +833,7 @@ int esp6_input_done2(struct sk_buff *skb, int err)
/*
* 1) if the NAT-T peer's IP or port changed then
- * advertize the change to the keying daemon.
+ * advertise the change to the keying daemon.
* This is an inbound SA, so just compare
* SRC ports.
*/
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index a189e08370a5..527b7caddbc6 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
int off = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr;
- if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+ /* ESP or ESPINUDP */
+ if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+ ipv6_hdr->nexthdr == NEXTHDR_UDP))
return offsetof(struct ipv6hdr, nexthdr);
while (off < nhlen) {
@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
+ int encap_type = 0;
__be32 seq;
__be32 spi;
int nhoff;
+ if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+ encap_type = UDP_ENCAP_ESPINUDP;
+
if (!pskb_pull(skb, offset))
return NULL;
@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
- xfrm_input(skb, IPPROTO_ESP, spi, -2);
+ xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 93a594a901d1..f62427097126 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
return dst;
}
- err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
+ err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
if (err)
goto relookup_failed;
@@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.mark = mark;
fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
@@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
msg.offset = 0;
msg.type = type;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
ipc6.sockc.mark = mark;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c50dcd35fe8..80043e46117c 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
fl6.daddr = sk->sk_v6_daddr;
res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
- np->tclass, sk->sk_priority);
+ np->tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f6f5b83dd954..7563f8c6aa87 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -46,7 +46,7 @@ struct ioam6_lwt {
struct ioam6_lwt_encap tuninfo;
};
-static struct netlink_range_validation freq_range = {
+static const struct netlink_range_validation freq_range = {
.min = IOAM6_IPTUNNEL_FREQ_MIN,
.max = IOAM6_IPTUNNEL_FREQ_MAX,
};
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b3ca4beb4405..eca07e10e21f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
return 0;
}
- if (np->repflow) {
+ if (inet6_test_bit(REPFLOW, sk)) {
freq->flr_label = np->flow_label;
return 0;
}
@@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
if (freq->flr_flags & IPV6_FL_F_REFLECT) {
if (sk->sk_protocol != IPPROTO_TCP)
return -ENOPROTOOPT;
- if (!np->repflow)
+ if (!inet6_test_bit(REPFLOW, sk))
return -ESRCH;
np->flow_label = 0;
- np->repflow = 0;
+ inet6_clear_bit(REPFLOW, sk);
return 0;
}
@@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
if (sk->sk_protocol != IPPROTO_TCP)
return -ENOPROTOOPT;
- np->repflow = 1;
+ inet6_set_bit(REPFLOW, sk);
return 0;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54fc4c711f2c..a722a43dd668 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -117,6 +117,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
return res;
}
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+
rcu_read_lock();
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
@@ -162,7 +164,13 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
int err;
skb_mark_not_on_list(segs);
- err = ip6_fragment(net, sk, segs, ip6_finish_output2);
+ /* Last GSO segment can be smaller than gso_size (and MTU).
+ * Adding a fragment header would produce an "atomic fragment",
+ * which is considered harmful (RFC-8021). Avoid that.
+ */
+ err = segs->len > mtu ?
+ ip6_fragment(net, sk, segs, ip6_finish_output2) :
+ ip6_finish_output2(net, sk, segs);
if (err && ret == 0)
ret = err;
}
@@ -170,6 +178,16 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
return ret;
}
+static int ip6_finish_output_gso(struct net *net, struct sock *sk,
+ struct sk_buff *skb, unsigned int mtu)
+{
+ if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
+ !skb_gso_validate_network_len(skb, mtu))
+ return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
+
+ return ip6_finish_output2(net, sk, skb);
+}
+
static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
@@ -183,17 +201,14 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
#endif
mtu = ip6_skb_dst_mtu(skb);
- if (skb_is_gso(skb) &&
- !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) &&
- !skb_gso_validate_network_len(skb, mtu))
- return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
+ if (skb_is_gso(skb))
+ return ip6_finish_output_gso(net, sk, skb, mtu);
- if ((skb->len > mtu && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)) ||
+ if (skb->len > mtu ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
- else
- return ip6_finish_output2(net, sk, skb);
+
+ return ip6_finish_output2(net, sk, skb);
}
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -232,12 +247,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip6_output);
-bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
{
- if (!np->autoflowlabel_set)
+ if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk))
return ip6_default_np_autolabel(net);
- else
- return np->autoflowlabel;
+ return inet6_test_bit(AUTOFLOWLABEL, sk);
}
/*
@@ -309,12 +323,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* Fill in the IPv6 header
*/
if (np)
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- ip6_autoflowlabel(net, np), fl6));
+ ip6_autoflowlabel(net, sk), fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -329,7 +343,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -369,9 +383,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
- struct ipv6_pinfo *np = inet6_sk(sk);
- if (np && np->rtalert_isolate &&
+ if (inet6_test_bit(RTALERT_ISOLATE, sk) &&
!net_eq(sock_net(sk), dev_net(skb->dev))) {
continue;
}
@@ -448,10 +461,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
static inline int ip6_forward_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb);
-
- __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
consume_skb(skb);
@@ -619,6 +628,8 @@ int ip6_forward(struct sk_buff *skb)
}
}
+ __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+
mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
@@ -881,9 +892,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
mtu = IPV6_MIN_MTU;
}
- if (np && np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
+ if (np) {
+ u32 frag_size = READ_ONCE(np->frag_size);
+
+ if (frag_size && frag_size < mtu)
+ mtu = frag_size;
}
if (mtu < hlen + sizeof(struct frag_hdr) + 8)
goto fail_toobig;
@@ -1017,9 +1030,6 @@ slow_path:
return err;
fail_toobig:
- if (skb->sk && dst_allfrag(skb_dst(skb)))
- sk_gso_disable(skb->sk);
-
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
err = -EMSGSIZE;
@@ -1113,7 +1123,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
rcu_read_lock();
from = rt ? rcu_dereference(rt->from) : NULL;
err = ip6_route_get_saddr(net, from, &fl6->daddr,
- sk ? inet6_sk(sk)->srcprefs : 0,
+ sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0,
&fl6->saddr);
rcu_read_unlock();
@@ -1283,74 +1293,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
-/**
- * ip6_dst_lookup_tunnel - perform route lookup on tunnel
- * @skb: Packet for which lookup is done
- * @dev: Tunnel device
- * @net: Network namespace of tunnel device
- * @sock: Socket which provides route info
- * @saddr: Memory to store the src ip address
- * @info: Tunnel information
- * @protocol: IP protocol
- * @use_cache: Flag to enable cache usage
- * This function performs a route lookup on a tunnel
- *
- * It returns a valid dst pointer and stores src address to be used in
- * tunnel in param saddr on success, else a pointer encoded error code.
- */
-
-struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
- struct net_device *dev,
- struct net *net,
- struct socket *sock,
- struct in6_addr *saddr,
- const struct ip_tunnel_info *info,
- u8 protocol,
- bool use_cache)
-{
- struct dst_entry *dst = NULL;
-#ifdef CONFIG_DST_CACHE
- struct dst_cache *dst_cache;
-#endif
- struct flowi6 fl6;
- __u8 prio;
-
-#ifdef CONFIG_DST_CACHE
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- dst = dst_cache_get_ip6(dst_cache, saddr);
- if (dst)
- return dst;
- }
-#endif
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = protocol;
- fl6.daddr = info->key.u.ipv6.dst;
- fl6.saddr = info->key.u.ipv6.src;
- prio = info->key.tos;
- fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label);
-
- dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
- NULL);
- if (IS_ERR(dst)) {
- netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (dst->dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
- dst_release(dst);
- return ERR_PTR(-ELOOP);
- }
-#ifdef CONFIG_DST_CACHE
- if (use_cache)
- dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
-#endif
- *saddr = fl6.saddr;
- return dst;
-}
-EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
-
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
gfp_t gfp)
{
@@ -1392,7 +1334,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
struct rt6_info *rt)
{
struct ipv6_pinfo *np = inet6_sk(sk);
- unsigned int mtu;
+ unsigned int mtu, frag_size;
struct ipv6_txoptions *nopt, *opt = ipc6->opt;
/* callers pass dst together with a reference, set it first so
@@ -1436,25 +1378,23 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
if (rt->dst.flags & DST_XFRM_TUNNEL)
- mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
- mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+ mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
- if (np->frag_size < mtu) {
- if (np->frag_size)
- mtu = np->frag_size;
- }
+
+ frag_size = READ_ONCE(np->frag_size);
+ if (frag_size && frag_size < mtu)
+ mtu = frag_size;
+
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
cork->base.mark = ipc6->sockc.mark;
sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
- if (dst_allfrag(xfrm_dst_path(&rt->dst)))
- cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
-
cork->base.transmit_time = ipc6->sockc.transmit_time;
return 0;
@@ -1511,8 +1451,6 @@ static int __ip6_append_data(struct sock *sk,
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
- (dst_allfrag(&rt->dst) ?
- sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
if (mtu <= fragheaderlen ||
@@ -1622,7 +1560,7 @@ emsgsize:
while (length > 0) {
/* Check if the remaining data fits into current packet. */
- copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+ copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len;
if (copy < length)
copy = maxfraglen - skb->len;
@@ -1653,7 +1591,7 @@ alloc_new_skb:
*/
datalen = length + fraggap;
- if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+ if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
pagedlen = 0;
@@ -1902,7 +1840,6 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork)
struct dst_entry *dst = cork->base.dst;
cork->base.dst = NULL;
- cork->base.flags &= ~IPCORK_ALLFRAG;
skb_dst_set(skb, dst);
}
@@ -1923,7 +1860,6 @@ static void ip6_cork_release(struct inet_cork_full *cork,
if (cork->base.dst) {
dst_release(cork->base.dst);
cork->base.dst = NULL;
- cork->base.flags &= ~IPCORK_ALLFRAG;
}
}
@@ -1935,7 +1871,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct in6_addr *final_dst;
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *hdr;
struct ipv6_txoptions *opt = v6_cork->opt;
@@ -1978,18 +1913,18 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- ip6_autoflowlabel(net, np), fl6));
+ ip6_autoflowlabel(net, sk), fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
hdr->daddr = *final_dst;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
skb->tstamp = cork->base.transmit_time;
ip6_cork_steal_dst(skb, cork);
- IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
u8 icmp6_type;
@@ -2091,7 +2026,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
return ERR_PTR(err);
}
if (ipc6->dontfrag < 0)
- ipc6->dontfrag = inet6_sk(sk)->dontfrag;
+ ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk);
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index cdc4d4ee2420..a7bf0327b380 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -1,3 +1,4 @@
+
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/module.h>
#include <linux/errno.h>
@@ -75,8 +76,9 @@ EXPORT_SYMBOL_GPL(udp_sock_create6);
int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr,
+ struct net_device *dev,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
__u8 prio, __u8 ttl, __be32 label,
__be16 src_port, __be16 dst_port, bool nocheck)
{
@@ -111,4 +113,73 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
}
EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb);
+/**
+ * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel
+ * @skb: Packet for which lookup is done
+ * @dev: Tunnel device
+ * @net: Network namespace of tunnel device
+ * @sock: Socket which provides route info
+ * @oif: Index of the output interface
+ * @saddr: Memory to store the src ip address
+ * @key: Tunnel information
+ * @sport: UDP source port
+ * @dport: UDP destination port
+ * @dsfield: The traffic class field
+ * @dst_cache: The dst cache to use for lookup
+ * This function performs a route lookup on a UDP tunnel
+ *
+ * It returns a valid dst pointer and stores src address to be used in
+ * tunnel in param saddr on success, else a pointer encoded error code.
+ */
+
+struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
+ struct net_device *dev,
+ struct net *net,
+ struct socket *sock,
+ int oif,
+ struct in6_addr *saddr,
+ const struct ip_tunnel_key *key,
+ __be16 sport, __be16 dport, u8 dsfield,
+ struct dst_cache *dst_cache)
+{
+ struct dst_entry *dst = NULL;
+ struct flowi6 fl6;
+
+#ifdef CONFIG_DST_CACHE
+ if (dst_cache) {
+ dst = dst_cache_get_ip6(dst_cache, saddr);
+ if (dst)
+ return dst;
+ }
+#endif
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = IPPROTO_UDP;
+ fl6.flowi6_oif = oif;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.saddr = key->u.ipv6.src;
+ fl6.fl6_sport = sport;
+ fl6.fl6_dport = dport;
+ fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label);
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
+ NULL);
+ if (IS_ERR(dst)) {
+ netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+ if (dst->dev == dev) { /* is this necessary? */
+ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
+ dst_release(dst);
+ return ERR_PTR(-ELOOP);
+ }
+#ifdef CONFIG_DST_CACHE
+ if (dst_cache)
+ dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
+#endif
+ *saddr = fl6.saddr;
+ return dst;
+}
+EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup);
+
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 73c85d4e0e9c..e550240c85e1 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
default:
goto tx_err;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0e2a0847b387..7d661735cb9d 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -415,6 +415,101 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (ip6_mroute_opt(optname))
return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IPV6_UNICAST_HOPS:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->hop_limit, val);
+ return 0;
+ case IPV6_MULTICAST_LOOP:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val != valbool)
+ return -EINVAL;
+ inet6_assign_bit(MC6_LOOP, sk, valbool);
+ return 0;
+ case IPV6_MULTICAST_HOPS:
+ if (sk->sk_type == SOCK_STREAM)
+ return retv;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->mcast_hops,
+ val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
+ return 0;
+ case IPV6_MTU:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val && val < IPV6_MIN_MTU)
+ return -EINVAL;
+ WRITE_ONCE(np->frag_size, val);
+ return 0;
+ case IPV6_MINHOPCOUNT:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < 0 || val > 255)
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&ip6_min_hopcount);
+
+ /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
+ * while we are changing it.
+ */
+ WRITE_ONCE(np->min_hopcount, val);
+ return 0;
+ case IPV6_RECVERR_RFC4884:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < 0 || val > 1)
+ return -EINVAL;
+ inet6_assign_bit(RECVERR6_RFC4884, sk, valbool);
+ return 0;
+ case IPV6_MULTICAST_ALL:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(MC6_ALL, sk, valbool);
+ return 0;
+ case IPV6_AUTOFLOWLABEL:
+ inet6_assign_bit(AUTOFLOWLABEL, sk, valbool);
+ inet6_set_bit(AUTOFLOWLABEL_SET, sk);
+ return 0;
+ case IPV6_DONTFRAG:
+ inet6_assign_bit(DONTFRAG, sk, valbool);
+ return 0;
+ case IPV6_RECVERR:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(RECVERR6, sk, valbool);
+ if (!val)
+ skb_errqueue_purge(&sk->sk_error_queue);
+ return 0;
+ case IPV6_ROUTER_ALERT_ISOLATE:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(RTALERT_ISOLATE, sk, valbool);
+ return 0;
+ case IPV6_MTU_DISCOVER:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
+ return -EINVAL;
+ WRITE_ONCE(np->pmtudisc, val);
+ return 0;
+ case IPV6_FLOWINFO_SEND:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ inet6_assign_bit(SNDFLOW, sk, valbool);
+ return 0;
+ case IPV6_ADDR_PREFERENCES:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ return ip6_sock_set_addr_preferences(sk, val);
+ }
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -733,34 +828,7 @@ done:
}
break;
}
- case IPV6_UNICAST_HOPS:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->hop_limit = val;
- retv = 0;
- break;
- case IPV6_MULTICAST_HOPS:
- if (sk->sk_type == SOCK_STREAM)
- break;
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val);
- retv = 0;
- break;
-
- case IPV6_MULTICAST_LOOP:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val != valbool)
- goto e_inval;
- np->mc_loop = valbool;
- retv = 0;
- break;
case IPV6_UNICAST_IF:
{
@@ -862,13 +930,6 @@ done:
retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
break;
}
- case IPV6_MULTICAST_ALL:
- if (optlen < sizeof(int))
- goto e_inval;
- np->mc_all = valbool;
- retv = 0;
- break;
-
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
if (in_compat_syscall())
@@ -896,42 +957,6 @@ done:
goto e_inval;
retv = ip6_ra_control(sk, val);
break;
- case IPV6_ROUTER_ALERT_ISOLATE:
- if (optlen < sizeof(int))
- goto e_inval;
- np->rtalert_isolate = valbool;
- retv = 0;
- break;
- case IPV6_MTU_DISCOVER:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)
- goto e_inval;
- np->pmtudisc = val;
- retv = 0;
- break;
- case IPV6_MTU:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val && val < IPV6_MIN_MTU)
- goto e_inval;
- np->frag_size = val;
- retv = 0;
- break;
- case IPV6_RECVERR:
- if (optlen < sizeof(int))
- goto e_inval;
- np->recverr = valbool;
- if (!val)
- skb_errqueue_purge(&sk->sk_error_queue);
- retv = 0;
- break;
- case IPV6_FLOWINFO_SEND:
- if (optlen < sizeof(int))
- goto e_inval;
- np->sndflow = valbool;
- retv = 0;
- break;
case IPV6_FLOWLABEL_MGR:
retv = ipv6_flowlabel_opt(sk, optval, optlen);
break;
@@ -943,47 +968,10 @@ done:
retv = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IPV6_ADDR_PREFERENCES:
- if (optlen < sizeof(int))
- goto e_inval;
- retv = __ip6_sock_set_addr_preferences(sk, val);
- break;
- case IPV6_MINHOPCOUNT:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < 0 || val > 255)
- goto e_inval;
-
- if (val)
- static_branch_enable(&ip6_min_hopcount);
-
- /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount
- * while we are changing it.
- */
- WRITE_ONCE(np->min_hopcount, val);
- retv = 0;
- break;
- case IPV6_DONTFRAG:
- np->dontfrag = valbool;
- retv = 0;
- break;
- case IPV6_AUTOFLOWLABEL:
- np->autoflowlabel = valbool;
- np->autoflowlabel_set = 1;
- retv = 0;
- break;
case IPV6_RECVFRAGSIZE:
np->rxopt.bits.recvfragsize = valbool;
retv = 0;
break;
- case IPV6_RECVERR_RFC4884:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val < 0 || val > 1)
- goto e_inval;
- np->recverr_rfc4884 = valbool;
- retv = 0;
- break;
}
unlock:
@@ -1180,7 +1168,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
- int hlim = np->mcast_hops;
+ int hlim = READ_ONCE(np->mcast_hops);
+
put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxtclass) {
@@ -1197,7 +1186,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
- int hlim = np->mcast_hops;
+ int hlim = READ_ONCE(np->mcast_hops);
+
put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
if (np->rxopt.bits.rxflow) {
@@ -1347,9 +1337,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct dst_entry *dst;
if (optname == IPV6_UNICAST_HOPS)
- val = np->hop_limit;
+ val = READ_ONCE(np->hop_limit);
else
- val = np->mcast_hops;
+ val = READ_ONCE(np->mcast_hops);
if (val < 0) {
rcu_read_lock();
@@ -1365,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_MULTICAST_LOOP:
- val = np->mc_loop;
+ val = inet6_test_bit(MC6_LOOP, sk);
break;
case IPV6_MULTICAST_IF:
@@ -1373,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_MULTICAST_ALL:
- val = np->mc_all;
+ val = inet6_test_bit(MC6_ALL, sk);
break;
case IPV6_UNICAST_IF:
@@ -1381,15 +1371,15 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_MTU_DISCOVER:
- val = np->pmtudisc;
+ val = READ_ONCE(np->pmtudisc);
break;
case IPV6_RECVERR:
- val = np->recverr;
+ val = inet6_test_bit(RECVERR6, sk);
break;
case IPV6_FLOWINFO_SEND:
- val = np->sndflow;
+ val = inet6_test_bit(SNDFLOW, sk);
break;
case IPV6_FLOWLABEL_MGR:
@@ -1424,33 +1414,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
case IPV6_ADDR_PREFERENCES:
+ {
+ u8 srcprefs = READ_ONCE(np->srcprefs);
val = 0;
- if (np->srcprefs & IPV6_PREFER_SRC_TMP)
+ if (srcprefs & IPV6_PREFER_SRC_TMP)
val |= IPV6_PREFER_SRC_TMP;
- else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC)
+ else if (srcprefs & IPV6_PREFER_SRC_PUBLIC)
val |= IPV6_PREFER_SRC_PUBLIC;
else {
/* XXX: should we return system default? */
val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT;
}
- if (np->srcprefs & IPV6_PREFER_SRC_COA)
+ if (srcprefs & IPV6_PREFER_SRC_COA)
val |= IPV6_PREFER_SRC_COA;
else
val |= IPV6_PREFER_SRC_HOME;
break;
-
+ }
case IPV6_MINHOPCOUNT:
- val = np->min_hopcount;
+ val = READ_ONCE(np->min_hopcount);
break;
case IPV6_DONTFRAG:
- val = np->dontfrag;
+ val = inet6_test_bit(DONTFRAG, sk);
break;
case IPV6_AUTOFLOWLABEL:
- val = ip6_autoflowlabel(sock_net(sk), np);
+ val = ip6_autoflowlabel(sock_net(sk), sk);
break;
case IPV6_RECVFRAGSIZE:
@@ -1458,11 +1450,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_ROUTER_ALERT_ISOLATE:
- val = np->rtalert_isolate;
+ val = inet6_test_bit(RTALERT_ISOLATE, sk);
break;
case IPV6_RECVERR_RFC4884:
- val = np->recverr_rfc4884;
+ val = inet6_test_bit(RECVERR6_RFC4884, sk);
break;
default:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce25bcb9974..b75d3c9d41bb 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr,
}
if (!mc) {
rcu_read_unlock();
- return np->mc_all;
+ return inet6_test_bit(MC6_ALL, sk);
}
psl = rcu_dereference(mc->sflist);
if (!psl) {
@@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
hdr->payload_len = htons(len);
hdr->nexthdr = proto;
- hdr->hop_limit = inet6_sk(sk)->hop_limit;
+ hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);
hdr->saddr = *saddr;
hdr->daddr = *daddr;
@@ -1789,7 +1789,7 @@ static void mld_sendpack(struct sk_buff *skb)
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) -
sizeof(*pip6);
@@ -2147,8 +2147,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
full_len = sizeof(struct ipv6hdr) + payload_len;
rcu_read_lock();
- IP6_UPD_PO_STATS(net, __in6_dev_get(dev),
- IPSTATS_MIB_OUT, full_len);
+ IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS);
rcu_read_unlock();
skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err);
@@ -3011,8 +3010,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
continue;
state->im = rcu_dereference(state->idev->mc_list);
}
- if (!state->im)
- break;
psf = rcu_dereference(state->im->mca_sources);
}
out:
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 553c8664e0a7..a19999b30bc0 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -500,11 +500,11 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
csum_partial(icmp6h,
skb->len, 0));
- ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+ ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, sk, skb, NULL, dst->dev,
@@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net)
np = inet6_sk(sk);
np->hop_limit = 255;
/* Do not loopback ndisc messages */
- np->mc_loop = 0;
+ inet6_clear_bit(MC6_LOOP, sk);
return 0;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 857713d7a38a..53d255838e6a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
+ xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index a88b2ce4a3cb..8dd4cd0c47bd 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = {
static unsigned int
ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
- unsigned int ret;
struct in6_addr saddr, daddr;
- u_int8_t hop_limit;
- u_int32_t flowlabel, mark;
+ unsigned int ret, verdict;
+ u32 flowlabel, mark;
+ u8 hop_limit;
int err;
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
@@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
ret = ip6t_do_table(priv, skb, state);
+ verdict = ret & NF_VERDICT_MASK;
- if (ret != NF_DROP && ret != NF_STOLEN &&
+ if (verdict != NF_DROP && verdict != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
skb->mark != mark ||
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5831aaa53d75..d2098dd4ceae 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -56,7 +56,7 @@ static int ping_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
@@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EAFNOSUPPORT;
}
daddr = &(u->sin6_addr);
- if (np->sndflow)
+ if (inet6_test_bit(SNDFLOW, sk))
fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK;
if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
oif = u->sin6_scope_id;
@@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
- ipcm6_init_sk(&ipc6, np);
+ ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e20b3705c2d2..6d1d9221649d 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -61,7 +61,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS),
SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS),
SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS),
- SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS),
+ SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS),
SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS),
SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES),
SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT),
@@ -84,6 +84,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
+ SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 42fcec3ecf5e..dd0a4e73e602 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
+ bool recverr = inet6_test_bit(RECVERR6, sk);
struct ipv6_pinfo *np = inet6_sk(sk);
int err;
int harderr;
@@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
2. Socket is connected (otherwise the error indication
is useless without recverr and error is hard.
*/
- if (!np->recverr && sk->sk_state != TCP_ESTABLISHED)
+ if (!recverr && sk->sk_state != TCP_ESTABLISHED)
return;
harderr = icmpv6_err_convert(type, code, &err);
if (type == ICMPV6_PKT_TOOBIG) {
ip6_sk_update_pmtu(skb, sk, info);
- harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
+ harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO);
}
if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
return;
}
- if (np->recverr) {
+ if (recverr) {
u8 *payload = skb->data;
if (!inet_test_bit(HDRINCL, sk))
payload += offset;
ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
}
- if (np->recverr || harderr) {
+ if (recverr || harderr) {
sk->sk_err = err;
sk_error_report(sk);
}
@@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
struct flowi6 *fl6, struct dst_entry **dstp,
unsigned int flags, const struct sockcm_cookie *sockc)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
struct net *net = sock_net(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
@@ -651,7 +651,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
* have been queued for deletion.
*/
rcu_read_lock();
- IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
if (err > 0)
@@ -668,7 +668,7 @@ out:
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
error_check:
- if (err == -ENOBUFS && !np->recverr)
+ if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk))
err = 0;
return err;
}
@@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return -EINVAL;
daddr = &sin6->sin6_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
@@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9c687b357e6a..b132feae3393 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
int flags)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
- 1, DST_OBSOLETE_FORCE_CHK, flags);
+ DST_OBSOLETE_FORCE_CHK, flags);
if (rt) {
rt6_info_init(rt);
@@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
if (!any_src)
flags |= RT6_LOOKUP_F_HAS_SADDR;
else if (sk)
- flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
+ flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs));
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
@@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
struct net_device *loopback_dev = net->loopback_dev;
struct dst_entry *new = NULL;
- rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+ rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev,
DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5014aa663452..500f6ed3b8cf 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -140,6 +140,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
struct dst_entry *dst;
__u8 rcv_wscale;
u32 tsoff = 0;
+ int l3index;
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
!th->ack || th->rst)
@@ -214,6 +215,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
treq->snt_isn = cookie;
treq->ts_off = 0;
treq->txhash = net_tx_rndhash();
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
+ tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index);
+
if (IS_ENABLED(CONFIG_SMC))
ireq->smc_ok = 0;
diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c
new file mode 100644
index 000000000000..3c09ac26206e
--- /dev/null
+++ b/net/ipv6/tcp_ao.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * INET An implementation of the TCP Authentication Option (TCP-AO).
+ * See RFC5925.
+ *
+ * Authors: Dmitry Safonov <dima@arista.com>
+ * Francesco Ruggeri <fruggeri@arista.com>
+ * Salam Noureddine <noureddine@arista.com>
+ */
+#include <crypto/hash.h>
+#include <linux/tcp.h>
+
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key,
+ const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __be16 sport, __be16 dport,
+ __be32 sisn, __be32 disn)
+{
+ struct kdf_input_block {
+ u8 counter;
+ u8 label[6];
+ struct tcp6_ao_context ctx;
+ __be16 outlen;
+ } __packed * tmp;
+ struct tcp_sigpool hp;
+ int err;
+
+ err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp);
+ if (err)
+ return err;
+
+ tmp = hp.scratch;
+ tmp->counter = 1;
+ memcpy(tmp->label, "TCP-AO", 6);
+ tmp->ctx.saddr = *saddr;
+ tmp->ctx.daddr = *daddr;
+ tmp->ctx.sport = sport;
+ tmp->ctx.dport = dport;
+ tmp->ctx.sisn = sisn;
+ tmp->ctx.disn = disn;
+ tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */
+
+ err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp);
+ tcp_sigpool_end(&hp);
+
+ return err;
+}
+
+int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key,
+ const struct sk_buff *skb,
+ __be32 sisn, __be32 disn)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ return tcp_v6_ao_calc_key(mkt, key, &iph->saddr,
+ &iph->daddr, th->source,
+ th->dest, sisn, disn);
+}
+
+int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key,
+ const struct sock *sk, __be32 sisn,
+ __be32 disn, bool send)
+{
+ if (send)
+ return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_rcv_saddr,
+ &sk->sk_v6_daddr, htons(sk->sk_num),
+ sk->sk_dport, sisn, disn);
+ else
+ return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_daddr,
+ &sk->sk_v6_rcv_saddr, sk->sk_dport,
+ htons(sk->sk_num), disn, sisn);
+}
+
+int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key,
+ struct request_sock *req)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ return tcp_v6_ao_calc_key(mkt, key,
+ &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr,
+ htons(ireq->ir_num), ireq->ir_rmt_port,
+ htonl(tcp_rsk(req)->snt_isn),
+ htonl(tcp_rsk(req)->rcv_isn));
+}
+
+struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk,
+ struct sock *addr_sk,
+ int sndid, int rcvid)
+{
+ int l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
+ struct in6_addr *addr = &addr_sk->sk_v6_daddr;
+
+ return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr,
+ AF_INET6, sndid, rcvid);
+}
+
+struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk,
+ struct request_sock *req,
+ int sndid, int rcvid)
+{
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct in6_addr *addr = &ireq->ir_v6_rmt_addr;
+ int l3index;
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
+ return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr,
+ AF_INET6, sndid, rcvid);
+}
+
+int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr, int nbytes)
+{
+ struct tcp6_pseudohdr *bp;
+ struct scatterlist sg;
+
+ bp = hp->scratch;
+ /* 1. TCP pseudo-header (RFC2460) */
+ bp->saddr = *saddr;
+ bp->daddr = *daddr;
+ bp->len = cpu_to_be32(nbytes);
+ bp->protocol = cpu_to_be32(IPPROTO_TCP);
+
+ sg_init_one(&sg, bp, sizeof(*bp));
+ ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp));
+ return crypto_ahash_update(hp->req);
+}
+
+int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key,
+ const struct sock *sk, const struct sk_buff *skb,
+ const u8 *tkey, int hash_offset, u32 sne)
+{
+ return tcp_ao_hash_skb(AF_INET6, ao_hash, key, sk, skb, tkey,
+ hash_offset, sne);
+}
+
+int tcp_v6_parse_ao(struct sock *sk, int cmd,
+ sockptr_t optval, int optlen)
+{
+ return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen);
+}
+
+int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key,
+ struct request_sock *req, const struct sk_buff *skb,
+ int hash_offset, u32 sne)
+{
+ void *hash_buf = NULL;
+ int err;
+
+ hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC);
+ if (!hash_buf)
+ return -ENOMEM;
+
+ err = tcp_v6_ao_calc_key_rsk(ao_key, hash_buf, req);
+ if (err)
+ goto out;
+
+ err = tcp_ao_hash_skb(AF_INET6, ao_hash, ao_key, req_to_sk(req), skb,
+ hash_buf, hash_offset, sne);
+out:
+ kfree(hash_buf);
+ return err;
+}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3a88545a265d..937a02c2e534 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -76,16 +76,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
const struct inet_connection_sock_af_ops ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
-#else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr,
- int l3index)
-{
- return NULL;
-}
#endif
/* Helper returning the inet6 address from a given tcp socket.
@@ -135,7 +128,7 @@ static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
sock_owned_by_me(sk);
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
}
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -163,7 +156,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
memset(&fl6, 0, sizeof(fl6));
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
IP6_ECN_flow_init(fl6.flowlabel);
if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
@@ -239,7 +232,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
@@ -252,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tp->af_specific = &tcp_sock_ipv6_specific;
#endif
goto failure;
@@ -286,6 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
goto failure;
}
+ tp->tcp_usec_ts = dst_tcp_usec_ts(dst);
tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (!saddr) {
@@ -402,6 +396,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -412,6 +408,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+ if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) {
+ sock_put(sk);
+ return 0;
+ }
+
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
@@ -508,7 +509,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tcp_ld_RTO_revert(sk, seq);
}
- if (!sock_owned_by_user(sk) && np->recverr) {
+ if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
} else {
@@ -548,7 +549,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
&ireq->ir_v6_rmt_addr);
fl6->daddr = ireq->ir_v6_rmt_addr;
- if (np->repflow && ireq->pktopts)
+ if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
@@ -565,7 +566,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (!opt)
opt = rcu_dereference(np->opt);
err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
- opt, tclass, sk->sk_priority);
+ opt, tclass, READ_ONCE(sk->sk_priority));
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -606,8 +607,10 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ union tcp_ao_addr *addr;
int l3index = 0;
u8 prefixlen;
+ bool l3flag;
u8 flags;
if (optlen < sizeof(cmd))
@@ -620,6 +623,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+ l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
@@ -660,17 +664,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- if (ipv6_addr_v4mapped(&sin6->sin6_addr))
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
+ if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
+ addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3];
+
+ /* Don't allow keys for peers that have a matching TCP-AO key.
+ * See the comment in tcp_ao_add_cmd()
+ */
+ if (tcp_ao_required(sk, addr, AF_INET,
+ l3flag ? l3index : -1, false))
+ return -EKEYREJECTED;
+ return tcp_md5_do_add(sk, addr,
AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen);
+ }
+
+ addr = (union tcp_md5_addr *)&sin6->sin6_addr;
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index, flags,
+ /* Don't allow keys for peers that have a matching TCP-AO key.
+ * See the comment in tcp_ao_add_cmd()
+ */
+ if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false))
+ return -EKEYREJECTED;
+
+ return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen);
}
-static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
+static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp,
const struct in6_addr *daddr,
const struct in6_addr *saddr,
const struct tcphdr *th, int nbytes)
@@ -691,39 +711,36 @@ static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
_th->check = 0;
sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
- ahash_request_set_crypt(hp->md5_req, &sg, NULL,
+ ahash_request_set_crypt(hp->req, &sg, NULL,
sizeof(*bp) + sizeof(*th));
- return crypto_ahash_update(hp->md5_req);
+ return crypto_ahash_update(hp->req);
}
static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
const struct in6_addr *daddr, struct in6_addr *saddr,
const struct tcphdr *th)
{
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
+ struct tcp_sigpool hp;
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
+ if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
+ goto clear_hash_nostart;
- if (crypto_ahash_init(req))
+ if (crypto_ahash_init(hp.req))
goto clear_hash;
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
+ if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2))
goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
+ if (tcp_md5_hash_key(&hp, key))
goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
+ ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(hp.req))
goto clear_hash;
- tcp_put_md5sig_pool();
+ tcp_sigpool_end(&hp);
return 0;
clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
+ tcp_sigpool_end(&hp);
+clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
@@ -733,10 +750,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
const struct sock *sk,
const struct sk_buff *skb)
{
- const struct in6_addr *saddr, *daddr;
- struct tcp_md5sig_pool *hp;
- struct ahash_request *req;
const struct tcphdr *th = tcp_hdr(skb);
+ const struct in6_addr *saddr, *daddr;
+ struct tcp_sigpool hp;
if (sk) { /* valid for establish/request sockets */
saddr = &sk->sk_v6_rcv_saddr;
@@ -747,34 +763,31 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
daddr = &ip6h->daddr;
}
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
- req = hp->md5_req;
+ if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp))
+ goto clear_hash_nostart;
- if (crypto_ahash_init(req))
+ if (crypto_ahash_init(hp.req))
goto clear_hash;
- if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
+ if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len))
goto clear_hash;
- if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2))
goto clear_hash;
- if (tcp_md5_hash_key(hp, key))
+ if (tcp_md5_hash_key(&hp, key))
goto clear_hash;
- ahash_request_set_crypt(req, NULL, md5_hash, 0);
- if (crypto_ahash_final(req))
+ ahash_request_set_crypt(hp.req, NULL, md5_hash, 0);
+ if (crypto_ahash_final(hp.req))
goto clear_hash;
- tcp_put_md5sig_pool();
+ tcp_sigpool_end(&hp);
return 0;
clear_hash:
- tcp_put_md5sig_pool();
-clear_hash_noput:
+ tcp_sigpool_end(&hp);
+clear_hash_nostart:
memset(md5_hash, 0, 16);
return 1;
}
-
#endif
static void tcp_v6_init_req(struct request_sock *req,
@@ -797,7 +810,7 @@ static void tcp_v6_init_req(struct request_sock *req,
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
- np->rxopt.bits.rxohlim || np->repflow)) {
+ np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) {
refcount_inc(&skb->users);
ireq->pktopts = skb;
}
@@ -833,6 +846,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.req_md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v6_ao_lookup_rsk,
+ .ao_calc_key = tcp_v6_ao_calc_key_rsk,
+ .ao_synack_hash = tcp_v6_ao_synack_hash,
+#endif
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
#endif
@@ -844,8 +862,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
- int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label, u32 priority, u32 txhash)
+ int oif, int rst, u8 tclass, __be32 label,
+ u32 priority, u32 txhash, struct tcp_key *key)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -860,13 +878,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (tsecr)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
-#ifdef CONFIG_TCP_MD5SIG
- if (key)
+ if (tcp_key_is_md5(key))
tot_len += TCPOLEN_MD5SIG_ALIGNED;
-#endif
+ if (tcp_key_is_ao(key))
+ tot_len += tcp_ao_len(key->ao_key);
#ifdef CONFIG_MPTCP
- if (rst && !key) {
+ if (rst && !tcp_key_is_md5(key)) {
mrst = mptcp_reset_option(skb);
if (mrst)
@@ -907,14 +925,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
*topt++ = mrst;
#ifdef CONFIG_TCP_MD5SIG
- if (key) {
+ if (tcp_key_is_md5(key)) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_md5_hash_hdr((__u8 *)topt, key,
+ tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key,
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, t1);
}
#endif
+#ifdef CONFIG_TCP_AO
+ if (tcp_key_is_ao(key)) {
+ *topt++ = htonl((TCPOPT_AO << 24) |
+ (tcp_ao_len(key->ao_key) << 16) |
+ (key->ao_key->sndid << 8) |
+ (key->rcv_next));
+
+ tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key,
+ key->traffic_key,
+ (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr,
+ (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr,
+ t1, key->sne);
+ }
+#endif
memset(&fl6, 0, sizeof(fl6));
fl6.daddr = ipv6_hdr(skb)->saddr;
@@ -977,19 +1009,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- u32 seq = 0, ack_seq = 0;
- struct tcp_md5sig_key *key = NULL;
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- unsigned char newhash[16];
- int genhash;
- struct sock *sk1 = NULL;
+ const __u8 *md5_hash_location = NULL;
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
+ bool allocated_traffic_key = false;
#endif
+ const struct tcp_ao_hdr *aoh;
+ struct tcp_key key = {};
+ u32 seq = 0, ack_seq = 0;
__be32 label = 0;
u32 priority = 0;
struct net *net;
u32 txhash = 0;
int oif = 0;
+#ifdef CONFIG_TCP_MD5SIG
+ unsigned char newhash[16];
+ int genhash;
+ struct sock *sk1 = NULL;
+#endif
if (th->rst)
return;
@@ -1001,9 +1037,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
return;
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
-#ifdef CONFIG_TCP_MD5SIG
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh))
+ return;
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
rcu_read_lock();
- hash_location = tcp_parse_md5sig_option(th);
+#endif
+#ifdef CONFIG_TCP_MD5SIG
if (sk && sk_fullsock(sk)) {
int l3index;
@@ -1011,8 +1051,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* in an L3 domain and inet_iif is set to it.
*/
l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
- } else if (hash_location) {
+ key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+ } else if (md5_hash_location) {
int dif = tcp_v6_iif_l3_slave(skb);
int sdif = tcp_v6_sdif(skb);
int l3index;
@@ -1036,12 +1078,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
*/
l3index = tcp_v6_sdif(skb) ? dif : 0;
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
- if (!key)
+ key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
+ if (!key.md5_key)
goto out;
+ key.type = TCP_KEY_MD5;
- genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
- if (genhash || memcmp(hash_location, newhash, 16) != 0)
+ genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb);
+ if (genhash || memcmp(md5_hash_location, newhash, 16) != 0)
goto out;
}
#endif
@@ -1052,15 +1095,27 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
(th->doff << 2);
+#ifdef CONFIG_TCP_AO
+ if (aoh) {
+ int l3index;
+
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+ if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq,
+ &key.ao_key, &key.traffic_key,
+ &allocated_traffic_key,
+ &key.rcv_next, &key.sne))
+ goto out;
+ key.type = TCP_KEY_AO;
+ }
+#endif
+
if (sk) {
oif = sk->sk_bound_dev_if;
if (sk_fullsock(sk)) {
- const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
-
trace_tcp_send_reset(sk, skb);
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
label = ip6_flowlabel(ipv6h);
- priority = sk->sk_priority;
+ priority = READ_ONCE(sk->sk_priority);
txhash = sk->sk_txhash;
}
if (sk->sk_state == TCP_TIME_WAIT) {
@@ -1073,45 +1128,141 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
label = ip6_flowlabel(ipv6h);
}
- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
- ipv6_get_dsfield(ipv6h), label, priority, txhash);
+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
+ ipv6_get_dsfield(ipv6h), label, priority, txhash,
+ &key);
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
out:
+ if (allocated_traffic_key)
+ kfree(key.traffic_key);
rcu_read_unlock();
#endif
}
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
- struct tcp_md5sig_key *key, u8 tclass,
+ struct tcp_key *key, u8 tclass,
__be32 label, u32 priority, u32 txhash)
{
- tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
- tclass, label, priority, txhash);
+ tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0,
+ tclass, label, priority, txhash, key);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
+ struct tcp_key key = {};
+#ifdef CONFIG_TCP_AO
+ struct tcp_ao_info *ao_info;
+
+ if (static_branch_unlikely(&tcp_ao_needed.key)) {
+
+ /* FIXME: the segment to-be-acked is not verified yet */
+ ao_info = rcu_dereference(tcptw->ao_info);
+ if (ao_info) {
+ const struct tcp_ao_hdr *aoh;
+
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
+ goto out;
+ if (aoh)
+ key.ao_key = tcp_ao_established_key(ao_info,
+ aoh->rnext_keyid, -1);
+ }
+ }
+ if (key.ao_key) {
+ struct tcp_ao_key *rnext_key;
+
+ key.traffic_key = snd_other_key(key.ao_key);
+ /* rcv_next switches to our rcv_next */
+ rnext_key = READ_ONCE(ao_info->rnext_key);
+ key.rcv_next = rnext_key->rcvid;
+ key.sne = READ_ONCE(ao_info->snd_sne);
+ key.type = TCP_KEY_AO;
+#else
+ if (0) {
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ key.md5_key = tcp_twsk_md5_key(tcptw);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcp_time_stamp_raw() + tcptw->tw_ts_offset,
- tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
+ tcp_tw_tsval(tcptw),
+ tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key,
tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
tw->tw_txhash);
+#ifdef CONFIG_TCP_AO
+out:
+#endif
inet_twsk_put(tw);
}
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- int l3index;
+ struct tcp_key key = {};
- l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+#ifdef CONFIG_TCP_AO
+ if (static_branch_unlikely(&tcp_ao_needed.key) &&
+ tcp_rsk_used_ao(req)) {
+ const struct in6_addr *addr = &ipv6_hdr(skb)->saddr;
+ const struct tcp_ao_hdr *aoh;
+ int l3index;
+
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+ /* Invalid TCP option size or twice included auth */
+ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh))
+ return;
+ if (!aoh)
+ return;
+ key.ao_key = tcp_ao_do_lookup(sk, l3index,
+ (union tcp_ao_addr *)addr,
+ AF_INET6, aoh->rnext_keyid, -1);
+ if (unlikely(!key.ao_key)) {
+ /* Send ACK with any matching MKT for the peer */
+ key.ao_key = tcp_ao_do_lookup(sk, l3index,
+ (union tcp_ao_addr *)addr,
+ AF_INET6, -1, -1);
+ /* Matching key disappeared (user removed the key?)
+ * let the handshake timeout.
+ */
+ if (!key.ao_key) {
+ net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n",
+ addr,
+ ntohs(tcp_hdr(skb)->source),
+ &ipv6_hdr(skb)->daddr,
+ ntohs(tcp_hdr(skb)->dest));
+ return;
+ }
+ }
+ key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC);
+ if (!key.traffic_key)
+ return;
+
+ key.type = TCP_KEY_AO;
+ key.rcv_next = aoh->keyid;
+ tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req);
+#else
+ if (0) {
+#endif
+#ifdef CONFIG_TCP_MD5SIG
+ } else if (static_branch_unlikely(&tcp_md5_needed.key)) {
+ int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+
+ key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr,
+ l3index);
+ if (key.md5_key)
+ key.type = TCP_KEY_MD5;
+#endif
+ }
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
@@ -1125,12 +1276,13 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
- tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+ tcp_rsk_tsval(tcp_rsk(req)),
READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
READ_ONCE(sk->sk_priority),
READ_ONCE(tcp_rsk(req)->txhash));
+ if (tcp_key_is_ao(&key))
+ kfree(key.traffic_key);
}
@@ -1235,7 +1387,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (sk_is_mptcp(newsk))
mptcpv6_handle_mapped(newsk, true);
newsk->sk_backlog_rcv = tcp_v4_do_rcv;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
@@ -1247,7 +1399,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->mcast_oif = inet_iif(skb);
newnp->mcast_hops = ip_hdr(skb)->ttl;
newnp->rcv_flowinfo = 0;
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
newnp->flow_label = 0;
/*
@@ -1320,7 +1472,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newnp->mcast_oif = tcp_v6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
/* Set ToS of the new socket based upon the value of incoming SYN.
@@ -1360,19 +1512,26 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
#ifdef CONFIG_TCP_MD5SIG
l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- /* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
- if (key) {
- const union tcp_md5_addr *addr;
-
- addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
- if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
- inet_csk_prepare_forced_close(newsk);
- tcp_done(newsk);
- goto out;
+ if (!tcp_rsk_used_ao(req)) {
+ /* Copy over the MD5 key from the original socket */
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
+ if (key) {
+ const union tcp_md5_addr *addr;
+
+ addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
+ if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
+ goto out;
+ }
}
}
#endif
+#ifdef CONFIG_TCP_AO
+ /* Copy over tcp_ao_info if any */
+ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6))
+ goto out; /* OOM */
+#endif
if (__inet_inherit_port(sk, newsk) < 0) {
inet_csk_prepare_forced_close(newsk);
@@ -1542,10 +1701,11 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
np->mcast_oif = tcp_v6_iif(opt_skb);
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
+ WRITE_ONCE(np->mcast_hops,
+ ipv6_hdr(opt_skb)->hop_limit);
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
- if (np->repflow)
+ if (inet6_test_bit(REPFLOW, sk))
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
tcp_v6_restore_cb(opt_skb);
@@ -1640,9 +1800,12 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- drop_reason = tcp_inbound_md5_hash(sk, skb,
- &hdr->saddr, &hdr->daddr,
- AF_INET6, dif, sdif);
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_hash(sk, req, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -1689,6 +1852,7 @@ process:
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
@@ -1715,8 +1879,8 @@ process:
goto discard_and_relse;
}
- drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
- AF_INET6, dif, sdif);
+ drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
if (drop_reason)
goto discard_and_relse;
@@ -1891,7 +2055,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct ipv6hdr),
- .net_frag_header_len = sizeof(struct frag_hdr),
.setsockopt = ipv6_setsockopt,
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
@@ -1899,11 +2062,19 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.mtu_reduced = tcp_v6_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
.md5_parse = tcp_v6_parse_md5_keys,
+#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v6_ao_lookup,
+ .calc_ao_hash = tcp_v6_ao_hash_skb,
+ .ao_parse = tcp_v6_parse_ao,
+ .ao_calc_key_sk = tcp_v6_ao_calc_key_sk,
+#endif
};
#endif
@@ -1925,11 +2096,19 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.mtu_reduced = tcp_v4_mtu_reduced,
};
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+#ifdef CONFIG_TCP_MD5SIG
.md5_lookup = tcp_v4_md5_lookup,
.calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_parse = tcp_v6_parse_md5_keys,
+#endif
+#ifdef CONFIG_TCP_AO
+ .ao_lookup = tcp_v6_ao_lookup,
+ .calc_ao_hash = tcp_v4_ao_hash_skb,
+ .ao_parse = tcp_v6_parse_ao,
+ .ao_calc_key_sk = tcp_v4_ao_calc_key_sk,
+#endif
};
#endif
@@ -1944,7 +2123,7 @@ static int tcp_v6_init_sock(struct sock *sk)
icsk->icsk_af_ops = &ipv6_specific;
-#ifdef CONFIG_TCP_MD5SIG
+#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 86b5d509a468..622b10a549f7 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -410,10 +410,11 @@ try_again:
*addr_len = sizeof(*sin6);
BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
- (struct sockaddr *)sin6);
+ (struct sockaddr *)sin6,
+ addr_len);
}
- if (udp_sk(sk)->gro_enabled)
+ if (udp_test_bit(GRO_ENABLED, sk))
udp_cmsg_recv(msg, sk, skb);
if (np->rxopt.all)
@@ -571,7 +572,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
- if (!sk || udp_sk(sk)->encap_type) {
+ if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) {
/* No socket for error: try tunnels before discarding */
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
@@ -598,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
ip6_sk_update_pmtu(skb, sk, info);
- if (np->pmtudisc != IPV6_PMTUDISC_DONT)
+ if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT)
harderr = 1;
}
if (type == NDISC_REDIRECT) {
@@ -619,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
goto out;
}
- if (!np->recverr) {
+ if (!inet6_test_bit(RECVERR6, sk)) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
@@ -688,7 +689,8 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
}
nf_reset_ct(skb);
- if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
+ if (static_branch_unlikely(&udpv6_encap_needed_key) &&
+ READ_ONCE(up->encap_type)) {
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
/*
@@ -726,16 +728,17 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
/*
* UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
*/
- if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
+ if (udp_test_bit(UDPLITE_RECV_CC, sk) && UDP_SKB_CB(skb)->partial_cov) {
+ u16 pcrlen = READ_ONCE(up->pcrlen);
- if (up->pcrlen == 0) { /* full coverage was set */
+ if (pcrlen == 0) { /* full coverage was set */
net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
- if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
+ if (UDP_SKB_CB(skb)->cscov < pcrlen) {
net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
- UDP_SKB_CB(skb)->cscov, up->pcrlen);
+ UDP_SKB_CB(skb)->cscov, pcrlen);
goto drop;
}
}
@@ -858,7 +861,7 @@ start_lookup:
/* If zero checksum and no_check is not on for
* the socket then skip it.
*/
- if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ if (!uh->check && !udp_get_no_check6_rx(sk))
continue;
if (!first) {
first = sk;
@@ -980,7 +983,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp6_sk_rx_dst_set(sk, dst);
- if (!uh->check && !udp_sk(sk)->no_check6_rx) {
+ if (!uh->check && !udp_get_no_check6_rx(sk)) {
if (refcounted)
sock_put(sk);
goto report_csum_error;
@@ -1002,7 +1005,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
/* Unicast */
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk) {
- if (!uh->check && !udp_sk(sk)->no_check6_rx)
+ if (!uh->check && !udp_get_no_check6_rx(sk))
goto report_csum_error;
return udp6_unicast_rcv_skb(sk, skb, uh);
}
@@ -1155,7 +1158,7 @@ static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
- return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr, &addr_len);
}
/**
@@ -1241,7 +1244,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (udp_sk(sk)->no_check6_tx) {
+ if (udp_get_no_check6_tx(sk)) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1262,7 +1265,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
if (is_udplite)
csum = udplite_csum(skb);
- else if (udp_sk(sk)->no_check6_tx) { /* UDP csum disabled */
+ else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
@@ -1281,7 +1284,7 @@ csum_partial:
send:
err = ip6_send_skb(skb);
if (err) {
- if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
+ if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) {
UDP6_INC_STATS(sock_net(sk),
UDP_MIB_SNDBUFERRORS, is_udplite);
err = 0;
@@ -1332,7 +1335,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
+ int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
@@ -1427,7 +1430,7 @@ do_udp_sendmsg:
fl6->fl6_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
- if (np->sndflow) {
+ if (inet6_test_bit(SNDFLOW, sk)) {
fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
@@ -1508,6 +1511,7 @@ do_udp_sendmsg:
if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6,
+ &addr_len,
&fl6->saddr);
if (err)
goto out_no_dst;
@@ -1593,7 +1597,7 @@ back_from_confirm:
do_append_data:
if (ipc6.dontfrag < 0)
- ipc6.dontfrag = np->dontfrag;
+ ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk);
up->len += ulen;
err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
&ipc6, fl6, (struct rt6_info *)dst,
@@ -1606,7 +1610,7 @@ do_append_data:
up->pending = 0;
if (err > 0)
- err = np->recverr ? net_xmit_errno(err) : 0;
+ err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0;
release_sock(sk);
out:
@@ -1644,11 +1648,11 @@ static void udpv6_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || READ_ONCE(up->corkflag))
+ if (!up->pending || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
- if (up->pending && !READ_ONCE(up->corkflag))
+ if (up->pending && !udp_test_bit(CORK, sk))
udp_v6_push_pending_frames(sk);
release_sock(sk);
}
@@ -1670,7 +1674,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (up->encap_enabled) {
+ if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udpv6_encap_needed_key);
udp_encap_disable();
}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 267d491e9707..a60bec9b14f1 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,7 +17,6 @@
static int udplitev6_sk_init(struct sock *sk)
{
udpv6_init_sock(sk);
- udp_sk(sk)->pcflag = UDPLITE_BIT;
pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, "
"please contact the netdev mailing list\n");
return 0;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4907ab241d6b..6e36e5047fba 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
#include <linux/netfilter_ipv6.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t)
@@ -67,28 +69,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return 0;
}
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
struct ipv6hdr *ip6h;
int len;
int ip6hlen = sizeof(struct ipv6hdr);
-
__u8 *udpdata;
__be32 *udpdata32;
- __u16 encap_type = up->encap_type;
-
- if (skb->protocol == htons(ETH_P_IP))
- return xfrm4_udp_encap_rcv(sk, skb);
+ u16 encap_type;
+ encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
return 1;
@@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
@@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
@@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
- goto drop;
+ return -EINVAL;
/* Now we can update and verify the packet length... */
ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
if (skb->len < ip6hlen + len) {
/* packet is too small!?! */
- goto drop;
+ return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- __skb_pull(skb, len);
- skb_reset_transport_header(skb);
+ if (pull) {
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
+ } else {
+ skb_set_transport_header(skb, len);
+ }
/* process ESP */
- return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
- kfree_skb(skb);
return 0;
}
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_udp_encap_rcv(sk, skb);
+
+ ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+ if (!ret)
+ return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+ udp_sk(sk)->encap_type);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb)
+{
+ int offset = skb_gro_offset(skb);
+ const struct net_offload *ops;
+ struct sk_buff *pp = NULL;
+ int ret;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_gro_udp_encap_rcv(sk, head, skb);
+
+ offset = offset - sizeof(struct udphdr);
+
+ if (!pskb_pull(skb, offset))
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+ ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+ if (ret)
+ goto out;
+
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ rcu_read_unlock();
+
+ return pp;
+
+out:
+ rcu_read_unlock();
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+
+ return NULL;
+}
+
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index ad07904642ca..5f7b1fdbffe6 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -95,7 +95,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return -EMSGSIZE;
}
- if (toobig || dst_allfrag(skb_dst(skb)))
+ if (toobig)
return ip6_fragment(net, sk, skb,
__xfrm6_output_finish);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 41a680c76d2e..42fb6996b077 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -117,10 +117,10 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}