summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-07 18:33:07 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-07 18:33:07 -0700
commit73be7fb14e83d24383f840a22f24d3ed222ca319 (patch)
treec03cb0058aa829aca0b30cad5e82f400e4561a00 /net/ipv4
parent2ab35ce202f8ba56d4b0930985426214341638a7 (diff)
parent1b36955cc048c8ff6ba448dbf4be0e52f59f2963 (diff)
Merge tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking updates from Jakub Kicinski: "Including fixes from netfilter and bpf. Current release - regressions: - eth: stmmac: fix failure to probe without MAC interface specified Current release - new code bugs: - docs: netlink: fix missing classic_netlink doc reference Previous releases - regressions: - deal with integer overflows in kmalloc_reserve() - use sk_forward_alloc_get() in sk_get_meminfo() - bpf_sk_storage: fix the missing uncharge in sk_omem_alloc - fib: avoid warn splat in flow dissector after packet mangling - skb_segment: call zero copy functions before using skbuff frags - eth: sfc: check for zero length in EF10 RX prefix Previous releases - always broken: - af_unix: fix msg_controllen test in scm_pidfd_recv() for MSG_CMSG_COMPAT - xsk: fix xsk_build_skb() dereferencing possible ERR_PTR() - netfilter: - nft_exthdr: fix non-linear header modification - xt_u32, xt_sctp: validate user space input - nftables: exthdr: fix 4-byte stack OOB write - nfnetlink_osf: avoid OOB read - one more fix for the garbage collection work from last release - igmp: limit igmpv3_newpack() packet size to IP_MAX_MTU - bpf, sockmap: fix preempt_rt splat when using raw_spin_lock_t - handshake: fix null-deref in handshake_nl_done_doit() - ip: ignore dst hint for multipath routes to ensure packets are hashed across the nexthops - phy: micrel: - correct bit assignments for cable test errata - disable EEE according to the KSZ9477 errata Misc: - docs/bpf: document compile-once-run-everywhere (CO-RE) relocations - Revert "net: macsec: preserve ingress frame ordering", it appears to have been developed against an older kernel, problem doesn't exist upstream" * tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (95 commits) net: enetc: distinguish error from valid pointers in enetc_fixup_clear_rss_rfs() Revert "net: team: do not use dynamic lockdep key" net: hns3: remove GSO partial feature bit net: hns3: fix the port information display when sfp is absent net: hns3: fix invalid mutex between tc qdisc and dcb ets command issue net: hns3: fix debugfs concurrency issue between kfree buffer and read net: hns3: fix byte order conversion issue in hclge_dbg_fd_tcam_read() net: hns3: Support query tx timeout threshold by debugfs net: hns3: fix tx timeout issue net: phy: Provide Module 4 KSZ9477 errata (DS80000754C) netfilter: nf_tables: Unbreak audit log reset netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for ip_set_hash_netportnet.c netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction netfilter: nf_tables: uapi: Describe NFTA_RULE_CHAIN_ID netfilter: nfnetlink_osf: avoid OOB read netfilter: nftables: exthdr: fix 4-byte stack OOB write selftests/bpf: Check bpf_sk_storage has uncharged sk_omem_alloc bpf: bpf_sk_storage: Fix the missing uncharge in sk_omem_alloc bpf: bpf_sk_storage: Fix invalid wait context lockdep report s390/bpf: Pass through tail call counter in trampolines ...
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_output.c9
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipmr.c1
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c6
12 files changed, 22 insertions, 18 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 65ba18a91865..eafa4a033515 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -278,7 +278,8 @@ void fib_release_info(struct fib_info *fi)
hlist_del(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
- fi->fib_dead = 1;
+ /* Paired with READ_ONCE() from fib_table_lookup() */
+ WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
@@ -1581,6 +1582,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
link_it:
ofi = fib_find_info(fi);
if (ofi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
refcount_inc(&ofi->fib_treeref);
@@ -1619,6 +1621,7 @@ err_inval:
failure:
if (fi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 74d403dbd2b4..d13fb9e76b97 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1582,7 +1582,8 @@ found:
if (fa->fa_dscp &&
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
- if (fi->fib_dead)
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fi->fib_dead))
continue;
if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 0c9e768e5628..418e5fb58fd3 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -353,8 +353,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu;
+ unsigned int size;
+ size = min(mtu, IP_MAX_MTU);
while (1) {
skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e18931a6d153..66fac1216d46 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,7 +67,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
struct ip_options *opt = &(IPCB(skb)->opt);
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fe9ead9ee863..5e9c8156656a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 43ba4b77b248..4ab877cf6d35 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -207,6 +207,9 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
+ /* OUTOCTETS should be counted after fragment */
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
if (!skb)
@@ -366,8 +369,6 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -424,8 +425,6 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -982,7 +981,7 @@ static int __ip_append_data(struct sock *sk,
paged = !!cork->gso_size;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d1c73660b844..cce9cb25f3b3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -511,7 +511,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb);
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 3f0c6d602fb7..9e222a57bc2b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1804,7 +1804,6 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d8c99bdc6170..66f419e7f9a7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2144,6 +2144,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
+ IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b1559481898d..0c3040a63ebd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2256,14 +2256,14 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
}
}
- if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e6b4fbd642f7..ccfc8bbf7455 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3474,7 +3474,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
if (delta <= 0)
return;
amt = sk_mem_pages(delta);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0794a2c46a56..f39b9c844580 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1414,9 +1414,9 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
spin_lock(&sk_queue->lock);
- sk->sk_forward_alloc += size;
+ sk_forward_alloc_add(sk, size);
amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk->sk_forward_alloc -= amt;
+ sk_forward_alloc_add(sk, -amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
@@ -1527,7 +1527,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
goto uncharge_drop;
}
- sk->sk_forward_alloc -= size;
+ sk_forward_alloc_add(sk, -size);
/* no need to setup a destructor, we will explicitly release the
* forward allocated memory on dequeue