summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-08-03 14:29:50 -0700
committerJakub Kicinski <kuba@kernel.org>2023-08-03 14:34:37 -0700
commit35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (patch)
tree0c0debbc24d6633ec6f8980f94e70f924aeee74e /net/ipv4
parent36e68eadd303dce58018b503186a89bcb27d527e (diff)
parent999f6631866e9ea81add935b9c6ebaab0579d259 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR. Conflicts: net/dsa/port.c 9945c1fb03a3 ("net: dsa: fix older DSA drivers using phylink") a88dd7538461 ("net: dsa: remove legacy_pre_march2020 detection") https://lore.kernel.org/all/20230731102254.2c9868ca@canb.auug.org.au/ net/xdp/xsk.c 3c5b4d69c358 ("net: annotate data-races around sk->sk_mark") b7f72a30e9ac ("xsk: introduce wrappers and helpers for supporting multi-buffer in Tx path") https://lore.kernel.org/all/20230731102631.39988412@canb.auug.org.au/ drivers/net/ethernet/broadcom/bnxt/bnxt.c 37b61cda9c16 ("bnxt: don't handle XDP in netpoll") 2b56b3d99241 ("eth: bnxt: handle invalid Tx completions more gracefully") https://lore.kernel.org/all/20230801101708.1dc7faac@canb.auug.org.au/ Adjacent changes: drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c 62da08331f1a ("net/mlx5e: Set proper IPsec source port in L4 selector") fbd517549c32 ("net/mlx5e: Add function to get IPsec offload namespace") drivers/net/ethernet/sfc/selftest.c 55c1528f9b97 ("sfc: fix field-spanning memcpy in selftest") ae9d445cd41f ("sfc: Miscellaneous comment removals") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/ip_output.c17
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_metrics.c70
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv4/udp_offload.c7
9 files changed, 76 insertions, 42 deletions
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b812eb36f0e3..f7426926a104 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif
- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
goto errout;
if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
if (sk_fullsock(sk))
- entry.mark = sk->sk_mark;
+ entry.mark = READ_ONCE(sk->sk_mark);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else if (sk->sk_state == TCP_TIME_WAIT)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e70839257f7..6ba1a0fafbaa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -184,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
ip_options_build(skb, &opt->opt, daddr, rt);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
if (!skb->mark)
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
/* Send it out. */
return ip_local_out(net, skb->sk, skb);
@@ -528,8 +528,8 @@ packet_routed:
skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
res = ip_local_out(net, sk, skb);
rcu_read_unlock();
@@ -1158,10 +1158,15 @@ alloc_new_skb:
}
copy = datalen - transhdrlen - fraggap - pagedlen;
+ /* [!] NOTE: copy will be negative if pagedlen>0
+ * because then the equation reduces to -fraggap.
+ */
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1209,6 +1214,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8e97d8d4cc9d..d41bce8927b2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
}
if (inet_sk(sk)->tos != val) {
inet_sk(sk)->tos = val;
- sk->sk_priority = rt_tos2priority(val);
+ WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7782ff5e6539..cb381f5aa464 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98d7e6ba7493..92fede388d52 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if;
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
@@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 09cffbc82d32..5b18a048f613 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -932,9 +932,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(ipv4_tcp_sk);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_priority : sk->sk_priority;
+ inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 82f4575f9cd9..99ac5efe244d 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- possible_net_t tcpm_net;
+ struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head;
};
-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
- return read_pnet(&tm->tcpm_net);
+ /* Paired with the WRITE_ONCE() in tcpm_new() */
+ return READ_ONCE(tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_lock & (1 << idx);
+ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}
-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_vals[idx];
+ /* Paired with WRITE_ONCE() in tcp_metric_set() */
+ return READ_ONCE(tm->tcpm_vals[idx]);
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
- tm->tcpm_vals[idx] = val;
+ /* Paired with READ_ONCE() in tcp_metric_get() */
+ WRITE_ONCE(tm->tcpm_vals[idx], val);
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- return inetpeer_addr_cmp(a, b) == 0;
+ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}
struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval;
u32 val;
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
- tm->tcpm_lock = val;
+ /* Paired with READ_ONCE() in tcp_metric_locked() */
+ WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ dst_metric_raw(dst, RTAX_SSTHRESH));
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ dst_metric_raw(dst, RTAX_CWND));
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
+ write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
+ write_sequnlock(&fastopen_seqlock);
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst)
{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ unsigned long limit;
+
+ if (!tm)
+ return;
+ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+ if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}
@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) {
- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+ if (time_before(READ_ONCE(tm->tcpm_stamp),
+ READ_ONCE(oldest->tcpm_stamp)))
oldest = tm;
}
tm = oldest;
} else {
- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
goto out_unlock;
}
- write_pnet(&tm->tcpm_net, net);
+ /* Paired with the READ_ONCE() in tm_net() */
+ WRITE_ONCE(tm->tcpm_net, net);
+
tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr;
- tcpm_suck_dst(tm, dst, true);
+ tcpm_suck_dst(tm, dst, reclaim);
if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
tp->reordering);
}
}
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
rcu_read_unlock();
}
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret;
}
-static DEFINE_SEQLOCK(fastopen_seqlock);
-
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
}
if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
- jiffies - tm->tcpm_stamp,
+ jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest)
goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
- u32 val = tm->tcpm_vals[i];
+ u32 val = tcp_metric_get(tm, i);
if (!val)
continue;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8c3ebd95f5b9..1ee9e56dc79a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
+#include <net/gro.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -555,10 +556,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f402946da344..0f46b3c2e4ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -609,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct iphdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE