summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/icmp.c23
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c30
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_bpf.c27
-rw-r--r--net/ipv4/tcp_ipv4.c45
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/udp_bpf.c1
8 files changed, 75 insertions, 66 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8b30cadff708..b7e277d8a84d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1054,14 +1054,19 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
if (!ext_hdr || !iio)
goto send_mal_query;
- if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr))
+ if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) ||
+ ntohs(iio->extobj_hdr.length) > sizeof(_iio))
goto send_mal_query;
ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
+ iio = skb_header_pointer(skb, sizeof(_ext_hdr),
+ sizeof(iio->extobj_hdr) + ident_len, &_iio);
+ if (!iio)
+ goto send_mal_query;
+
status = 0;
dev = NULL;
switch (iio->extobj_hdr.class_type) {
case ICMP_EXT_ECHO_CTYPE_NAME:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
if (ident_len >= IFNAMSIZ)
goto send_mal_query;
memset(buff, 0, sizeof(buff));
@@ -1069,30 +1074,24 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
dev = dev_get_by_name(net, buff);
break;
case ICMP_EXT_ECHO_CTYPE_INDEX:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(iio->ident.ifindex), &_iio);
if (ident_len != sizeof(iio->ident.ifindex))
goto send_mal_query;
dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
break;
case ICMP_EXT_ECHO_CTYPE_ADDR:
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+ if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) ||
+ ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
iio->ident.addr.ctype3_hdr.addrlen)
goto send_mal_query;
switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
case ICMP_AFI_IP:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(struct in_addr), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr))
goto send_mal_query;
dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case ICMP_AFI_IP6:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in6_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
dev_hold(dev);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 80aeaf9e6e16..bfb522e51346 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -242,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
- score = sk->sk_family == PF_INET ? 2 : 1;
+ if (sk->sk_family == PF_INET)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 613432a36f0a..e61ea428ea18 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -20,13 +20,8 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-static unsigned int defrag4_pernet_id __read_mostly;
static DEFINE_MUTEX(defrag4_mutex);
-struct defrag4_pernet {
- unsigned int users;
-};
-
static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
@@ -111,19 +106,15 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = {
static void __net_exit defrag4_net_exit(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
- if (nf_defrag->users) {
+ if (net->nf.defrag_ipv4_users) {
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
- nf_defrag->users = 0;
+ net->nf.defrag_ipv4_users = 0;
}
}
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
- .id = &defrag4_pernet_id,
- .size = sizeof(struct defrag4_pernet),
};
static int __init nf_defrag_init(void)
@@ -138,24 +129,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv4_enable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
int err = 0;
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv4_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_defrag->users) {
- nf_defrag->users++;
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
if (err == 0)
- nf_defrag->users = 1;
+ net->nf.defrag_ipv4_users = 1;
out_unlock:
mutex_unlock(&defrag4_mutex);
@@ -165,12 +155,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
void nf_defrag_ipv4_disable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users) {
- nf_defrag->users--;
- if (nf_defrag->users == 0)
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users--;
+ if (net->nf.defrag_ipv4_users == 0)
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e8b48df73c85..f5c336f8b0c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -486,10 +486,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target)
{
if (tcp_epollin_ready(sk, target))
return true;
-
- if (sk->sk_prot->stream_memory_read)
- return sk->sk_prot->stream_memory_read(sk);
- return false;
+ return sk_is_readable(sk);
}
/*
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index d3e9386b493e..5f4d6f45d87f 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
#ifdef CONFIG_BPF_SYSCALL
-static bool tcp_bpf_stream_read(const struct sock *sk)
-{
- struct sk_psock *psock;
- bool empty = true;
-
- rcu_read_lock();
- psock = sk_psock(sk);
- if (likely(psock))
- empty = list_empty(&psock->ingress_msg);
- rcu_read_unlock();
- return !empty;
-}
-
static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
long timeo)
{
@@ -232,6 +219,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
bool cork = false, enospc = sk_msg_full(msg);
struct sock *sk_redir;
u32 tosend, delta = 0;
+ u32 eval = __SK_NONE;
int ret;
more_data:
@@ -275,13 +263,24 @@ more_data:
case __SK_REDIRECT:
sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend);
+ if (!psock->apply_bytes) {
+ /* Clean up before releasing the sock lock. */
+ eval = psock->eval;
+ psock->eval = __SK_NONE;
+ psock->sk_redir = NULL;
+ }
if (psock->cork) {
cork = true;
psock->cork = NULL;
}
sk_msg_return(sk, msg, tosend);
release_sock(sk);
+
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+
+ if (eval == __SK_REDIRECT)
+ sock_put(sk_redir);
+
lock_sock(sk);
if (unlikely(ret < 0)) {
int free = sk_msg_free_nocharge(sk, msg);
@@ -479,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_BASE].unhash = sock_map_unhash;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
- prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
+ prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e62e0d6373a..5b8ce65dfc06 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1037,6 +1037,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
+static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new)
+{
+ if (!old)
+ return true;
+
+ /* l3index always overrides non-l3index */
+ if (old->l3index && new->l3index == 0)
+ return false;
+ if (old->l3index == 0 && new->l3index)
+ return true;
+
+ return old->prefixlen < new->prefixlen;
+}
+
/* Find the Key structure for an address. */
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
@@ -1059,7 +1073,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->l3index && key->l3index != l3index)
+ if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index)
continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
@@ -1074,8 +1088,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
match = false;
}
- if (match && (!best_match ||
- key->prefixlen > best_match->prefixlen))
+ if (match && better_md5_match(best_match, key))
best_match = key;
}
return best_match;
@@ -1085,7 +1098,7 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
int family, u8 prefixlen,
- int l3index)
+ int l3index, u8 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1105,7 +1118,9 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->l3index && key->l3index != l3index)
+ if ((key->flags & TCP_MD5SIG_FLAG_IFINDEX) != (flags & TCP_MD5SIG_FLAG_IFINDEX))
+ continue;
+ if (key->l3index != l3index)
continue;
if (!memcmp(&key->addr, addr, size) &&
key->prefixlen == prefixlen)
@@ -1129,7 +1144,7 @@ EXPORT_SYMBOL(tcp_v4_md5_lookup);
/* This can be called on a newly created socket, from other files */
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
@@ -1137,7 +1152,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
if (key) {
/* Pre-existing entry - just update that one.
* Note that the key might be used concurrently.
@@ -1182,6 +1197,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->family = family;
key->prefixlen = prefixlen;
key->l3index = l3index;
+ key->flags = flags;
memcpy(&key->addr, addr,
(family == AF_INET6) ? sizeof(struct in6_addr) :
sizeof(struct in_addr));
@@ -1191,11 +1207,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen, int l3index)
+ u8 prefixlen, int l3index, u8 flags)
{
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -1229,6 +1245,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
const union tcp_md5_addr *addr;
u8 prefixlen = 32;
int l3index = 0;
+ u8 flags;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -1239,6 +1256,8 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (sin->sin_family != AF_INET)
return -EINVAL;
+ flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
@@ -1246,7 +1265,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
}
- if (optname == TCP_MD5SIG_EXT &&
+ if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
@@ -1267,12 +1286,12 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);
+ return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index, flags);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index,
+ return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
@@ -1596,7 +1615,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index,
+ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags,
key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2a7825a5b842..2fffcf2b54f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -390,7 +390,8 @@ static int compute_score(struct sock *sk, struct net *net,
dif, sdif);
if (!dev_match)
return -1;
- score += 4;
+ if (sk->sk_bound_dev_if)
+ score += 4;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -2866,6 +2867,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
+ /* psock ingress_msg queue should not contain any bad checksum frames */
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a1d5f473878..bbe6569c9ad3 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)