summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/devinet.c19
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c42
-rw-r--r--net/ipv4/inet_hashtables.c10
-rw-r--r--net/ipv4/inet_timewait_sock.c31
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_bbr.c2
-rw-r--r--net/ipv4/tcp_input.c3
-rw-r--r--net/ipv4/tcp_plb.c2
-rw-r--r--net/ipv4/tcp_ulp.c4
-rw-r--r--net/ipv4/udp_tunnel_core.c1
16 files changed, 104 insertions, 36 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ab4a06be489b..6c0ec2789943 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1665,6 +1665,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
if (rc == 0) {
*sk = sock->sk;
(*sk)->sk_allocation = GFP_ATOMIC;
+ (*sk)->sk_use_task_frag = false;
/*
* Unhash it so that IP input processing does not even see it,
* we do not wish this socket to see incoming packets.
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e8b9a9202fec..b0acf6e19aed 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -234,13 +234,20 @@ static void inet_free_ifa(struct in_ifaddr *ifa)
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}
+static void in_dev_free_rcu(struct rcu_head *head)
+{
+ struct in_device *idev = container_of(head, struct in_device, rcu_head);
+
+ kfree(rcu_dereference_protected(idev->mc_hash, 1));
+ kfree(idev);
+}
+
void in_dev_finish_destroy(struct in_device *idev)
{
struct net_device *dev = idev->dev;
WARN_ON(idev->ifa_list);
WARN_ON(idev->mc_list);
- kfree(rcu_dereference_protected(idev->mc_hash, 1));
#ifdef NET_REFCNT_DEBUG
pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
#endif
@@ -248,7 +255,7 @@ void in_dev_finish_destroy(struct in_device *idev)
if (!idev->dead)
pr_err("Freeing alive in_device %p\n", idev);
else
- kfree(idev);
+ call_rcu(&idev->rcu_head, in_dev_free_rcu);
}
EXPORT_SYMBOL(in_dev_finish_destroy);
@@ -298,12 +305,6 @@ out_kfree:
goto out;
}
-static void in_dev_rcu_put(struct rcu_head *head)
-{
- struct in_device *idev = container_of(head, struct in_device, rcu_head);
- in_dev_put(idev);
-}
-
static void inetdev_destroy(struct in_device *in_dev)
{
struct net_device *dev;
@@ -328,7 +329,7 @@ static void inetdev_destroy(struct in_device *in_dev)
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
arp_ifdown(dev);
- call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
+ in_dev_put(in_dev);
}
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d5d745c3e345..46aa2d65e40a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -263,7 +263,7 @@ bool icmp_global_allow(void)
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
*/
- credit = max_t(int, credit - prandom_u32_max(3), 0);
+ credit = max_t(int, credit - get_random_u32_below(3), 0);
rc = true;
}
WRITE_ONCE(icmp_global.credit, credit);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 81be3e0f0e70..c920aa9a62a9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -213,7 +213,7 @@ static void igmp_stop_timer(struct ip_mc_list *im)
/* It must be called with locked im->lock */
static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
{
- int tv = prandom_u32_max(max_delay);
+ int tv = get_random_u32_below(max_delay);
im->tm_running = 1;
if (!mod_timer(&im->timer, jiffies+tv+2))
@@ -222,7 +222,7 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
static void igmp_gq_start_timer(struct in_device *in_dev)
{
- int tv = prandom_u32_max(in_dev->mr_maxdelay);
+ int tv = get_random_u32_below(in_dev->mr_maxdelay);
unsigned long exp = jiffies + tv + 2;
if (in_dev->mr_gq_running &&
@@ -236,7 +236,7 @@ static void igmp_gq_start_timer(struct in_device *in_dev)
static void igmp_ifc_start_timer(struct in_device *in_dev, int delay)
{
- int tv = prandom_u32_max(delay);
+ int tv = get_random_u32_below(delay);
if (!mod_timer(&in_dev->mr_ifc_timer, jiffies+tv+2))
in_dev_hold(in_dev);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4a34bc7cb15e..d1f837579398 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
return false;
}
+static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ return false;
+
+ return inet_bind_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok);
+}
+
static bool inet_bhash2_conflict(const struct sock *sk,
const struct inet_bind2_bucket *tb2,
kuid_t sk_uid,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
+ struct inet_timewait_sock *tw2;
struct sock *sk2;
sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
- if (inet_bind_conflict(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
+ twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
+ sk2 = (struct sock *)tw2;
+
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
return true;
}
+
return false;
}
@@ -314,7 +332,7 @@ other_half_scan:
if (likely(remaining > 1))
remaining &= ~1U;
- offset = prandom_u32_max(remaining);
+ offset = get_random_u32_below(remaining);
/* __inet_hash_connect() favors ports having @low parity
* We do the opposite to not pollute connect() users.
*/
@@ -1182,12 +1200,26 @@ void inet_csk_prepare_forced_close(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+static int inet_ulp_can_listen(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
+ return -EINVAL;
+
+ return 0;
+}
+
int inet_csk_listen_start(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
int err;
+ err = inet_ulp_can_listen(sk);
+ if (unlikely(err))
+ return err;
+
reqsk_queue_alloc(&icsk->icsk_accept_queue);
sk->sk_ack_backlog = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3cec471a2cd2..24a38b56fab9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
#endif
tb->rcv_saddr = sk->sk_rcv_saddr;
INIT_HLIST_HEAD(&tb->owners);
+ INIT_HLIST_HEAD(&tb->deathrow);
hlist_add_head(&tb->node, &head->chain);
}
@@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
/* Caller must hold hashbucket lock for this tb with local BH disabled */
void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
{
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
__hlist_del(&tb->node);
kmem_cache_free(cachep, tb);
}
@@ -1097,21 +1098,22 @@ ok:
* on low contention the randomness is maximal and on high contention
* it may be inexistent.
*/
- i = max_t(int, i, prandom_u32_max(8) * 2);
+ i = max_t(int, i, get_random_u32_below(8) * 2);
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, tb2, port);
- spin_unlock(&head2->lock);
-
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
+
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
+
if (tw)
inet_twsk_deschedule_put(tw);
local_bh_enable();
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 66fc940f9521..1d77d992e6e7 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -29,6 +29,7 @@
void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
+ struct inet_bind2_bucket *tb2 = tw->tw_tb2;
struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb)
@@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ __hlist_del(&tw->tw_bind2_node);
+ tw->tw_tb2 = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+
__sock_put((struct sock *)tw);
}
@@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
spin_lock(lock);
sk_nulls_del_node_init_rcu((struct sock *)tw);
@@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw,
+ twsk_net(tw), tw->tw_num);
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
inet_twsk_bind_unhash(tw, hashinfo);
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
refcount_dec(&tw->tw_dr->tw_refcount);
@@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
hlist_add_head(&tw->tw_bind_node, list);
}
+static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
+ struct hlist_head *list)
+{
+ hlist_add_head(&tw->tw_bind2_node, list);
+}
+
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
+
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
+
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
+
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+
+ tw->tw_tb2 = icsk->icsk_bind2_hash;
+ WARN_ON(!icsk->icsk_bind2_hash);
+ inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
spin_lock(lock);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b58df3c1bf7d..eec1f6df80d8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -412,7 +412,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
- del_timer_sync(&mrt->ipmr_expire_timer);
+ timer_shutdown_sync(&mrt->ipmr_expire_timer);
mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
rhltable_destroy(&mrt->mfc_hash);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cd1fa9f70f1a..de6e3515ab4f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -471,7 +471,7 @@ static u32 ip_idents_reserve(u32 hash, int segs)
old = READ_ONCE(*p_tstamp);
if (old != now && cmpxchg(p_tstamp, old, now) == old)
- delta = prandom_u32_max(now - old);
+ delta = get_random_u32_below(now - old);
/* If UBSAN reports an error there, please make sure your compiler
* supports -fno-strict-overflow before reporting it that was a bug
@@ -689,7 +689,7 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
} else {
/* Randomize max depth to avoid some side channels attacks. */
int max_depth = FNHE_RECLAIM_DEPTH +
- prandom_u32_max(FNHE_RECLAIM_DEPTH);
+ get_random_u32_below(FNHE_RECLAIM_DEPTH);
while (depth > max_depth) {
fnhe_remove_oldest(hash);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 942d2dfa1115..26fb97d1d4d9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -288,12 +288,11 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
struct tcp_request_sock *treq;
struct request_sock *req;
-#ifdef CONFIG_MPTCP
if (sk_is_mptcp(sk))
- ops = &mptcp_subflow_request_sock_ops;
-#endif
+ req = mptcp_subflow_reqsk_alloc(ops, sk, false);
+ else
+ req = inet_reqsk_alloc(ops, sk, false);
- req = inet_reqsk_alloc(ops, sk, false);
if (!req)
return NULL;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 001947136b0a..c567d5e8053e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2000,7 +2000,7 @@ static int receive_fallback_to_copy(struct sock *sk,
if (copy_address != zc->copybuf_address)
return -EINVAL;
- err = import_single_range(READ, (void __user *)copy_address,
+ err = import_single_range(ITER_DEST, (void __user *)copy_address,
inq, &iov, &msg.msg_iter);
if (err)
return err;
@@ -2034,7 +2034,7 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
if (copy_address != zc->copybuf_address)
return -EINVAL;
- err = import_single_range(READ, (void __user *)copy_address,
+ err = import_single_range(ITER_DEST, (void __user *)copy_address,
copylen, &iov, &msg.msg_iter);
if (err)
return err;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 54eec33c6e1c..d2c470524e58 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -618,7 +618,7 @@ static void bbr_reset_probe_bw_mode(struct sock *sk)
struct bbr *bbr = inet_csk_ca(sk);
bbr->mode = BBR_PROBE_BW;
- bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
+ bbr->cycle_idx = CYCLE_LEN - 1 - get_random_u32_below(bbr_cycle_rand);
bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1efacbe948da..cc072d2cfcd8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3646,7 +3646,8 @@ static void tcp_send_challenge_ack(struct sock *sk)
u32 half = (ack_limit + 1) >> 1;
WRITE_ONCE(net->ipv4.tcp_challenge_timestamp, now);
- WRITE_ONCE(net->ipv4.tcp_challenge_count, half + prandom_u32_max(ack_limit));
+ WRITE_ONCE(net->ipv4.tcp_challenge_count,
+ get_random_u32_inclusive(half, ack_limit + half - 1));
}
count = READ_ONCE(net->ipv4.tcp_challenge_count);
if (count > 0) {
diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c
index bb1a08fda113..4bcf7eff95e3 100644
--- a/net/ipv4/tcp_plb.c
+++ b/net/ipv4/tcp_plb.c
@@ -97,7 +97,7 @@ void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb)
return;
pause = READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ;
- pause += prandom_u32_max(pause);
+ pause += get_random_u32_below(pause);
plb->pause_until = tcp_jiffies32 + pause;
/* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 9ae50b1bd844..05b6077b9f2c 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
if (sk->sk_socket)
clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ err = -EINVAL;
+ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN)
+ goto out_err;
+
err = ulp_ops->init(sk);
if (err)
goto out_err;
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 8242c8947340..5f8104cf082d 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -176,6 +176,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
+ synchronize_rcu();
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}