From 14afee4b6092fde451ee17604e5f5c89da33e71e Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Fri, 30 Jun 2017 13:08:00 +0300 Subject: net: convert sock.sk_wmem_alloc from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ip_output.c | 6 +++--- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_offload.c | 2 +- net/ipv4/tcp_output.c | 15 +++++++-------- 6 files changed, 15 insertions(+), 16 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 58925b6597de..76c2077c3f5b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk) } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(atomic_read(&sk->sk_wmem_alloc)); + WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1f18b4650253..0cbee0a666ff 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -307,7 +307,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->data_len += tailen; skb->truesize += tailen; if (sk) - atomic_add(tailen, &sk->sk_wmem_alloc); + refcount_add(tailen, &sk->sk_wmem_alloc); goto out; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 532b36e9ce2a..2e61e2af251a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1037,7 +1037,7 @@ alloc_new_skb: (flags & MSG_DONTWAIT), &err); } else { skb = NULL; - if (atomic_read(&sk->sk_wmem_alloc) <= + if (refcount_read(&sk->sk_wmem_alloc) <= 2 * sk->sk_sndbuf) skb = sock_wmalloc(sk, alloclen + hh_len + 15, 1, @@ -1145,7 +1145,7 @@ alloc_new_skb: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - atomic_add(copy, &sk->sk_wmem_alloc); + refcount_add(copy, &sk->sk_wmem_alloc); } offset += copy; length -= copy; @@ -1369,7 +1369,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb->len += len; skb->data_len += len; skb->truesize += len; - atomic_add(len, &sk->sk_wmem_alloc); + refcount_add(len, &sk->sk_wmem_alloc); offset += len; size -= len; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4793fb78d93b..fae45e402742 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -664,7 +664,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, return skb->len < size_goal && sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && - atomic_read(&sk->sk_wmem_alloc) > skb->truesize; + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } static void tcp_push(struct sock *sk, int flags, int mss_now, @@ -692,7 +692,7 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, /* It is possible TX completion already happened * before we set TSQ_THROTTLED. */ - if (atomic_read(&sk->sk_wmem_alloc) > skb->truesize) + if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize) return; } diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index bc68da38ea86..11f69bbf9307 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -152,7 +152,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); sum_truesize += skb->truesize; - atomic_add(sum_truesize - gso_skb->truesize, + refcount_add(sum_truesize - gso_skb->truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a9c395b6235..1d79137f3795 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -861,12 +861,11 @@ void tcp_wfree(struct sk_buff *skb) struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); unsigned long flags, nval, oval; - int wmem; /* Keep one reference on sk_wmem_alloc. * Will be released by sk_free() from here or tcp_tasklet_func() */ - wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + WARN_ON(refcount_sub_and_test(skb->truesize - 1, &sk->sk_wmem_alloc)); /* If this softirq is serviced by ksoftirqd, we are likely under stress. * Wait until our queues (qdisc + devices) are drained. @@ -875,7 +874,7 @@ void tcp_wfree(struct sk_buff *skb) * - chance for incoming ACK (processed by another cpu maybe) * to migrate this flow (skb->ooo_okay will be eventually set) */ - if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + if (refcount_read(&sk->sk_wmem_alloc) >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) goto out; for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) { @@ -925,7 +924,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer) if (nval != oval) continue; - if (!atomic_inc_not_zero(&sk->sk_wmem_alloc)) + if (!refcount_inc_not_zero(&sk->sk_wmem_alloc)) break; /* queue this socket to tasklet queue */ tsq = this_cpu_ptr(&tsq_tasklet); @@ -1045,7 +1044,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; skb_set_hash_from_sk(skb, sk); - atomic_add(skb->truesize, &sk->sk_wmem_alloc); + refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); @@ -2176,7 +2175,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); limit <<= factor; - if (atomic_read(&sk->sk_wmem_alloc) > limit) { + if (refcount_read(&sk->sk_wmem_alloc) > limit) { /* Always send the 1st or 2nd skb in write queue. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. @@ -2192,7 +2191,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, * test again the condition. */ smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) + if (refcount_read(&sk->sk_wmem_alloc) > limit) return true; } return false; @@ -2812,7 +2811,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Do not sent more than we queued. 1/4 is reserved for possible * copying overhead: fragmentation, tunneling, mangling etc. */ - if (atomic_read(&sk->sk_wmem_alloc) > + if (refcount_read(&sk->sk_wmem_alloc) > min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) return -EAGAIN; -- cgit