diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
| -rw-r--r-- | net/ipv4/tcp_ipv4.c | 137 | 
1 files changed, 85 insertions, 52 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5c8fa7f1e327..10172487921b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)  }  #endif -struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { -	.lhash_lock  = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), -	.lhash_users = ATOMIC_INIT(0), -	.lhash_wait  = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), -}; +struct inet_hashinfo tcp_hashinfo;  static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)  { @@ -492,7 +488,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)  		skb->csum_offset = offsetof(struct tcphdr, check);  	} else {  		th->check = tcp_v4_check(len, inet->saddr, inet->daddr, -					 csum_partial((char *)th, +					 csum_partial(th,  						      th->doff << 2,  						      skb->csum));  	} @@ -726,7 +722,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,  		th->check = tcp_v4_check(skb->len,  					 ireq->loc_addr,  					 ireq->rmt_addr, -					 csum_partial((char *)th, skb->len, +					 csum_partial(th, skb->len,  						      skb->csum));  		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, @@ -1139,10 +1135,9 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)  	if (genhash || memcmp(hash_location, newhash, 16) != 0) {  		if (net_ratelimit()) { -			printk(KERN_INFO "MD5 Hash failed for " -			       "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", -			       NIPQUAD(iph->saddr), ntohs(th->source), -			       NIPQUAD(iph->daddr), ntohs(th->dest), +			printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", +			       &iph->saddr, ntohs(th->source), +			       &iph->daddr, ntohs(th->dest),  			       genhash ? " tcp_v4_calc_md5_hash failed" : "");  		}  		return 1; @@ -1297,10 +1292,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  			 * to destinations, already remembered  			 * to the moment of synflood.  			 */ -			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " -				       "request from " NIPQUAD_FMT "/%u\n", -				       NIPQUAD(saddr), -				       ntohs(tcp_hdr(skb)->source)); +			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n", +				       &saddr, ntohs(tcp_hdr(skb)->source));  			goto drop_and_release;  		} @@ -1804,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk)  	sk->sk_sndbuf = sysctl_tcp_wmem[1];  	sk->sk_rcvbuf = sysctl_tcp_rmem[1]; -	atomic_inc(&tcp_sockets_allocated); +	percpu_counter_inc(&tcp_sockets_allocated);  	return 0;  } @@ -1852,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk)  		sk->sk_sndmsg_page = NULL;  	} -	atomic_dec(&tcp_sockets_allocated); +	percpu_counter_dec(&tcp_sockets_allocated);  }  EXPORT_SYMBOL(tcp_v4_destroy_sock); @@ -1860,32 +1853,35 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);  #ifdef CONFIG_PROC_FS  /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)  { -	return hlist_empty(head) ? NULL : +	return hlist_nulls_empty(head) ? NULL :  		list_entry(head->first, struct inet_timewait_sock, tw_node);  }  static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)  { -	return tw->tw_node.next ? -		hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; +	return !is_a_nulls(tw->tw_node.next) ? +		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;  }  static void *listening_get_next(struct seq_file *seq, void *cur)  {  	struct inet_connection_sock *icsk; -	struct hlist_node *node; +	struct hlist_nulls_node *node;  	struct sock *sk = cur; -	struct tcp_iter_state* st = seq->private; +	struct inet_listen_hashbucket *ilb; +	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq);  	if (!sk) {  		st->bucket = 0; -		sk = sk_head(&tcp_hashinfo.listening_hash[0]); +		ilb = &tcp_hashinfo.listening_hash[0]; +		spin_lock_bh(&ilb->lock); +		sk = sk_nulls_head(&ilb->head);  		goto get_sk;  	} - +	ilb = &tcp_hashinfo.listening_hash[st->bucket];  	++st->num;  	if (st->state == TCP_SEQ_STATE_OPENREQ) { @@ -1918,7 +1914,7 @@ get_req:  		sk = sk_next(sk);  	}  get_sk: -	sk_for_each_from(sk, node) { +	sk_nulls_for_each_from(sk, node) {  		if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {  			cur = sk;  			goto out; @@ -1935,8 +1931,11 @@ start_req:  		}  		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);  	} +	spin_unlock_bh(&ilb->lock);  	if (++st->bucket < INET_LHTABLE_SIZE) { -		sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); +		ilb = &tcp_hashinfo.listening_hash[st->bucket]; +		spin_lock_bh(&ilb->lock); +		sk = sk_nulls_head(&ilb->head);  		goto get_sk;  	}  	cur = NULL; @@ -1957,28 +1956,28 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)  static inline int empty_bucket(struct tcp_iter_state *st)  { -	return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && -		hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); +	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && +		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);  }  static void *established_get_first(struct seq_file *seq)  { -	struct tcp_iter_state* st = seq->private; +	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq);  	void *rc = NULL;  	for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {  		struct sock *sk; -		struct hlist_node *node; +		struct hlist_nulls_node *node;  		struct inet_timewait_sock *tw; -		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); +		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);  		/* Lockless fast path for the common case of empty buckets */  		if (empty_bucket(st))  			continue; -		read_lock_bh(lock); -		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { +		spin_lock_bh(lock); +		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {  			if (sk->sk_family != st->family ||  			    !net_eq(sock_net(sk), net)) {  				continue; @@ -1996,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)  			rc = tw;  			goto out;  		} -		read_unlock_bh(lock); +		spin_unlock_bh(lock);  		st->state = TCP_SEQ_STATE_ESTABLISHED;  	}  out: @@ -2007,8 +2006,8 @@ static void *established_get_next(struct seq_file *seq, void *cur)  {  	struct sock *sk = cur;  	struct inet_timewait_sock *tw; -	struct hlist_node *node; -	struct tcp_iter_state* st = seq->private; +	struct hlist_nulls_node *node; +	struct tcp_iter_state *st = seq->private;  	struct net *net = seq_file_net(seq);  	++st->num; @@ -2024,7 +2023,7 @@ get_tw:  			cur = tw;  			goto out;  		} -		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); +		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));  		st->state = TCP_SEQ_STATE_ESTABLISHED;  		/* Look for next non empty bucket */ @@ -2034,12 +2033,12 @@ get_tw:  		if (st->bucket >= tcp_hashinfo.ehash_size)  			return NULL; -		read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); -		sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); +		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); +		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);  	} else -		sk = sk_next(sk); +		sk = sk_nulls_next(sk); -	sk_for_each_from(sk, node) { +	sk_nulls_for_each_from(sk, node) {  		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))  			goto found;  	} @@ -2067,14 +2066,12 @@ static void *established_get_idx(struct seq_file *seq, loff_t pos)  static void *tcp_get_idx(struct seq_file *seq, loff_t pos)  {  	void *rc; -	struct tcp_iter_state* st = seq->private; +	struct tcp_iter_state *st = seq->private; -	inet_listen_lock(&tcp_hashinfo);  	st->state = TCP_SEQ_STATE_LISTENING;  	rc	  = listening_get_idx(seq, &pos);  	if (!rc) { -		inet_listen_unlock(&tcp_hashinfo);  		st->state = TCP_SEQ_STATE_ESTABLISHED;  		rc	  = established_get_idx(seq, pos);  	} @@ -2084,7 +2081,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)  static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)  { -	struct tcp_iter_state* st = seq->private; +	struct tcp_iter_state *st = seq->private;  	st->state = TCP_SEQ_STATE_LISTENING;  	st->num = 0;  	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -2093,7 +2090,7 @@ static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)  static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)  {  	void *rc = NULL; -	struct tcp_iter_state* st; +	struct tcp_iter_state *st;  	if (v == SEQ_START_TOKEN) {  		rc = tcp_get_idx(seq, 0); @@ -2106,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)  	case TCP_SEQ_STATE_LISTENING:  		rc = listening_get_next(seq, v);  		if (!rc) { -			inet_listen_unlock(&tcp_hashinfo);  			st->state = TCP_SEQ_STATE_ESTABLISHED;  			rc	  = established_get_first(seq);  		} @@ -2123,7 +2119,7 @@ out:  static void tcp_seq_stop(struct seq_file *seq, void *v)  { -	struct tcp_iter_state* st = seq->private; +	struct tcp_iter_state *st = seq->private;  	switch (st->state) {  	case TCP_SEQ_STATE_OPENREQ: @@ -2133,12 +2129,12 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)  		}  	case TCP_SEQ_STATE_LISTENING:  		if (v != SEQ_START_TOKEN) -			inet_listen_unlock(&tcp_hashinfo); +			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);  		break;  	case TCP_SEQ_STATE_TIME_WAIT:  	case TCP_SEQ_STATE_ESTABLISHED:  		if (v) -			read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); +			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));  		break;  	}  } @@ -2284,7 +2280,7 @@ static void get_timewait4_sock(struct inet_timewait_sock *tw,  static int tcp4_seq_show(struct seq_file *seq, void *v)  { -	struct tcp_iter_state* st; +	struct tcp_iter_state *st;  	int len;  	if (v == SEQ_START_TOKEN) { @@ -2350,6 +2346,41 @@ void tcp4_proc_exit(void)  }  #endif /* CONFIG_PROC_FS */ +struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ +	struct iphdr *iph = ip_hdr(skb); + +	switch (skb->ip_summed) { +	case CHECKSUM_COMPLETE: +		if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, +				  skb->csum)) { +			skb->ip_summed = CHECKSUM_UNNECESSARY; +			break; +		} + +		/* fall through */ +	case CHECKSUM_NONE: +		NAPI_GRO_CB(skb)->flush = 1; +		return NULL; +	} + +	return tcp_gro_receive(head, skb); +} +EXPORT_SYMBOL(tcp4_gro_receive); + +int tcp4_gro_complete(struct sk_buff *skb) +{ +	struct iphdr *iph = ip_hdr(skb); +	struct tcphdr *th = tcp_hdr(skb); + +	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), +				  iph->saddr, iph->daddr, 0); +	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + +	return tcp_gro_complete(skb); +} +EXPORT_SYMBOL(tcp4_gro_complete); +  struct proto tcp_prot = {  	.name			= "TCP",  	.owner			= THIS_MODULE, @@ -2378,6 +2409,7 @@ struct proto tcp_prot = {  	.sysctl_rmem		= sysctl_tcp_rmem,  	.max_header		= MAX_TCP_HEADER,  	.obj_size		= sizeof(struct tcp_sock), +	.slab_flags		= SLAB_DESTROY_BY_RCU,  	.twsk_prot		= &tcp_timewait_sock_ops,  	.rsk_prot		= &tcp_request_sock_ops,  	.h.hashinfo		= &tcp_hashinfo, @@ -2407,6 +2439,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {  void __init tcp_v4_init(void)  { +	inet_hashinfo_init(&tcp_hashinfo);  	if (register_pernet_device(&tcp_sk_ops))  		panic("Failed to create the TCP control socket.\n");  }  | 
