From 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:46:52 -0700 Subject: [NET] Generalise TCP's struct open_request minisock infrastructure Kept this first changeset minimal, without changing existing names to ease peer review. Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn has two new members: ->slab, that replaces tcp_openreq_cachep ->obj_size, to inform the size of the openreq descendant for a specific protocol The protocol specific fields in struct open_request were moved to a class hierarchy, with the things that are common to all connection oriented PF_INET protocols in struct inet_request_sock, the TCP ones in tcp_request_sock, that is an inet_request_sock, that is an open_request. I.e. this uses the same approach used for the struct sock class hierarchy, with sk_prot indicating if the protocol wants to use the open_request infrastructure by filling in sk_prot->rsk_prot with an or_calltable. Results? Performance is improved and TCP v4 now uses only 64 bytes per open request minisock, down from 96 without this patch :-) Next changeset will rename some of the structs, fields and functions mentioned above, struct or_calltable is way unclear, better name it struct request_sock_ops, s/struct open_request/struct request_sock/g, etc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fa24e7ae1f40..f3c8747caf91 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1358,6 +1358,7 @@ int tcp_send_synack(struct sock *sk) struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct open_request *req) { + struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th; int tcp_header_size; @@ -1373,47 +1374,47 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS + - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + - (req->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + + (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) + + (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) + /* SACK_PERM is in the place of NOP NOP of TS */ - ((req->sack_ok && !req->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); + ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0)); skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; if (dst->dev->features&NETIF_F_TSO) - req->ecn_ok = 0; + ireq->ecn_ok = 0; TCP_ECN_make_synack(req, th); th->source = inet_sk(sk)->sport; - th->dest = req->rmt_port; - TCP_SKB_CB(skb)->seq = req->snt_isn; + th->dest = ireq->rmt_port; + TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; TCP_SKB_CB(skb)->sacked = 0; skb_shinfo(skb)->tso_segs = 1; skb_shinfo(skb)->tso_size = 0; th->seq = htonl(TCP_SKB_CB(skb)->seq); - th->ack_seq = htonl(req->rcv_isn + 1); + th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ __u8 rcv_wscale; /* Set this up on the first call only */ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(tcp_full_space(sk), - dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rcv_wnd, &req->window_clamp, - req->wscale_ok, + ireq->wscale_ok, &rcv_wscale); - req->rcv_wscale = rcv_wscale; + ireq->rcv_wscale = rcv_wscale; } /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(req->rcv_wnd); TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok, - req->sack_ok, req->wscale_ok, req->rcv_wscale, + tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok, + ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale, TCP_SKB_CB(skb)->when, req->ts_recent); -- cgit From 60236fdd08b2169045a3bbfc5ffe1576e6c3c17b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:21 -0700 Subject: [NET] Rename open_request to request_sock Ok, this one just renames some stuff to have a better namespace and to dissassociate it from TCP: struct open_request -> struct request_sock tcp_openreq_alloc -> reqsk_alloc tcp_openreq_free -> reqsk_free tcp_openreq_fastfree -> __reqsk_free With this most of the infrastructure closely resembles a struct sock methods subset. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f3c8747caf91..f17c6577e337 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1356,7 +1356,7 @@ int tcp_send_synack(struct sock *sk) * Prepare a SYN-ACK. */ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct open_request *req) + struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); -- cgit From 317a76f9a44b437d6301718f4e5d08bd93f98da7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 23 Jun 2005 12:19:55 -0700 Subject: [TCP]: Add pluggable congestion control algorithm infrastructure. Allow TCP to have multiple pluggable congestion control algorithms. Algorithms are defined by a set of operations and can be built in or modules. The legacy "new RENO" algorithm is used as a starting point and fallback. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f17c6577e337..0e17c244875c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -111,8 +111,7 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; - if (tcp_is_vegas(tp)) - tcp_vegas_enable(tp); + tcp_ca_event(tp, CA_EVENT_CWND_RESTART); tp->snd_ssthresh = tcp_current_ssthresh(tp); restart_cwnd = min(restart_cwnd, cwnd); @@ -280,6 +279,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 + /* If congestion control is doing timestamping */ + if (tp->ca_ops->rtt_sample) + do_gettimeofday(&skb->stamp); + sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; @@ -304,17 +307,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); } - /* - * If the connection is idle and we are restarting, - * then we don't want to do any Vegas calculations - * until we get fresh RTT samples. So when we - * restart, we reset our Vegas state to a clean - * slate. After we get acks for this flight of - * packets, _then_ we can make Vegas calculations - * again. - */ - if (tcp_is_vegas(tp) && tcp_packets_in_flight(tp) == 0) - tcp_vegas_enable(tp); + if (tcp_packets_in_flight(tp) == 0) + tcp_ca_event(tp, CA_EVENT_TX_START); th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; @@ -521,6 +515,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) * skbs, which it never sent before. --ANK */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; + buff->stamp = skb->stamp; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= tcp_skb_pcount(skb); @@ -1449,7 +1444,6 @@ static inline void tcp_connect_init(struct sock *sk) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(sk); - tcp_ca_init(tp); tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), @@ -1503,7 +1497,6 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->end_seq = tp->write_seq; tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; - tcp_ca_init(tp); /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; -- cgit