diff options
| -rw-r--r-- | net/ipv4/tcp_bbr.c | 77 | 
1 files changed, 65 insertions, 12 deletions
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index b88081285fd1..9277abdd822a 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -369,6 +369,39 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)  	return cwnd;  } +/* With pacing at lower layers, there's often less data "in the network" than + * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), + * we often have several skbs queued in the pacing layer with a pre-scheduled + * earliest departure time (EDT). BBR adapts its pacing rate based on the + * inflight level that it estimates has already been "baked in" by previous + * departure time decisions. We calculate a rough estimate of the number of our + * packets that might be in the network at the earliest departure time for the + * next skb scheduled: + *   in_network_at_edt = inflight_at_edt - (EDT - now) * bw + * If we're increasing inflight, then we want to know if the transmit of the + * EDT skb will push inflight above the target, so inflight_at_edt includes + * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, + * then estimate if inflight will sink too low just before the EDT transmit. + */ +static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	struct bbr *bbr = inet_csk_ca(sk); +	u64 now_ns, edt_ns, interval_us; +	u32 interval_delivered, inflight_at_edt; + +	now_ns = tp->tcp_clock_cache; +	edt_ns = max(tp->tcp_wstamp_ns, now_ns); +	interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); +	interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; +	inflight_at_edt = inflight_now; +	if (bbr->pacing_gain > BBR_UNIT)              /* increasing inflight */ +		inflight_at_edt += bbr_tso_segs_goal(sk);  /* include EDT skb */ +	if (interval_delivered >= inflight_at_edt) +		return 0; +	return inflight_at_edt - interval_delivered; +} +  /* An optimization in BBR to reduce losses: On the first round of recovery, we   * follow the packet conservation principle: send P packets per P packets acked.   * After that, we slow-start and send at most 2*P packets per P packets acked. @@ -460,7 +493,7 @@ static bool bbr_is_next_cycle_phase(struct sock *sk,  	if (bbr->pacing_gain == BBR_UNIT)  		return is_full_length;		/* just use wall clock time */ -	inflight = rs->prior_in_flight;  /* what was in-flight before ACK? */ +	inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight);  	bw = bbr_max_bw(sk);  	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at @@ -488,8 +521,6 @@ static void bbr_advance_cycle_phase(struct sock *sk)  	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);  	bbr->cycle_mstamp = tp->delivered_mstamp; -	bbr->pacing_gain = bbr->lt_use_bw ? BBR_UNIT : -					    bbr_pacing_gain[bbr->cycle_idx];  }  /* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ @@ -507,8 +538,6 @@ static void bbr_reset_startup_mode(struct sock *sk)  	struct bbr *bbr = inet_csk_ca(sk);  	bbr->mode = BBR_STARTUP; -	bbr->pacing_gain = bbr_high_gain; -	bbr->cwnd_gain	 = bbr_high_gain;  }  static void bbr_reset_probe_bw_mode(struct sock *sk) @@ -516,8 +545,6 @@ static void bbr_reset_probe_bw_mode(struct sock *sk)  	struct bbr *bbr = inet_csk_ca(sk);  	bbr->mode = BBR_PROBE_BW; -	bbr->pacing_gain = BBR_UNIT; -	bbr->cwnd_gain = bbr_cwnd_gain;  	bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);  	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */  } @@ -735,13 +762,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)  	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {  		bbr->mode = BBR_DRAIN;	/* drain queue we created */ -		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */ -		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */  		tcp_sk(sk)->snd_ssthresh =  				bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT);  	}	/* fall through to check if in-flight is already small: */  	if (bbr->mode == BBR_DRAIN && -	    tcp_packets_in_flight(tcp_sk(sk)) <= +	    bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <=  	    bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))  		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */  } @@ -798,8 +823,6 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)  	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&  	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {  		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */ -		bbr->pacing_gain = BBR_UNIT; -		bbr->cwnd_gain = BBR_UNIT;  		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */  		bbr->probe_rtt_done_stamp = 0;  	} @@ -827,6 +850,35 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)  		bbr->idle_restart = 0;  } +static void bbr_update_gains(struct sock *sk) +{ +	struct bbr *bbr = inet_csk_ca(sk); + +	switch (bbr->mode) { +	case BBR_STARTUP: +		bbr->pacing_gain = bbr_high_gain; +		bbr->cwnd_gain	 = bbr_high_gain; +		break; +	case BBR_DRAIN: +		bbr->pacing_gain = bbr_drain_gain;	/* slow, to drain */ +		bbr->cwnd_gain	 = bbr_high_gain;	/* keep cwnd */ +		break; +	case BBR_PROBE_BW: +		bbr->pacing_gain = (bbr->lt_use_bw ? +				    BBR_UNIT : +				    bbr_pacing_gain[bbr->cycle_idx]); +		bbr->cwnd_gain	 = bbr_cwnd_gain; +		break; +	case BBR_PROBE_RTT: +		bbr->pacing_gain = BBR_UNIT; +		bbr->cwnd_gain	 = BBR_UNIT; +		break; +	default: +		WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); +		break; +	} +} +  static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)  {  	bbr_update_bw(sk, rs); @@ -834,6 +886,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)  	bbr_check_full_bw_reached(sk, rs);  	bbr_check_drain(sk, rs);  	bbr_update_min_rtt(sk, rs); +	bbr_update_gains(sk);  }  static void bbr_main(struct sock *sk, const struct rate_sample *rs)  | 
