diff options
Diffstat (limited to 'net/rxrpc/rtt.c')
| -rw-r--r-- | net/rxrpc/rtt.c | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c new file mode 100644 index 000000000000..7474f88d7b18 --- /dev/null +++ b/net/rxrpc/rtt.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* RTT/RTO calculation. + * + * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/rfc6298 + * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 + * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf + */ + +#include <linux/net.h> +#include "ar-internal.h" + +#define RXRPC_RTO_MAX (120 * USEC_PER_SEC) +#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ +#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ + +static u32 rxrpc_rto_min_us(struct rxrpc_call *call) +{ + return 200; +} + +static u32 __rxrpc_set_rto(const struct rxrpc_call *call) +{ + return (call->srtt_us >> 3) + call->rttvar_us; +} + +static u32 rxrpc_bound_rto(u32 rto) +{ + return clamp(200000, rto + 100000, RXRPC_RTO_MAX); +} + +/* + * Called to compute a smoothed rtt estimate. The data fed to this + * routine either comes from timestamps, or from segments that were + * known _not_ to have been retransmitted [see Karn/Partridge + * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 + * piece by Van Jacobson. + * NOTE: the next three routines used to be one big routine. + * To save cycles in the RFC 1323 implementation it was better to break + * it up into three procedures. -- erics + */ +static void rxrpc_rtt_estimator(struct rxrpc_call *call, long sample_rtt_us) +{ + long m = sample_rtt_us; /* RTT */ + u32 srtt = call->srtt_us; + + /* The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + * + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + * + * Funny. This algorithm seems to be very broken. + * These formulae increase RTO, when it should be decreased, increase + * too slowly, when it should be increased quickly, decrease too quickly + * etc. I guess in BSD RTO takes ONE value, so that it is absolutely + * does not matter how to _calculate_ it. Seems, it was trap + * that VJ failed to avoid. 8) + */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) { + m = -m; /* m is now abs(error) */ + m -= (call->mdev_us >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (call->mdev_us >> 2); /* similar update on mdev */ + } + + call->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (call->mdev_us > call->mdev_max_us) { + call->mdev_max_us = call->mdev_us; + if (call->mdev_max_us > call->rttvar_us) + call->rttvar_us = call->mdev_max_us; + } + } else { + /* no previous measure. */ + srtt = m << 3; /* take the measured time to be rtt */ + call->mdev_us = m << 1; /* make sure rto = 3*rtt */ + call->rttvar_us = umax(call->mdev_us, rxrpc_rto_min_us(call)); + call->mdev_max_us = call->rttvar_us; + } + + call->srtt_us = umax(srtt, 1); +} + +/* + * Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +static void rxrpc_set_rto(struct rxrpc_call *call) +{ + u32 rto; + + /* 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some circumstances. + */ + rto = __rxrpc_set_rto(call); + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exactly, which we pretend to do. + */ + + /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ + call->rto_us = rxrpc_bound_rto(rto); +} + +static void rxrpc_update_rtt_min(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) +{ + /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */ + u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024; + + minmax_running_min(&call->min_rtt, wlen_us, resp_time / 1024, + (u32)rtt_us ? : jiffies_to_usecs(1)); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) +{ + if (rtt_us < 0) + return; + + /* Update RACK min RTT [RFC8985 6.1 Step 1]. */ + rxrpc_update_rtt_min(call, resp_time, rtt_us); + + rxrpc_rtt_estimator(call, rtt_us); + rxrpc_set_rto(call); + + /* Only reset backoff on valid RTT measurement [RFC6298]. */ + call->backoff = 0; +} + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the call RTT data. + */ +void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + int rtt_slot, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + s64 rtt_us; + + rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); + if (rtt_us < 0) + return; + + rxrpc_ack_update_rtt(call, resp_time, rtt_us); + if (call->rtt_count < 3) + call->rtt_count++; + call->rtt_taken++; + + WRITE_ONCE(call->peer->recent_srtt_us, call->srtt_us / 8); + WRITE_ONCE(call->peer->recent_rto_us, call->rto_us); + + trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, + rtt_us, call->srtt_us, call->rto_us); +} + +/* + * Get the retransmission timeout to set in nanoseconds, backing it off each + * time we retransmit. + */ +ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans) +{ + u64 timo_us; + u32 backoff = READ_ONCE(call->backoff); + + timo_us = call->rto_us; + timo_us <<= backoff; + if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) + WRITE_ONCE(call->backoff, backoff + 1); + + if (timo_us < 1) + timo_us = 1; + + return ns_to_ktime(timo_us * NSEC_PER_USEC); +} + +void rxrpc_call_init_rtt(struct rxrpc_call *call) +{ + call->rtt_last_req = KTIME_MIN; + call->rto_us = RXRPC_TIMEOUT_INIT; + call->mdev_us = RXRPC_TIMEOUT_INIT; + call->backoff = 0; + //minmax_reset(&call->rtt_min, rxrpc_jiffies32, ~0U); +} |
