diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2020-09-10 20:53:01 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2020-09-10 20:53:15 -0700 |
| commit | 2bab48c5bef00d103085da71a01da27ec7200c08 (patch) | |
| tree | ac322ae7e14ca53346ebf620db5f5dd438ff0aa2 /net/ipv4/tcp_cong.c | |
| parent | 18841da98100c936990ac9013d55bf6b40e1f609 (diff) | |
| parent | 5050bef8736facac341fb7e2c0eda5002619acf8 (diff) | |
Merge branch 'improve-bpf-tcp-cc-init'
Neal Cardwell says:
====================
This patch series reorganizes TCP congestion control initialization so that if
EBPF code called by tcp_init_transfer() sets the congestion control algorithm
by calling setsockopt(TCP_CONGESTION) then the TCP stack initializes the
congestion control module immediately, instead of having tcp_init_transfer()
later initialize the congestion control module.
This increases flexibility for the EBPF code that runs at connection
establishment time, and simplifies the code.
This has the following benefits:
(1) This allows CC module customizations made by the EBPF called in
tcp_init_transfer() to persist, and not be wiped out by a later
call to tcp_init_congestion_control() in tcp_init_transfer().
(2) Does not flip the order of EBPF and CC init, to avoid causing bugs
for existing code upstream that depends on the current order.
(3) Does not cause 2 initializations for for CC in the case where the
EBPF called in tcp_init_transfer() wants to set the CC to a new CC
algorithm.
(4) Allows follow-on simplifications to the code in net/core/filter.c
and net/ipv4/tcp_cong.c, which currently both have some complexity
to special-case CC initialization to avoid double CC
initialization if EBPF sets the CC.
changes in v2:
o rebase onto bpf-next
o add another follow-on simplification suggested by Martin KaFai Lau:
"tcp: simplify tcp_set_congestion_control() load=false case"
changes in v3:
o no change in commits
o resent patch series from @gmail.com, since mail from ncardwell@google.com
stopped being accepted at netdev@vger.kernel.org mid-way through processing
the v2 patch series (between patches 2 and 3), confusing patchwork about
which patches belonged to the v2 patch series
====================
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_cong.c')
| -rw-r--r-- | net/ipv4/tcp_cong.c | 27 |
1 files changed, 7 insertions, 20 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 62878cf26d9c..db47ac24d057 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -176,7 +176,7 @@ void tcp_assign_congestion_control(struct sock *sk) void tcp_init_congestion_control(struct sock *sk) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; if (icsk->icsk_ca_ops->init) @@ -185,6 +185,7 @@ void tcp_init_congestion_control(struct sock *sk) INET_ECN_xmit(sk); else INET_ECN_dontxmit(sk); + icsk->icsk_ca_initialized = 1; } static void tcp_reinit_congestion_control(struct sock *sk, @@ -340,7 +341,7 @@ out: * already initialized. */ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, - bool reinit, bool cap_net_admin) + bool cap_net_admin) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -361,28 +362,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, goto out; } - if (!ca) { + if (!ca) err = -ENOENT; - } else if (!load) { - const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops; - - if (bpf_try_module_get(ca, ca->owner)) { - if (reinit) { - tcp_reinit_congestion_control(sk, ca); - } else { - icsk->icsk_ca_ops = ca; - bpf_module_put(old_ca, old_ca->owner); - } - } else { - err = -EBUSY; - } - } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) { + else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) err = -EPERM; - } else if (!bpf_try_module_get(ca, ca->owner)) { + else if (!bpf_try_module_get(ca, ca->owner)) err = -EBUSY; - } else { + else tcp_reinit_congestion_control(sk, ca); - } out: rcu_read_unlock(); return err; |
