summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2020-09-30 17:18:17 +0200
committerAlexei Starovoitov <ast@kernel.org>2020-09-30 11:50:35 -0700
commitb4ab31414970a7a03a5d55d75083f2c101a30592 (patch)
tree513b1e81e65d103278b626750a47644452bd7881 /tools
parent92acdc58ab11af66fcaef485433fde61b5e32fac (diff)
bpf: Add redirect_neigh helper as redirect drop-in
Add a redirect_neigh() helper as redirect() drop-in replacement for the xmit side. Main idea for the helper is to be very similar in semantics to the latter just that the skb gets injected into the neighboring subsystem in order to let the stack do the work it knows best anyway to populate the L2 addresses of the packet and then hand over to dev_queue_xmit() as redirect() does. This solves two bigger items: i) skbs don't need to go up to the stack on the host facing veth ingress side for traffic egressing the container to achieve the same for populating L2 which also has the huge advantage that ii) the skb->sk won't get orphaned in ip_rcv_core() when entering the IP routing layer on the host stack. Given that skb->sk neither gets orphaned when crossing the netns as per 9c4c325252c5 ("skbuff: preserve sock reference when scrubbing the skb.") the helper can then push the skbs directly to the phys device where FQ scheduler can do its work and TCP stack gets proper backpressure given we hold on to skb->sk as long as skb is still residing in queues. With the helper used in BPF data path to then push the skb to the phys device, I observed a stable/consistent TCP_STREAM improvement on veth devices for traffic going container -> host -> host -> container from ~10Gbps to ~15Gbps for a single stream in my test environment. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Reviewed-by: David Ahern <dsahern@gmail.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Cc: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/bpf/f207de81629e1724899b73b8112e0013be782d35.1601477936.git.daniel@iogearbox.net
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/bpf.h14
1 files changed, 14 insertions, 0 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6116a7f54c8f..1f17c6752deb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3652,6 +3652,19 @@ union bpf_attr {
* associated socket instead of the current process.
* Return
* The id is returned or 0 in case the id could not be retrieved.
+ *
+ * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*
+ * and fill in L2 addresses from neighboring subsystem. This helper
+ * is somewhat similar to **bpf_redirect**\ (), except that it
+ * fills in e.g. MAC addresses based on the L3 information from
+ * the packet. This helper is supported for IPv4 and IPv6 protocols.
+ * The *flags* argument is reserved and must be 0. The helper is
+ * currently only supported for tc BPF program types.
+ * Return
+ * The helper returns **TC_ACT_REDIRECT** on success or
+ * **TC_ACT_SHOT** on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3806,6 +3819,7 @@ union bpf_attr {
FN(snprintf_btf), \
FN(seq_printf_btf), \
FN(skb_cgroup_classid), \
+ FN(redirect_neigh), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper