diff options
| author | Daniel Borkmann <daniel@iogearbox.net> | 2016-01-07 15:50:23 +0100 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2016-01-10 17:54:28 -0500 | 
| commit | f8ffad69c9f8b8dfb0b633425d4ef4d2493ba61a (patch) | |
| tree | 3b01575dd9d774bcfbeb65f0ca302d606a26f895 | |
| parent | fdc5432a7b44ab7de17141beec19d946b9344e91 (diff) | |
bpf: add skb_postpush_rcsum and fix dev_forward_skb occasions
Add a small helper skb_postpush_rcsum() and fix up redirect locations
that need CHECKSUM_COMPLETE fixups on ingress. dev_forward_skb() expects
a proper csum that covers also Ethernet header, f.e. since 2c26d34bbcc0
("net/core: Handle csum for CHECKSUM_COMPLETE VXLAN forwarding"), we
also do skb_postpull_rcsum() after pulling Ethernet header off via
eth_type_trans().
When using eBPF in a netns setup f.e. with vxlan in collect metadata mode,
I can trigger the following csum issue with an IPv6 setup:
  [  505.144065] dummy1: hw csum failure
  [...]
  [  505.144108] Call Trace:
  [  505.144112]  <IRQ>  [<ffffffff81372f08>] dump_stack+0x44/0x5c
  [  505.144134]  [<ffffffff81607cea>] netdev_rx_csum_fault+0x3a/0x40
  [  505.144142]  [<ffffffff815fee3f>] __skb_checksum_complete+0xcf/0xe0
  [  505.144149]  [<ffffffff816f0902>] nf_ip6_checksum+0xb2/0x120
  [  505.144161]  [<ffffffffa08c0e0e>] icmpv6_error+0x17e/0x328 [nf_conntrack_ipv6]
  [  505.144170]  [<ffffffffa0898eca>] ? ip6t_do_table+0x2fa/0x645 [ip6_tables]
  [  505.144177]  [<ffffffffa08c0725>] ? ipv6_get_l4proto+0x65/0xd0 [nf_conntrack_ipv6]
  [  505.144189]  [<ffffffffa06c9a12>] nf_conntrack_in+0xc2/0x5a0 [nf_conntrack]
  [  505.144196]  [<ffffffffa08c039c>] ipv6_conntrack_in+0x1c/0x20 [nf_conntrack_ipv6]
  [  505.144204]  [<ffffffff8164385d>] nf_iterate+0x5d/0x70
  [  505.144210]  [<ffffffff816438d6>] nf_hook_slow+0x66/0xc0
  [  505.144218]  [<ffffffff816bd302>] ipv6_rcv+0x3f2/0x4f0
  [  505.144225]  [<ffffffff816bca40>] ? ip6_make_skb+0x1b0/0x1b0
  [  505.144232]  [<ffffffff8160b77b>] __netif_receive_skb_core+0x36b/0x9a0
  [  505.144239]  [<ffffffff8160bdc8>] ? __netif_receive_skb+0x18/0x60
  [  505.144245]  [<ffffffff8160bdc8>] __netif_receive_skb+0x18/0x60
  [  505.144252]  [<ffffffff8160ccff>] process_backlog+0x9f/0x140
  [  505.144259]  [<ffffffff8160c4a5>] net_rx_action+0x145/0x320
  [...]
What happens is that on ingress, we push Ethernet header back in, either
from cls_bpf or right before skb_do_redirect(), but without updating csum.
The "hw csum failure" can be fixed by using the new skb_postpush_rcsum()
helper for the dev_forward_skb() case to correct the csum diff again.
Thanks to Hannes Frederic Sowa for the csum_partial() idea!
Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper")
Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/linux/skbuff.h | 17 | ||||
| -rw-r--r-- | net/core/filter.c | 17 | 
2 files changed, 30 insertions, 4 deletions
| diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6b6bd42d6134..07f9ccd28654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2805,6 +2805,23 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb,  unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static inline void skb_postpush_rcsum(struct sk_buff *skb, +				      const void *start, unsigned int len) +{ +	/* For performing the reverse operation to skb_postpull_rcsum(), +	 * we can instead of ... +	 * +	 *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); +	 * +	 * ... just use this equivalent version here to save a few +	 * instructions. Feeding csum of 0 in csum_partial() and later +	 * on adding skb->csum is equivalent to feed skb->csum in the +	 * first place. +	 */ +	if (skb->ip_summed == CHECKSUM_COMPLETE) +		skb->csum = csum_partial(start, len, skb->csum); +} +  /**   *	pskb_trim_rcsum - trim received skb and update checksum   *	@skb: buffer to trim diff --git a/net/core/filter.c b/net/core/filter.c index 35e6fed28709..0db92b5e2cbf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1368,8 +1368,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)  		/* skb_store_bits cannot return -EFAULT here */  		skb_store_bits(skb, offset, ptr, len); -	if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE) -		skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0)); +	if (BPF_RECOMPUTE_CSUM(flags)) +		skb_postpush_rcsum(skb, ptr, len); +  	return 0;  } @@ -1525,8 +1526,12 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)  	if (unlikely(!skb2))  		return -ENOMEM; -	if (BPF_IS_REDIRECT_INGRESS(flags)) +	if (BPF_IS_REDIRECT_INGRESS(flags)) { +		if (skb_at_tc_ingress(skb2)) +			skb_postpush_rcsum(skb2, skb_mac_header(skb2), +					   skb2->mac_len);  		return dev_forward_skb(dev, skb2); +	}  	skb2->dev = dev;  	skb_sender_cpu_clear(skb2); @@ -1569,8 +1574,12 @@ int skb_do_redirect(struct sk_buff *skb)  		return -EINVAL;  	} -	if (BPF_IS_REDIRECT_INGRESS(ri->flags)) +	if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { +		if (skb_at_tc_ingress(skb)) +			skb_postpush_rcsum(skb, skb_mac_header(skb), +					   skb->mac_len);  		return dev_forward_skb(dev, skb); +	}  	skb->dev = dev;  	skb_sender_cpu_clear(skb); | 
