From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Mar 2019 14:35:15 -0500 Subject: bpf: only test gso type on gso packets BPF can adjust gso only for tcp bytestreams. Fail on other gso types. But only on gso packets. It does not touch this field if !gso_size. Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols") Signed-off-by: Willem de Bruijn Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 5ceba98069d4..f274620945ff 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); -- cgit From 915905f8b1d452e70ee6d8637c3f0fb55a39691d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Mar 2019 09:31:26 -0800 Subject: xsk: fix potential crash in xsk_diag_put_umem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes two typos in xsk_diag_put_umem() syzbot reported the following crash : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 7641 Comm: syz-executor946 Not tainted 5.0.0-rc7+ #95 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline] RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline] RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143 Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203 RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014 RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174 R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000 R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40 FS: 00000000011ea880(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000080 CR3: 000000008fa13000 CR4: 00000000001406e0 Call Trace: netlink_dump+0x55d/0xfb0 net/netlink/af_netlink.c:2252 __netlink_dump_start+0x5b4/0x7e0 net/netlink/af_netlink.c:2360 netlink_dump_start include/linux/netlink.h:226 [inline] xsk_diag_handler_dump+0x1b2/0x250 net/xdp/xsk_diag.c:170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x322/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x17a/0x460 net/netlink/af_netlink.c:2485 sock_diag_rcv+0x2b/0x40 net/core/sock_diag.c:274 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x536/0x720 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x8ae/0xd70 net/netlink/af_netlink.c:1925 sock_sendmsg_nosec net/socket.c:622 [inline] sock_sendmsg+0xdd/0x130 net/socket.c:632 sock_write_iter+0x27c/0x3e0 net/socket.c:923 call_write_iter include/linux/fs.h:1863 [inline] do_iter_readv_writev+0x5e0/0x8e0 fs/read_write.c:680 do_iter_write fs/read_write.c:956 [inline] do_iter_write+0x184/0x610 fs/read_write.c:937 vfs_writev+0x1b3/0x2f0 fs/read_write.c:1001 do_writev+0xf6/0x290 fs/read_write.c:1036 __do_sys_writev fs/read_write.c:1109 [inline] __se_sys_writev fs/read_write.c:1106 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1106 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x440139 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffcc966cc18 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440139 RDX: 0000000000000001 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 0000000000000004 R11: 0000000000000246 R12: 00000000004019c0 R13: 0000000000401a50 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: ---[ end trace 460a3c24d0a656c9 ]--- RIP: 0010:xsk_diag_put_umem net/xdp/xsk_diag.c:71 [inline] RIP: 0010:xsk_diag_fill net/xdp/xsk_diag.c:113 [inline] RIP: 0010:xsk_diag_dump+0xdcb/0x13a0 net/xdp/xsk_diag.c:143 Code: 8d be c0 04 00 00 48 89 f8 48 c1 e8 03 42 80 3c 20 00 0f 85 39 04 00 00 49 8b 96 c0 04 00 00 48 8d 7a 14 48 89 f8 48 c1 e8 03 <42> 0f b6 0c 20 48 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RSP: 0018:ffff888090bcf2d8 EFLAGS: 00010203 RAX: 0000000000000002 RBX: ffff8880a0aacbc0 RCX: ffffffff86ffdc3c RDX: 0000000000000000 RSI: ffffffff86ffdc70 RDI: 0000000000000014 RBP: ffff888090bcf438 R08: ffff88808e04a700 R09: ffffed1011c74174 R10: ffffed1011c74173 R11: ffff88808e3a0b9f R12: dffffc0000000000 R13: ffff888093a6d818 R14: ffff88808e365240 R15: ffff88808e3a0b40 FS: 00000000011ea880(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000001d22000 CR3: 000000008fa13000 CR4: 00000000001406f0 Fixes: a36b38aa2af6 ("xsk: add sock_diag interface for AF_XDP") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Björn Töpel Cc: Daniel Borkmann Cc: Magnus Karlsson Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 661d007c3b28..d5e06c8e0cbf 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -68,9 +68,9 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); if (!err && umem->fq) - err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); + err = xsk_diag_put_ring(umem->fq, XDP_DIAG_UMEM_FILL_RING, nlskb); if (!err && umem->cq) { - err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, + err = xsk_diag_put_ring(umem->cq, XDP_DIAG_UMEM_COMPLETION_RING, nlskb); } return err; -- cgit From ea0371f7879987cff70e21d808e3e9fea624c051 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Mon, 4 Mar 2019 16:27:08 -0800 Subject: net: fix GSO in bpf_lwt_push_ip_encap GSO needs inner headers and inner protocol set properly to work. skb->inner_mac_header: skb_reset_inner_headers() assigns the current mac header value to inner_mac_header; but it is not set at the point, so we need to call skb_reset_inner_mac_header, otherwise gre_gso_segment fails: it does int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); ... if (unlikely(!pskb_may_pull(skb, tnl_hlen))) ... skb->inner_protocol should also be correctly set. Fixes: ca78801a81e0 ("bpf: handle GSO in bpf_lwt_push_encap") Signed-off-by: Peter Oskolkov Reviewed-by: David Ahern Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- net/core/lwt_bpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index cf2f8897ca19..126d31ff5ee3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) /* push the encap headers and fix pointers */ skb_reset_inner_headers(skb); + skb_reset_inner_mac_header(skb); /* mac header is not yet set */ + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_push(skb, len); if (ingress) -- cgit From e8e3437762ad938880dd48a3c52d702e7cf3c124 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 7 Mar 2019 11:35:43 +0100 Subject: bpf: Stop the psock parser before canceling its work We might have never enabled (started) the psock's parser, in which case it will not get stopped when destroying the psock. This leads to a warning when trying to cancel parser's work from psock's deferred destructor: [ 405.325769] WARNING: CPU: 1 PID: 3216 at net/strparser/strparser.c:526 strp_done+0x3c/0x40 [ 405.326712] Modules linked in: [last unloaded: test_bpf] [ 405.327359] CPU: 1 PID: 3216 Comm: kworker/1:164 Tainted: G W 5.0.0 #42 [ 405.328294] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 [ 405.329712] Workqueue: events sk_psock_destroy_deferred [ 405.330254] RIP: 0010:strp_done+0x3c/0x40 [ 405.330706] Code: 28 e8 b8 d5 6b ff 48 8d bb 80 00 00 00 e8 9c d5 6b ff 48 8b 7b 18 48 85 ff 74 0d e8 1e a5 e8 ff 48 c7 43 18 00 00 00 00 5b c3 <0f> 0b eb cf 66 66 66 66 90 55 89 f5 53 48 89 fb 48 83 c7 28 e8 0b [ 405.332862] RSP: 0018:ffffc900026bbe50 EFLAGS: 00010246 [ 405.333482] RAX: ffffffff819323e0 RBX: ffff88812cb83640 RCX: ffff88812cb829e8 [ 405.334228] RDX: 0000000000000001 RSI: ffff88812cb837e8 RDI: ffff88812cb83640 [ 405.335366] RBP: ffff88813fd22680 R08: 0000000000000000 R09: 000073746e657665 [ 405.336472] R10: 8080808080808080 R11: 0000000000000001 R12: ffff88812cb83600 [ 405.337760] R13: 0000000000000000 R14: ffff88811f401780 R15: ffff88812cb837e8 [ 405.338777] FS: 0000000000000000(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000 [ 405.339903] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 405.340821] CR2: 00007fb11489a6b8 CR3: 000000012d4d6000 CR4: 00000000000406e0 [ 405.341981] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 405.343131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 405.344415] Call Trace: [ 405.344821] sk_psock_destroy_deferred+0x23/0x1b0 [ 405.345585] process_one_work+0x1ae/0x3e0 [ 405.346110] worker_thread+0x3c/0x3b0 [ 405.346576] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 405.347187] kthread+0x11d/0x140 [ 405.347601] ? __kthread_parkme+0x80/0x80 [ 405.348108] ret_from_fork+0x35/0x40 [ 405.348566] ---[ end trace a4a3af4026a327d4 ]--- Stop psock's parser just before canceling its work. Fixes: 1d79895aef18 ("sk_msg: Always cancel strp work before freeing the psock") Reported-by: kernel test robot Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ae6f06e45737..cc94d921476c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ + strp_stop(&psock->parser.strp); strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); -- cgit From f54ba391d88f5a5d032175b4c308c176e34b80b7 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 8 Mar 2019 08:57:26 +0100 Subject: xsk: fix to reject invalid flags in xsk_bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing a non-existing flag in the sxdp_flags member of struct sockaddr_xdp was, incorrectly, silently ignored. This patch addresses that behavior, and rejects any non-existing flags. We have examined existing user space code, and to our best knowledge, no one is relying on the current incorrect behavior. AF_XDP is still in its infancy, so from our perspective, the risk of breakage is very low, and addressing this problem now is important. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 6697084e3fdf..a14e8864e4fa 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -407,6 +407,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) if (sxdp->sxdp_family != AF_XDP) return -EINVAL; + flags = sxdp->sxdp_flags; + if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) + return -EINVAL; + mutex_lock(&xs->mutex); if (xs->dev) { err = -EBUSY; @@ -425,7 +429,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } qid = sxdp->sxdp_queue_id; - flags = sxdp->sxdp_flags; if (flags & XDP_SHARED_UMEM) { struct xdp_sock *umem_xs; -- cgit From c57b557b644da624982c36b74f608cdb7b902868 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 8 Mar 2019 08:57:27 +0100 Subject: xsk: fix to reject invalid options in Tx descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passing a non-existing option in the options member of struct xdp_desc was, incorrectly, silently ignored. This patch addresses that behavior, and drops any Tx descriptor with non-existing options. We have examined existing user space code, and to our best knowledge, no one is relying on the current incorrect behavior. AF_XDP is still in its infancy, so from our perspective, the risk of breakage is very low, and addressing this problem now is important. Fixes: 35fcde7f8deb ("xsk: support for Tx") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk_queue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index bcb5cbb40419..610c0bdc0c2b 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -174,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) if (!xskq_is_valid_addr(q, d->addr)) return false; - if (((d->addr + d->len) & q->chunk_mask) != - (d->addr & q->chunk_mask)) { + if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || + d->options) { q->invalid_descs++; return false; } -- cgit From 71b91a506bb05f9aef3acd57af2e835d85721942 Mon Sep 17 00:00:00 2001 From: Bo YU Date: Fri, 8 Mar 2019 01:45:51 -0500 Subject: bpf: fix warning about using plain integer as NULL Sparse warning below: sudo make C=2 CF=-D__CHECK_ENDIAN__ M=net/bpf/ CHECK net/bpf//test_run.c net/bpf//test_run.c:19:77: warning: Using plain integer as NULL pointer ./include/linux/bpf-cgroup.h:295:77: warning: Using plain integer as NULL pointer Fixes: 8bad74f9840f ("bpf: extend cgroup bpf core to allow multiple cgroup storage types") Acked-by: Yonghong Song Signed-off-by: Bo YU Signed-off-by: Daniel Borkmann --- net/bpf/test_run.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index da7051d62727..fab142b796ef 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -16,7 +16,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time) { - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 }; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; enum bpf_cgroup_storage_type stype; u64 time_start, time_spent = 0; int ret = 0; -- cgit