From 2baae3545327632167c0180e9ca1d467416f1919 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 May 2019 09:59:16 -0700 Subject: bpf: devmap: fix use-after-free Read in __dev_map_entry_free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit synchronize_rcu() is fine when the rcu callbacks only need to free memory (kfree_rcu() or direct kfree() call rcu call backs) __dev_map_entry_free() is a bit more complex, so we need to make sure that call queued __dev_map_entry_free() callbacks have completed. sysbot report: BUG: KASAN: use-after-free in dev_map_flush_old kernel/bpf/devmap.c:365 [inline] BUG: KASAN: use-after-free in __dev_map_entry_free+0x2a8/0x300 kernel/bpf/devmap.c:379 Read of size 8 at addr ffff8801b8da38c8 by task ksoftirqd/1/18 CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.17.0+ #39 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 dev_map_flush_old kernel/bpf/devmap.c:365 [inline] __dev_map_entry_free+0x2a8/0x300 kernel/bpf/devmap.c:379 __rcu_reclaim kernel/rcu/rcu.h:178 [inline] rcu_do_batch kernel/rcu/tree.c:2558 [inline] invoke_rcu_callbacks kernel/rcu/tree.c:2818 [inline] __rcu_process_callbacks kernel/rcu/tree.c:2785 [inline] rcu_process_callbacks+0xe9d/0x1760 kernel/rcu/tree.c:2802 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:284 run_ksoftirqd+0x86/0x100 kernel/softirq.c:645 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 kthread+0x345/0x410 kernel/kthread.c:240 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 Allocated by task 6675: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kmem_cache_alloc_trace+0x152/0x780 mm/slab.c:3620 kmalloc include/linux/slab.h:513 [inline] kzalloc include/linux/slab.h:706 [inline] dev_map_alloc+0x208/0x7f0 kernel/bpf/devmap.c:102 find_and_alloc_map kernel/bpf/syscall.c:129 [inline] map_create+0x393/0x1010 kernel/bpf/syscall.c:453 __do_sys_bpf kernel/bpf/syscall.c:2351 [inline] __se_sys_bpf kernel/bpf/syscall.c:2328 [inline] __x64_sys_bpf+0x303/0x510 kernel/bpf/syscall.c:2328 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 26: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xd9/0x260 mm/slab.c:3813 dev_map_free+0x4fa/0x670 kernel/bpf/devmap.c:191 bpf_map_free_deferred+0xba/0xf0 kernel/bpf/syscall.c:262 process_one_work+0xc64/0x1b70 kernel/workqueue.c:2153 worker_thread+0x181/0x13a0 kernel/workqueue.c:2296 kthread+0x345/0x410 kernel/kthread.c:240 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 The buggy address belongs to the object at ffff8801b8da37c0 which belongs to the cache kmalloc-512 of size 512 The buggy address is located 264 bytes inside of 512-byte region [ffff8801b8da37c0, ffff8801b8da39c0) The buggy address belongs to the page: page:ffffea0006e368c0 count:1 mapcount:0 mapping:ffff8801da800940 index:0xffff8801b8da3540 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0007217b88 ffffea0006e30cc8 ffff8801da800940 raw: ffff8801b8da3540 ffff8801b8da3040 0000000100000004 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801b8da3780: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8801b8da3800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ffff8801b8da3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801b8da3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801b8da3980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references") Signed-off-by: Eric Dumazet Reported-by: syzbot+457d3e2ffbcf31aee5c0@syzkaller.appspotmail.com Acked-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..1e525d70f833 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -164,6 +164,9 @@ static void dev_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_rcu(); + /* Make sure prior __dev_map_entry_free() have completed. */ + rcu_barrier(); + /* To ensure all pending flush operations have completed wait for flush * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. * Because the above synchronize_rcu() ensures the map is disconnected -- cgit From 5b497af42fab12cadc0e29bcb7052cf9963603f5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 29 May 2019 07:18:09 -0700 Subject: treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 295 Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of version 2 of the gnu general public license as published by the free software foundation this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference in 64 file(s). Signed-off-by: Thomas Gleixner Reviewed-by: Alexios Zavras Reviewed-by: Allison Randal Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190529141901.894819585@linutronix.de Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/devmap.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1e525d70f833..15dbc15c5b0c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -1,13 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. */ /* Devmaps primary use is as a backend map for XDP BPF helper call -- cgit From d4dd153d551634683fccf8881f606fa9f3dfa1ef Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 14 Jun 2019 17:20:13 +0900 Subject: bpf, devmap: Fix premature entry free on destroying map dev_map_free() waits for flush_needed bitmap to be empty in order to ensure all flush operations have completed before freeing its entries. However the corresponding clear_bit() was called before using the entries, so the entries could be used after free. All access to the entries needs to be done before clearing the bit. It seems commit a5e2da6e9787 ("bpf: netdev is never null in __dev_map_flush") accidentally changed the clear_bit() and memory access order. Note that the problem happens only in __dev_map_flush(), not in dev_map_flush_old(). dev_map_flush_old() is called only after nulling out the corresponding netdev_map entry, so dev_map_free() never frees the entry thus no such race happens there. Fixes: a5e2da6e9787 ("bpf: netdev is never null in __dev_map_flush") Signed-off-by: Toshiaki Makita Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 1e525d70f833..e001fb1a96b1 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -291,10 +291,10 @@ void __dev_map_flush(struct bpf_map *map) if (unlikely(!dev)) continue; - __clear_bit(bit, bitmap); - bq = this_cpu_ptr(dev->bulkq); bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); + + __clear_bit(bit, bitmap); } } -- cgit From edabf4d9dd905acd60048ea1579943801e3a4876 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 14 Jun 2019 17:20:14 +0900 Subject: bpf, devmap: Add missing bulk queue free dev_map_free() forgot to free bulk queue when freeing its entries. Fixes: 5d053f9da431 ("bpf: devmap prepare xdp frames for bulking") Signed-off-by: Toshiaki Makita Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index e001fb1a96b1..a126d95d12de 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -186,6 +186,7 @@ static void dev_map_free(struct bpf_map *map) if (!dev) continue; + free_percpu(dev->bulkq); dev_put(dev->dev); kfree(dev); } -- cgit From 86723c8640633bee4b4588d3c7784ee7a0032f65 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Fri, 14 Jun 2019 17:20:15 +0900 Subject: bpf, devmap: Add missing RCU read lock on flush .ndo_xdp_xmit() assumes it is called under RCU. For example virtio_net uses RCU to detect it has setup the resources for tx. The assumption accidentally broke when introducing bulk queue in devmap. Fixes: 5d053f9da431 ("bpf: devmap prepare xdp frames for bulking") Reported-by: David Ahern Signed-off-by: Toshiaki Makita Signed-off-by: Daniel Borkmann --- kernel/bpf/devmap.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/bpf/devmap.c') diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a126d95d12de..1defea4b2755 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -282,6 +282,7 @@ void __dev_map_flush(struct bpf_map *map) unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); u32 bit; + rcu_read_lock(); for_each_set_bit(bit, bitmap, map->max_entries) { struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); struct xdp_bulk_queue *bq; @@ -297,6 +298,7 @@ void __dev_map_flush(struct bpf_map *map) __clear_bit(bit, bitmap); } + rcu_read_unlock(); } /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or @@ -389,6 +391,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) int cpu; + rcu_read_lock(); for_each_online_cpu(cpu) { bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); __clear_bit(dev->bit, bitmap); @@ -396,6 +399,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) bq = per_cpu_ptr(dev->bulkq, cpu); bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); } + rcu_read_unlock(); } } -- cgit