From 16b2f264983dc264c1560cc0170e760dec1bf54f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 21 Dec 2023 15:23:23 -0800 Subject: bpf: sockmap, fix proto update hook to avoid dup calls When sockets are added to a sockmap or sockhash we allocate and init a psock. Then update the proto ops with sock_map_init_proto the flow is sock_hash_update_common sock_map_link psock = sock_map_psock_get_checked() <-returns existing psock sock_map_init_proto(sk, psock) <- updates sk_proto If the socket is already in a map this results in the sock_map_init_proto being called multiple times on the same socket. We do this because when a socket is added to multiple maps this might result in a new set of BPF programs being attached to the socket requiring an updated ops struct. This creates a rule where it must be safe to call psock_update_sk_prot multiple times. When we added a fix for UAF through unix sockets in patch 4dd9a38a753fc we broke this rule by adding a sock_hold in that path to ensure the sock is not released. The result is if a af_unix stream sock is placed in multiple maps it results in a memory leak because we call sock_hold multiple times with only a single sock_put on it. Fixes: 8866730aed51 ("bpf, sockmap: af_unix stream sockets need to hold ref for pair sock") Reported-by: Xingwei Lee Signed-off-by: John Fastabend Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20231221232327.43678-2-john.fastabend@gmail.com --- net/unix/unix_bpf.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 7ea7c3a0d0d0..bd84785bf8d6 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -161,15 +161,30 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r { struct sock *sk_pair; + /* Restore does not decrement the sk_pair reference yet because we must + * keep the a reference to the socket until after an RCU grace period + * and any pending sends have completed. + */ if (restore) { sk->sk_write_space = psock->saved_write_space; sock_replace_proto(sk, psock->sk_proto); return 0; } - sk_pair = unix_peer(sk); - sock_hold(sk_pair); - psock->sk_pair = sk_pair; + /* psock_update_sk_prot can be called multiple times if psock is + * added to multiple maps and/or slots in the same map. There is + * also an edge case where replacing a psock with itself can trigger + * an extra psock_update_sk_prot during the insert process. So it + * must be safe to do multiple calls. Here we need to ensure we don't + * increment the refcnt through sock_hold many times. There will only + * be a single matching destroy operation. + */ + if (!psock->sk_pair) { + sk_pair = unix_peer(sk); + sock_hold(sk_pair); + psock->sk_pair = sk_pair; + } + unix_stream_bpf_check_needs_rebuild(psock->sk_proto); sock_replace_proto(sk, &unix_stream_bpf_prot); return 0; -- cgit From 98e20e5e13d2811898921f999288be7151a11954 Mon Sep 17 00:00:00 2001 From: Quentin Deslandes Date: Tue, 26 Dec 2023 14:07:42 +0100 Subject: bpfilter: remove bpfilter bpfilter was supposed to convert iptables filtering rules into BPF programs on the fly, from the kernel, through a usermode helper. The base code for the UMH was introduced in 2018, and couple of attempts (2, 3) tried to introduce the BPF program generate features but were abandoned. bpfilter now sits in a kernel tree unused and unusable, occasionally causing confusion amongst Linux users (4, 5). As bpfilter is now developed in a dedicated repository on GitHub (6), it was suggested a couple of times this year (LSFMM/BPF 2023, LPC 2023) to remove the deprecated kernel part of the project. This is the purpose of this patch. [1]: https://lore.kernel.org/lkml/20180522022230.2492505-1-ast@kernel.org/ [2]: https://lore.kernel.org/bpf/20210829183608.2297877-1-me@ubique.spb.ru/#t [3]: https://lore.kernel.org/lkml/20221224000402.476079-1-qde@naccy.de/ [4]: https://dxuuu.xyz/bpfilter.html [5]: https://github.com/linuxkit/linuxkit/pull/3904 [6]: https://github.com/facebook/bpfilter Signed-off-by: Quentin Deslandes Link: https://lore.kernel.org/r/20231226130745.465988-1-qde@naccy.de Signed-off-by: Alexei Starovoitov --- net/Kconfig | 2 - net/Makefile | 1 - net/bpfilter/.gitignore | 2 - net/bpfilter/Kconfig | 23 ------- net/bpfilter/Makefile | 20 ------ net/bpfilter/bpfilter_kern.c | 136 --------------------------------------- net/bpfilter/bpfilter_umh_blob.S | 7 -- net/bpfilter/main.c | 64 ------------------ net/bpfilter/msgfmt.h | 17 ----- net/ipv4/Makefile | 2 - net/ipv4/bpfilter/Makefile | 2 - net/ipv4/bpfilter/sockopt.c | 71 -------------------- net/ipv4/ip_sockglue.c | 12 ---- 13 files changed, 359 deletions(-) delete mode 100644 net/bpfilter/.gitignore delete mode 100644 net/bpfilter/Kconfig delete mode 100644 net/bpfilter/Makefile delete mode 100644 net/bpfilter/bpfilter_kern.c delete mode 100644 net/bpfilter/bpfilter_umh_blob.S delete mode 100644 net/bpfilter/main.c delete mode 100644 net/bpfilter/msgfmt.h delete mode 100644 net/ipv4/bpfilter/Makefile delete mode 100644 net/ipv4/bpfilter/sockopt.c (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 3ec6bc98fa05..4adc47d0c9c2 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -233,8 +233,6 @@ source "net/bridge/netfilter/Kconfig" endif -source "net/bpfilter/Kconfig" - source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4c4dc535453d..b06b5539e7a6 100644 --- a/net/Makefile +++ b/net/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX_SCM) += unix/ obj-y += ipv6/ -obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore deleted file mode 100644 index f34e85ee8204..000000000000 --- a/net/bpfilter/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -bpfilter_umh diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig deleted file mode 100644 index 3d4a21462458..000000000000 --- a/net/bpfilter/Kconfig +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -menuconfig BPFILTER - bool "BPF based packet filtering framework (BPFILTER)" - depends on BPF && INET - select USERMODE_DRIVER - help - This builds experimental bpfilter framework that is aiming to - provide netfilter compatible functionality via BPF - -if BPFILTER -config BPFILTER_UMH - tristate "bpfilter kernel module with user mode helper" - depends on CC_CAN_LINK - depends on m || CC_CAN_LINK_STATIC - default m - help - This builds bpfilter kernel module with embedded user mode helper - - Note: To compile this as built-in, your toolchain must support - building static binaries, since rootfs isn't mounted at the time - when __init functions are called and do_execv won't be able to find - the elf interpreter. -endif diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile deleted file mode 100644 index cdac82b8c53a..000000000000 --- a/net/bpfilter/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the Linux BPFILTER layer. -# - -userprogs := bpfilter_umh -bpfilter_umh-objs := main.o -userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi - -ifeq ($(CONFIG_BPFILTER_UMH), y) -# builtin bpfilter_umh should be linked with -static -# since rootfs isn't mounted at the time of __init -# function is called and do_execv won't find elf interpreter -userldflags += -static -endif - -$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh - -obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o -bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c deleted file mode 100644 index 97e129e3f31c..000000000000 --- a/net/bpfilter/bpfilter_kern.c +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include "msgfmt.h" - -extern char bpfilter_umh_start; -extern char bpfilter_umh_end; - -static void shutdown_umh(void) -{ - struct umd_info *info = &bpfilter_ops.info; - struct pid *tgid = info->tgid; - - if (tgid) { - kill_pid(tgid, SIGKILL, 1); - wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - umd_cleanup_helper(info); - } -} - -static void __stop_umh(void) -{ - if (IS_ENABLED(CONFIG_INET)) - shutdown_umh(); -} - -static int bpfilter_send_req(struct mbox_request *req) -{ - struct mbox_reply reply; - loff_t pos = 0; - ssize_t n; - - if (!bpfilter_ops.info.tgid) - return -EFAULT; - pos = 0; - n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req), - &pos); - if (n != sizeof(*req)) { - pr_err("write fail %zd\n", n); - goto stop; - } - pos = 0; - n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply), - &pos); - if (n != sizeof(reply)) { - pr_err("read fail %zd\n", n); - goto stop; - } - return reply.status; -stop: - __stop_umh(); - return -EFAULT; -} - -static int bpfilter_process_sockopt(struct sock *sk, int optname, - sockptr_t optval, unsigned int optlen, - bool is_set) -{ - struct mbox_request req = { - .is_set = is_set, - .pid = current->pid, - .cmd = optname, - .addr = (uintptr_t)optval.user, - .len = optlen, - }; - if (sockptr_is_kernel(optval)) { - pr_err("kernel access not supported\n"); - return -EFAULT; - } - return bpfilter_send_req(&req); -} - -static int start_umh(void) -{ - struct mbox_request req = { .pid = current->pid }; - int err; - - /* fork usermode process */ - err = fork_usermode_driver(&bpfilter_ops.info); - if (err) - return err; - pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid)); - - /* health check that usermode process started correctly */ - if (bpfilter_send_req(&req) != 0) { - shutdown_umh(); - return -EFAULT; - } - - return 0; -} - -static int __init load_umh(void) -{ - int err; - - err = umd_load_blob(&bpfilter_ops.info, - &bpfilter_umh_start, - &bpfilter_umh_end - &bpfilter_umh_start); - if (err) - return err; - - mutex_lock(&bpfilter_ops.lock); - err = start_umh(); - if (!err && IS_ENABLED(CONFIG_INET)) { - bpfilter_ops.sockopt = &bpfilter_process_sockopt; - bpfilter_ops.start = &start_umh; - } - mutex_unlock(&bpfilter_ops.lock); - if (err) - umd_unload_blob(&bpfilter_ops.info); - return err; -} - -static void __exit fini_umh(void) -{ - mutex_lock(&bpfilter_ops.lock); - if (IS_ENABLED(CONFIG_INET)) { - shutdown_umh(); - bpfilter_ops.start = NULL; - bpfilter_ops.sockopt = NULL; - } - mutex_unlock(&bpfilter_ops.lock); - - umd_unload_blob(&bpfilter_ops.info); -} -module_init(load_umh); -module_exit(fini_umh); -MODULE_LICENSE("GPL"); diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S deleted file mode 100644 index 40311d10d2f2..000000000000 --- a/net/bpfilter/bpfilter_umh_blob.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .section .init.rodata, "a" - .global bpfilter_umh_start -bpfilter_umh_start: - .incbin "net/bpfilter/bpfilter_umh" - .global bpfilter_umh_end -bpfilter_umh_end: diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c deleted file mode 100644 index 291a92546246..000000000000 --- a/net/bpfilter/main.c +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include "../../include/uapi/linux/bpf.h" -#include -#include "msgfmt.h" - -FILE *debug_f; - -static int handle_get_cmd(struct mbox_request *cmd) -{ - switch (cmd->cmd) { - case 0: - return 0; - default: - break; - } - return -ENOPROTOOPT; -} - -static int handle_set_cmd(struct mbox_request *cmd) -{ - return -ENOPROTOOPT; -} - -static void loop(void) -{ - while (1) { - struct mbox_request req; - struct mbox_reply reply; - int n; - - n = read(0, &req, sizeof(req)); - if (n != sizeof(req)) { - fprintf(debug_f, "invalid request %d\n", n); - return; - } - - reply.status = req.is_set ? - handle_set_cmd(&req) : - handle_get_cmd(&req); - - n = write(1, &reply, sizeof(reply)); - if (n != sizeof(reply)) { - fprintf(debug_f, "reply failed %d\n", n); - return; - } - } -} - -int main(void) -{ - debug_f = fopen("/dev/kmsg", "w"); - setvbuf(debug_f, 0, _IOLBF, 0); - fprintf(debug_f, "<5>Started bpfilter\n"); - loop(); - fclose(debug_f); - return 0; -} diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h deleted file mode 100644 index 98d121c62945..000000000000 --- a/net/bpfilter/msgfmt.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NET_BPFILTER_MSGFMT_H -#define _NET_BPFILTER_MSGFMT_H - -struct mbox_request { - __u64 addr; - __u32 len; - __u32 is_set; - __u32 cmd; - __u32 pid; -}; - -struct mbox_reply { - __u32 status; -}; - -#endif diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index e144a02a6a61..ec36d2ec059e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -16,8 +16,6 @@ obj-y := route.o inetpeer.o protocol.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ metrics.o netlink.o nexthop.o udp_tunnel_stub.o -obj-$(CONFIG_BPFILTER) += bpfilter/ - obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile deleted file mode 100644 index 00af5305e05a..000000000000 --- a/net/ipv4/bpfilter/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_BPFILTER) += sockopt.o diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c deleted file mode 100644 index 193bcc2acccc..000000000000 --- a/net/ipv4/bpfilter/sockopt.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct bpfilter_umh_ops bpfilter_ops; -EXPORT_SYMBOL_GPL(bpfilter_ops); - -static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen, bool is_set) -{ - int err; - mutex_lock(&bpfilter_ops.lock); - if (!bpfilter_ops.sockopt) { - mutex_unlock(&bpfilter_ops.lock); - request_module("bpfilter"); - mutex_lock(&bpfilter_ops.lock); - - if (!bpfilter_ops.sockopt) { - err = -ENOPROTOOPT; - goto out; - } - } - if (bpfilter_ops.info.tgid && - thread_group_exited(bpfilter_ops.info.tgid)) - umd_cleanup_helper(&bpfilter_ops.info); - - if (!bpfilter_ops.info.tgid) { - err = bpfilter_ops.start(); - if (err) - goto out; - } - err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set); -out: - mutex_unlock(&bpfilter_ops.lock); - return err; -} - -int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, - unsigned int optlen) -{ - return bpfilter_mbox_request(sk, optname, optval, optlen, true); -} - -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, - int __user *optlen) -{ - int len; - - if (get_user(len, optlen)) - return -EFAULT; - - return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, - false); -} - -static int __init bpfilter_sockopt_init(void) -{ - mutex_init(&bpfilter_ops.lock); - bpfilter_ops.info.tgid = NULL; - bpfilter_ops.info.driver_name = "bpfilter_umh"; - - return 0; -} -device_initcall(bpfilter_sockopt_init); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 66247e8b429e..7aa9dc0e6760 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -47,8 +47,6 @@ #include #include -#include - /* * SOL_IP control messages. */ @@ -1411,11 +1409,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_SET_REPLACE && - optname < BPFILTER_IPT_SET_MAX) - err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); -#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && @@ -1763,11 +1756,6 @@ int ip_getsockopt(struct sock *sk, int level, err = do_ip_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); -#if IS_ENABLED(CONFIG_BPFILTER_UMH) - if (optname >= BPFILTER_IPT_SO_GET_INFO && - optname < BPFILTER_IPT_GET_MAX) - err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); -#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && -- cgit