From 068d9d1eba72423e99162aad3586727180715c2a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 11 Aug 2020 19:29:23 -0700 Subject: bpf: Fix XDP FD-based attach/detach logic around XDP_FLAGS_UPDATE_IF_NOEXIST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enforce XDP_FLAGS_UPDATE_IF_NOEXIST only if new BPF program to be attached is non-NULL (i.e., we are not detaching a BPF program). Fixes: d4baa9368a5e ("bpf, xdp: Extract common XDP program attachment logic") Reported-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Tested-by: Stanislav Fomichev Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200812022923.1217922-1-andriin@fb.com --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 7df6c9617321..b5d1129d8310 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8913,10 +8913,6 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Active program does not match expected"); return -EEXIST; } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { - NL_SET_ERR_MSG(extack, "XDP program already attached"); - return -EBUSY; - } /* put effective new program into new_prog */ if (link) @@ -8927,6 +8923,10 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB ? XDP_MODE_DRV : XDP_MODE_SKB; + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) { + NL_SET_ERR_MSG(extack, "XDP program already attached"); + return -EBUSY; + } if (!offload && dev_xdp_prog(dev, other_mode)) { NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time"); return -EEXIST; -- cgit From fd09af010788a884de1c39537c288830c3d305db Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 11 Aug 2020 15:04:37 -0700 Subject: bpf: sock_ops ctx access may stomp registers in corner case I had a sockmap program that after doing some refactoring started spewing this splat at me: [18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 [...] [18610.807359] Call Trace: [18610.807370] ? 0xffffffffc114d0d5 [18610.807382] __cgroup_bpf_run_filter_sock_ops+0x7d/0xb0 [18610.807391] tcp_connect+0x895/0xd50 [18610.807400] tcp_v4_connect+0x465/0x4e0 [18610.807407] __inet_stream_connect+0xd6/0x3a0 [18610.807412] ? __inet_stream_connect+0x5/0x3a0 [18610.807417] inet_stream_connect+0x3b/0x60 [18610.807425] __sys_connect+0xed/0x120 After some debugging I was able to build this simple reproducer, __section("sockops/reproducer_bad") int bpf_reproducer_bad(struct bpf_sock_ops *skops) { volatile __maybe_unused __u32 i = skops->snd_ssthresh; return 0; } And along the way noticed that below program ran without splat, __section("sockops/reproducer_good") int bpf_reproducer_good(struct bpf_sock_ops *skops) { volatile __maybe_unused __u32 i = skops->snd_ssthresh; volatile __maybe_unused __u32 family; compiler_barrier(); family = skops->family; return 0; } So I decided to check out the code we generate for the above two programs and noticed each generates the BPF code you would expect, 0000000000000000 : ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: r1 = *(u32 *)(r1 + 96) 1: *(u32 *)(r10 - 4) = r1 ; return 0; 2: r0 = 0 3: exit 0000000000000000 : ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: r2 = *(u32 *)(r1 + 96) 1: *(u32 *)(r10 - 4) = r2 ; family = skops->family; 2: r1 = *(u32 *)(r1 + 20) 3: *(u32 *)(r10 - 8) = r1 ; return 0; 4: r0 = 0 5: exit So we get reasonable assembly, but still something was causing the null pointer dereference. So, we load the programs and dump the xlated version observing that line 0 above 'r* = *(u32 *)(r1 +96)' is going to be translated by the skops access helpers. int bpf_reproducer_bad(struct bpf_sock_ops * skops): ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: (61) r1 = *(u32 *)(r1 +28) 1: (15) if r1 == 0x0 goto pc+2 2: (79) r1 = *(u64 *)(r1 +0) 3: (61) r1 = *(u32 *)(r1 +2340) ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 4: (63) *(u32 *)(r10 -4) = r1 ; return 0; 5: (b7) r0 = 0 6: (95) exit int bpf_reproducer_good(struct bpf_sock_ops * skops): ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 0: (61) r2 = *(u32 *)(r1 +28) 1: (15) if r2 == 0x0 goto pc+2 2: (79) r2 = *(u64 *)(r1 +0) 3: (61) r2 = *(u32 *)(r2 +2340) ; volatile __maybe_unused __u32 i = skops->snd_ssthresh; 4: (63) *(u32 *)(r10 -4) = r2 ; family = skops->family; 5: (79) r1 = *(u64 *)(r1 +0) 6: (69) r1 = *(u16 *)(r1 +16) ; family = skops->family; 7: (63) *(u32 *)(r10 -8) = r1 ; return 0; 8: (b7) r0 = 0 9: (95) exit Then we look at lines 0 and 2 above. In the good case we do the zero check in r2 and then load 'r1 + 0' at line 2. Do a quick cross-check into the bpf_sock_ops check and we can confirm that is the 'struct sock *sk' pointer field. But, in the bad case, 0: (61) r1 = *(u32 *)(r1 +28) 1: (15) if r1 == 0x0 goto pc+2 2: (79) r1 = *(u64 *)(r1 +0) Oh no, we read 'r1 +28' into r1, this is skops->fullsock and then in line 2 we read the 'r1 +0' as a pointer. Now jumping back to our spat, [18610.807284] BUG: unable to handle kernel NULL pointer dereference at 0000000000000001 The 0x01 makes sense because that is exactly the fullsock value. And its not a valid dereference so we splat. To fix we need to guard the case when a program is doing a sock_ops field access with src_reg == dst_reg. This is already handled in the load case where the ctx_access handler uses a tmp register being careful to store the old value and restore it. To fix the get case test if src_reg == dst_reg and in this case do the is_fullsock test in the temporary register. Remembering to restore the temporary register before writing to either dst_reg or src_reg to avoid smashing the pointer into the struct holding the tmp variable. Adding this inline code to test_tcpbpf_kern will now be generated correctly from, 9: r2 = *(u32 *)(r2 + 96) to xlated code, 12: (7b) *(u64 *)(r2 +32) = r9 13: (61) r9 = *(u32 *)(r2 +28) 14: (15) if r9 == 0x0 goto pc+4 15: (79) r9 = *(u64 *)(r2 +32) 16: (79) r2 = *(u64 *)(r2 +0) 17: (61) r2 = *(u32 *)(r2 +2348) 18: (05) goto pc+1 19: (79) r9 = *(u64 *)(r2 +32) And in the normal case we keep the original code, because really this is an edge case. From this, 9: r2 = *(u32 *)(r6 + 96) to xlated code, 22: (61) r2 = *(u32 *)(r6 +28) 23: (15) if r2 == 0x0 goto pc+2 24: (79) r2 = *(u64 *)(r6 +0) 25: (61) r2 = *(u32 *)(r2 +2348) So three additional instructions if dst == src register, but I scanned my current code base and did not see this pattern anywhere so should not be a big deal. Further, it seems no one else has hit this or at least reported it so it must a fairly rare pattern. Fixes: 9b1f3d6e5af29 ("bpf: Refactor sock_ops_convert_ctx_access") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159718347772.4728.2781381670567919577.stgit@john-Precision-5820-Tower --- net/core/filter.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 7124f0fe6974..1baeeff2fcd2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8317,15 +8317,31 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, /* Helper macro for adding read access to tcp_sock or sock fields. */ #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ do { \ + int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + fullsock_reg = reg; \ + jmp += 2; \ + } \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, \ is_fullsock), \ - si->dst_reg, si->src_reg, \ + fullsock_reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ is_fullsock)); \ - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ + if (si->dst_reg == si->src_reg) \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ struct bpf_sock_ops_kern, sk),\ si->dst_reg, si->src_reg, \ @@ -8334,6 +8350,12 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, OBJ_FIELD), \ si->dst_reg, si->dst_reg, \ offsetof(OBJ, OBJ_FIELD)); \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } \ } while (0) #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ -- cgit From 84f44df664e9f0e261157e16ee1acd77cc1bb78d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 11 Aug 2020 15:04:56 -0700 Subject: bpf: sock_ops sk access may stomp registers when dst_reg = src_reg Similar to patch ("bpf: sock_ops ctx access may stomp registers") if the src_reg = dst_reg when reading the sk field of a sock_ops struct we generate xlated code, 53: (61) r9 = *(u32 *)(r9 +28) 54: (15) if r9 == 0x0 goto pc+3 56: (79) r9 = *(u64 *)(r9 +0) This stomps on the r9 reg to do the sk_fullsock check and then when reading the skops->sk field instead of the sk pointer we get the sk_fullsock. To fix use similar pattern noted in the previous fix and use the temp field to save/restore a register used to do sk_fullsock check. After the fix the generated xlated code reads, 52: (7b) *(u64 *)(r9 +32) = r8 53: (61) r8 = *(u32 *)(r9 +28) 54: (15) if r9 == 0x0 goto pc+3 55: (79) r8 = *(u64 *)(r9 +32) 56: (79) r9 = *(u64 *)(r9 +0) 57: (05) goto pc+1 58: (79) r8 = *(u64 *)(r9 +32) Here r9 register was in-use so r8 is chosen as the temporary register. In line 52 r8 is saved in temp variable and at line 54 restored in case fullsock != 0. Finally we handle fullsock == 0 case by restoring at line 58. This adds a new macro SOCK_OPS_GET_SK it is almost possible to merge this with SOCK_OPS_GET_FIELD, but I found the extra branch logic a bit more confusing than just adding a new macro despite a bit of duplicating code. Fixes: 1314ef561102e ("bpf: export bpf_sock for BPF_PROG_TYPE_SOCK_OPS prog type") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Song Liu Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/159718349653.4728.6559437186853473612.stgit@john-Precision-5820-Tower --- net/core/filter.c | 49 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 1baeeff2fcd2..b2df52086445 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8358,6 +8358,43 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, } \ } while (0) +#define SOCK_OPS_GET_SK() \ + do { \ + int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == reg || si->src_reg == reg) \ + reg--; \ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + fullsock_reg = reg; \ + jmp += 2; \ + } \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, \ + is_fullsock), \ + fullsock_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + is_fullsock)); \ + *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \ + if (si->dst_reg == si->src_reg) \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \ + struct bpf_sock_ops_kern, sk),\ + si->dst_reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, sk));\ + if (si->dst_reg == si->src_reg) { \ + *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ + offsetof(struct bpf_sock_ops_kern, \ + temp)); \ + } \ + } while (0) + #define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \ SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock) @@ -8642,17 +8679,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked); break; case offsetof(struct bpf_sock_ops, sk): - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( - struct bpf_sock_ops_kern, - is_fullsock), - si->dst_reg, si->src_reg, - offsetof(struct bpf_sock_ops_kern, - is_fullsock)); - *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( - struct bpf_sock_ops_kern, sk), - si->dst_reg, si->src_reg, - offsetof(struct bpf_sock_ops_kern, sk)); + SOCK_OPS_GET_SK(); break; } return insn - insn_buf; -- cgit