From e688c3db7ca69bea1872c5706aec6a7fdf89df17 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov <ast@kernel.org> Date: Wed, 14 Oct 2020 10:56:08 -0700 Subject: bpf: Fix register equivalence tracking. The 64-bit JEQ/JNE handling in reg_set_min_max() was clearing reg->id in either true or false branch. In the case 'if (reg->id)' check was done on the other branch the counter part register would have reg->id == 0 when called into find_equal_scalars(). In such case the helper would incorrectly identify other registers with id == 0 as equivalent and propagate the state incorrectly. Fix it by preserving ID across reg_set_min_max(). In other words any kind of comparison operator on the scalar register should preserve its ID to recognize: r1 = r2 if (r1 == 20) { #1 here both r1 and r2 == 20 } else if (r2 < 20) { #2 here both r1 and r2 < 20 } The patch is addressing #1 case. The #2 was working correctly already. Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: John Fastabend <john.fastabend@gmail.com> Tested-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20201014175608.1416-1-alexei.starovoitov@gmail.com --- kernel/bpf/verifier.c | 38 ++++++++++++++++--------- tools/testing/selftests/bpf/verifier/regalloc.c | 26 +++++++++++++++++ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c43a5e8f0818..39d7f44e7c92 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1010,14 +1010,9 @@ static const int caller_saved[CALLER_SAVED_REGS] = { static void __mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg); -/* Mark the unknown part of a register (variable offset or scalar value) as - * known to have the value @imm. - */ -static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) +/* This helper doesn't clear reg->id */ +static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm) { - /* Clear id, off, and union(map_ptr, range) */ - memset(((u8 *)reg) + sizeof(reg->type), 0, - offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); reg->var_off = tnum_const(imm); reg->smin_value = (s64)imm; reg->smax_value = (s64)imm; @@ -1030,6 +1025,17 @@ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) reg->u32_max_value = (u32)imm; } +/* Mark the unknown part of a register (variable offset or scalar value) as + * known to have the value @imm. + */ +static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) +{ + /* Clear id, off, and union(map_ptr, range) */ + memset(((u8 *)reg) + sizeof(reg->type), 0, + offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); + ___mark_reg_known(reg, imm); +} + static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm) { reg->var_off = tnum_const_subreg(reg->var_off, imm); @@ -7001,14 +7007,18 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *reg = opcode == BPF_JEQ ? true_reg : false_reg; - /* For BPF_JEQ, if this is false we know nothing Jon Snow, but - * if it is true we know the value for sure. Likewise for - * BPF_JNE. + /* JEQ/JNE comparison doesn't change the register equivalence. + * r1 = r2; + * if (r1 == 42) goto label; + * ... + * label: // here both r1 and r2 are known to be 42. + * + * Hence when marking register as known preserve it's ID. */ if (is_jmp32) __mark_reg32_known(reg, val32); else - __mark_reg_known(reg, val); + ___mark_reg_known(reg, val); break; } case BPF_JSET: @@ -7551,7 +7561,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], src_reg, dst_reg, opcode); - if (src_reg->id) { + if (src_reg->id && + !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) { find_equal_scalars(this_branch, src_reg); find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]); } @@ -7563,7 +7574,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, opcode, is_jmp32); } - if (dst_reg->type == SCALAR_VALUE && dst_reg->id) { + if (dst_reg->type == SCALAR_VALUE && dst_reg->id && + !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) { find_equal_scalars(this_branch, dst_reg); find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]); } diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c index ac71b824f97a..4ad7e05de706 100644 --- a/tools/testing/selftests/bpf/verifier/regalloc.c +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -241,3 +241,29 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, +{ + "regalloc, spill, JEQ", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0), + /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0), + /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */ + /* Buggy verifier will think that r3 == 20 here */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */ + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, -- cgit From f58423aeab28f861b67933206f322f764f05787d Mon Sep 17 00:00:00 2001 From: Lorenz Bauer <lmb@cloudflare.com> Date: Mon, 12 Oct 2020 11:18:50 +0200 Subject: bpf, sockmap: Add locking annotations to iterator The sparse checker currently outputs the following warnings: include/linux/rcupdate.h:632:9: sparse: sparse: context imbalance in 'sock_hash_seq_start' - wrong count at exit include/linux/rcupdate.h:632:9: sparse: sparse: context imbalance in 'sock_map_seq_start' - wrong count at exit Add the necessary __acquires and __release annotations to make the iterator locking schema palatable to sparse. Also add __must_hold for good measure. The kernel codebase uses both __acquires(rcu) and __acquires(RCU). I couldn't find any guidance which one is preferred, so I used what is easier to type out. Fixes: 0365351524d7 ("net: Allow iterating sockmap and sockhash") Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Lorenz Bauer <lmb@cloudflare.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: John Fastabend <john.fastabend@gmail.com> Acked-by: Jakub Sitnicki <jakub@cloudflare.com> Link: https://lore.kernel.org/bpf/20201012091850.67452-1-lmb@cloudflare.com --- net/core/sock_map.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index df09c39a4dd2..203900a6ca5f 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -745,6 +745,7 @@ static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info) } static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) { struct sock_map_seq_info *info = seq->private; @@ -757,6 +758,7 @@ static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos) } static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) + __must_hold(rcu) { struct sock_map_seq_info *info = seq->private; @@ -767,6 +769,7 @@ static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static int sock_map_seq_show(struct seq_file *seq, void *v) + __must_hold(rcu) { struct sock_map_seq_info *info = seq->private; struct bpf_iter__sockmap ctx = {}; @@ -789,6 +792,7 @@ static int sock_map_seq_show(struct seq_file *seq, void *v) } static void sock_map_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) { if (!v) (void)sock_map_seq_show(seq, NULL); @@ -1353,6 +1357,7 @@ static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info, } static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(rcu) { struct sock_hash_seq_info *info = seq->private; @@ -1365,6 +1370,7 @@ static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos) } static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos) + __must_hold(rcu) { struct sock_hash_seq_info *info = seq->private; @@ -1373,6 +1379,7 @@ static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static int sock_hash_seq_show(struct seq_file *seq, void *v) + __must_hold(rcu) { struct sock_hash_seq_info *info = seq->private; struct bpf_iter__sockmap ctx = {}; @@ -1396,6 +1403,7 @@ static int sock_hash_seq_show(struct seq_file *seq, void *v) } static void sock_hash_seq_stop(struct seq_file *seq, void *v) + __releases(rcu) { if (!v) (void)sock_hash_seq_show(seq, NULL); -- cgit From 8a3feed90e75283f15f830b1051bf048e4127c91 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau <kafai@fb.com> Date: Mon, 12 Oct 2020 16:49:40 -0700 Subject: bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo The tcp_hdr_options test adds a "::eB9F" addr to the lo dev. However, this non loopback address will have a race on ipv6 dad which may lead to EADDRNOTAVAIL error from time to time. Even nodad is used in the iproute2 command, there is still a race in when the route will be added. This will then lead to ENETUNREACH from time to time. To avoid the above, this patch uses the default loopback address "::1" to do the test. Fixes: ad2f8eb0095e ("bpf: selftests: Tcp header options") Reported-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20201012234940.1707941-1-kafai@fb.com --- .../selftests/bpf/prog_tests/tcp_hdr_options.c | 26 +--------------------- .../bpf/progs/test_misc_tcp_hdr_options.c | 2 +- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index c86e67214a9e..c85174cdcb77 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -15,7 +15,7 @@ #include "test_tcp_hdr_options.skel.h" #include "test_misc_tcp_hdr_options.skel.h" -#define LO_ADDR6 "::eB9F" +#define LO_ADDR6 "::1" #define CG_NAME "/tcpbpf-hdr-opt-test" struct bpf_test_option exp_passive_estab_in; @@ -40,27 +40,6 @@ struct sk_fds { int active_lport; }; -static int add_lo_addr(void) -{ - char ip_addr_cmd[256]; - int cmdlen; - - cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd), - "ip -6 addr add %s/128 dev lo scope host", - LO_ADDR6); - - if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd", - "failed to add host addr %s to lo. ip cmdlen is too long\n", - LO_ADDR6)) - return -1; - - if (CHECK(system(ip_addr_cmd), "run ip cmd", - "failed to add host addr %s to lo\n", LO_ADDR6)) - return -1; - - return 0; -} - static int create_netns(void) { if (CHECK(unshare(CLONE_NEWNET), "create netns", @@ -72,9 +51,6 @@ static int create_netns(void) "failed to bring lo link up\n")) return -1; - if (add_lo_addr()) - return -1; - return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c index 72ec0178f653..6077a025092c 100644 --- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -16,7 +16,7 @@ #define BPF_PROG_TEST_TCP_HDR_OPTIONS #include "test_tcp_hdr_options.h" -__u16 last_addr16_n = __bpf_htons(0xeB9F); +__u16 last_addr16_n = __bpf_htons(1); __u16 active_lport_n = 0; __u16 active_lport_h = 0; __u16 passive_lport_n = 0; -- cgit From 83c11c17553c0fca217105c17444c4ef5ab2403f Mon Sep 17 00:00:00 2001 From: Alex Dewar <alex.dewar90@gmail.com> Date: Mon, 12 Oct 2020 18:09:53 +0100 Subject: net, sockmap: Don't call bpf_prog_put() on NULL pointer If bpf_prog_inc_not_zero() fails for skb_parser, then bpf_prog_put() is called unconditionally on skb_verdict, even though it may be NULL. Fix and tidy up error path. Fixes: 743df8b7749f ("bpf, sockmap: Check skb_verdict and skb_parser programs explicitly") Addresses-Coverity-ID: 1497799: Null pointer dereferences (FORWARD_NULL) Signed-off-by: Alex Dewar <alex.dewar90@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Jakub Sitnicki <jakub@cloudflare.com> Acked-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/20201012170952.60750-1-alex.dewar90@gmail.com --- net/core/sock_map.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 203900a6ca5f..ddc899e83313 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -238,17 +238,18 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, int ret; skb_verdict = READ_ONCE(progs->skb_verdict); - skb_parser = READ_ONCE(progs->skb_parser); if (skb_verdict) { skb_verdict = bpf_prog_inc_not_zero(skb_verdict); if (IS_ERR(skb_verdict)) return PTR_ERR(skb_verdict); } + + skb_parser = READ_ONCE(progs->skb_parser); if (skb_parser) { skb_parser = bpf_prog_inc_not_zero(skb_parser); if (IS_ERR(skb_parser)) { - bpf_prog_put(skb_verdict); - return PTR_ERR(skb_parser); + ret = PTR_ERR(skb_parser); + goto out_put_skb_verdict; } } @@ -257,7 +258,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, msg_parser = bpf_prog_inc_not_zero(msg_parser); if (IS_ERR(msg_parser)) { ret = PTR_ERR(msg_parser); - goto out; + goto out_put_skb_parser; } } @@ -311,11 +312,12 @@ out_drop: out_progs: if (msg_parser) bpf_prog_put(msg_parser); -out: - if (skb_verdict) - bpf_prog_put(skb_verdict); +out_put_skb_parser: if (skb_parser) bpf_prog_put(skb_parser); +out_put_skb_verdict: + if (skb_verdict) + bpf_prog_put(skb_verdict); return ret; } -- cgit