From e688c3db7ca69bea1872c5706aec6a7fdf89df17 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 14 Oct 2020 10:56:08 -0700
Subject: bpf: Fix register equivalence tracking.

The 64-bit JEQ/JNE handling in reg_set_min_max() was clearing reg->id in either
true or false branch. In the case 'if (reg->id)' check was done on the other
branch the counter part register would have reg->id == 0 when called into
find_equal_scalars(). In such case the helper would incorrectly identify other
registers with id == 0 as equivalent and propagate the state incorrectly.
Fix it by preserving ID across reg_set_min_max().

In other words any kind of comparison operator on the scalar register
should preserve its ID to recognize:

r1 = r2
if (r1 == 20) {
  #1 here both r1 and r2 == 20
} else if (r2 < 20) {
  #2 here both r1 and r2 < 20
}

The patch is addressing #1 case. The #2 was working correctly already.

Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Tested-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201014175608.1416-1-alexei.starovoitov@gmail.com
---
 kernel/bpf/verifier.c                           | 38 ++++++++++++++++---------
 tools/testing/selftests/bpf/verifier/regalloc.c | 26 +++++++++++++++++
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c43a5e8f0818..39d7f44e7c92 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1010,14 +1010,9 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 				struct bpf_reg_state *reg);
 
-/* Mark the unknown part of a register (variable offset or scalar value) as
- * known to have the value @imm.
- */
-static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
+/* This helper doesn't clear reg->id */
+static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 {
-	/* Clear id, off, and union(map_ptr, range) */
-	memset(((u8 *)reg) + sizeof(reg->type), 0,
-	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
 	reg->var_off = tnum_const(imm);
 	reg->smin_value = (s64)imm;
 	reg->smax_value = (s64)imm;
@@ -1030,6 +1025,17 @@ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 	reg->u32_max_value = (u32)imm;
 }
 
+/* Mark the unknown part of a register (variable offset or scalar value) as
+ * known to have the value @imm.
+ */
+static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
+{
+	/* Clear id, off, and union(map_ptr, range) */
+	memset(((u8 *)reg) + sizeof(reg->type), 0,
+	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
+	___mark_reg_known(reg, imm);
+}
+
 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
 {
 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
@@ -7001,14 +7007,18 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
 		struct bpf_reg_state *reg =
 			opcode == BPF_JEQ ? true_reg : false_reg;
 
-		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
-		 * if it is true we know the value for sure. Likewise for
-		 * BPF_JNE.
+		/* JEQ/JNE comparison doesn't change the register equivalence.
+		 * r1 = r2;
+		 * if (r1 == 42) goto label;
+		 * ...
+		 * label: // here both r1 and r2 are known to be 42.
+		 *
+		 * Hence when marking register as known preserve it's ID.
 		 */
 		if (is_jmp32)
 			__mark_reg32_known(reg, val32);
 		else
-			__mark_reg_known(reg, val);
+			___mark_reg_known(reg, val);
 		break;
 	}
 	case BPF_JSET:
@@ -7551,7 +7561,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
 						    &other_branch_regs[insn->dst_reg],
 						    src_reg, dst_reg, opcode);
-			if (src_reg->id) {
+			if (src_reg->id &&
+			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
 				find_equal_scalars(this_branch, src_reg);
 				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
 			}
@@ -7563,7 +7574,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 					opcode, is_jmp32);
 	}
 
-	if (dst_reg->type == SCALAR_VALUE && dst_reg->id) {
+	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
+	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
 		find_equal_scalars(this_branch, dst_reg);
 		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
 	}
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
index ac71b824f97a..4ad7e05de706 100644
--- a/tools/testing/selftests/bpf/verifier/regalloc.c
+++ b/tools/testing/selftests/bpf/verifier/regalloc.c
@@ -241,3 +241,29 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
+{
+	"regalloc, spill, JEQ",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+	/* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0),
+	/* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */
+	/* Buggy verifier will think that r3 == 20 here */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
-- 
cgit 


From f58423aeab28f861b67933206f322f764f05787d Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Mon, 12 Oct 2020 11:18:50 +0200
Subject: bpf, sockmap: Add locking annotations to iterator

The sparse checker currently outputs the following warnings:

    include/linux/rcupdate.h:632:9: sparse: sparse: context imbalance in 'sock_hash_seq_start' - wrong count at exit
    include/linux/rcupdate.h:632:9: sparse: sparse: context imbalance in 'sock_map_seq_start' - wrong count at exit

Add the necessary __acquires and __release annotations to make the
iterator locking schema palatable to sparse. Also add __must_hold
for good measure.

The kernel codebase uses both __acquires(rcu) and __acquires(RCU).
I couldn't find any guidance which one is preferred, so I used
what is easier to type out.

Fixes: 0365351524d7 ("net: Allow iterating sockmap and sockhash")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20201012091850.67452-1-lmb@cloudflare.com
---
 net/core/sock_map.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index df09c39a4dd2..203900a6ca5f 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -745,6 +745,7 @@ static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
 }
 
 static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rcu)
 {
 	struct sock_map_seq_info *info = seq->private;
 
@@ -757,6 +758,7 @@ static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
 }
 
 static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+	__must_hold(rcu)
 {
 	struct sock_map_seq_info *info = seq->private;
 
@@ -767,6 +769,7 @@ static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static int sock_map_seq_show(struct seq_file *seq, void *v)
+	__must_hold(rcu)
 {
 	struct sock_map_seq_info *info = seq->private;
 	struct bpf_iter__sockmap ctx = {};
@@ -789,6 +792,7 @@ static int sock_map_seq_show(struct seq_file *seq, void *v)
 }
 
 static void sock_map_seq_stop(struct seq_file *seq, void *v)
+	__releases(rcu)
 {
 	if (!v)
 		(void)sock_map_seq_show(seq, NULL);
@@ -1353,6 +1357,7 @@ static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
 }
 
 static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(rcu)
 {
 	struct sock_hash_seq_info *info = seq->private;
 
@@ -1365,6 +1370,7 @@ static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
 }
 
 static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+	__must_hold(rcu)
 {
 	struct sock_hash_seq_info *info = seq->private;
 
@@ -1373,6 +1379,7 @@ static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static int sock_hash_seq_show(struct seq_file *seq, void *v)
+	__must_hold(rcu)
 {
 	struct sock_hash_seq_info *info = seq->private;
 	struct bpf_iter__sockmap ctx = {};
@@ -1396,6 +1403,7 @@ static int sock_hash_seq_show(struct seq_file *seq, void *v)
 }
 
 static void sock_hash_seq_stop(struct seq_file *seq, void *v)
+	__releases(rcu)
 {
 	if (!v)
 		(void)sock_hash_seq_show(seq, NULL);
-- 
cgit 


From 8a3feed90e75283f15f830b1051bf048e4127c91 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 12 Oct 2020 16:49:40 -0700
Subject: bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo

The tcp_hdr_options test adds a "::eB9F" addr to the lo dev.
However, this non loopback address will have a race on ipv6 dad
which may lead to EADDRNOTAVAIL error from time to time.

Even nodad is used in the iproute2 command, there is still a race in
when the route will be added.  This will then lead to ENETUNREACH from
time to time.

To avoid the above, this patch uses the default loopback address "::1"
to do the test.

Fixes: ad2f8eb0095e ("bpf: selftests: Tcp header options")
Reported-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201012234940.1707941-1-kafai@fb.com
---
 .../selftests/bpf/prog_tests/tcp_hdr_options.c     | 26 +---------------------
 .../bpf/progs/test_misc_tcp_hdr_options.c          |  2 +-
 2 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index c86e67214a9e..c85174cdcb77 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -15,7 +15,7 @@
 #include "test_tcp_hdr_options.skel.h"
 #include "test_misc_tcp_hdr_options.skel.h"
 
-#define LO_ADDR6 "::eB9F"
+#define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-hdr-opt-test"
 
 struct bpf_test_option exp_passive_estab_in;
@@ -40,27 +40,6 @@ struct sk_fds {
 	int active_lport;
 };
 
-static int add_lo_addr(void)
-{
-	char ip_addr_cmd[256];
-	int cmdlen;
-
-	cmdlen = snprintf(ip_addr_cmd, sizeof(ip_addr_cmd),
-			  "ip -6 addr add %s/128 dev lo scope host",
-			  LO_ADDR6);
-
-	if (CHECK(cmdlen >= sizeof(ip_addr_cmd), "compile ip cmd",
-		  "failed to add host addr %s to lo. ip cmdlen is too long\n",
-		  LO_ADDR6))
-		return -1;
-
-	if (CHECK(system(ip_addr_cmd), "run ip cmd",
-		  "failed to add host addr %s to lo\n", LO_ADDR6))
-		return -1;
-
-	return 0;
-}
-
 static int create_netns(void)
 {
 	if (CHECK(unshare(CLONE_NEWNET), "create netns",
@@ -72,9 +51,6 @@ static int create_netns(void)
 		  "failed to bring lo link up\n"))
 		return -1;
 
-	if (add_lo_addr())
-		return -1;
-
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
index 72ec0178f653..6077a025092c 100644
--- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -16,7 +16,7 @@
 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
 #include "test_tcp_hdr_options.h"
 
-__u16 last_addr16_n = __bpf_htons(0xeB9F);
+__u16 last_addr16_n = __bpf_htons(1);
 __u16 active_lport_n = 0;
 __u16 active_lport_h = 0;
 __u16 passive_lport_n = 0;
-- 
cgit 


From 83c11c17553c0fca217105c17444c4ef5ab2403f Mon Sep 17 00:00:00 2001
From: Alex Dewar <alex.dewar90@gmail.com>
Date: Mon, 12 Oct 2020 18:09:53 +0100
Subject: net, sockmap: Don't call bpf_prog_put() on NULL pointer

If bpf_prog_inc_not_zero() fails for skb_parser, then bpf_prog_put() is
called unconditionally on skb_verdict, even though it may be NULL. Fix
and tidy up error path.

Fixes: 743df8b7749f ("bpf, sockmap: Check skb_verdict and skb_parser programs explicitly")
Addresses-Coverity-ID: 1497799: Null pointer dereferences (FORWARD_NULL)
Signed-off-by: Alex Dewar <alex.dewar90@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20201012170952.60750-1-alex.dewar90@gmail.com
---
 net/core/sock_map.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 203900a6ca5f..ddc899e83313 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -238,17 +238,18 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 	int ret;
 
 	skb_verdict = READ_ONCE(progs->skb_verdict);
-	skb_parser = READ_ONCE(progs->skb_parser);
 	if (skb_verdict) {
 		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
 		if (IS_ERR(skb_verdict))
 			return PTR_ERR(skb_verdict);
 	}
+
+	skb_parser = READ_ONCE(progs->skb_parser);
 	if (skb_parser) {
 		skb_parser = bpf_prog_inc_not_zero(skb_parser);
 		if (IS_ERR(skb_parser)) {
-			bpf_prog_put(skb_verdict);
-			return PTR_ERR(skb_parser);
+			ret = PTR_ERR(skb_parser);
+			goto out_put_skb_verdict;
 		}
 	}
 
@@ -257,7 +258,7 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		msg_parser = bpf_prog_inc_not_zero(msg_parser);
 		if (IS_ERR(msg_parser)) {
 			ret = PTR_ERR(msg_parser);
-			goto out;
+			goto out_put_skb_parser;
 		}
 	}
 
@@ -311,11 +312,12 @@ out_drop:
 out_progs:
 	if (msg_parser)
 		bpf_prog_put(msg_parser);
-out:
-	if (skb_verdict)
-		bpf_prog_put(skb_verdict);
+out_put_skb_parser:
 	if (skb_parser)
 		bpf_prog_put(skb_parser);
+out_put_skb_verdict:
+	if (skb_verdict)
+		bpf_prog_put(skb_verdict);
 	return ret;
 }
 
-- 
cgit