From e9a8e5a587ca55fec6c58e4881742705d45bee54 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 22 Feb 2024 17:41:20 +0200
Subject: bpf: check bpf_func_state->callback_depth when pruning states

When comparing current and cached states verifier should consider
bpf_func_state->callback_depth. Current state cannot be pruned against
cached state, when current states has more iterations left compared to
cached state. Current state has more iterations left when it's
callback_depth is smaller.

Below is an example illustrating this bug, minimized from mailing list
discussion [0] (assume that BPF_F_TEST_STATE_FREQ is set).
The example is not a safe program: if loop_cb point (1) is followed by
loop_cb point (2), then division by zero is possible at point (4).

    struct ctx {
    	__u64 a;
    	__u64 b;
    	__u64 c;
    };

    static void loop_cb(int i, struct ctx *ctx)
    {
    	/* assume that generated code is "fallthrough-first":
    	 * if ... == 1 goto
    	 * if ... == 2 goto
    	 * <default>
    	 */
    	switch (bpf_get_prandom_u32()) {
    	case 1:  /* 1 */ ctx->a = 42; return 0; break;
    	case 2:  /* 2 */ ctx->b = 42; return 0; break;
    	default: /* 3 */ ctx->c = 42; return 0; break;
    	}
    }

    SEC("tc")
    __failure
    __flag(BPF_F_TEST_STATE_FREQ)
    int test(struct __sk_buff *skb)
    {
    	struct ctx ctx = { 7, 7, 7 };

    	bpf_loop(2, loop_cb, &ctx, 0);              /* 0 */
    	/* assume generated checks are in-order: .a first */
    	if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7)
    		asm volatile("r0 /= 0;":::"r0");    /* 4 */
    	return 0;
    }

Prior to this commit verifier built the following checkpoint tree for
this example:

 .------------------------------------- Checkpoint / State name
 |    .-------------------------------- Code point number
 |    |   .---------------------------- Stack state {ctx.a,ctx.b,ctx.c}
 |    |   |        .------------------- Callback depth in frame #0
 v    v   v        v
   - (0) {7P,7P,7},depth=0
     - (3) {7P,7P,7},depth=1
       - (0) {7P,7P,42},depth=1
         - (3) {7P,7,42},depth=2
           - (0) {7P,7,42},depth=2      loop terminates because of depth limit
             - (4) {7P,7,42},depth=0    predicted false, ctx.a marked precise
             - (6) exit
(a)      - (2) {7P,7,42},depth=2
           - (0) {7P,42,42},depth=2     loop terminates because of depth limit
             - (4) {7P,42,42},depth=0   predicted false, ctx.a marked precise
             - (6) exit
(b)      - (1) {7P,7P,42},depth=2
           - (0) {42P,7P,42},depth=2    loop terminates because of depth limit
             - (4) {42P,7P,42},depth=0  predicted false, ctx.{a,b} marked precise
             - (6) exit
     - (2) {7P,7,7},depth=1             considered safe, pruned using checkpoint (a)
(c)  - (1) {7P,7P,7},depth=1            considered safe, pruned using checkpoint (b)

Here checkpoint (b) has callback_depth of 2, meaning that it would
never reach state {42,42,7}.
While checkpoint (c) has callback_depth of 1, and thus
could yet explore the state {42,42,7} if not pruned prematurely.
This commit makes forbids such premature pruning,
allowing verifier to explore states sub-tree starting at (c):

(c)  - (1) {7,7,7P},depth=1
       - (0) {42P,7,7P},depth=1
         ...
         - (2) {42,7,7},depth=2
           - (0) {42,42,7},depth=2      loop terminates because of depth limit
             - (4) {42,42,7},depth=0    predicted true, ctx.{a,b,c} marked precise
               - (5) division by zero

[0] https://lore.kernel.org/bpf/9b251840-7cb8-4d17-bd23-1fc8071d8eef@linux.dev/

Fixes: bb124da69c47 ("bpf: keep track of max number of bpf_loop callback iterations")
Suggested-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Acked-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20240222154121.6991-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b263f093ee76..ddea9567f755 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -16602,6 +16602,9 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
 {
 	int i;
 
+	if (old->callback_depth > cur->callback_depth)
+		return false;
+
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
 			     &env->idmap_scratch, exact))
-- 
cgit 


From 5c2bc5e2f81d3344095ae241032dde20a4ea2b48 Mon Sep 17 00:00:00 2001
From: Eduard Zingerman <eddyz87@gmail.com>
Date: Thu, 22 Feb 2024 17:41:21 +0200
Subject: selftests/bpf: test case for callback_depth states pruning logic

The test case was minimized from mailing list discussion [0].
It is equivalent to the following C program:

    struct iter_limit_bug_ctx { __u64 a; __u64 b; __u64 c; };

    static __naked void iter_limit_bug_cb(void)
    {
    	switch (bpf_get_prandom_u32()) {
    	case 1:  ctx->a = 42; break;
    	case 2:  ctx->b = 42; break;
    	default: ctx->c = 42; break;
    	}
    }

    int iter_limit_bug(struct __sk_buff *skb)
    {
    	struct iter_limit_bug_ctx ctx = { 7, 7, 7 };

    	bpf_loop(2, iter_limit_bug_cb, &ctx, 0);
    	if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7)
    	  asm volatile("r1 /= 0;":::"r1");
    	return 0;
    }

The main idea is that each loop iteration changes one of the state
variables in a non-deterministic manner. Hence it is premature to
prune the states that have two iterations left comparing them to
states with one iteration left.
E.g. {{7,7,7}, callback_depth=0} can reach state {42,42,7},
while {{7,7,7}, callback_depth=1} can't.

[0] https://lore.kernel.org/bpf/9b251840-7cb8-4d17-bd23-1fc8071d8eef@linux.dev/

Acked-by: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20240222154121.6991-3-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../bpf/progs/verifier_iterating_callbacks.c       | 70 ++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index 5905e036e0ea..a955a6358206 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -239,4 +239,74 @@ int bpf_loop_iter_limit_nested(void *unused)
 	return 1000 * a + b + c;
 }
 
+struct iter_limit_bug_ctx {
+	__u64 a;
+	__u64 b;
+	__u64 c;
+};
+
+static __naked void iter_limit_bug_cb(void)
+{
+	/* This is the same as C code below, but written
+	 * in assembly to control which branches are fall-through.
+	 *
+	 *   switch (bpf_get_prandom_u32()) {
+	 *   case 1:  ctx->a = 42; break;
+	 *   case 2:  ctx->b = 42; break;
+	 *   default: ctx->c = 42; break;
+	 *   }
+	 */
+	asm volatile (
+	"r9 = r2;"
+	"call %[bpf_get_prandom_u32];"
+	"r1 = r0;"
+	"r2 = 42;"
+	"r0 = 0;"
+	"if r1 == 0x1 goto 1f;"
+	"if r1 == 0x2 goto 2f;"
+	"*(u64 *)(r9 + 16) = r2;"
+	"exit;"
+	"1: *(u64 *)(r9 + 0) = r2;"
+	"exit;"
+	"2: *(u64 *)(r9 + 8) = r2;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all
+	);
+}
+
+SEC("tc")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+int iter_limit_bug(struct __sk_buff *skb)
+{
+	struct iter_limit_bug_ctx ctx = { 7, 7, 7 };
+
+	bpf_loop(2, iter_limit_bug_cb, &ctx, 0);
+
+	/* This is the same as C code below,
+	 * written in assembly to guarantee checks order.
+	 *
+	 *   if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7)
+	 *     asm volatile("r1 /= 0;":::"r1");
+	 */
+	asm volatile (
+	"r1 = *(u64 *)%[ctx_a];"
+	"if r1 != 42 goto 1f;"
+	"r1 = *(u64 *)%[ctx_b];"
+	"if r1 != 42 goto 1f;"
+	"r1 = *(u64 *)%[ctx_c];"
+	"if r1 != 7 goto 1f;"
+	"r1 /= 0;"
+	"1:"
+	:
+	: [ctx_a]"m"(ctx.a),
+	  [ctx_b]"m"(ctx.b),
+	  [ctx_c]"m"(ctx.c)
+	: "r1"
+	);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit 


From f267f262815033452195f46c43b572159262f533 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 5 Mar 2024 10:08:28 +0100
Subject: xdp, bonding: Fix feature flags when there are no slave devs anymore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY")
changed the driver from reporting everything as supported before a device
was bonded into having the driver report that no XDP feature is supported
until a real device is bonded as it seems to be more truthful given
eventually real underlying devices decide what XDP features are supported.

The change however did not take into account when all slave devices get
removed from the bond device. In this case after 9b0ed890ac2a, the driver
keeps reporting a feature mask of 0x77, that is, NETDEV_XDP_ACT_MASK &
~NETDEV_XDP_ACT_XSK_ZEROCOPY whereas it should have reported a feature
mask of 0.

Fix it by resetting XDP feature flags in the same way as if no XDP program
is attached to the bond device. This was uncovered by the XDP bond selftest
which let BPF CI fail. After adjusting the starting masks on the latter
to 0 instead of NETDEV_XDP_ACT_MASK the test passes again together with
this fix.

Fixes: 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Cc: Prashant Batra <prbatra.mail@gmail.com>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Message-ID: <20240305090829.17131-1-daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a11748b8d69b..cd0683bcca03 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1811,7 +1811,7 @@ void bond_xdp_set_features(struct net_device *bond_dev)
 
 	ASSERT_RTNL();
 
-	if (!bond_xdp_check(bond)) {
+	if (!bond_xdp_check(bond) || !bond_has_slaves(bond)) {
 		xdp_clear_features_flag(bond_dev);
 		return;
 	}
-- 
cgit 


From 0bfc0336e1348883fdab4689f0c8c56458f36dd8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 5 Mar 2024 10:08:29 +0100
Subject: selftests/bpf: Fix up xdp bonding test wrt feature flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adjust the XDP feature flags for the bond device when no bond slave
devices are attached. After 9b0ed890ac2a ("bonding: do not report
NETDEV_XDP_ACT_XSK_ZEROCOPY"), the empty bond device must report 0
as flags instead of NETDEV_XDP_ACT_MASK.

  # ./vmtest.sh -- ./test_progs -t xdp_bond
  [...]
  [    3.983311] bond1 (unregistering): (slave veth1_1): Releasing backup interface
  [    3.995434] bond1 (unregistering): Released all slaves
  [    4.022311] bond2: (slave veth2_1): Releasing backup interface
  #507/1   xdp_bonding/xdp_bonding_attach:OK
  #507/2   xdp_bonding/xdp_bonding_nested:OK
  #507/3   xdp_bonding/xdp_bonding_features:OK
  #507/4   xdp_bonding/xdp_bonding_roundrobin:OK
  #507/5   xdp_bonding/xdp_bonding_activebackup:OK
  #507/6   xdp_bonding/xdp_bonding_xor_layer2:OK
  #507/7   xdp_bonding/xdp_bonding_xor_layer23:OK
  #507/8   xdp_bonding/xdp_bonding_xor_layer34:OK
  #507/9   xdp_bonding/xdp_bonding_redirect_multi:OK
  #507     xdp_bonding:OK
  Summary: 1/9 PASSED, 0 SKIPPED, 0 FAILED
  [    4.185255] bond2 (unregistering): Released all slaves
  [...]

Fixes: 9b0ed890ac2a ("bonding: do not report NETDEV_XDP_ACT_XSK_ZEROCOPY")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Message-ID: <20240305090829.17131-2-daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/xdp_bonding.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index c3b45745cbcc..6d8b54124cb3 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -511,7 +511,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons)
 	if (!ASSERT_OK(err, "bond bpf_xdp_query"))
 		goto out;
 
-	if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+	if (!ASSERT_EQ(query_opts.feature_flags, 0,
 		       "bond query_opts.feature_flags"))
 		goto out;
 
@@ -601,7 +601,7 @@ static void test_xdp_bonding_features(struct skeletons *skeletons)
 	if (!ASSERT_OK(err, "bond bpf_xdp_query"))
 		goto out;
 
-	ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK,
+	ASSERT_EQ(query_opts.feature_flags, 0,
 		  "bond query_opts.feature_flags");
 out:
 	bpf_link__destroy(link);
-- 
cgit 


From 2487007aa3b9fafbd2cb14068f49791ce1d7ede5 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 5 Mar 2024 22:31:32 +0100
Subject: cpumap: Zero-initialise xdp_rxq_info struct before running XDP
 program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When running an XDP program that is attached to a cpumap entry, we don't
initialise the xdp_rxq_info data structure being used in the xdp_buff
that backs the XDP program invocation. Tobias noticed that this leads to
random values being returned as the xdp_md->rx_queue_index value for XDP
programs running in a cpumap.

This means we're basically returning the contents of the uninitialised
memory, which is bad. Fix this by zero-initialising the rxq data
structure before running the XDP program.

Fixes: 9216477449f3 ("bpf: cpumap: Add the possibility to attach an eBPF program to cpumap")
Reported-by: Tobias Böhm <tobias@aibor.de>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20240305213132.11955-1-toke@redhat.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/cpumap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8a0bb80fe48a..ef82ffc90cbe 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -178,7 +178,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
 				    void **frames, int n,
 				    struct xdp_cpumap_stats *stats)
 {
-	struct xdp_rxq_info rxq;
+	struct xdp_rxq_info rxq = {};
 	struct xdp_buff xdp;
 	int i, nframes = 0;
 
-- 
cgit