From 13a9c48a85ccf1417b527975c0a12b47fbfaf625 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:51 -0800
Subject: bpf: offload: add comment warning developers about double destroy

Offload state may get destroyed either because the device for which
it was constructed is going away, or because the refcount of bpf
program itself has reached 0.  In both of those cases we will call
__bpf_prog_offload_destroy() to unlink the offload from the device.
We may in fact call it twice, which works just fine, but we should
make clear this is intended and caution others trying to extend the
function.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/offload.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 2816feb38be1..fd696d3dd429 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -85,6 +85,10 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 	struct bpf_dev_offload *offload = prog->aux->offload;
 	struct netdev_bpf data = {};
 
+	/* Caution - if netdev is destroyed before the program, this function
+	 * will be called twice.
+	 */
+
 	data.offload.prog = prog;
 
 	if (offload->verifier_running)
-- 
cgit 


From 649f11dcd19a5f0d00fdbc760fbdccdd98e56a43 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:52 -0800
Subject: bpf: offload: limit offload to cls_bpf and xdp programs only

We are currently only allowing attachment of device-bound
cls_bpf and XDP programs.  Make this restriction explicit in
the BPF offload code.  This way we can potentially reuse the
ifindex field in the future.

Since XDP and cls_bpf programs can only be loaded by admin,
we can drop the explicit capability check from offload code.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/offload.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index fd696d3dd429..ac187f9ee182 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -14,8 +14,9 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	struct net *net = current->nsproxy->net_ns;
 	struct bpf_dev_offload *offload;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
+	    attr->prog_type != BPF_PROG_TYPE_XDP)
+		return -EINVAL;
 
 	if (attr->prog_flags)
 		return -EINVAL;
-- 
cgit 


From 1f6f4cb7ba219b00a3fa9afe8049fa16444d8b52 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:53 -0800
Subject: bpf: offload: rename the ifindex field

bpf_target_prog seems long and clunky, rename it to prog_ifindex.
We don't want to call this field just ifindex, because maps
may need a similar field in the future and bpf_attr members for
programs and maps are unnamed.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/offload.c | 2 +-
 kernel/bpf/syscall.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ac187f9ee182..a778e5df7e26 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -29,7 +29,7 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
 	init_waitqueue_head(&offload->verifier_done);
 
 	rtnl_lock();
-	offload->netdev = __dev_get_by_index(net, attr->prog_target_ifindex);
+	offload->netdev = __dev_get_by_index(net, attr->prog_ifindex);
 	if (!offload->netdev) {
 		rtnl_unlock();
 		kfree(offload);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 09badc37e864..8e9d065bb7cd 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1118,7 +1118,7 @@ struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
 EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev);
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD prog_target_ifindex
+#define	BPF_PROG_LOAD_LAST_FIELD prog_ifindex
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -1181,7 +1181,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 	atomic_set(&prog->aux->refcnt, 1);
 	prog->gpl_compatible = is_gpl ? 1 : 0;
 
-	if (attr->prog_target_ifindex) {
+	if (attr->prog_ifindex) {
 		err = bpf_prog_offload_init(prog, attr);
 		if (err)
 			goto free_prog;
-- 
cgit 


From 288b3de55aace830f13280985ec9e6bcbff33b1b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:54 -0800
Subject: bpf: offload: move offload device validation out to the drivers

With TC shared block changes we can't depend on correct netdev
pointer being available in cls_bpf.  Move the device validation
to the driver.  Core will only make sure that offloaded programs
are always attached in the driver (or in HW by the driver).  We
trust that drivers which implement offload callbacks will perform
necessary checks.

Moving the checks to the driver is generally a useful thing,
in practice the check should be against a switchdev instance,
not a netdev, given that most ASICs will probably allow using
the same program on many ports.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8e9d065bb7cd..38da55905ab0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1057,22 +1057,23 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
 
-static bool bpf_prog_can_attach(struct bpf_prog *prog,
-				enum bpf_prog_type *attach_type,
-				struct net_device *netdev)
+static bool bpf_prog_get_ok(struct bpf_prog *prog,
+			    enum bpf_prog_type *attach_type, bool attach_drv)
 {
-	struct bpf_dev_offload *offload = prog->aux->offload;
+	/* not an attachment, just a refcount inc, always allow */
+	if (!attach_type)
+		return true;
 
 	if (prog->type != *attach_type)
 		return false;
-	if (offload && offload->netdev != netdev)
+	if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
 		return false;
 
 	return true;
 }
 
 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
-				       struct net_device *netdev)
+				       bool attach_drv)
 {
 	struct fd f = fdget(ufd);
 	struct bpf_prog *prog;
@@ -1080,7 +1081,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
 	prog = ____bpf_prog_get(f);
 	if (IS_ERR(prog))
 		return prog;
-	if (attach_type && !bpf_prog_can_attach(prog, attach_type, netdev)) {
+	if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) {
 		prog = ERR_PTR(-EINVAL);
 		goto out;
 	}
@@ -1093,12 +1094,12 @@ out:
 
 struct bpf_prog *bpf_prog_get(u32 ufd)
 {
-	return __bpf_prog_get(ufd, NULL, NULL);
+	return __bpf_prog_get(ufd, NULL, false);
 }
 
 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 {
-	struct bpf_prog *prog = __bpf_prog_get(ufd, &type, NULL);
+	struct bpf_prog *prog = __bpf_prog_get(ufd, &type, false);
 
 	if (!IS_ERR(prog))
 		trace_bpf_prog_get_type(prog);
@@ -1107,9 +1108,9 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
 EXPORT_SYMBOL_GPL(bpf_prog_get_type);
 
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
-				       struct net_device *netdev)
+				       bool attach_drv)
 {
-	struct bpf_prog *prog = __bpf_prog_get(ufd, &type, netdev);
+	struct bpf_prog *prog = __bpf_prog_get(ufd, &type, attach_drv);
 
 	if (!IS_ERR(prog))
 		trace_bpf_prog_get_type(prog);
-- 
cgit 


From 479321e9c31a6c05426790b11888427400f75ac8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:56 -0800
Subject: bpf: turn bpf_prog_get_type() into a wrapper

bpf_prog_get_type() is identical to bpf_prog_get_type_dev(),
with false passed as attach_drv.  Instead of keeping it as
an exported symbol turn it into static inline wrapper.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 38da55905ab0..41509cf825d8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1097,16 +1097,6 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 	return __bpf_prog_get(ufd, NULL, false);
 }
 
-struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
-{
-	struct bpf_prog *prog = __bpf_prog_get(ufd, &type, false);
-
-	if (!IS_ERR(prog))
-		trace_bpf_prog_get_type(prog);
-	return prog;
-}
-EXPORT_SYMBOL_GPL(bpf_prog_get_type);
-
 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type,
 				       bool attach_drv)
 {
-- 
cgit 


From 62c71b45e8537b8cb746cc929ea05ba0258e0b5a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:57 -0800
Subject: bpf: offload: ignore namespace moves

We are currently destroying the device offload state when device
moves to another net namespace.  This doesn't break with current
NFP code, because offload state is not used on program removal,
but it's not correct behaviour.

Ignore the device unregister notifications on namespace move.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/offload.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index a778e5df7e26..d4267c674fec 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -174,6 +174,10 @@ static int bpf_offload_notification(struct notifier_block *notifier,
 
 	switch (event) {
 	case NETDEV_UNREGISTER:
+		/* ignore namespace changes */
+		if (netdev->reg_state != NETREG_UNREGISTERING)
+			break;
+
 		list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
 					 offloads) {
 			if (offload->netdev == netdev)
-- 
cgit 


From 1ee640095f049e7ac4ec36b985abada497b98cc2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 20 Nov 2017 15:21:59 -0800
Subject: bpf: revert report offload info to user space

This reverts commit bd601b6ada11 ("bpf: report offload info to user
space").  The ifindex by itself is not sufficient, we should provide
information on which network namespace this ifindex belongs to.
After considering some options we concluded that it's best to just
remove this API for now, and rework it in -next.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/offload.c | 12 ------------
 kernel/bpf/syscall.c |  5 -----
 2 files changed, 17 deletions(-)

(limited to 'kernel')

diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d4267c674fec..68ec884440b7 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -149,18 +149,6 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
 	return bpf_prog_offload_translate(prog);
 }
 
-u32 bpf_prog_offload_ifindex(struct bpf_prog *prog)
-{
-	struct bpf_dev_offload *offload = prog->aux->offload;
-	u32 ifindex;
-
-	rtnl_lock();
-	ifindex = offload->netdev ? offload->netdev->ifindex : 0;
-	rtnl_unlock();
-
-	return ifindex;
-}
-
 const struct bpf_prog_ops bpf_offload_prog_ops = {
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 41509cf825d8..2c4cfeaa8d5e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1616,11 +1616,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			return -EFAULT;
 	}
 
-	if (bpf_prog_is_dev_bound(prog->aux)) {
-		info.status |= BPF_PROG_STATUS_DEV_BOUND;
-		info.ifindex = bpf_prog_offload_ifindex(prog);
-	}
-
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
-- 
cgit 


From db1ac4964fa172803a0fea83033cd35d380a8a77 Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Wed, 22 Nov 2017 18:32:53 +0000
Subject: bpf: introduce ARG_PTR_TO_MEM_OR_NULL

With the current ARG_PTR_TO_MEM/ARG_PTR_TO_UNINIT_MEM semantics, an helper
argument can be NULL when the next argument type is ARG_CONST_SIZE_OR_ZERO
and the verifier can prove the value of this next argument is 0. However,
most helpers are just interested in handling <!NULL, 0>, so forcing them to
deal with <NULL, 0> makes the implementation of those helpers more
complicated for no apparent benefits, requiring them to explicitly handle
those corner cases with checks that bpf programs could start relying upon,
preventing the possibility of removing them later.

Solve this by making ARG_PTR_TO_MEM/ARG_PTR_TO_UNINIT_MEM never accept NULL
even when ARG_CONST_SIZE_OR_ZERO is set, and introduce a new argument type
ARG_PTR_TO_MEM_OR_NULL to explicitly deal with the NULL case.

Currently, the only helper that needs this is bpf_csum_diff_proto(), so
change arg1 and arg3 to this new type as well.

Also add a new battery of tests that explicitly test the
!ARG_PTR_TO_MEM_OR_NULL combination: all the current ones testing the
various <NULL, 0> variations are focused on bpf_csum_diff, so cover also
other helpers.

Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dd54d20ace2f..308b0638ec5d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1384,13 +1384,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		if (type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_MEM ||
+		   arg_type == ARG_PTR_TO_MEM_OR_NULL ||
 		   arg_type == ARG_PTR_TO_UNINIT_MEM) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
 		 * passed in as argument, it's a SCALAR_VALUE type. Final test
 		 * happens during stack boundary checking.
 		 */
-		if (register_is_null(*reg))
+		if (register_is_null(*reg) &&
+		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
 			/* final test in check_stack_boundary() */;
 		else if (!type_is_pkt_pointer(type) &&
 			 type != PTR_TO_MAP_VALUE &&
-- 
cgit 


From eb33f2cca49ec49a1b893b5af546e7c042ca6365 Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Wed, 22 Nov 2017 18:32:54 +0000
Subject: bpf: remove explicit handling of 0 for arg2 in bpf_probe_read

Commit 9c019e2bc4b2 ("bpf: change helper bpf_probe_read arg2 type to
ARG_CONST_SIZE_OR_ZERO") changed arg2 type to ARG_CONST_SIZE_OR_ZERO to
simplify writing bpf programs by taking advantage of the new semantics
introduced for ARG_CONST_SIZE_OR_ZERO which allows <!NULL, 0> arguments.

In order to prevent the helper from actually passing a NULL pointer to
probe_kernel_read, which can happen when <NULL, 0> is passed to the helper,
the commit also introduced an explicit check against size == 0.

After the recent introduction of the ARG_PTR_TO_MEM_OR_NULL type,
bpf_probe_read can not receive a pair of <NULL, 0> arguments anymore, thus
the check is not needed anymore and can be removed, since probe_kernel_read
can correctly handle a <!NULL, 0> call. This also fixes the semantics of
the helper before it gets officially released and bpf programs start
relying on this check.

Fixes: 9c019e2bc4b2 ("bpf: change helper bpf_probe_read arg2 type to ARG_CONST_SIZE_OR_ZERO")
Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/trace/bpf_trace.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a5580c670866..728909f7951c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -78,16 +78,12 @@ EXPORT_SYMBOL_GPL(trace_call_bpf);
 
 BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
-	int ret = 0;
-
-	if (unlikely(size == 0))
-		goto out;
+	int ret;
 
 	ret = probe_kernel_read(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
 		memset(dst, 0, size);
 
- out:
 	return ret;
 }
 
-- 
cgit 


From 5c4e1201740ceae9bd6f622851a9bf7c66debe3a Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Wed, 22 Nov 2017 18:32:55 +0000
Subject: bpf: change bpf_probe_read_str arg2 type to ARG_CONST_SIZE_OR_ZERO

Commit 9fd29c08e520 ("bpf: improve verifier ARG_CONST_SIZE_OR_ZERO
semantics") relaxed the treatment of ARG_CONST_SIZE_OR_ZERO due to the way
the compiler generates optimized BPF code when checking boundaries of an
argument from C code. A typical example of this optimized code can be
generated using the bpf_probe_read_str helper when operating on variable
memory:

/* len is a generic scalar */
if (len > 0 && len <= 0x7fff)
        bpf_probe_read_str(p, len, s);

251: (79) r1 = *(u64 *)(r10 -88)
252: (07) r1 += -1
253: (25) if r1 > 0x7ffe goto pc-42
254: (bf) r1 = r7
255: (79) r2 = *(u64 *)(r10 -88)
256: (bf) r8 = r4
257: (85) call bpf_probe_read_str#45
R2 min value is negative, either use unsigned or 'var &= const'

With this code, the verifier loses track of the variable.

Replacing arg2 with ARG_CONST_SIZE_OR_ZERO is thus desirable since it
avoids this quite common case which leads to usability issues, and the
compiler generates code that the verifier can more easily test:

if (len <= 0x7fff)
        bpf_probe_read_str(p, len, s);

or

bpf_probe_read_str(p, len & 0x7fff, s);

No changes to the bpf_probe_read_str helper are necessary since
strncpy_from_unsafe itself immediately returns if the size passed is 0.

Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/trace/bpf_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 728909f7951c..ed8601a1a861 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -494,7 +494,7 @@ static const struct bpf_func_proto bpf_probe_read_str_proto = {
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
-	.arg2_type	= ARG_CONST_SIZE,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
 	.arg3_type	= ARG_ANYTHING,
 };
 
-- 
cgit 


From a60dd35d2e39209fa7645945e1192bf9769872c6 Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Wed, 22 Nov 2017 18:32:56 +0000
Subject: bpf: change bpf_perf_event_output arg5 type to ARG_CONST_SIZE_OR_ZERO

Commit 9fd29c08e520 ("bpf: improve verifier ARG_CONST_SIZE_OR_ZERO
semantics") relaxed the treatment of ARG_CONST_SIZE_OR_ZERO due to the way
the compiler generates optimized BPF code when checking boundaries of an
argument from C code. A typical example of this optimized code can be
generated using the bpf_perf_event_output helper when operating on variable
memory:

/* len is a generic scalar */
if (len > 0 && len <= 0x7fff)
        bpf_perf_event_output(ctx, &perf_map, 0, buf, len);

110: (79) r5 = *(u64 *)(r10 -40)
111: (bf) r1 = r5
112: (07) r1 += -1
113: (25) if r1 > 0x7ffe goto pc+6
114: (bf) r1 = r6
115: (18) r2 = 0xffff94e5f166c200
117: (b7) r3 = 0
118: (bf) r4 = r7
119: (85) call bpf_perf_event_output#25
R5 min value is negative, either use unsigned or 'var &= const'

With this code, the verifier loses track of the variable.

Replacing arg5 with ARG_CONST_SIZE_OR_ZERO is thus desirable since it
avoids this quite common case which leads to usability issues, and the
compiler generates code that the verifier can more easily test:

if (len <= 0x7fff)
        bpf_perf_event_output(ctx, &perf_map, 0, buf, len);

or

bpf_perf_event_output(ctx, &perf_map, 0, buf, len & 0x7fff);

No changes to the bpf_perf_event_output helper are necessary since it can
handle a case where size is 0, and an empty frame is pushed.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/trace/bpf_trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ed8601a1a861..27d1f4ffa3de 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -403,7 +403,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
 	.arg4_type	= ARG_PTR_TO_MEM,
-	.arg5_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
@@ -605,7 +605,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
 	.arg4_type	= ARG_PTR_TO_MEM,
-	.arg5_type	= ARG_CONST_SIZE,
+	.arg5_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
-- 
cgit 


From c131187db2d3fa2f8bf32fdf4e9a4ef805168467 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 22 Nov 2017 16:42:05 -0800
Subject: bpf: fix branch pruning logic

when the verifier detects that register contains a runtime constant
and it's compared with another constant it will prune exploration
of the branch that is guaranteed not to be taken at runtime.
This is all correct, but malicious program may be constructed
in such a way that it always has a constant comparison and
the other branch is never taken under any conditions.
In this case such path through the program will not be explored
by the verifier. It won't be taken at run-time either, but since
all instructions are JITed the malicious program may cause JITs
to complain about using reserved fields, etc.
To fix the issue we have to track the instructions explored by
the verifier and sanitize instructions that are dead at run time
with NOPs. We cannot reject such dead code, since llvm generates
it for valid C code, since it doesn't do as much data flow
analysis as the verifier does.

Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'kernel')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 308b0638ec5d..d4593571c404 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3827,6 +3827,7 @@ static int do_check(struct bpf_verifier_env *env)
 			return err;
 
 		regs = cur_regs(env);
+		env->insn_aux_data[insn_idx].seen = true;
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
@@ -4022,6 +4023,7 @@ process_bpf_exit:
 					return err;
 
 				insn_idx++;
+				env->insn_aux_data[insn_idx].seen = true;
 			} else {
 				verbose(env, "invalid BPF_LD mode\n");
 				return -EINVAL;
@@ -4204,6 +4206,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
 				u32 off, u32 cnt)
 {
 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+	int i;
 
 	if (cnt == 1)
 		return 0;
@@ -4213,6 +4216,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
 	memcpy(new_data + off + cnt - 1, old_data + off,
 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+	for (i = off; i < off + cnt - 1; i++)
+		new_data[i].seen = true;
 	env->insn_aux_data = new_data;
 	vfree(old_data);
 	return 0;
@@ -4231,6 +4236,25 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
 	return new_prog;
 }
 
+/* The verifier does more data flow analysis than llvm and will not explore
+ * branches that are dead at run time. Malicious programs can have dead code
+ * too. Therefore replace all dead at-run-time code with nops.
+ */
+static void sanitize_dead_code(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
+	struct bpf_insn *insn = env->prog->insnsi;
+	const int insn_cnt = env->prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++) {
+		if (aux_data[i].seen)
+			continue;
+		memcpy(insn + i, &nop, sizeof(nop));
+	}
+}
+
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
@@ -4557,6 +4581,9 @@ skip_full_check:
 	while (!pop_stack(env, NULL, NULL));
 	free_states(env);
 
+	if (ret == 0)
+		sanitize_dead_code(env);
+
 	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
 		ret = convert_ctx_accesses(env);
-- 
cgit