summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-05-11 16:05:56 -0700
committerDavid S. Miller <davem@davemloft.net>2021-05-11 16:05:56 -0700
commitdf6f8237036938d48b7705681c170566c00593fa (patch)
tree343db1e2b6ba0f11485473f6d5579701b53e9038 /kernel
parent9fe37a80c9298936a535a242355d4ef536f82dd9 (diff)
parent569c484f9995f489f2b80dd134269fe07d2b900d (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2021-05-11 The following pull-request contains BPF updates for your *net* tree. We've added 13 non-merge commits during the last 8 day(s) which contain a total of 21 files changed, 817 insertions(+), 382 deletions(-). The main changes are: 1) Fix multiple ringbuf bugs in particular to prevent writable mmap of read-only pages, from Andrii Nakryiko & Thadeu Lima de Souza Cascardo. 2) Fix verifier alu32 known-const subregister bound tracking for bitwise operations and/or/xor, from Daniel Borkmann. 3) Reject trampoline attachment for functions with variable arguments, and also add a deny list of other forbidden functions, from Jiri Olsa. 4) Fix nested bpf_bprintf_prepare() calls used by various helpers by switching to per-CPU buffers, from Florent Revest. 5) Fix kernel compilation with BTF debug info on ppc64 due to pahole missing TCP-CC functions like cubictcp_init, from Martin KaFai Lau. 6) Add a kconfig entry to provide an option to disallow unprivileged BPF by default, from Daniel Borkmann. 7) Fix libbpf compilation for older libelf when GELF_ST_VISIBILITY() macro is not available, from Arnaldo Carvalho de Melo. 8) Migrate test_tc_redirect to test_progs framework as prep work for upcoming skb_change_head() fix & selftest, from Jussi Maki. 9) Fix a libbpf segfault in add_dummy_ksym_var() if BTF is not present, from Ian Rogers. 10) Fix tx_only micro-benchmark in xdpsock BPF sample with proper frame size, from Magnus Karlsson. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Kconfig88
-rw-r--r--kernel/bpf/btf.c12
-rw-r--r--kernel/bpf/helpers.c27
-rw-r--r--kernel/bpf/ringbuf.c24
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c36
-rw-r--r--kernel/sysctl.c29
7 files changed, 174 insertions, 45 deletions
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
new file mode 100644
index 000000000000..26b591e23f16
--- /dev/null
+++ b/kernel/bpf/Kconfig
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# BPF interpreter that, for example, classic socket filters depend on.
+config BPF
+ bool
+
+# Used by archs to tell that they support BPF JIT compiler plus which
+# flavour. Only one of the two can be selected for a specific arch since
+# eBPF JIT supersedes the cBPF JIT.
+
+# Classic BPF JIT (cBPF)
+config HAVE_CBPF_JIT
+ bool
+
+# Extended BPF JIT (eBPF)
+config HAVE_EBPF_JIT
+ bool
+
+# Used by archs to tell that they want the BPF JIT compiler enabled by
+# default for kernels that were compiled with BPF JIT support.
+config ARCH_WANT_DEFAULT_BPF_JIT
+ bool
+
+menu "BPF subsystem"
+
+config BPF_SYSCALL
+ bool "Enable bpf() system call"
+ select BPF
+ select IRQ_WORK
+ select TASKS_TRACE_RCU
+ select BINARY_PRINTF
+ select NET_SOCK_MSG if INET
+ default n
+ help
+ Enable the bpf() system call that allows to manipulate BPF programs
+ and maps via file descriptors.
+
+config BPF_JIT
+ bool "Enable BPF Just In Time compiler"
+ depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
+ depends on MODULES
+ help
+ BPF programs are normally handled by a BPF interpreter. This option
+ allows the kernel to generate native code when a program is loaded
+ into the kernel. This will significantly speed-up processing of BPF
+ programs.
+
+ Note, an admin should enable this feature changing:
+ /proc/sys/net/core/bpf_jit_enable
+ /proc/sys/net/core/bpf_jit_harden (optional)
+ /proc/sys/net/core/bpf_jit_kallsyms (optional)
+
+config BPF_JIT_ALWAYS_ON
+ bool "Permanently enable BPF JIT and remove BPF interpreter"
+ depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+ help
+ Enables BPF JIT and removes BPF interpreter to avoid speculative
+ execution of BPF instructions by the interpreter.
+
+config BPF_JIT_DEFAULT_ON
+ def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
+ depends on HAVE_EBPF_JIT && BPF_JIT
+
+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
+source "kernel/bpf/preload/Kconfig"
+
+config BPF_LSM
+ bool "Enable BPF LSM Instrumentation"
+ depends on BPF_EVENTS
+ depends on BPF_SYSCALL
+ depends on SECURITY
+ depends on BPF_JIT
+ help
+ Enables instrumentation of the security hooks with BPF programs for
+ implementing dynamic MAC and Audit Policies.
+
+ If you are unsure how to answer this question, answer N.
+
+endmenu # "BPF subsystem"
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0600ed325fa0..f982a9f0dbc4 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5206,6 +5206,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
m->ret_size = ret;
for (i = 0; i < nargs; i++) {
+ if (i == nargs - 1 && args[i].type == 0) {
+ bpf_log(log,
+ "The function %s with variable args is unsupported.\n",
+ tname);
+ return -EINVAL;
+ }
ret = __get_type_size(btf, args[i].type, &t);
if (ret < 0) {
bpf_log(log,
@@ -5213,6 +5219,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
return -EINVAL;
}
+ if (ret == 0) {
+ bpf_log(log,
+ "The function %s has malformed void argument.\n",
+ tname);
+ return -EINVAL;
+ }
m->arg_size[i] = ret;
}
m->nr_args = nargs;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 544773970dbc..ef658a9ea5c9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -696,34 +696,35 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
*/
#define MAX_PRINTF_BUF_LEN 512
-struct bpf_printf_buf {
- char tmp_buf[MAX_PRINTF_BUF_LEN];
+/* Support executing three nested bprintf helper calls on a given CPU */
+struct bpf_bprintf_buffers {
+ char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
};
-static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
-static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
static int try_get_fmt_tmp_buf(char **tmp_buf)
{
- struct bpf_printf_buf *bufs;
- int used;
+ struct bpf_bprintf_buffers *bufs;
+ int nest_level;
preempt_disable();
- used = this_cpu_inc_return(bpf_printf_buf_used);
- if (WARN_ON_ONCE(used > 1)) {
- this_cpu_dec(bpf_printf_buf_used);
+ nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
+ if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
+ this_cpu_dec(bpf_bprintf_nest_level);
preempt_enable();
return -EBUSY;
}
- bufs = this_cpu_ptr(&bpf_printf_buf);
- *tmp_buf = bufs->tmp_buf;
+ bufs = this_cpu_ptr(&bpf_bprintf_bufs);
+ *tmp_buf = bufs->tmp_bufs[nest_level - 1];
return 0;
}
void bpf_bprintf_cleanup(void)
{
- if (this_cpu_read(bpf_printf_buf_used)) {
- this_cpu_dec(bpf_printf_buf_used);
+ if (this_cpu_read(bpf_bprintf_nest_level)) {
+ this_cpu_dec(bpf_bprintf_nest_level);
preempt_enable();
}
}
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index f25b719ac786..84b3b35fc0d0 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -221,25 +221,20 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key,
return -ENOTSUPP;
}
-static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb)
-{
- size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT;
-
- /* consumer page + producer page + 2 x data pages */
- return RINGBUF_POS_PAGES + 2 * data_pages;
-}
-
static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
{
struct bpf_ringbuf_map *rb_map;
- size_t mmap_sz;
rb_map = container_of(map, struct bpf_ringbuf_map, map);
- mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT;
-
- if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz)
- return -EINVAL;
+ if (vma->vm_flags & VM_WRITE) {
+ /* allow writable mapping for the consumer_pos only */
+ if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EPERM;
+ } else {
+ vma->vm_flags &= ~VM_MAYWRITE;
+ }
+ /* remap_vmalloc_range() checks size and offset constraints */
return remap_vmalloc_range(vma, rb_map->rb,
vma->vm_pgoff + RINGBUF_PGOFF);
}
@@ -315,6 +310,9 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
return NULL;
len = round_up(size + BPF_RINGBUF_HDR_SZ, 8);
+ if (len > rb->mask + 1)
+ return NULL;
+
cons_pos = smp_load_acquire(&rb->consumer_pos);
if (in_nmi()) {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 941ca06d9dfa..ea04b0deb5ce 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock);
static DEFINE_IDR(link_idr);
static DEFINE_SPINLOCK(link_idr_lock);
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+ IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
static const struct bpf_map_ops * const bpf_map_types[] = {
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 757476c91c98..c58598ef4b5b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7084,11 +7084,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
s32 smin_val = src_reg->s32_min_value;
u32 umax_val = src_reg->u32_max_value;
- /* Assuming scalar64_min_max_and will be called so its safe
- * to skip updating register for known 32-bit case.
- */
- if (src_known && dst_known)
+ if (src_known && dst_known) {
+ __mark_reg32_known(dst_reg, var32_off.value);
return;
+ }
/* We get our minimum from the var_off, since that's inherently
* bitwise. Our maximum is the minimum of the operands' maxima.
@@ -7108,7 +7107,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
dst_reg->s32_min_value = dst_reg->u32_min_value;
dst_reg->s32_max_value = dst_reg->u32_max_value;
}
-
}
static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
@@ -7155,11 +7153,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
s32 smin_val = src_reg->s32_min_value;
u32 umin_val = src_reg->u32_min_value;
- /* Assuming scalar64_min_max_or will be called so it is safe
- * to skip updating register for known case.
- */
- if (src_known && dst_known)
+ if (src_known && dst_known) {
+ __mark_reg32_known(dst_reg, var32_off.value);
return;
+ }
/* We get our maximum from the var_off, and our minimum is the
* maximum of the operands' minima
@@ -7224,11 +7221,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
struct tnum var32_off = tnum_subreg(dst_reg->var_off);
s32 smin_val = src_reg->s32_min_value;
- /* Assuming scalar64_min_max_xor will be called so it is safe
- * to skip updating register for known case.
- */
- if (src_known && dst_known)
+ if (src_known && dst_known) {
+ __mark_reg32_known(dst_reg, var32_off.value);
return;
+ }
/* We get both minimum and maximum from the var32_off. */
dst_reg->u32_min_value = var32_off.value;
@@ -13200,6 +13196,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
return 0;
}
+BTF_SET_START(btf_id_deny)
+BTF_ID_UNUSED
+#ifdef CONFIG_SMP
+BTF_ID(func, migrate_disable)
+BTF_ID(func, migrate_enable)
+#endif
+#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
+BTF_ID(func, rcu_read_unlock_strict)
+#endif
+BTF_SET_END(btf_id_deny)
+
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
@@ -13259,6 +13266,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
ret = bpf_lsm_verify_prog(&env->log, prog);
if (ret < 0)
return ret;
+ } else if (prog->type == BPF_PROG_TYPE_TRACING &&
+ btf_id_set_contains(&btf_id_deny, btf_id)) {
+ return -EINVAL;
}
key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 14edf84cc571..d4a78e08f6d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -225,7 +225,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}
-#endif
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+ return ret;
+}
+#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
/*
* /proc/sys support
@@ -2600,10 +2620,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_ONE,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
},
{
.procname = "bpf_stats_enabled",