summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/arraymap.c58
-rw-r--r--kernel/bpf/btf.c558
-rw-r--r--kernel/bpf/core.c15
-rw-r--r--kernel/bpf/inode.c7
-rw-r--r--kernel/bpf/map_in_map.c2
-rw-r--r--kernel/bpf/syscall.c282
-rw-r--r--kernel/bpf/trampoline.c253
-rw-r--r--kernel/bpf/verifier.c137
-rw-r--r--kernel/bpf/xskmap.c6
-rw-r--r--kernel/events/core.c7
12 files changed, 1187 insertions, 141 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4effe01ebbe2..9bf1045fedfa 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2545,7 +2545,7 @@ void __audit_ntp_log(const struct audit_ntp_data *ad)
audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST);
}
-static void audit_log_task(struct audit_buffer *ab)
+void audit_log_task(struct audit_buffer *ab)
{
kuid_t auid, uid;
kgid_t gid;
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e1d9adb212f9..3f671bf617e8 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
+obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 1c65ce0098a9..633c8c701ff6 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -14,7 +14,7 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
- (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
+ (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK)
static void bpf_array_free_percpu(struct bpf_array *array)
{
@@ -59,6 +59,10 @@ int array_map_alloc_check(union bpf_attr *attr)
(percpu && numa_node != NUMA_NO_NODE))
return -EINVAL;
+ if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
+ attr->map_flags & BPF_F_MMAPABLE)
+ return -EINVAL;
+
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
@@ -102,10 +106,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
}
array_size = sizeof(*array);
- if (percpu)
+ if (percpu) {
array_size += (u64) max_entries * sizeof(void *);
- else
- array_size += (u64) max_entries * elem_size;
+ } else {
+ /* rely on vmalloc() to return page-aligned memory and
+ * ensure array->value is exactly page-aligned
+ */
+ if (attr->map_flags & BPF_F_MMAPABLE) {
+ array_size = PAGE_ALIGN(array_size);
+ array_size += PAGE_ALIGN((u64) max_entries * elem_size);
+ } else {
+ array_size += (u64) max_entries * elem_size;
+ }
+ }
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
@@ -117,7 +130,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
- array = bpf_map_area_alloc(array_size, numa_node);
+ if (attr->map_flags & BPF_F_MMAPABLE) {
+ void *data;
+
+ /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
+ data = bpf_map_area_mmapable_alloc(array_size, numa_node);
+ if (!data) {
+ bpf_map_charge_finish(&mem);
+ return ERR_PTR(-ENOMEM);
+ }
+ array = data + PAGE_ALIGN(sizeof(struct bpf_array))
+ - offsetof(struct bpf_array, value);
+ } else {
+ array = bpf_map_area_alloc(array_size, numa_node);
+ }
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
@@ -350,6 +376,11 @@ static int array_map_delete_elem(struct bpf_map *map, void *key)
return -EINVAL;
}
+static void *array_map_vmalloc_addr(struct bpf_array *array)
+{
+ return (void *)round_down((unsigned long)array, PAGE_SIZE);
+}
+
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
@@ -365,7 +396,10 @@ static void array_map_free(struct bpf_map *map)
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
- bpf_map_area_free(array);
+ if (array->map.map_flags & BPF_F_MMAPABLE)
+ bpf_map_area_free(array_map_vmalloc_addr(array));
+ else
+ bpf_map_area_free(array);
}
static void array_map_seq_show_elem(struct bpf_map *map, void *key,
@@ -444,6 +478,17 @@ static int array_map_check_btf(const struct bpf_map *map,
return 0;
}
+static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
+
+ if (!(map->map_flags & BPF_F_MMAPABLE))
+ return -EINVAL;
+
+ return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff);
+}
+
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
.map_alloc = array_map_alloc,
@@ -455,6 +500,7 @@ const struct bpf_map_ops array_map_ops = {
.map_gen_lookup = array_map_gen_lookup,
.map_direct_value_addr = array_map_direct_value_addr,
.map_direct_value_meta = array_map_direct_value_meta,
+ .map_mmap = array_map_mmap,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 128d89601d73..40efde5eedcb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -2,6 +2,8 @@
/* Copyright (c) 2018 Facebook */
#include <uapi/linux/btf.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/types.h>
#include <linux/seq_file.h>
#include <linux/compiler.h>
@@ -16,6 +18,9 @@
#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
+#include <linux/skmsg.h>
+#include <linux/perf_event.h>
+#include <net/sock.h>
/* BTF (BPF Type Format) is the meta data format which describes
* the data types of BPF program/map. Hence, it basically focus
@@ -1036,6 +1041,82 @@ static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
}
+/* Resolve the size of a passed-in "type"
+ *
+ * type: is an array (e.g. u32 array[x][y])
+ * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY,
+ * *type_size: (x * y * sizeof(u32)). Hence, *type_size always
+ * corresponds to the return type.
+ * *elem_type: u32
+ * *total_nelems: (x * y). Hence, individual elem size is
+ * (*type_size / *total_nelems)
+ *
+ * type: is not an array (e.g. const struct X)
+ * return type: type "struct X"
+ * *type_size: sizeof(struct X)
+ * *elem_type: same as return type ("struct X")
+ * *total_nelems: 1
+ */
+static const struct btf_type *
+btf_resolve_size(const struct btf *btf, const struct btf_type *type,
+ u32 *type_size, const struct btf_type **elem_type,
+ u32 *total_nelems)
+{
+ const struct btf_type *array_type = NULL;
+ const struct btf_array *array;
+ u32 i, size, nelems = 1;
+
+ for (i = 0; i < MAX_RESOLVE_DEPTH; i++) {
+ switch (BTF_INFO_KIND(type->info)) {
+ /* type->size can be used */
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ size = type->size;
+ goto resolved;
+
+ case BTF_KIND_PTR:
+ size = sizeof(void *);
+ goto resolved;
+
+ /* Modifiers */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ type = btf_type_by_id(btf, type->type);
+ break;
+
+ case BTF_KIND_ARRAY:
+ if (!array_type)
+ array_type = type;
+ array = btf_type_array(type);
+ if (nelems && array->nelems > U32_MAX / nelems)
+ return ERR_PTR(-EINVAL);
+ nelems *= array->nelems;
+ type = btf_type_by_id(btf, array->type);
+ break;
+
+ /* type without size */
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return ERR_PTR(-EINVAL);
+
+resolved:
+ if (nelems && size > U32_MAX / nelems)
+ return ERR_PTR(-EINVAL);
+
+ *type_size = nelems * size;
+ *total_nelems = nelems;
+ *elem_type = type;
+
+ return array_type ? : type;
+}
+
/* The input param "type_id" must point to a needs_resolve type */
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
u32 *type_id)
@@ -3363,13 +3444,112 @@ errout:
extern char __weak _binary__btf_vmlinux_bin_start[];
extern char __weak _binary__btf_vmlinux_bin_end[];
+extern struct btf *btf_vmlinux;
+
+#define BPF_MAP_TYPE(_id, _ops)
+static union {
+ struct bpf_ctx_convert {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ prog_ctx_type _id##_prog; \
+ kern_ctx_type _id##_kern;
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+ } *__t;
+ /* 't' is written once under lock. Read many times. */
+ const struct btf_type *t;
+} bpf_ctx_convert;
+enum {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+static u8 bpf_ctx_convert_map[] = {
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
+ [_id] = __ctx_convert##_id,
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+};
+#undef BPF_MAP_TYPE
+
+static const struct btf_member *
+btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
+ const struct btf_type *t, enum bpf_prog_type prog_type)
+{
+ const struct btf_type *conv_struct;
+ const struct btf_type *ctx_struct;
+ const struct btf_member *ctx_type;
+ const char *tname, *ctx_tname;
+
+ conv_struct = bpf_ctx_convert.t;
+ if (!conv_struct) {
+ bpf_log(log, "btf_vmlinux is malformed\n");
+ return NULL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_struct(t)) {
+ /* Only pointer to struct is supported for now.
+ * That means that BPF_PROG_TYPE_TRACEPOINT with BTF
+ * is not supported yet.
+ * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
+ */
+ bpf_log(log, "BPF program ctx type is not a struct\n");
+ return NULL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+ if (!tname) {
+ bpf_log(log, "BPF program ctx struct doesn't have a name\n");
+ return NULL;
+ }
+ /* prog_type is valid bpf program type. No need for bounds check. */
+ ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
+ /* ctx_struct is a pointer to prog_ctx_type in vmlinux.
+ * Like 'struct __sk_buff'
+ */
+ ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type);
+ if (!ctx_struct)
+ /* should not happen */
+ return NULL;
+ ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
+ if (!ctx_tname) {
+ /* should not happen */
+ bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
+ return NULL;
+ }
+ /* only compare that prog's ctx type name is the same as
+ * kernel expects. No need to compare field by field.
+ * It's ok for bpf prog to do:
+ * struct __sk_buff {};
+ * int socket_filter_bpf_prog(struct __sk_buff *skb)
+ * { // no fields of skb are ever used }
+ */
+ if (strcmp(ctx_tname, tname))
+ return NULL;
+ return ctx_type;
+}
+
+static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *t,
+ enum bpf_prog_type prog_type)
+{
+ const struct btf_member *prog_ctx_type, *kern_ctx_type;
+
+ prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type);
+ if (!prog_ctx_type)
+ return -ENOENT;
+ kern_ctx_type = prog_ctx_type + 1;
+ return kern_ctx_type->type;
+}
struct btf *btf_parse_vmlinux(void)
{
struct btf_verifier_env *env = NULL;
struct bpf_verifier_log *log;
struct btf *btf = NULL;
- int err;
+ int err, i;
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
if (!env)
@@ -3403,6 +3583,26 @@ struct btf *btf_parse_vmlinux(void)
if (err)
goto errout;
+ /* find struct bpf_ctx_convert for type checking later */
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t;
+ const char *tname;
+
+ t = btf_type_by_id(btf, i);
+ if (!__btf_type_is_struct(t))
+ continue;
+ tname = __btf_name_by_offset(btf, t->name_off);
+ if (!strcmp(tname, "bpf_ctx_convert")) {
+ /* btf_parse_vmlinux() runs under bpf_verifier_lock */
+ bpf_ctx_convert.t = t;
+ break;
+ }
+ }
+ if (i > btf->nr_types) {
+ err = -ENOENT;
+ goto errout;
+ }
+
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
@@ -3416,17 +3616,29 @@ errout:
return ERR_PTR(err);
}
-extern struct btf *btf_vmlinux;
+struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
+{
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+
+ if (tgt_prog) {
+ return tgt_prog->aux->btf;
+ } else {
+ return btf_vmlinux;
+ }
+}
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
const struct btf_type *t = prog->aux->attach_func_proto;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
+ struct btf *btf = bpf_prog_get_target_btf(prog);
const char *tname = prog->aux->attach_func_name;
struct bpf_verifier_log *log = info->log;
const struct btf_param *args;
u32 nr_args, arg;
+ int ret;
if (off % 8) {
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
@@ -3435,22 +3647,34 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
arg = off / 8;
args = (const struct btf_param *)(t + 1);
- nr_args = btf_type_vlen(t);
+ /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
+ nr_args = t ? btf_type_vlen(t) : 5;
if (prog->aux->attach_btf_trace) {
/* skip first 'void *__data' argument in btf_trace_##name typedef */
args++;
nr_args--;
}
- if (arg >= nr_args) {
+
+ if (prog->expected_attach_type == BPF_TRACE_FEXIT &&
+ arg == nr_args) {
+ if (!t)
+ /* Default prog with 5 args. 6th arg is retval. */
+ return true;
+ /* function return type */
+ t = btf_type_by_id(btf, t->type);
+ } else if (arg >= nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
- tname, arg);
+ tname, arg + 1);
return false;
+ } else {
+ if (!t)
+ /* Default prog with 5 args */
+ return true;
+ t = btf_type_by_id(btf, args[arg].type);
}
-
- t = btf_type_by_id(btf_vmlinux, args[arg].type);
/* skip modifiers */
while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (btf_type_is_int(t))
/* accessing a scalar */
return true;
@@ -3458,7 +3682,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
bpf_log(log,
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
tname, arg,
- __btf_name_by_offset(btf_vmlinux, t->name_off),
+ __btf_name_by_offset(btf, t->name_off),
btf_kind_str[BTF_INFO_KIND(t->info)]);
return false;
}
@@ -3473,10 +3697,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
info->reg_type = PTR_TO_BTF_ID;
info->btf_id = t->type;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ if (tgt_prog) {
+ ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type);
+ if (ret > 0) {
+ info->btf_id = ret;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ t = btf_type_by_id(btf, t->type);
/* skip modifiers */
while (btf_type_is_modifier(t))
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_struct(t)) {
bpf_log(log,
"func '%s' arg%d type %s is not a struct\n",
@@ -3485,7 +3718,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
- __btf_name_by_offset(btf_vmlinux, t->name_off));
+ __btf_name_by_offset(btf, t->name_off));
return true;
}
@@ -3494,10 +3727,10 @@ int btf_struct_access(struct bpf_verifier_log *log,
enum bpf_access_type atype,
u32 *next_btf_id)
{
+ u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
+ const struct btf_type *mtype, *elem_type = NULL;
const struct btf_member *member;
- const struct btf_type *mtype;
const char *tname, *mname;
- int i, moff = 0, msize;
again:
tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -3507,40 +3740,88 @@ again:
}
for_each_member(i, t, member) {
- /* offset of the field in bits */
- moff = btf_member_bit_offset(t, member);
-
if (btf_member_bitfield_size(t, member))
/* bitfields are not supported yet */
continue;
- if (off + size <= moff / 8)
+ /* offset of the field in bytes */
+ moff = btf_member_bit_offset(t, member) / 8;
+ if (off + size <= moff)
/* won't find anything, field is already too far */
break;
+ /* In case of "off" is pointing to holes of a struct */
+ if (off < moff)
+ continue;
/* type of the field */
mtype = btf_type_by_id(btf_vmlinux, member->type);
mname = __btf_name_by_offset(btf_vmlinux, member->name_off);
- /* skip modifiers */
- while (btf_type_is_modifier(mtype))
- mtype = btf_type_by_id(btf_vmlinux, mtype->type);
-
- if (btf_type_is_array(mtype))
- /* array deref is not supported yet */
- continue;
-
- if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) {
+ mtype = btf_resolve_size(btf_vmlinux, mtype, &msize,
+ &elem_type, &total_nelems);
+ if (IS_ERR(mtype)) {
bpf_log(log, "field %s doesn't have size\n", mname);
return -EFAULT;
}
- if (btf_type_is_ptr(mtype))
- msize = 8;
- else
- msize = mtype->size;
- if (off >= moff / 8 + msize)
+
+ mtrue_end = moff + msize;
+ if (off >= mtrue_end)
/* no overlap with member, keep iterating */
continue;
+
+ if (btf_type_is_array(mtype)) {
+ u32 elem_idx;
+
+ /* btf_resolve_size() above helps to
+ * linearize a multi-dimensional array.
+ *
+ * The logic here is treating an array
+ * in a struct as the following way:
+ *
+ * struct outer {
+ * struct inner array[2][2];
+ * };
+ *
+ * looks like:
+ *
+ * struct outer {
+ * struct inner array_elem0;
+ * struct inner array_elem1;
+ * struct inner array_elem2;
+ * struct inner array_elem3;
+ * };
+ *
+ * When accessing outer->array[1][0], it moves
+ * moff to "array_elem2", set mtype to
+ * "struct inner", and msize also becomes
+ * sizeof(struct inner). Then most of the
+ * remaining logic will fall through without
+ * caring the current member is an array or
+ * not.
+ *
+ * Unlike mtype/msize/moff, mtrue_end does not
+ * change. The naming difference ("_true") tells
+ * that it is not always corresponding to
+ * the current mtype/msize/moff.
+ * It is the true end of the current
+ * member (i.e. array in this case). That
+ * will allow an int array to be accessed like
+ * a scratch space,
+ * i.e. allow access beyond the size of
+ * the array's element as long as it is
+ * within the mtrue_end boundary.
+ */
+
+ /* skip empty array */
+ if (moff == mtrue_end)
+ continue;
+
+ msize /= total_nelems;
+ elem_idx = (off - moff) / msize;
+ moff += elem_idx * msize;
+ mtype = elem_type;
+ }
+
/* the 'off' we're looking for is either equal to start
* of this field or inside of this struct
*/
@@ -3549,20 +3830,20 @@ again:
t = mtype;
/* adjust offset we're looking for */
- off -= moff / 8;
+ off -= moff;
goto again;
}
- if (msize != size) {
- /* field access size doesn't match */
- bpf_log(log,
- "cannot access %d bytes in struct %s field %s that has size %d\n",
- size, tname, mname, msize);
- return -EACCES;
- }
if (btf_type_is_ptr(mtype)) {
const struct btf_type *stype;
+ if (msize != size || off != moff) {
+ bpf_log(log,
+ "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n",
+ mname, moff, tname, off, size);
+ return -EACCES;
+ }
+
stype = btf_type_by_id(btf_vmlinux, mtype->type);
/* skip modifiers */
while (btf_type_is_modifier(stype))
@@ -3572,14 +3853,28 @@ again:
return PTR_TO_BTF_ID;
}
}
- /* all other fields are treated as scalars */
+
+ /* Allow more flexible access within an int as long as
+ * it is within mtrue_end.
+ * Since mtrue_end could be the end of an array,
+ * that also allows using an array of int as a scratch
+ * space. e.g. skb->cb[].
+ */
+ if (off + size > mtrue_end) {
+ bpf_log(log,
+ "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
+ mname, mtrue_end, tname, off, size);
+ return -EACCES;
+ }
+
return SCALAR_VALUE;
}
bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
return -EINVAL;
}
-u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
+static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn,
+ int arg)
{
char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
const struct btf_param *args;
@@ -3647,6 +3942,185 @@ u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg)
return btf_id;
}
+int btf_resolve_helper_id(struct bpf_verifier_log *log,
+ const struct bpf_func_proto *fn, int arg)
+{
+ int *btf_id = &fn->btf_id[arg];
+ int ret;
+
+ if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
+ return -EINVAL;
+
+ ret = READ_ONCE(*btf_id);
+ if (ret)
+ return ret;
+ /* ok to race the search. The result is the same */
+ ret = __btf_resolve_helper_id(log, fn->func, arg);
+ if (!ret) {
+ /* Function argument cannot be type 'void' */
+ bpf_log(log, "BTF resolution bug\n");
+ return -EFAULT;
+ }
+ WRITE_ONCE(*btf_id, ret);
+ return ret;
+}
+
+static int __get_type_size(struct btf *btf, u32 btf_id,
+ const struct btf_type **bad_type)
+{
+ const struct btf_type *t;
+
+ if (!btf_id)
+ /* void */
+ return 0;
+ t = btf_type_by_id(btf, btf_id);
+ while (t && btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (!t)
+ return -EINVAL;
+ if (btf_type_is_ptr(t))
+ /* kernel size of pointer. Not BPF's size of pointer*/
+ return sizeof(void *);
+ if (btf_type_is_int(t) || btf_type_is_enum(t))
+ return t->size;
+ *bad_type = t;
+ return -EINVAL;
+}
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *func,
+ const char *tname,
+ struct btf_func_model *m)
+{
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 i, nargs;
+ int ret;
+
+ if (!func) {
+ /* BTF function prototype doesn't match the verifier types.
+ * Fall back to 5 u64 args.
+ */
+ for (i = 0; i < 5; i++)
+ m->arg_size[i] = 8;
+ m->ret_size = 8;
+ m->nr_args = 5;
+ return 0;
+ }
+ args = (const struct btf_param *)(func + 1);
+ nargs = btf_type_vlen(func);
+ if (nargs >= MAX_BPF_FUNC_ARGS) {
+ bpf_log(log,
+ "The function %s has %d arguments. Too many.\n",
+ tname, nargs);
+ return -EINVAL;
+ }
+ ret = __get_type_size(btf, func->type, &t);
+ if (ret < 0) {
+ bpf_log(log,
+ "The function %s return type %s is unsupported.\n",
+ tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return -EINVAL;
+ }
+ m->ret_size = ret;
+
+ for (i = 0; i < nargs; i++) {
+ ret = __get_type_size(btf, args[i].type, &t);
+ if (ret < 0) {
+ bpf_log(log,
+ "The function %s arg%d type %s is unsupported.\n",
+ tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
+ return -EINVAL;
+ }
+ m->arg_size[i] = ret;
+ }
+ m->nr_args = nargs;
+ return 0;
+}
+
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog)
+{
+ struct bpf_verifier_state *st = env->cur_state;
+ struct bpf_func_state *func = st->frame[st->curframe];
+ struct bpf_reg_state *reg = func->regs;
+ struct bpf_verifier_log *log = &env->log;
+ struct bpf_prog *prog = env->prog;
+ struct btf *btf = prog->aux->btf;
+ const struct btf_param *args;
+ const struct btf_type *t;
+ u32 i, nargs, btf_id;
+ const char *tname;
+
+ if (!prog->aux->func_info)
+ return 0;
+
+ btf_id = prog->aux->func_info[subprog].type_id;
+ if (!btf_id)
+ return 0;
+
+ if (prog->aux->func_info_aux[subprog].unreliable)
+ return 0;
+
+ t = btf_type_by_id(btf, btf_id);
+ if (!t || !btf_type_is_func(t)) {
+ bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n",
+ subprog);
+ return -EINVAL;
+ }
+ tname = btf_name_by_offset(btf, t->name_off);
+
+ t = btf_type_by_id(btf, t->type);
+ if (!t || !btf_type_is_func_proto(t)) {
+ bpf_log(log, "Invalid type of func %s\n", tname);
+ return -EINVAL;
+ }
+ args = (const struct btf_param *)(t + 1);
+ nargs = btf_type_vlen(t);
+ if (nargs > 5) {
+ bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
+ goto out;
+ }
+ /* check that BTF function arguments match actual types that the
+ * verifier sees.
+ */
+ for (i = 0; i < nargs; i++) {
+ t = btf_type_by_id(btf, args[i].type);
+ while (btf_type_is_modifier(t))
+ t = btf_type_by_id(btf, t->type);
+ if (btf_type_is_int(t) || btf_type_is_enum(t)) {
+ if (reg[i + 1].type == SCALAR_VALUE)
+ continue;
+ bpf_log(log, "R%d is not a scalar\n", i + 1);
+ goto out;
+ }
+ if (btf_type_is_ptr(t)) {
+ if (reg[i + 1].type == SCALAR_VALUE) {
+ bpf_log(log, "R%d is not a pointer\n", i + 1);
+ goto out;
+ }
+ /* If program is passing PTR_TO_CTX into subprogram
+ * check that BTF type matches.
+ */
+ if (reg[i + 1].type == PTR_TO_CTX &&
+ !btf_get_prog_ctx_type(log, btf, t, prog->type))
+ goto out;
+ /* All other pointers are ok */
+ continue;
+ }
+ bpf_log(log, "Unrecognized argument type %s\n",
+ btf_kind_str[BTF_INFO_KIND(t->info)]);
+ goto out;
+ }
+ return 0;
+out:
+ /* LLVM optimizations can remove arguments from static functions. */
+ bpf_log(log,
+ "Type info disagrees with actual arguments due to compiler optimizations\n");
+ prog->aux->func_info_aux[subprog].unreliable = true;
+ return 0;
+}
+
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m)
{
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 97e37d82a1cc..b5945c3aaa8e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -31,6 +31,7 @@
#include <linux/rcupdate.h>
#include <linux/perf_event.h>
#include <linux/extable.h>
+#include <linux/log2.h>
#include <asm/unaligned.h>
/* Registers */
@@ -815,6 +816,9 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
struct bpf_binary_header *hdr;
u32 size, hole, start, pages;
+ WARN_ON_ONCE(!is_power_of_2(alignment) ||
+ alignment > BPF_IMAGE_ALIGNMENT);
+
/* Most of BPF filters are really small, but if some of them
* fill a page, allow at least 128 extra bytes to insert a
* random section of illegal instructions.
@@ -1569,7 +1573,7 @@ out:
#undef LDST
#define LDX_PROBE(SIZEOP, SIZE) \
LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
+ bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \
CONT;
LDX_PROBE(B, 1)
LDX_PROBE(H, 2)
@@ -2011,6 +2015,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
#endif
+ bpf_trampoline_put(aux->trampoline);
for (i = 0; i < aux->func_cnt; i++)
bpf_jit_free(aux->func[i]);
if (aux->func_cnt) {
@@ -2026,6 +2031,8 @@ void bpf_prog_free(struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
+ if (aux->linked_prog)
+ bpf_prog_put(aux->linked_prog);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
schedule_work(&aux->work);
}
@@ -2140,6 +2147,12 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
return -EFAULT;
}
+int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *addr1, void *addr2)
+{
+ return -ENOTSUPP;
+}
+
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index a70f7209cda3..ecf42bec38c0 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
{
switch (type) {
case BPF_TYPE_PROG:
- raw = bpf_prog_inc(raw);
+ bpf_prog_inc(raw);
break;
case BPF_TYPE_MAP:
- raw = bpf_map_inc(raw, true);
+ bpf_map_inc_with_uref(raw);
break;
default:
WARN_ON_ONCE(1);
@@ -534,7 +534,8 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type
if (!bpf_prog_get_ok(prog, &type, false))
return ERR_PTR(-EINVAL);
- return bpf_prog_inc(prog);
+ bpf_prog_inc(prog);
+ return prog;
}
struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index fab4fb134547..4cbe987be35b 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -98,7 +98,7 @@ void *bpf_map_fd_get_ptr(struct bpf_map *map,
return inner_map;
if (bpf_map_meta_equal(map->inner_map_meta, inner_map))
- inner_map = bpf_map_inc(inner_map, false);
+ bpf_map_inc(inner_map);
else
inner_map = ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d447b5e343bf..b51ecb9644d0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -23,6 +23,7 @@
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
+#include <linux/audit.h>
#include <uapi/linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
@@ -43,7 +44,7 @@ static DEFINE_SPINLOCK(map_idr_lock);
int sysctl_unprivileged_bpf_disabled __read_mostly;
static const struct bpf_map_ops * const bpf_map_types[] = {
-#define BPF_PROG_TYPE(_id, _ops)
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
#define BPF_MAP_TYPE(_id, _ops) \
[_id] = &_ops,
#include <linux/bpf_types.h>
@@ -127,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-void *bpf_map_area_alloc(u64 size, int numa_node)
+static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
{
/* We really just want to fail instead of triggering OOM killer
* under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
@@ -145,18 +146,33 @@ void *bpf_map_area_alloc(u64 size, int numa_node)
if (size >= SIZE_MAX)
return NULL;
- if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
+ /* kmalloc()'ed memory can't be mmap()'ed */
+ if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
numa_node);
if (area != NULL)
return area;
}
-
+ if (mmapable) {
+ BUG_ON(!PAGE_ALIGNED(size));
+ return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL |
+ __GFP_RETRY_MAYFAIL | flags);
+ }
return __vmalloc_node_flags_caller(size, numa_node,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
flags, __builtin_return_address(0));
}
+void *bpf_map_area_alloc(u64 size, int numa_node)
+{
+ return __bpf_map_area_alloc(size, numa_node, false);
+}
+
+void *bpf_map_area_mmapable_alloc(u64 size, int numa_node)
+{
+ return __bpf_map_area_alloc(size, numa_node, true);
+}
+
void bpf_map_area_free(void *area)
{
kvfree(area);
@@ -314,7 +330,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
- if (atomic_dec_and_test(&map->usercnt)) {
+ if (atomic64_dec_and_test(&map->usercnt)) {
if (map->ops->map_release_uref)
map->ops->map_release_uref(map);
}
@@ -325,7 +341,7 @@ static void bpf_map_put_uref(struct bpf_map *map)
*/
static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
{
- if (atomic_dec_and_test(&map->refcnt)) {
+ if (atomic64_dec_and_test(&map->refcnt)) {
/* bpf_map_free_id() must be called first */
bpf_map_free_id(map, do_idr_lock);
btf_put(map->btf);
@@ -428,6 +444,74 @@ static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
return -EINVAL;
}
+/* called for any extra memory-mapped regions (except initial) */
+static void bpf_map_mmap_open(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+
+ bpf_map_inc_with_uref(map);
+
+ if (vma->vm_flags & VM_WRITE) {
+ mutex_lock(&map->freeze_mutex);
+ map->writecnt++;
+ mutex_unlock(&map->freeze_mutex);
+ }
+}
+
+/* called for all unmapped memory region (including initial) */
+static void bpf_map_mmap_close(struct vm_area_struct *vma)
+{
+ struct bpf_map *map = vma->vm_file->private_data;
+
+ if (vma->vm_flags & VM_WRITE) {
+ mutex_lock(&map->freeze_mutex);
+ map->writecnt--;
+ mutex_unlock(&map->freeze_mutex);
+ }
+
+ bpf_map_put_with_uref(map);
+}
+
+static const struct vm_operations_struct bpf_map_default_vmops = {
+ .open = bpf_map_mmap_open,
+ .close = bpf_map_mmap_close,
+};
+
+static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct bpf_map *map = filp->private_data;
+ int err;
+
+ if (!map->ops->map_mmap || map_value_has_spin_lock(map))
+ return -ENOTSUPP;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ mutex_lock(&map->freeze_mutex);
+
+ if ((vma->vm_flags & VM_WRITE) && map->frozen) {
+ err = -EPERM;
+ goto out;
+ }
+
+ /* set default open/close callbacks */
+ vma->vm_ops = &bpf_map_default_vmops;
+ vma->vm_private_data = map;
+
+ err = map->ops->map_mmap(map, vma);
+ if (err)
+ goto out;
+
+ bpf_map_inc_with_uref(map);
+
+ if (vma->vm_flags & VM_WRITE)
+ map->writecnt++;
+out:
+ mutex_unlock(&map->freeze_mutex);
+ return err;
+}
+
const struct file_operations bpf_map_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_map_show_fdinfo,
@@ -435,6 +519,7 @@ const struct file_operations bpf_map_fops = {
.release = bpf_map_release,
.read = bpf_dummy_read,
.write = bpf_dummy_write,
+ .mmap = bpf_map_mmap,
};
int bpf_map_new_fd(struct bpf_map *map, int flags)
@@ -578,8 +663,9 @@ static int map_create(union bpf_attr *attr)
if (err)
goto free_map;
- atomic_set(&map->refcnt, 1);
- atomic_set(&map->usercnt, 1);
+ atomic64_set(&map->refcnt, 1);
+ atomic64_set(&map->usercnt, 1);
+ mutex_init(&map->freeze_mutex);
if (attr->btf_key_type_id || attr->btf_value_type_id) {
struct btf *btf;
@@ -656,21 +742,19 @@ struct bpf_map *__bpf_map_get(struct fd f)
return f.file->private_data;
}
-/* prog's and map's refcnt limit */
-#define BPF_MAX_REFCNT 32768
-
-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
+void bpf_map_inc(struct bpf_map *map)
{
- if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
- atomic_dec(&map->refcnt);
- return ERR_PTR(-EBUSY);
- }
- if (uref)
- atomic_inc(&map->usercnt);
- return map;
+ atomic64_inc(&map->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_map_inc);
+void bpf_map_inc_with_uref(struct bpf_map *map)
+{
+ atomic64_inc(&map->refcnt);
+ atomic64_inc(&map->usercnt);
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref);
+
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
{
struct fd f = fdget(ufd);
@@ -680,38 +764,30 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
if (IS_ERR(map))
return map;
- map = bpf_map_inc(map, true);
+ bpf_map_inc_with_uref(map);
fdput(f);
return map;
}
/* map_idr_lock should have been held */
-static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map,
- bool uref)
+static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
{
int refold;
- refold = atomic_fetch_add_unless(&map->refcnt, 1, 0);
-
- if (refold >= BPF_MAX_REFCNT) {
- __bpf_map_put(map, false);
- return ERR_PTR(-EBUSY);
- }
-
+ refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0);
if (!refold)
return ERR_PTR(-ENOENT);
-
if (uref)
- atomic_inc(&map->usercnt);
+ atomic64_inc(&map->usercnt);
return map;
}
-struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map)
{
spin_lock_bh(&map_idr_lock);
- map = __bpf_map_inc_not_zero(map, uref);
+ map = __bpf_map_inc_not_zero(map, false);
spin_unlock_bh(&map_idr_lock);
return map;
@@ -1176,6 +1252,13 @@ static int map_freeze(const union bpf_attr *attr)
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
+
+ mutex_lock(&map->freeze_mutex);
+
+ if (map->writecnt) {
+ err = -EBUSY;
+ goto err_put;
+ }
if (READ_ONCE(map->frozen)) {
err = -EBUSY;
goto err_put;
@@ -1187,12 +1270,13 @@ static int map_freeze(const union bpf_attr *attr)
WRITE_ONCE(map->frozen, true);
err_put:
+ mutex_unlock(&map->freeze_mutex);
fdput(f);
return err;
}
static const struct bpf_prog_ops * const bpf_prog_types[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -1238,6 +1322,34 @@ static void free_used_maps(struct bpf_prog_aux *aux)
kfree(aux->used_maps);
}
+enum bpf_event {
+ BPF_EVENT_LOAD,
+ BPF_EVENT_UNLOAD,
+};
+
+static const char * const bpf_event_audit_str[] = {
+ [BPF_EVENT_LOAD] = "LOAD",
+ [BPF_EVENT_UNLOAD] = "UNLOAD",
+};
+
+static void bpf_audit_prog(const struct bpf_prog *prog, enum bpf_event event)
+{
+ bool has_task_context = event == BPF_EVENT_LOAD;
+ struct audit_buffer *ab;
+
+ if (audit_enabled == AUDIT_OFF)
+ return;
+ ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_BPF);
+ if (unlikely(!ab))
+ return;
+ if (has_task_context)
+ audit_log_task(ab);
+ audit_log_format(ab, "%sprog-id=%u event=%s",
+ has_task_context ? " " : "",
+ prog->aux->id, bpf_event_audit_str[event]);
+ audit_log_end(ab);
+}
+
int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -1331,6 +1443,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
kvfree(aux->func_info);
+ kfree(aux->func_info_aux);
free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
@@ -1351,8 +1464,9 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
- if (atomic_dec_and_test(&prog->aux->refcnt)) {
+ if (atomic64_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+ bpf_audit_prog(prog, BPF_EVENT_UNLOAD);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
__bpf_prog_put_noref(prog, true);
@@ -1457,13 +1571,9 @@ static struct bpf_prog *____bpf_prog_get(struct fd f)
return f.file->private_data;
}
-struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+void bpf_prog_add(struct bpf_prog *prog, int i)
{
- if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
- atomic_sub(i, &prog->aux->refcnt);
- return ERR_PTR(-EBUSY);
- }
- return prog;
+ atomic64_add(i, &prog->aux->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_prog_add);
@@ -1474,13 +1584,13 @@ void bpf_prog_sub(struct bpf_prog *prog, int i)
* path holds a reference to the program, thus atomic_sub() can
* be safely used in such cases!
*/
- WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+ WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0);
}
EXPORT_SYMBOL_GPL(bpf_prog_sub);
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+void bpf_prog_inc(struct bpf_prog *prog)
{
- return bpf_prog_add(prog, 1);
+ atomic64_inc(&prog->aux->refcnt);
}
EXPORT_SYMBOL_GPL(bpf_prog_inc);
@@ -1489,12 +1599,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
{
int refold;
- refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0);
-
- if (refold >= BPF_MAX_REFCNT) {
- __bpf_prog_put(prog, false);
- return ERR_PTR(-EBUSY);
- }
+ refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0);
if (!refold)
return ERR_PTR(-ENOENT);
@@ -1532,7 +1637,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type,
goto out;
}
- prog = bpf_prog_inc(prog);
+ bpf_prog_inc(prog);
out:
fdput(f);
return prog;
@@ -1579,7 +1684,7 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
static int
bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type,
- u32 btf_id)
+ u32 btf_id, u32 prog_fd)
{
switch (prog_type) {
case BPF_PROG_TYPE_TRACING:
@@ -1587,7 +1692,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
return -EINVAL;
break;
default:
- if (btf_id)
+ if (btf_id || prog_fd)
return -EINVAL;
break;
}
@@ -1638,7 +1743,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
}
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD attach_btf_id
+#define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
{
@@ -1681,7 +1786,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
bpf_prog_load_fixup_attach_type(attr);
if (bpf_prog_load_check_attach(type, attr->expected_attach_type,
- attr->attach_btf_id))
+ attr->attach_btf_id,
+ attr->attach_prog_fd))
return -EINVAL;
/* plain bpf_prog allocation */
@@ -1691,6 +1797,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->attach_btf_id = attr->attach_btf_id;
+ if (attr->attach_prog_fd) {
+ struct bpf_prog *tgt_prog;
+
+ tgt_prog = bpf_prog_get(attr->attach_prog_fd);
+ if (IS_ERR(tgt_prog)) {
+ err = PTR_ERR(tgt_prog);
+ goto free_prog_nouncharge;
+ }
+ prog->aux->linked_prog = tgt_prog;
+ }
prog->aux->offload_requested = !!attr->prog_ifindex;
@@ -1712,7 +1828,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->orig_prog = NULL;
prog->jited = 0;
- atomic_set(&prog->aux->refcnt, 1);
+ atomic64_set(&prog->aux->refcnt, 1);
prog->gpl_compatible = is_gpl ? 1 : 0;
if (bpf_prog_is_dev_bound(prog->aux)) {
@@ -1760,6 +1876,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
*/
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
+ bpf_audit_prog(prog, BPF_EVENT_LOAD);
err = bpf_prog_new_fd(prog);
if (err < 0)
@@ -1802,6 +1919,49 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
+static int bpf_tracing_prog_release(struct inode *inode, struct file *filp)
+{
+ struct bpf_prog *prog = filp->private_data;
+
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ bpf_prog_put(prog);
+ return 0;
+}
+
+static const struct file_operations bpf_tracing_prog_fops = {
+ .release = bpf_tracing_prog_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
+};
+
+static int bpf_tracing_prog_attach(struct bpf_prog *prog)
+{
+ int tr_fd, err;
+
+ if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
+ prog->expected_attach_type != BPF_TRACE_FEXIT) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
+
+ err = bpf_trampoline_link_prog(prog);
+ if (err)
+ goto out_put_prog;
+
+ tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops,
+ prog, O_CLOEXEC);
+ if (tr_fd < 0) {
+ WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog));
+ err = tr_fd;
+ goto out_put_prog;
+ }
+ return tr_fd;
+
+out_put_prog:
+ bpf_prog_put(prog);
+ return err;
+}
+
struct bpf_raw_tracepoint {
struct bpf_raw_event_map *btp;
struct bpf_prog *prog;
@@ -1853,14 +2013,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
if (prog->type == BPF_PROG_TYPE_TRACING) {
if (attr->raw_tracepoint.name) {
- /* raw_tp name should not be specified in raw_tp
- * programs that were verified via in-kernel BTF info
+ /* The attach point for this category of programs
+ * should be specified via btf_id during program load.
*/
err = -EINVAL;
goto out_put_prog;
}
- /* raw_tp name is taken from type name instead */
- tp_name = prog->aux->attach_func_name;
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP)
+ tp_name = prog->aux->attach_func_name;
+ else
+ return bpf_tracing_prog_attach(prog);
} else {
if (strncpy_from_user(buf,
u64_to_user_ptr(attr->raw_tracepoint.name),
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
new file mode 100644
index 000000000000..10ae59d65f13
--- /dev/null
+++ b/kernel/bpf/trampoline.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <linux/hash.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
+#define TRAMPOLINE_HASH_BITS 10
+#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
+
+static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
+
+/* serializes access to trampoline_table */
+static DEFINE_MUTEX(trampoline_mutex);
+
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ struct bpf_trampoline *tr;
+ struct hlist_head *head;
+ void *image;
+ int i;
+
+ mutex_lock(&trampoline_mutex);
+ head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
+ hlist_for_each_entry(tr, head, hlist) {
+ if (tr->key == key) {
+ refcount_inc(&tr->refcnt);
+ goto out;
+ }
+ }
+ tr = kzalloc(sizeof(*tr), GFP_KERNEL);
+ if (!tr)
+ goto out;
+
+ /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!image) {
+ kfree(tr);
+ tr = NULL;
+ goto out;
+ }
+
+ tr->key = key;
+ INIT_HLIST_NODE(&tr->hlist);
+ hlist_add_head(&tr->hlist, head);
+ refcount_set(&tr->refcnt, 1);
+ mutex_init(&tr->mutex);
+ for (i = 0; i < BPF_TRAMP_MAX; i++)
+ INIT_HLIST_HEAD(&tr->progs_hlist[i]);
+
+ set_vm_flush_reset_perms(image);
+ /* Keep image as writeable. The alternative is to keep flipping ro/rw
+ * everytime new program is attached or detached.
+ */
+ set_memory_x((long)image, 1);
+ tr->image = image;
+out:
+ mutex_unlock(&trampoline_mutex);
+ return tr;
+}
+
+/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
+ * bytes on x86. Pick a number to fit into PAGE_SIZE / 2
+ */
+#define BPF_MAX_TRAMP_PROGS 40
+
+static int bpf_trampoline_update(struct bpf_trampoline *tr)
+{
+ void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
+ void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+ struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
+ int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
+ int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
+ struct bpf_prog **progs, **fentry, **fexit;
+ u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+ struct bpf_prog_aux *aux;
+ int err;
+
+ if (fentry_cnt + fexit_cnt == 0) {
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP,
+ old_image, NULL);
+ tr->selector = 0;
+ goto out;
+ }
+
+ /* populate fentry progs */
+ fentry = progs = progs_to_run;
+ hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
+ *progs++ = aux->prog;
+
+ /* populate fexit progs */
+ fexit = progs;
+ hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
+ *progs++ = aux->prog;
+
+ if (fexit_cnt)
+ flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+
+ err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags,
+ fentry, fentry_cnt,
+ fexit, fexit_cnt,
+ tr->func.addr);
+ if (err)
+ goto out;
+
+ if (tr->selector)
+ /* progs already running at this address */
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL,
+ old_image, new_image);
+ else
+ /* first time registering */
+ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL,
+ NULL, new_image);
+ if (err)
+ goto out;
+ tr->selector++;
+out:
+ return err;
+}
+
+static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
+{
+ switch (t) {
+ case BPF_TRACE_FENTRY:
+ return BPF_TRAMP_FENTRY;
+ default:
+ return BPF_TRAMP_FEXIT;
+ }
+}
+
+int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+ enum bpf_tramp_prog_type kind;
+ struct bpf_trampoline *tr;
+ int err = 0;
+
+ tr = prog->aux->trampoline;
+ kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+ mutex_lock(&tr->mutex);
+ if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
+ >= BPF_MAX_TRAMP_PROGS) {
+ err = -E2BIG;
+ goto out;
+ }
+ if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
+ /* prog already linked */
+ err = -EBUSY;
+ goto out;
+ }
+ hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
+ tr->progs_cnt[kind]++;
+ err = bpf_trampoline_update(prog->aux->trampoline);
+ if (err) {
+ hlist_del(&prog->aux->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ }
+out:
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+ enum bpf_tramp_prog_type kind;
+ struct bpf_trampoline *tr;
+ int err;
+
+ tr = prog->aux->trampoline;
+ kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
+ mutex_lock(&tr->mutex);
+ hlist_del(&prog->aux->tramp_hlist);
+ tr->progs_cnt[kind]--;
+ err = bpf_trampoline_update(prog->aux->trampoline);
+ mutex_unlock(&tr->mutex);
+ return err;
+}
+
+void bpf_trampoline_put(struct bpf_trampoline *tr)
+{
+ if (!tr)
+ return;
+ mutex_lock(&trampoline_mutex);
+ if (!refcount_dec_and_test(&tr->refcnt))
+ goto out;
+ WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
+ goto out;
+ if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
+ goto out;
+ bpf_jit_free_exec(tr->image);
+ hlist_del(&tr->hlist);
+ kfree(tr);
+out:
+ mutex_unlock(&trampoline_mutex);
+}
+
+/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
+ * are needed for trampoline. The macro is split into
+ * call _bpf_prog_enter
+ * call prog->bpf_func
+ * call __bpf_prog_exit
+ */
+u64 notrace __bpf_prog_enter(void)
+{
+ u64 start = 0;
+
+ rcu_read_lock();
+ preempt_disable();
+ if (static_branch_unlikely(&bpf_stats_enabled_key))
+ start = sched_clock();
+ return start;
+}
+
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
+{
+ struct bpf_prog_stats *stats;
+
+ if (static_branch_unlikely(&bpf_stats_enabled_key) &&
+ /* static_key could be enabled in __bpf_prog_enter
+ * and disabled in __bpf_prog_exit.
+ * And vice versa.
+ * Hence check that 'start' is not zero.
+ */
+ start) {
+ stats = this_cpu_ptr(prog->aux->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->cnt++;
+ stats->nsecs += sched_clock() - start;
+ u64_stats_update_end(&stats->syncp);
+ }
+ preempt_enable();
+ rcu_read_unlock();
+}
+
+int __weak
+arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ return -ENOTSUPP;
+}
+
+static int __init init_trampolines(void)
+{
+ int i;
+
+ for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
+ INIT_HLIST_HEAD(&trampoline_table[i]);
+ return 0;
+}
+late_initcall(init_trampolines);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2f2374967b36..9f59f7a19dd0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23,7 +23,7 @@
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[_id] = & _name ## _verifier_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
@@ -3970,6 +3970,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* only increment it after check_reg_arg() finished */
state->curframe++;
+ if (btf_check_func_arg_match(env, subprog))
+ return -EINVAL;
+
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
@@ -4147,11 +4150,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
- if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) {
- if (!fn->btf_id[i])
- fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i);
- meta.btf_id = fn->btf_id[i];
- }
+ err = btf_resolve_helper_id(&env->log, fn, i);
+ if (err > 0)
+ meta.btf_id = err;
err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
if (err)
return err;
@@ -6566,6 +6567,7 @@ static int check_btf_func(struct bpf_verifier_env *env,
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
+ struct bpf_func_info_aux *info_aux = NULL;
const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
@@ -6599,6 +6601,9 @@ static int check_btf_func(struct bpf_verifier_env *env,
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
if (!krecord)
return -ENOMEM;
+ info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
+ if (!info_aux)
+ goto err_free;
for (i = 0; i < nfuncs; i++) {
ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
@@ -6650,29 +6655,31 @@ static int check_btf_func(struct bpf_verifier_env *env,
ret = -EINVAL;
goto err_free;
}
-
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
prog->aux->func_info = krecord;
prog->aux->func_info_cnt = nfuncs;
+ prog->aux->func_info_aux = info_aux;
return 0;
err_free:
kvfree(krecord);
+ kfree(info_aux);
return ret;
}
static void adjust_btf_func(struct bpf_verifier_env *env)
{
+ struct bpf_prog_aux *aux = env->prog->aux;
int i;
- if (!env->prog->aux->func_info)
+ if (!aux->func_info)
return;
for (i = 0; i < env->subprog_cnt; i++)
- env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
+ aux->func_info[i].insn_off = env->subprog_info[i].start;
}
#define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \
@@ -7653,6 +7660,9 @@ static int do_check(struct bpf_verifier_env *env)
0 /* frameno */,
0 /* subprogno, zero == main subprog */);
+ if (btf_check_func_arg_match(env, 0))
+ return -EINVAL;
+
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -8169,11 +8179,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
* will be used by the valid program until it's unloaded
* and all maps are released in free_used_maps()
*/
- map = bpf_map_inc(map, false);
- if (IS_ERR(map)) {
- fdput(f);
- return PTR_ERR(map);
- }
+ bpf_map_inc(map);
aux->map_index = env->used_map_cnt;
env->used_maps[env->used_map_cnt++] = map;
@@ -9380,10 +9386,17 @@ static void print_verification_stats(struct bpf_verifier_env *env)
static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
+ struct bpf_prog *tgt_prog = prog->aux->linked_prog;
u32 btf_id = prog->aux->attach_btf_id;
const char prefix[] = "btf_trace_";
+ int ret = 0, subprog = -1, i;
+ struct bpf_trampoline *tr;
const struct btf_type *t;
+ bool conservative = true;
const char *tname;
+ struct btf *btf;
+ long addr;
+ u64 key;
if (prog->type != BPF_PROG_TYPE_TRACING)
return 0;
@@ -9392,19 +9405,47 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
verbose(env, "Tracing programs must provide btf_id\n");
return -EINVAL;
}
- t = btf_type_by_id(btf_vmlinux, btf_id);
+ btf = bpf_prog_get_target_btf(prog);
+ if (!btf) {
+ verbose(env,
+ "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, btf_id);
if (!t) {
verbose(env, "attach_btf_id %u is invalid\n", btf_id);
return -EINVAL;
}
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ tname = btf_name_by_offset(btf, t->name_off);
if (!tname) {
verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
return -EINVAL;
}
+ if (tgt_prog) {
+ struct bpf_prog_aux *aux = tgt_prog->aux;
+
+ for (i = 0; i < aux->func_info_cnt; i++)
+ if (aux->func_info[i].type_id == btf_id) {
+ subprog = i;
+ break;
+ }
+ if (subprog == -1) {
+ verbose(env, "Subprog %s doesn't exist\n", tname);
+ return -EINVAL;
+ }
+ conservative = aux->func_info_aux[subprog].unreliable;
+ key = ((u64)aux->id) << 32 | btf_id;
+ } else {
+ key = btf_id;
+ }
switch (prog->expected_attach_type) {
case BPF_TRACE_RAW_TP:
+ if (tgt_prog) {
+ verbose(env,
+ "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
+ return -EINVAL;
+ }
if (!btf_type_is_typedef(t)) {
verbose(env, "attach_btf_id %u is not a typedef\n",
btf_id);
@@ -9416,11 +9457,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return -EINVAL;
}
tname += sizeof(prefix) - 1;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_ptr(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
- t = btf_type_by_id(btf_vmlinux, t->type);
+ t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
@@ -9432,6 +9473,66 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_func_proto = t;
prog->aux->attach_btf_trace = true;
return 0;
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ if (!btf_type_is_func(t)) {
+ verbose(env, "attach_btf_id %u is not a function\n",
+ btf_id);
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf, t->type);
+ if (!btf_type_is_func_proto(t))
+ return -EINVAL;
+ tr = bpf_trampoline_lookup(key);
+ if (!tr)
+ return -ENOMEM;
+ prog->aux->attach_func_name = tname;
+ /* t is either vmlinux type or another program's type */
+ prog->aux->attach_func_proto = t;
+ mutex_lock(&tr->mutex);
+ if (tr->func.addr) {
+ prog->aux->trampoline = tr;
+ goto out;
+ }
+ if (tgt_prog && conservative) {
+ prog->aux->attach_func_proto = NULL;
+ t = NULL;
+ }
+ ret = btf_distill_func_proto(&env->log, btf, t,
+ tname, &tr->func.model);
+ if (ret < 0)
+ goto out;
+ if (tgt_prog) {
+ if (!tgt_prog->jited) {
+ /* for now */
+ verbose(env, "Can trace only JITed BPF progs\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (tgt_prog->type == BPF_PROG_TYPE_TRACING) {
+ /* prevent cycles */
+ verbose(env, "Cannot recursively attach\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
+ } else {
+ addr = kallsyms_lookup_name(tname);
+ if (!addr) {
+ verbose(env,
+ "The address of function %s cannot be found\n",
+ tname);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+ tr->func.addr = (void *)addr;
+ prog->aux->trampoline = tr;
+out:
+ mutex_unlock(&tr->mutex);
+ if (ret)
+ bpf_trampoline_put(tr);
+ return ret;
default:
return -EINVAL;
}
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index da16c30868f3..90c4fce1c981 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -11,10 +11,8 @@
int xsk_map_inc(struct xsk_map *map)
{
- struct bpf_map *m = &map->map;
-
- m = bpf_map_inc(m, false);
- return PTR_ERR_OR_ZERO(m);
+ bpf_map_inc(&map->map);
+ return 0;
}
void xsk_map_put(struct xsk_map *map)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 00a014670ed0..834640057c93 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10477,12 +10477,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
context = parent_event->overflow_handler_context;
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
if (overflow_handler == bpf_overflow_handler) {
- struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+ struct bpf_prog *prog = parent_event->prog;
- if (IS_ERR(prog)) {
- err = PTR_ERR(prog);
- goto err_ns;
- }
+ bpf_prog_inc(prog);
event->prog = prog;
event->orig_overflow_handler =
parent_event->orig_overflow_handler;