summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-11-02 15:27:42 -0700
committerDavid S. Miller <davem@davemloft.net>2019-11-02 15:29:58 -0700
commitae8a76fb8b5d03fa2adc7249dc6131ba6a0c6119 (patch)
treeb197a7452b46abf51ffab8485236ccab69664d5c /kernel/bpf
parentd31e95585ca697fb31440c6fe30113adc85ecfbd (diff)
parent358fdb456288d48874d44a064a82bfb0d9963fa0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2019-11-02 The following pull-request contains BPF updates for your *net-next* tree. We've added 30 non-merge commits during the last 7 day(s) which contain a total of 41 files changed, 1864 insertions(+), 474 deletions(-). The main changes are: 1) Fix long standing user vs kernel access issue by introducing bpf_probe_read_user() and bpf_probe_read_kernel() helpers, from Daniel. 2) Accelerated xskmap lookup, from Björn and Maciej. 3) Support for automatic map pinning in libbpf, from Toke. 4) Cleanup of BTF-enabled raw tracepoints, from Alexei. 5) Various fixes to libbpf and selftests. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/syscall.c6
-rw-r--r--kernel/bpf/verifier.c39
-rw-r--r--kernel/bpf/xskmap.c112
4 files changed, 73 insertions, 96 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 658d68d409a4..97e37d82a1cc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -668,9 +668,6 @@ static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr)
{
struct latch_tree_node *n;
- if (!bpf_jit_kallsyms_enabled())
- return NULL;
-
n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops);
return n ?
container_of(n, struct bpf_prog_aux, ksym_tnode)->prog :
@@ -1309,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr)
+u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
{
memset(dst, 0, size);
return -EFAULT;
}
+
/**
* __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
@@ -1569,9 +1567,9 @@ out:
LDST(W, u32)
LDST(DW, u64)
#undef LDST
-#define LDX_PROBE(SIZEOP, SIZE) \
- LDX_PROBE_MEM_##SIZEOP: \
- bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \
+#define LDX_PROBE(SIZEOP, SIZE) \
+ LDX_PROBE_MEM_##SIZEOP: \
+ bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
CONT;
LDX_PROBE(B, 1)
LDX_PROBE(H, 2)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 57eacd5fc24a..6d9ce95e5a8d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1579,7 +1579,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
u32 btf_id)
{
switch (prog_type) {
- case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_TRACING:
if (btf_id > BTF_MAX_TYPE)
return -EINVAL;
break;
@@ -1842,13 +1842,13 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
return PTR_ERR(prog);
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+ prog->type != BPF_PROG_TYPE_TRACING &&
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
err = -EINVAL;
goto out_put_prog;
}
- if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT &&
- prog->aux->attach_btf_id) {
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
if (attr->raw_tracepoint.name) {
/* raw_tp name should not be specified in raw_tp
* programs that were verified via in-kernel BTF info
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c59778c0fc4d..2f2374967b36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6279,6 +6279,11 @@ static int check_return_code(struct bpf_verifier_env *env)
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
break;
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ if (!env->prog->aux->attach_btf_id)
+ return 0;
+ range = tnum_const(0);
+ break;
default:
return 0;
}
@@ -9376,24 +9381,36 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
u32 btf_id = prog->aux->attach_btf_id;
+ const char prefix[] = "btf_trace_";
const struct btf_type *t;
const char *tname;
- if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT && btf_id) {
- const char prefix[] = "btf_trace_";
+ if (prog->type != BPF_PROG_TYPE_TRACING)
+ return 0;
- t = btf_type_by_id(btf_vmlinux, btf_id);
- if (!t) {
- verbose(env, "attach_btf_id %u is invalid\n", btf_id);
- return -EINVAL;
- }
+ if (!btf_id) {
+ verbose(env, "Tracing programs must provide btf_id\n");
+ return -EINVAL;
+ }
+ t = btf_type_by_id(btf_vmlinux, btf_id);
+ if (!t) {
+ verbose(env, "attach_btf_id %u is invalid\n", btf_id);
+ return -EINVAL;
+ }
+ tname = btf_name_by_offset(btf_vmlinux, t->name_off);
+ if (!tname) {
+ verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
+ return -EINVAL;
+ }
+
+ switch (prog->expected_attach_type) {
+ case BPF_TRACE_RAW_TP:
if (!btf_type_is_typedef(t)) {
verbose(env, "attach_btf_id %u is not a typedef\n",
btf_id);
return -EINVAL;
}
- tname = btf_name_by_offset(btf_vmlinux, t->name_off);
- if (!tname || strncmp(prefix, tname, sizeof(prefix) - 1)) {
+ if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
verbose(env, "attach_btf_id %u points to wrong type name %s\n",
btf_id, tname);
return -EINVAL;
@@ -9414,8 +9431,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
prog->aux->attach_func_name = tname;
prog->aux->attach_func_proto = t;
prog->aux->attach_btf_trace = true;
+ return 0;
+ default:
+ return -EINVAL;
}
- return 0;
}
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 82a1ffe15dfa..da16c30868f3 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -9,13 +9,6 @@
#include <linux/slab.h>
#include <linux/sched.h>
-struct xsk_map {
- struct bpf_map map;
- struct xdp_sock **xsk_map;
- struct list_head __percpu *flush_list;
- spinlock_t lock; /* Synchronize map updates */
-};
-
int xsk_map_inc(struct xsk_map *map)
{
struct bpf_map *m = &map->map;
@@ -80,9 +73,10 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
+ struct bpf_map_memory mem;
+ int cpu, err, numa_node;
struct xsk_map *m;
- int cpu, err;
- u64 cost;
+ u64 cost, size;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -92,44 +86,35 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
return ERR_PTR(-EINVAL);
- m = kzalloc(sizeof(*m), GFP_USER);
- if (!m)
+ numa_node = bpf_map_attr_numa_node(attr);
+ size = struct_size(m, xsk_map, attr->max_entries);
+ cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
+
+ err = bpf_map_charge_init(&mem, cost);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ m = bpf_map_area_alloc(size, numa_node);
+ if (!m) {
+ bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
+ }
bpf_map_init_from_attr(&m->map, attr);
+ bpf_map_charge_move(&m->map.memory, &mem);
spin_lock_init(&m->lock);
- cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
- cost += sizeof(struct list_head) * num_possible_cpus();
-
- /* Notice returns -EPERM on if map size is larger than memlock limit */
- err = bpf_map_charge_init(&m->map.memory, cost);
- if (err)
- goto free_m;
-
- err = -ENOMEM;
-
m->flush_list = alloc_percpu(struct list_head);
- if (!m->flush_list)
- goto free_charge;
+ if (!m->flush_list) {
+ bpf_map_charge_finish(&m->map.memory);
+ bpf_map_area_free(m);
+ return ERR_PTR(-ENOMEM);
+ }
for_each_possible_cpu(cpu)
INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
- m->xsk_map = bpf_map_area_alloc(m->map.max_entries *
- sizeof(struct xdp_sock *),
- m->map.numa_node);
- if (!m->xsk_map)
- goto free_percpu;
return &m->map;
-
-free_percpu:
- free_percpu(m->flush_list);
-free_charge:
- bpf_map_charge_finish(&m->map.memory);
-free_m:
- kfree(m);
- return ERR_PTR(err);
}
static void xsk_map_free(struct bpf_map *map)
@@ -139,8 +124,7 @@ static void xsk_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_net();
free_percpu(m->flush_list);
- bpf_map_area_free(m->xsk_map);
- kfree(m);
+ bpf_map_area_free(m);
}
static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -160,45 +144,20 @@ static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
-struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs;
-
- if (key >= map->max_entries)
- return NULL;
-
- xs = READ_ONCE(m->xsk_map[key]);
- return xs;
-}
-
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
-{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct list_head *flush_list = this_cpu_ptr(m->flush_list);
- int err;
-
- err = xsk_rcv(xs, xdp);
- if (err)
- return err;
-
- if (!xs->flush_node.prev)
- list_add(&xs->flush_node, flush_list);
-
- return 0;
-}
-
-void __xsk_map_flush(struct bpf_map *map)
+static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct list_head *flush_list = this_cpu_ptr(m->flush_list);
- struct xdp_sock *xs, *tmp;
-
- list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
- xsk_flush(xs);
- __list_del_clearprev(&xs->flush_node);
- }
+ const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
+ struct bpf_insn *insn = insn_buf;
+
+ *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+ *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+ *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
+ *insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
+ *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+ *insn++ = BPF_MOV64_IMM(ret, 0);
+ return insn - insn_buf;
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
@@ -312,6 +271,7 @@ const struct bpf_map_ops xsk_map_ops = {
.map_free = xsk_map_free,
.map_get_next_key = xsk_map_get_next_key,
.map_lookup_elem = xsk_map_lookup_elem,
+ .map_gen_lookup = xsk_map_gen_lookup,
.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,