diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/arraymap.c | 25 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 2 | ||||
-rw-r--r-- | kernel/bpf/local_storage.c | 5 | ||||
-rw-r--r-- | kernel/bpf/sockmap.c | 97 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 8 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 60 |
6 files changed, 160 insertions, 37 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index f9d24121be99..dded84cbe814 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -553,6 +553,29 @@ static void bpf_fd_array_map_clear(struct bpf_map *map) fd_array_map_delete_elem(map, &i); } +static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, + struct seq_file *m) +{ + void **elem, *ptr; + u32 prog_id; + + rcu_read_lock(); + + elem = array_map_lookup_elem(map, key); + if (elem) { + ptr = READ_ONCE(*elem); + if (ptr) { + seq_printf(m, "%u: ", *(u32 *)key); + prog_id = prog_fd_array_sys_lookup_elem(ptr); + btf_type_seq_show(map->btf, map->btf_value_type_id, + &prog_id, m); + seq_puts(m, "\n"); + } + } + + rcu_read_unlock(); +} + const struct bpf_map_ops prog_array_map_ops = { .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_map_alloc, @@ -564,7 +587,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, .map_release_uref = bpf_fd_array_map_clear, - .map_check_btf = map_check_no_btf, + .map_seq_show_elem = prog_array_map_seq_show_elem, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2590700237c1..138f0302692e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -1844,7 +1844,7 @@ static int btf_check_all_metas(struct btf_verifier_env *env) hdr = &btf->hdr; cur = btf->nohdr_data + hdr->type_off; - end = btf->nohdr_data + hdr->type_len; + end = cur + hdr->type_len; env->log_type_id = 1; while (cur < end) { diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 22ad967d1e5f..830d7f095748 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -129,7 +129,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key, struct bpf_cgroup_storage *storage; struct bpf_storage_buffer *new; - if (flags & BPF_NOEXIST) + if (flags != BPF_ANY && flags != BPF_EXIST) return -EINVAL; storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, @@ -195,6 +195,9 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) return ERR_PTR(-EINVAL); + if (attr->value_size == 0) + return ERR_PTR(-EINVAL); + if (attr->value_size > PAGE_SIZE) return ERR_PTR(-E2BIG); diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 488ef9663c01..d37a1a0a6e1e 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -132,6 +132,7 @@ struct smap_psock { struct work_struct gc_work; struct proto *sk_proto; + void (*save_unhash)(struct sock *sk); void (*save_close)(struct sock *sk, long timeout); void (*save_data_ready)(struct sock *sk); void (*save_write_space)(struct sock *sk); @@ -143,6 +144,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); static int bpf_tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +static void bpf_tcp_unhash(struct sock *sk); static void bpf_tcp_close(struct sock *sk, long timeout); static inline struct smap_psock *smap_psock_sk(const struct sock *sk) @@ -184,6 +186,7 @@ static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], struct proto *base) { prot[SOCKMAP_BASE] = *base; + prot[SOCKMAP_BASE].unhash = bpf_tcp_unhash; prot[SOCKMAP_BASE].close = bpf_tcp_close; prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; @@ -217,6 +220,7 @@ static int bpf_tcp_init(struct sock *sk) return -EBUSY; } + psock->save_unhash = sk->sk_prot->unhash; psock->save_close = sk->sk_prot->close; psock->sk_proto = sk->sk_prot; @@ -305,30 +309,12 @@ static struct smap_psock_map_entry *psock_map_pop(struct sock *sk, return e; } -static void bpf_tcp_close(struct sock *sk, long timeout) +static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock) { - void (*close_fun)(struct sock *sk, long timeout); struct smap_psock_map_entry *e; struct sk_msg_buff *md, *mtmp; - struct smap_psock *psock; struct sock *osk; - lock_sock(sk); - rcu_read_lock(); - psock = smap_psock_sk(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - return sk->sk_prot->close(sk, timeout); - } - - /* The psock may be destroyed anytime after exiting the RCU critial - * section so by the time we use close_fun the psock may no longer - * be valid. However, bpf_tcp_close is called with the sock lock - * held so the close hook and sk are still valid. - */ - close_fun = psock->save_close; - if (psock->cork) { free_start_sg(psock->sock, psock->cork, true); kfree(psock->cork); @@ -379,6 +365,42 @@ static void bpf_tcp_close(struct sock *sk, long timeout) kfree(e); e = psock_map_pop(sk, psock); } +} + +static void bpf_tcp_unhash(struct sock *sk) +{ + void (*unhash_fun)(struct sock *sk); + struct smap_psock *psock; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + if (sk->sk_prot->unhash) + sk->sk_prot->unhash(sk); + return; + } + unhash_fun = psock->save_unhash; + bpf_tcp_remove(sk, psock); + rcu_read_unlock(); + unhash_fun(sk); +} + +static void bpf_tcp_close(struct sock *sk, long timeout) +{ + void (*close_fun)(struct sock *sk, long timeout); + struct smap_psock *psock; + + lock_sock(sk); + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + release_sock(sk); + return sk->sk_prot->close(sk, timeout); + } + close_fun = psock->save_close; + bpf_tcp_remove(sk, psock); rcu_read_unlock(); release_sock(sk); close_fun(sk, timeout); @@ -590,8 +612,7 @@ static int free_sg(struct sock *sk, int start, if (i == MAX_SKB_FRAGS) i = 0; } - if (md->skb) - consume_skb(md->skb); + consume_skb(md->skb); return free; } @@ -973,8 +994,7 @@ bytes_ready: if (!sg->length && md->sg_start == md->sg_end) { list_del(&md->list); - if (md->skb) - consume_skb(md->skb); + consume_skb(md->skb); kfree(md); } } @@ -2097,8 +2117,12 @@ static int sock_map_update_elem(struct bpf_map *map, return -EINVAL; } + /* ULPs are currently supported only for TCP sockets in ESTABLISHED + * state. + */ if (skops.sk->sk_type != SOCK_STREAM || - skops.sk->sk_protocol != IPPROTO_TCP) { + skops.sk->sk_protocol != IPPROTO_TCP || + skops.sk->sk_state != TCP_ESTABLISHED) { fput(socket->file); return -EOPNOTSUPP; } @@ -2453,6 +2477,16 @@ static int sock_hash_update_elem(struct bpf_map *map, return -EINVAL; } + /* ULPs are currently supported only for TCP sockets in ESTABLISHED + * state. + */ + if (skops.sk->sk_type != SOCK_STREAM || + skops.sk->sk_protocol != IPPROTO_TCP || + skops.sk->sk_state != TCP_ESTABLISHED) { + fput(socket->file); + return -EOPNOTSUPP; + } + lock_sock(skops.sk); preempt_disable(); rcu_read_lock(); @@ -2543,10 +2577,22 @@ const struct bpf_map_ops sock_hash_ops = { .map_check_btf = map_check_no_btf, }; +static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops) +{ + return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB || + ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB; +} BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, struct bpf_map *, map, void *, key, u64, flags) { WARN_ON_ONCE(!rcu_read_lock_held()); + + /* ULPs are currently supported only for TCP sockets in ESTABLISHED + * state. This checks that the sock ops triggering the update is + * one indicating we are (or will be soon) in an ESTABLISHED state. + */ + if (!bpf_is_valid_sock_op(bpf_sock)) + return -EOPNOTSUPP; return sock_map_ctx_update_elem(bpf_sock, map, key, flags); } @@ -2565,6 +2611,9 @@ BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock, struct bpf_map *, map, void *, key, u64, flags) { WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!bpf_is_valid_sock_op(bpf_sock)) + return -EOPNOTSUPP; return sock_hash_ctx_update_elem(bpf_sock, map, key, flags); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3c9636f03bb2..b3c2d09bcf7a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_LIRC_MODE2: ptype = BPF_PROG_TYPE_LIRC_MODE2; break; + case BPF_FLOW_DISSECTOR: + ptype = BPF_PROG_TYPE_FLOW_DISSECTOR; + break; default: return -EINVAL; } @@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_LIRC_MODE2: ret = lirc_prog_attach(attr, prog); break; + case BPF_PROG_TYPE_FLOW_DISSECTOR: + ret = skb_flow_dissector_bpf_prog_attach(attr, prog); + break; default: ret = cgroup_bpf_prog_attach(attr, ptype, prog); } @@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL); case BPF_LIRC_MODE2: return lirc_prog_detach(attr); + case BPF_FLOW_DISSECTOR: + return skb_flow_dissector_bpf_prog_detach(attr); default: return -EINVAL; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f4ff0c569e54..3584ab27d25c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -261,6 +261,7 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET] = "pkt", [PTR_TO_PACKET_META] = "pkt_meta", [PTR_TO_PACKET_END] = "pkt_end", + [PTR_TO_FLOW_KEYS] = "flow_keys", }; static char slot_type_char[] = { @@ -570,7 +571,9 @@ static void __mark_reg_not_init(struct bpf_reg_state *reg); */ static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm) { - reg->id = 0; + /* Clear id, off, and union(map_ptr, range) */ + memset(((u8 *)reg) + sizeof(reg->type), 0, + offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type)); reg->var_off = tnum_const(imm); reg->smin_value = (s64)imm; reg->smax_value = (s64)imm; @@ -589,7 +592,6 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg) static void __mark_reg_const_zero(struct bpf_reg_state *reg) { __mark_reg_known(reg, 0); - reg->off = 0; reg->type = SCALAR_VALUE; } @@ -700,9 +702,12 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg) /* Mark a register as having a completely unknown (scalar) value. */ static void __mark_reg_unknown(struct bpf_reg_state *reg) { + /* + * Clear type, id, off, and union(map_ptr, range) and + * padding between 'type' and union + */ + memset(reg, 0, offsetof(struct bpf_reg_state, var_off)); reg->type = SCALAR_VALUE; - reg->id = 0; - reg->off = 0; reg->var_off = tnum_unknown; reg->frameno = 0; __mark_reg_unbounded(reg); @@ -961,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_PACKET_END: + case PTR_TO_FLOW_KEYS: case CONST_PTR_TO_MAP: return true; default: @@ -1234,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_SK_SKB: case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_FLOW_DISSECTOR: if (meta) return meta->pkt_access; @@ -1317,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, return -EACCES; } +static int check_flow_keys_access(struct bpf_verifier_env *env, int off, + int size) +{ + if (size < 0 || off < 0 || + (u64)off + size > sizeof(struct bpf_flow_keys)) { + verbose(env, "invalid access to flow keys off=%d size=%d\n", + off, size); + return -EACCES; + } + return 0; +} + static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg) { @@ -1418,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, * right in front, treat it the very same way. */ return check_pkt_ptr_alignment(env, reg, off, size, strict); + case PTR_TO_FLOW_KEYS: + pointer_desc = "flow keys "; + break; case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@ -1640,9 +1662,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn else mark_reg_known_zero(env, regs, value_regno); - regs[value_regno].id = 0; - regs[value_regno].off = 0; - regs[value_regno].range = 0; regs[value_regno].type = reg_type; } @@ -1691,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_packet_access(env, regno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_FLOW_KEYS) { + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { + verbose(env, "R%d leaks addr into flow keys\n", + value_regno); + return -EACCES; + } + + err = check_flow_keys_access(env, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -1838,6 +1868,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); + case PTR_TO_FLOW_KEYS: + return check_flow_keys_access(env, reg->off, access_size); case PTR_TO_MAP_VALUE: return check_map_access(env, regno, reg->off, access_size, zero_size_allowed); @@ -2495,7 +2527,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; /* There is no offset yet applied, variable or fixed */ mark_reg_known_zero(env, regs, BPF_REG_0); - regs[BPF_REG_0].off = 0; /* remember map_ptr, so that check_map_access() * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() @@ -2811,6 +2842,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, u64 umin_val, umax_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + if (insn_bitness == 32) { + /* Relevant for 32-bit RSH: Information can propagate towards + * LSB, so it isn't sufficient to only truncate the output to + * 32 bits. + */ + coerce_reg_to_size(dst_reg, 4); + coerce_reg_to_size(&src_reg, 4); + } + smin_val = src_reg.smin_value; smax_val = src_reg.smax_value; umin_val = src_reg.umin_value; @@ -3046,7 +3086,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, if (BPF_CLASS(insn->code) != BPF_ALU64) { /* 32-bit ALU ops are (32,32)->32 */ coerce_reg_to_size(dst_reg, 4); - coerce_reg_to_size(&src_reg, 4); } __reg_deduce_bounds(dst_reg); @@ -3078,7 +3117,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, * an arbitrary scalar. Disallow all math except * pointer subtraction */ - if (opcode == BPF_SUB){ + if (opcode == BPF_SUB && env->allow_ptr_leaks) { mark_reg_unknown(env, regs, insn->dst_reg); return 0; } @@ -4366,6 +4405,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, case PTR_TO_CTX: case CONST_PTR_TO_MAP: case PTR_TO_PACKET_END: + case PTR_TO_FLOW_KEYS: /* Only valid matches are exact, which memcmp() above * would have accepted */ |