summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-01-16 10:04:40 +0100
committerDavid S. Miller <davem@davemloft.net>2020-01-16 10:04:40 +0100
commit3981f955eb27fd4f52b8cef198091530811229f2 (patch)
tree5e85999c1a8c4a7ee73f025ffef47f649312a8dc
parent567110f147b352020138573a90476a1522210f62 (diff)
parent85ddd9c3173102930c16b0cfe8dbb771af434532 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Daniel Borkmann says: ==================== pull-request: bpf 2020-01-15 The following pull-request contains BPF updates for your *net* tree. We've added 12 non-merge commits during the last 9 day(s) which contain a total of 13 files changed, 95 insertions(+), 43 deletions(-). The main changes are: 1) Fix refcount leak for TCP time wait and request sockets for socket lookup related BPF helpers, from Lorenz Bauer. 2) Fix wrong verification of ARSH instruction under ALU32, from Daniel Borkmann. 3) Batch of several sockmap and related TLS fixes found while operating more complex BPF programs with Cilium and OpenSSL, from John Fastabend. 4) Fix sockmap to read psock's ingress_msg queue before regular sk_receive_queue() to avoid purging data upon teardown, from Lingpeng Chen. 5) Fix printing incorrect pointer in bpftool's btf_dump_ptr() in order to properly dump a BPF map's value with BTF, from Martin KaFai Lau. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skmsg.h13
-rw-r--r--include/linux/tnum.h2
-rw-r--r--include/net/tcp.h6
-rw-r--r--kernel/bpf/tnum.c9
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--net/core/filter.c20
-rw-r--r--net/core/skmsg.c2
-rw-r--r--net/core/sock_map.c7
-rw-r--r--net/ipv4/tcp_bpf.c17
-rw-r--r--net/ipv4/tcp_ulp.c6
-rw-r--r--net/tls/tls_main.c10
-rw-r--r--net/tls/tls_sw.c31
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
13 files changed, 95 insertions, 43 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index ef7031f8a304..14d61bba0b79 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -358,17 +358,22 @@ static inline void sk_psock_update_proto(struct sock *sk,
static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
- sk->sk_write_space = psock->saved_write_space;
+ sk->sk_prot->unhash = psock->saved_unhash;
if (psock->sk_proto) {
struct inet_connection_sock *icsk = inet_csk(sk);
bool has_ulp = !!icsk->icsk_ulp_data;
- if (has_ulp)
- tcp_update_ulp(sk, psock->sk_proto);
- else
+ if (has_ulp) {
+ tcp_update_ulp(sk, psock->sk_proto,
+ psock->saved_write_space);
+ } else {
sk->sk_prot = psock->sk_proto;
+ sk->sk_write_space = psock->saved_write_space;
+ }
psock->sk_proto = NULL;
+ } else {
+ sk->sk_write_space = psock->saved_write_space;
}
}
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index c17af77f3fae..ea627d1ab7e3 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -30,7 +30,7 @@ struct tnum tnum_lshift(struct tnum a, u8 shift);
/* Shift (rsh) a tnum right (by a fixed shift) */
struct tnum tnum_rshift(struct tnum a, u8 shift);
/* Shift (arsh) a tnum right (by a fixed min_shift) */
-struct tnum tnum_arshift(struct tnum a, u8 min_shift);
+struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness);
/* Add two tnums, return @a + @b */
struct tnum tnum_add(struct tnum a, struct tnum b);
/* Subtract two tnums, return @a - @b */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e460ea7f767b..e6f48384dc71 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2147,7 +2147,8 @@ struct tcp_ulp_ops {
/* initialize ulp */
int (*init)(struct sock *sk);
/* update ulp */
- void (*update)(struct sock *sk, struct proto *p);
+ void (*update)(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk));
/* cleanup ulp */
void (*release)(struct sock *sk);
/* diagnostic */
@@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
int tcp_set_ulp(struct sock *sk, const char *name);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
-void tcp_update_ulp(struct sock *sk, struct proto *p);
+void tcp_update_ulp(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk));
#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index ca52b9642943..d4f335a9a899 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -44,14 +44,19 @@ struct tnum tnum_rshift(struct tnum a, u8 shift)
return TNUM(a.value >> shift, a.mask >> shift);
}
-struct tnum tnum_arshift(struct tnum a, u8 min_shift)
+struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness)
{
/* if a.value is negative, arithmetic shifting by minimum shift
* will have larger negative offset compared to more shifting.
* If a.value is nonnegative, arithmetic shifting by minimum shift
* will have larger positive offset compare to more shifting.
*/
- return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift);
+ if (insn_bitness == 32)
+ return TNUM((u32)(((s32)a.value) >> min_shift),
+ (u32)(((s32)a.mask) >> min_shift));
+ else
+ return TNUM((s64)a.value >> min_shift,
+ (s64)a.mask >> min_shift);
}
struct tnum tnum_add(struct tnum a, struct tnum b)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce85e7041f0c..7d530ce8719d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5049,9 +5049,16 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
/* Upon reaching here, src_known is true and
* umax_val is equal to umin_val.
*/
- dst_reg->smin_value >>= umin_val;
- dst_reg->smax_value >>= umin_val;
- dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
+ if (insn_bitness == 32) {
+ dst_reg->smin_value = (u32)(((s32)dst_reg->smin_value) >> umin_val);
+ dst_reg->smax_value = (u32)(((s32)dst_reg->smax_value) >> umin_val);
+ } else {
+ dst_reg->smin_value >>= umin_val;
+ dst_reg->smax_value >>= umin_val;
+ }
+
+ dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val,
+ insn_bitness);
/* blow away the dst_reg umin_value/umax_value and rely on
* dst_reg var_off to refine the result.
diff --git a/net/core/filter.c b/net/core/filter.c
index 28b3c258188c..538f6a735a19 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2231,10 +2231,10 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
+ offset += len;
len = sk_msg_elem(msg, i)->length;
if (start < offset + len)
break;
- offset += len;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
@@ -2346,7 +2346,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
- u32 new, i = 0, l, space, copy = 0, offset = 0;
+ u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
u8 *raw, *to, *from;
struct page *page;
@@ -2356,11 +2356,11 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
+ offset += l;
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
- offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
@@ -2415,6 +2415,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
sk_msg_iter_var_next(i);
sg_unmark_end(psge);
+ sg_unmark_end(&rsge);
sk_msg_iter_next(msg, end);
}
@@ -2506,7 +2507,7 @@ static void sk_msg_shift_right(struct sk_msg *msg, int i)
BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
u32, len, u64, flags)
{
- u32 i = 0, l, space, offset = 0;
+ u32 i = 0, l = 0, space, offset = 0;
u64 last = start + len;
int pop;
@@ -2516,11 +2517,11 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
/* First find the starting scatterlist element */
i = msg->sg.start;
do {
+ offset += l;
l = sk_msg_elem(msg, i)->length;
if (start < offset + l)
break;
- offset += l;
sk_msg_iter_var_next(i);
} while (i != msg->sg.end);
@@ -5318,8 +5319,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (sk) {
sk = sk_to_full_sk(sk);
if (!sk_fullsock(sk)) {
- if (!sock_flag(sk, SOCK_RCU_FREE))
- sock_gen_put(sk);
+ sock_gen_put(sk);
return NULL;
}
}
@@ -5356,8 +5356,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
if (sk) {
sk = sk_to_full_sk(sk);
if (!sk_fullsock(sk)) {
- if (!sock_flag(sk, SOCK_RCU_FREE))
- sock_gen_put(sk);
+ sock_gen_put(sk);
return NULL;
}
}
@@ -5424,7 +5423,8 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
BPF_CALL_1(bpf_sk_release, struct sock *, sk)
{
- if (!sock_flag(sk, SOCK_RCU_FREE))
+ /* Only full sockets have sk->sk_flags. */
+ if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE))
sock_gen_put(sk);
return 0;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ded2d5227678..3866d7e20c07 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -594,6 +594,8 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
+ sock_owned_by_me(sk);
+
sk_psock_cork_free(psock);
sk_psock_zap_ingress(psock);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index eb114ee419b6..8998e356f423 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -241,8 +241,11 @@ static void sock_map_free(struct bpf_map *map)
struct sock *sk;
sk = xchg(psk, NULL);
- if (sk)
+ if (sk) {
+ lock_sock(sk);
sock_map_unref(sk, psk);
+ release_sock(sk);
+ }
}
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
@@ -862,7 +865,9 @@ static void sock_hash_free(struct bpf_map *map)
raw_spin_lock_bh(&bucket->lock);
hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
hlist_del_rcu(&elem->node);
+ lock_sock(elem->sk);
sock_map_unref(elem->sk, elem);
+ release_sock(elem->sk);
}
raw_spin_unlock_bh(&bucket->lock);
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index e38705165ac9..8a01428f80c1 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -121,14 +121,14 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
struct sk_psock *psock;
int copied, ret;
- if (unlikely(flags & MSG_ERRQUEUE))
- return inet_recv_error(sk, msg, len, addr_len);
- if (!skb_queue_empty(&sk->sk_receive_queue))
- return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-
psock = sk_psock_get(sk);
if (unlikely(!psock))
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+ if (!skb_queue_empty(&sk->sk_receive_queue) &&
+ sk_psock_queue_empty(psock))
+ return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
msg_bytes_ready:
copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
@@ -139,7 +139,7 @@ msg_bytes_ready:
timeo = sock_rcvtimeo(sk, nonblock);
data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
if (data) {
- if (skb_queue_empty(&sk->sk_receive_queue))
+ if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
release_sock(sk);
sk_psock_put(sk, psock);
@@ -315,10 +315,7 @@ more_data:
*/
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
- if (msg->sg.size < delta)
- delta -= msg->sg.size;
- else
- delta = 0;
+ delta -= msg->sg.size;
}
if (msg->cork_bytes &&
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 12ab5db2b71c..38d3ad141161 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
rcu_read_unlock();
}
-void tcp_update_ulp(struct sock *sk, struct proto *proto)
+void tcp_update_ulp(struct sock *sk, struct proto *proto,
+ void (*write_space)(struct sock *sk))
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (!icsk->icsk_ulp_ops) {
+ sk->sk_write_space = write_space;
sk->sk_prot = proto;
return;
}
if (icsk->icsk_ulp_ops->update)
- icsk->icsk_ulp_ops->update(sk, proto);
+ icsk->icsk_ulp_ops->update(sk, proto, write_space);
}
void tcp_cleanup_ulp(struct sock *sk)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index dac24c7aa7d4..94774c0e5ff3 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -732,15 +732,19 @@ out:
return rc;
}
-static void tls_update(struct sock *sk, struct proto *p)
+static void tls_update(struct sock *sk, struct proto *p,
+ void (*write_space)(struct sock *sk))
{
struct tls_context *ctx;
ctx = tls_get_ctx(sk);
- if (likely(ctx))
+ if (likely(ctx)) {
+ ctx->sk_write_space = write_space;
ctx->sk_proto = p;
- else
+ } else {
sk->sk_prot = p;
+ sk->sk_write_space = write_space;
+ }
}
static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 5c7c00429f8e..c98e602a1a2d 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -680,12 +680,32 @@ static int tls_push_record(struct sock *sk, int flags,
split_point = msg_pl->apply_bytes;
split = split_point && split_point < msg_pl->sg.size;
+ if (unlikely((!split &&
+ msg_pl->sg.size +
+ prot->overhead_size > msg_en->sg.size) ||
+ (split &&
+ split_point +
+ prot->overhead_size > msg_en->sg.size))) {
+ split = true;
+ split_point = msg_en->sg.size;
+ }
if (split) {
rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
split_point, prot->overhead_size,
&orig_end);
if (rc < 0)
return rc;
+ /* This can happen if above tls_split_open_record allocates
+ * a single large encryption buffer instead of two smaller
+ * ones. In this case adjust pointers and continue without
+ * split.
+ */
+ if (!msg_pl->sg.size) {
+ tls_merge_open_record(sk, rec, tmp, orig_end);
+ msg_pl = &rec->msg_plaintext;
+ msg_en = &rec->msg_encrypted;
+ split = false;
+ }
sk_msg_trim(sk, msg_en, msg_pl->sg.size +
prot->overhead_size);
}
@@ -707,6 +727,12 @@ static int tls_push_record(struct sock *sk, int flags,
sg_mark_end(sk_msg_elem(msg_pl, i));
}
+ if (msg_pl->sg.end < msg_pl->sg.start) {
+ sg_chain(&msg_pl->sg.data[msg_pl->sg.start],
+ MAX_SKB_FRAGS - msg_pl->sg.start + 1,
+ msg_pl->sg.data);
+ }
+
i = msg_pl->sg.start;
sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
@@ -781,10 +807,7 @@ more_data:
if (psock->eval == __SK_NONE) {
delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
- if (delta < msg->sg.size)
- delta -= msg->sg.size;
- else
- delta = 0;
+ delta -= msg->sg.size;
}
if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
!enospc && !full_record) {
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index d66131f69689..397e5716ab6d 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -26,7 +26,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw,
bool is_plain_text)
{
if (is_plain_text)
- jsonw_printf(jw, "%p", data);
+ jsonw_printf(jw, "%p", *(void **)data);
else
jsonw_printf(jw, "%lu", *(unsigned long *)data);
}