summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-07-04 17:48:34 -0700
committerDavid S. Miller <davem@davemloft.net>2020-07-04 17:48:34 -0700
commitf91c031e6528f1656e7a1f76c98e3c1d7620820c (patch)
tree3e4c990b6b0d8bf6eaa59364754a83d5f741ae10 /net
parent418e787e54a638eb2bf09212a323d920229ee5ef (diff)
parent9ff79af3331277c69ac61cc75b2392eb3284e305 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2020-07-04 The following pull-request contains BPF updates for your *net-next* tree. We've added 73 non-merge commits during the last 17 day(s) which contain a total of 106 files changed, 5233 insertions(+), 1283 deletions(-). The main changes are: 1) bpftool ability to show PIDs of processes having open file descriptors for BPF map/program/link/BTF objects, relying on BPF iterator progs to extract this info efficiently, from Andrii Nakryiko. 2) Addition of BPF iterator progs for dumping TCP and UDP sockets to seq_files, from Yonghong Song. 3) Support access to BPF map fields in struct bpf_map from programs through BTF struct access, from Andrey Ignatov. 4) Add a bpf_get_task_stack() helper to be able to dump /proc/*/stack via seq_file from BPF iterator progs, from Song Liu. 5) Make SO_KEEPALIVE and related options available to bpf_setsockopt() helper, from Dmitry Yakunin. 6) Optimize BPF sk_storage selection of its caching index, from Martin KaFai Lau. 7) Removal of redundant synchronize_rcu()s from BPF map destruction which has been a historic leftover, from Alexei Starovoitov. 8) Several improvements to test_progs to make it easier to create a shell loop that invokes each test individually which is useful for some CIs, from Jesper Dangaard Brouer. 9) Fix bpftool prog dump segfault when compiled without skeleton code on older clang versions, from John Fastabend. 10) Bunch of cleanups and minor improvements, from various others. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/bpf_sk_storage.c44
-rw-r--r--net/core/filter.c206
-rw-r--r--net/core/sock.c9
-rw-r--r--net/core/sock_map.c88
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_ipv4.c153
-rw-r--r--net/ipv4/udp.c144
-rw-r--r--net/xdp/xskmap.c3
8 files changed, 584 insertions, 69 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d2c4d16dadba..6f921c4ddc2c 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -11,8 +11,6 @@
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
-static atomic_t cache_idx;
-
#define SK_STORAGE_CREATE_FLAG_MASK \
(BPF_F_NO_PREALLOC | BPF_F_CLONE)
@@ -81,6 +79,9 @@ struct bpf_sk_storage_elem {
#define SDATA(_SELEM) (&(_SELEM)->sdata)
#define BPF_SK_STORAGE_CACHE_SIZE 16
+static DEFINE_SPINLOCK(cache_idx_lock);
+static u64 cache_idx_usage_counts[BPF_SK_STORAGE_CACHE_SIZE];
+
struct bpf_sk_storage {
struct bpf_sk_storage_data __rcu *cache[BPF_SK_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_sk_storage_elem */
@@ -512,6 +513,37 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
return 0;
}
+static u16 cache_idx_get(void)
+{
+ u64 min_usage = U64_MAX;
+ u16 i, res = 0;
+
+ spin_lock(&cache_idx_lock);
+
+ for (i = 0; i < BPF_SK_STORAGE_CACHE_SIZE; i++) {
+ if (cache_idx_usage_counts[i] < min_usage) {
+ min_usage = cache_idx_usage_counts[i];
+ res = i;
+
+ /* Found a free cache_idx */
+ if (!min_usage)
+ break;
+ }
+ }
+ cache_idx_usage_counts[res]++;
+
+ spin_unlock(&cache_idx_lock);
+
+ return res;
+}
+
+static void cache_idx_free(u16 idx)
+{
+ spin_lock(&cache_idx_lock);
+ cache_idx_usage_counts[idx]--;
+ spin_unlock(&cache_idx_lock);
+}
+
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
@@ -560,6 +592,8 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
smap = (struct bpf_sk_storage_map *)map;
+ cache_idx_free(smap->cache_idx);
+
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
@@ -673,8 +707,7 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
}
smap->elem_size = sizeof(struct bpf_sk_storage_elem) + attr->value_size;
- smap->cache_idx = (unsigned int)atomic_inc_return(&cache_idx) %
- BPF_SK_STORAGE_CACHE_SIZE;
+ smap->cache_idx = cache_idx_get();
return &smap->map;
}
@@ -886,6 +919,7 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
return -ENOENT;
}
+static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
.map_alloc_check = bpf_sk_storage_map_alloc_check,
.map_alloc = bpf_sk_storage_map_alloc,
@@ -895,6 +929,8 @@ const struct bpf_map_ops sk_storage_map_ops = {
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
.map_check_btf = bpf_sk_storage_map_check_btf,
+ .map_btf_name = "bpf_sk_storage_map",
+ .map_btf_id = &sk_storage_map_btf_id,
};
const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/net/core/filter.c b/net/core/filter.c
index 73395384afe2..c5e696e6c315 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,7 @@
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
@@ -73,6 +74,7 @@
#include <net/lwtunnel.h>
#include <net/ipv6_stubs.h>
#include <net/bpf_sk_storage.h>
+#include <net/transp_v6.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@@ -4289,10 +4291,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
char devname[IFNAMSIZ];
+ int val, valbool;
struct net *net;
int ifindex;
int ret = 0;
- int val;
if (!sk_fullsock(sk))
return -EINVAL;
@@ -4303,6 +4305,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
+ valbool = val ? 1 : 0;
/* Only some socketops are supported */
switch (optname) {
@@ -4361,6 +4364,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
ret = sock_bindtoindex(sk, ifindex, false);
break;
+ case SO_KEEPALIVE:
+ if (sk->sk_prot->keepalive)
+ sk->sk_prot->keepalive(sk, valbool);
+ sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+ break;
default:
ret = -EINVAL;
}
@@ -4421,6 +4429,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
ret = tcp_set_congestion_control(sk, name, false,
reinit, true);
} else {
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
if (optlen != sizeof(int))
@@ -4449,6 +4458,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
else
tp->save_syn = val;
break;
+ case TCP_KEEPIDLE:
+ ret = tcp_sock_set_keepidle_locked(sk, val);
+ break;
+ case TCP_KEEPINTVL:
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ ret = -EINVAL;
+ else
+ tp->keepalive_intvl = val * HZ;
+ break;
+ case TCP_KEEPCNT:
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ ret = -EINVAL;
+ else
+ tp->keepalive_probes = val;
+ break;
+ case TCP_SYNCNT:
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ ret = -EINVAL;
+ else
+ icsk->icsk_syn_retries = val;
+ break;
+ case TCP_USER_TIMEOUT:
+ if (val < 0)
+ ret = -EINVAL;
+ else
+ icsk->icsk_user_timeout = val;
+ break;
default:
ret = -EINVAL;
}
@@ -9191,3 +9227,171 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
{
bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
}
+
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+#define BTF_SOCK_TYPE_xxx \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET, "inet_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_CONN, "inet_connection_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_REQ, "inet_request_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_INET_TW, "inet_timewait_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_REQ, "request_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK, "sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCK_COMMON, "sock_common") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP, "tcp_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_REQ, "tcp_request_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, "tcp_timewait_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, "tcp6_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, "udp_sock") \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, "udp6_sock")
+
+enum {
+#define BTF_SOCK_TYPE(name, str) name,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+MAX_BTF_SOCK_TYPE,
+};
+
+static int btf_sock_ids[MAX_BTF_SOCK_TYPE];
+
+#ifdef CONFIG_BPF_SYSCALL
+static const char *bpf_sock_types[] = {
+#define BTF_SOCK_TYPE(name, str) str,
+BTF_SOCK_TYPE_xxx
+#undef BTF_SOCK_TYPE
+};
+
+void init_btf_sock_ids(struct btf *btf)
+{
+ int i, btf_id;
+
+ for (i = 0; i < MAX_BTF_SOCK_TYPE; i++) {
+ btf_id = btf_find_by_name_kind(btf, bpf_sock_types[i],
+ BTF_KIND_STRUCT);
+ if (btf_id > 0)
+ btf_sock_ids[i] = btf_id;
+ }
+}
+#endif
+
+static bool check_arg_btf_id(u32 btf_id, u32 arg)
+{
+ int i;
+
+ /* only one argument, no need to check arg */
+ for (i = 0; i < MAX_BTF_SOCK_TYPE; i++)
+ if (btf_sock_ids[i] == btf_id)
+ return true;
+ return false;
+}
+
+BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
+{
+ /* tcp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct tcp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
+ .func = bpf_skc_to_tcp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
+{
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
+ .func = bpf_skc_to_tcp_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
+ .func = bpf_skc_to_tcp_timewait_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
+};
+
+BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
+{
+#ifdef CONFIG_INET
+ if (sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+#if IS_BUILTIN(CONFIG_IPV6)
+ if (sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
+ return (unsigned long)sk;
+#endif
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
+ .func = bpf_skc_to_tcp_request_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
+};
+
+BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
+{
+ /* udp6_sock type is not generated in dwarf and hence btf,
+ * trigger an explicit type generation here.
+ */
+ BTF_TYPE_EMIT(struct udp6_sock);
+ if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
+ sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
+ return (unsigned long)sk;
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
+ .func = bpf_skc_to_udp6_sock,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .check_btf_id = check_arg_btf_id,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
+};
diff --git a/net/core/sock.c b/net/core/sock.c
index d832c650287c..f5b5fdd61c88 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -695,15 +695,6 @@ out:
return ret;
}
-static inline void sock_valbool_flag(struct sock *sk, enum sock_flags bit,
- int valbool)
-{
- if (valbool)
- sock_set_flag(sk, bit);
- else
- sock_reset_flag(sk, bit);
-}
-
bool sk_mc_loop(struct sock *sk)
{
if (dev_recursion_level())
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4059f94e9bb5..4c1123c749bb 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -643,6 +643,7 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
@@ -653,9 +654,11 @@ const struct bpf_map_ops sock_map_ops = {
.map_lookup_elem = sock_map_lookup,
.map_release_uref = sock_map_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_stab",
+ .map_btf_id = &sock_map_btf_id,
};
-struct bpf_htab_elem {
+struct bpf_shtab_elem {
struct rcu_head rcu;
u32 hash;
struct sock *sk;
@@ -663,14 +666,14 @@ struct bpf_htab_elem {
u8 key[];
};
-struct bpf_htab_bucket {
+struct bpf_shtab_bucket {
struct hlist_head head;
raw_spinlock_t lock;
};
-struct bpf_htab {
+struct bpf_shtab {
struct bpf_map map;
- struct bpf_htab_bucket *buckets;
+ struct bpf_shtab_bucket *buckets;
u32 buckets_num;
u32 elem_size;
struct sk_psock_progs progs;
@@ -682,17 +685,17 @@ static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
return jhash(key, len, 0);
}
-static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
- u32 hash)
+static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab,
+ u32 hash)
{
return &htab->buckets[hash & (htab->buckets_num - 1)];
}
-static struct bpf_htab_elem *
+static struct bpf_shtab_elem *
sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
u32 key_size)
{
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
hlist_for_each_entry_rcu(elem, head, node) {
if (elem->hash == hash &&
@@ -705,10 +708,10 @@ sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -719,8 +722,8 @@ static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
return elem ? elem->sk : NULL;
}
-static void sock_hash_free_elem(struct bpf_htab *htab,
- struct bpf_htab_elem *elem)
+static void sock_hash_free_elem(struct bpf_shtab *htab,
+ struct bpf_shtab_elem *elem)
{
atomic_dec(&htab->count);
kfree_rcu(elem, rcu);
@@ -729,9 +732,9 @@ static void sock_hash_free_elem(struct bpf_htab *htab,
static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
void *link_raw)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem_probe, *elem = link_raw;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem_probe, *elem = link_raw;
+ struct bpf_shtab_bucket *bucket;
WARN_ON_ONCE(!rcu_read_lock_held());
bucket = sock_hash_select_bucket(htab, elem->hash);
@@ -753,10 +756,10 @@ static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
static int sock_hash_delete_elem(struct bpf_map *map, void *key)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 hash, key_size = map->key_size;
- struct bpf_htab_bucket *bucket;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
int ret = -ENOENT;
hash = sock_hash_bucket_hash(key, key_size);
@@ -774,12 +777,12 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
return ret;
}
-static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
- void *key, u32 key_size,
- u32 hash, struct sock *sk,
- struct bpf_htab_elem *old)
+static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab,
+ void *key, u32 key_size,
+ u32 hash, struct sock *sk,
+ struct bpf_shtab_elem *old)
{
- struct bpf_htab_elem *new;
+ struct bpf_shtab_elem *new;
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
if (!old) {
@@ -803,10 +806,10 @@ static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
static int sock_hash_update_common(struct bpf_map *map, void *key,
struct sock *sk, u64 flags)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
u32 key_size = map->key_size, hash;
- struct bpf_htab_elem *elem, *elem_new;
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab_elem *elem, *elem_new;
+ struct bpf_shtab_bucket *bucket;
struct sk_psock_link *link;
struct sk_psock *psock;
int ret;
@@ -916,8 +919,8 @@ out:
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
void *key_next)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_elem *elem, *elem_next;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_elem *elem, *elem_next;
u32 hash, key_size = map->key_size;
struct hlist_head *head;
int i = 0;
@@ -931,7 +934,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
goto find_first_elem;
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
- struct bpf_htab_elem, node);
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -943,7 +946,7 @@ find_first_elem:
for (; i < htab->buckets_num; i++) {
head = &sock_hash_select_bucket(htab, i)->head;
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
- struct bpf_htab_elem, node);
+ struct bpf_shtab_elem, node);
if (elem_next) {
memcpy(key_next, elem_next->key, key_size);
return 0;
@@ -955,7 +958,7 @@ find_first_elem:
static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
{
- struct bpf_htab *htab;
+ struct bpf_shtab *htab;
int i, err;
u64 cost;
@@ -977,15 +980,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
bpf_map_init_from_attr(&htab->map, attr);
htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
- htab->elem_size = sizeof(struct bpf_htab_elem) +
+ htab->elem_size = sizeof(struct bpf_shtab_elem) +
round_up(htab->map.key_size, 8);
if (htab->buckets_num == 0 ||
- htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+ htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) {
err = -EINVAL;
goto free_htab;
}
- cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+ cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) +
(u64) htab->elem_size * htab->map.max_entries;
if (cost >= U32_MAX - PAGE_SIZE) {
err = -EINVAL;
@@ -996,7 +999,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
- sizeof(struct bpf_htab_bucket),
+ sizeof(struct bpf_shtab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
bpf_map_charge_finish(&htab->map.memory);
@@ -1017,10 +1020,10 @@ free_htab:
static void sock_hash_free(struct bpf_map *map)
{
- struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- struct bpf_htab_bucket *bucket;
+ struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
+ struct bpf_shtab_bucket *bucket;
struct hlist_head unlink_list;
- struct bpf_htab_elem *elem;
+ struct bpf_shtab_elem *elem;
struct hlist_node *node;
int i;
@@ -1096,7 +1099,7 @@ static void *sock_hash_lookup(struct bpf_map *map, void *key)
static void sock_hash_release_progs(struct bpf_map *map)
{
- psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+ psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs);
}
BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
@@ -1176,6 +1179,7 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
.map_alloc = sock_hash_alloc,
.map_free = sock_hash_free,
@@ -1186,6 +1190,8 @@ const struct bpf_map_ops sock_hash_ops = {
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "bpf_shtab",
+ .map_btf_id = &sock_hash_map_btf_id,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1194,7 +1200,7 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
case BPF_MAP_TYPE_SOCKMAP:
return &container_of(map, struct bpf_stab, map)->progs;
case BPF_MAP_TYPE_SOCKHASH:
- return &container_of(map, struct bpf_htab, map)->progs;
+ return &container_of(map, struct bpf_shtab, map)->progs;
default:
break;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 810cc164f795..de36c91d32ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2957,7 +2957,7 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
-static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2984,7 +2984,7 @@ int tcp_sock_set_keepidle(struct sock *sk, int val)
int err;
lock_sock(sk);
- err = __tcp_sock_set_keepidle(sk, val);
+ err = tcp_sock_set_keepidle_locked(sk, val);
release_sock(sk);
return err;
}
@@ -3183,7 +3183,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_KEEPIDLE:
- err = __tcp_sock_set_keepidle(sk, val);
+ err = tcp_sock_set_keepidle_locked(sk, val);
break;
case TCP_KEEPINTVL:
if (val < 1 || val > MAX_TCP_KEEPINTVL)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad6435ba6d72..ea0df9fd7618 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2211,13 +2211,18 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
*/
static void *listening_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
struct hlist_nulls_node *node;
struct sock *sk = cur;
+ if (st->bpf_seq_afinfo)
+ afinfo = st->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
@@ -2235,7 +2240,8 @@ get_sk:
sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_family == afinfo->family)
+ if (afinfo->family == AF_UNSPEC ||
+ sk->sk_family == afinfo->family)
return sk;
}
spin_unlock(&ilb->lock);
@@ -2272,11 +2278,16 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/
static void *established_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
void *rc = NULL;
+ if (st->bpf_seq_afinfo)
+ afinfo = st->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
struct sock *sk;
@@ -2289,7 +2300,8 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if (sk->sk_family != afinfo->family ||
+ if ((afinfo->family != AF_UNSPEC &&
+ sk->sk_family != afinfo->family) ||
!net_eq(sock_net(sk), net)) {
continue;
}
@@ -2304,19 +2316,25 @@ out:
static void *established_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
+ if (st->bpf_seq_afinfo)
+ afinfo = st->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
++st->num;
++st->offset;
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
- if (sk->sk_family == afinfo->family &&
+ if ((afinfo->family == AF_UNSPEC ||
+ sk->sk_family == afinfo->family) &&
net_eq(sock_net(sk), net))
return sk;
}
@@ -2595,6 +2613,74 @@ out:
return 0;
}
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__tcp {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct sock_common *, sk_common);
+ uid_t uid __aligned(8);
+};
+
+static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct sock_common *sk_common, uid_t uid)
+{
+ struct bpf_iter__tcp ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.sk_common = sk_common;
+ ctx.uid = uid;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ uid = 0;
+ } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ const struct request_sock *req = v;
+
+ uid = from_kuid_munged(seq_user_ns(seq),
+ sock_i_uid(req->rsk_listener));
+ } else {
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ }
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return tcp_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)tcp_prog_seq_show(prog, &meta, v, 0);
+ }
+
+ tcp_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_tcp_seq_ops = {
+ .show = bpf_iter_tcp_seq_show,
+ .start = tcp_seq_start,
+ .next = tcp_seq_next,
+ .stop = bpf_iter_tcp_seq_stop,
+};
+#endif
+
static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show,
.start = tcp_seq_start,
@@ -2826,8 +2912,63 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
.exit_batch = tcp_sk_exit_batch,
};
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
+ struct sock_common *sk_common, uid_t uid)
+
+static int bpf_iter_init_tcp(void *priv_data)
+{
+ struct tcp_iter_state *st = priv_data;
+ struct tcp_seq_afinfo *afinfo;
+ int ret;
+
+ afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
+ if (!afinfo)
+ return -ENOMEM;
+
+ afinfo->family = AF_UNSPEC;
+ st->bpf_seq_afinfo = afinfo;
+ ret = bpf_iter_init_seq_net(priv_data);
+ if (ret)
+ kfree(afinfo);
+ return ret;
+}
+
+static void bpf_iter_fini_tcp(void *priv_data)
+{
+ struct tcp_iter_state *st = priv_data;
+
+ kfree(st->bpf_seq_afinfo);
+ bpf_iter_fini_seq_net(priv_data);
+}
+
+static const struct bpf_iter_reg tcp_reg_info = {
+ .target = "tcp",
+ .seq_ops = &bpf_iter_tcp_seq_ops,
+ .init_seq_private = bpf_iter_init_tcp,
+ .fini_seq_private = bpf_iter_fini_tcp,
+ .seq_priv_size = sizeof(struct tcp_iter_state),
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__tcp, sk_common),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+};
+
+static void __init bpf_iter_register(void)
+{
+ if (bpf_iter_reg_target(&tcp_reg_info))
+ pr_warn("Warning: could not register bpf iterator tcp\n");
+}
+
+#endif
+
void __init tcp_v4_init(void)
{
if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b7ebbcae497..31530129f137 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2826,10 +2826,15 @@ EXPORT_SYMBOL(udp_prot);
static struct sock *udp_get_first(struct seq_file *seq, int start)
{
struct sock *sk;
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ if (state->bpf_seq_afinfo)
+ afinfo = state->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
++state->bucket) {
struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
@@ -2841,7 +2846,8 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
sk_for_each(sk, &hslot->head) {
if (!net_eq(sock_net(sk), net))
continue;
- if (sk->sk_family == afinfo->family)
+ if (afinfo->family == AF_UNSPEC ||
+ sk->sk_family == afinfo->family)
goto found;
}
spin_unlock_bh(&hslot->lock);
@@ -2853,13 +2859,20 @@ found:
static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
+ if (state->bpf_seq_afinfo)
+ afinfo = state->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
do {
sk = sk_next(sk);
- } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
+ } while (sk && (!net_eq(sock_net(sk), net) ||
+ (afinfo->family != AF_UNSPEC &&
+ sk->sk_family != afinfo->family)));
if (!sk) {
if (state->bucket <= afinfo->udp_table->mask)
@@ -2904,9 +2917,14 @@ EXPORT_SYMBOL(udp_seq_next);
void udp_seq_stop(struct seq_file *seq, void *v)
{
- struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
+ struct udp_seq_afinfo *afinfo;
struct udp_iter_state *state = seq->private;
+ if (state->bpf_seq_afinfo)
+ afinfo = state->bpf_seq_afinfo;
+ else
+ afinfo = PDE_DATA(file_inode(seq->file));
+
if (state->bucket <= afinfo->udp_table->mask)
spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
}
@@ -2950,6 +2968,67 @@ int udp4_seq_show(struct seq_file *seq, void *v)
return 0;
}
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__udp {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct udp_sock *, udp_sk);
+ uid_t uid __aligned(8);
+ int bucket __aligned(8);
+};
+
+static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct udp_sock *udp_sk, uid_t uid, int bucket)
+{
+ struct bpf_iter__udp ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.udp_sk = udp_sk;
+ ctx.uid = uid;
+ ctx.bucket = bucket;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
+{
+ struct udp_iter_state *state = seq->private;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+}
+
+static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)udp_prog_seq_show(prog, &meta, v, 0, 0);
+ }
+
+ udp_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_udp_seq_ops = {
+ .start = udp_seq_start,
+ .next = udp_seq_next,
+ .stop = bpf_iter_udp_seq_stop,
+ .show = bpf_iter_udp_seq_show,
+};
+#endif
+
const struct seq_operations udp_seq_ops = {
.start = udp_seq_start,
.next = udp_seq_next,
@@ -3067,6 +3146,57 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
.init = udp_sysctl_init,
};
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
+ struct udp_sock *udp_sk, uid_t uid, int bucket)
+
+static int bpf_iter_init_udp(void *priv_data)
+{
+ struct udp_iter_state *st = priv_data;
+ struct udp_seq_afinfo *afinfo;
+ int ret;
+
+ afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
+ if (!afinfo)
+ return -ENOMEM;
+
+ afinfo->family = AF_UNSPEC;
+ afinfo->udp_table = &udp_table;
+ st->bpf_seq_afinfo = afinfo;
+ ret = bpf_iter_init_seq_net(priv_data);
+ if (ret)
+ kfree(afinfo);
+ return ret;
+}
+
+static void bpf_iter_fini_udp(void *priv_data)
+{
+ struct udp_iter_state *st = priv_data;
+
+ kfree(st->bpf_seq_afinfo);
+ bpf_iter_fini_seq_net(priv_data);
+}
+
+static const struct bpf_iter_reg udp_reg_info = {
+ .target = "udp",
+ .seq_ops = &bpf_iter_udp_seq_ops,
+ .init_seq_private = bpf_iter_init_udp,
+ .fini_seq_private = bpf_iter_fini_udp,
+ .seq_priv_size = sizeof(struct udp_iter_state),
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__udp, udp_sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+};
+
+static void __init bpf_iter_register(void)
+{
+ if (bpf_iter_reg_target(&udp_reg_info))
+ pr_warn("Warning: could not register bpf iterator udp\n");
+}
+#endif
+
void __init udp_init(void)
{
unsigned long limit;
@@ -3092,4 +3222,8 @@ void __init udp_init(void)
if (register_pernet_subsys(&udp_sysctl_ops))
panic("UDP: failed to init sysctl parameters.\n");
+
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
}
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 1dc7208c71ba..8367adbbe9df 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -254,6 +254,7 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
spin_unlock_bh(&map->lock);
}
+static int xsk_map_btf_id;
const struct bpf_map_ops xsk_map_ops = {
.map_alloc = xsk_map_alloc,
.map_free = xsk_map_free,
@@ -264,4 +265,6 @@ const struct bpf_map_ops xsk_map_ops = {
.map_update_elem = xsk_map_update_elem,
.map_delete_elem = xsk_map_delete_elem,
.map_check_btf = map_check_no_btf,
+ .map_btf_name = "xsk_map",
+ .map_btf_id = &xsk_map_btf_id,
};