summaryrefslogtreecommitdiff
path: root/kernel/bpf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-07-17 09:27:43 +0200
committerIngo Molnar <mingo@kernel.org>2018-07-17 09:27:43 +0200
commit52b544bd386688177c41d53e748111c29d0ccc98 (patch)
treec2083582176e773084364af93cc44e0f94b6cde8 /kernel/bpf
parentafed7bcf9487bb28e2e2b016a195085c07416c0b (diff)
parent9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff)
Merge tag 'v4.18-rc5' into locking/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/cgroup.c54
-rw-r--r--kernel/bpf/core.c30
-rw-r--r--kernel/bpf/sockmap.c254
-rw-r--r--kernel/bpf/syscall.c99
4 files changed, 260 insertions, 177 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f7c00bd6f8e4..3d83ee7df381 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
return ret;
}
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+ enum bpf_prog_type ptype, struct bpf_prog *prog)
+{
+ struct cgroup *cgrp;
+ int ret;
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+ attr->attach_flags);
+ cgroup_put(cgrp);
+ return ret;
+}
+
+int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+ struct bpf_prog *prog;
+ struct cgroup *cgrp;
+ int ret;
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+ if (IS_ERR(prog))
+ prog = NULL;
+
+ ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+ if (prog)
+ bpf_prog_put(prog);
+
+ cgroup_put(cgrp);
+ return ret;
+}
+
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ struct cgroup *cgrp;
+ int ret;
+
+ cgrp = cgroup_get_from_fd(attr->query.target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ ret = cgroup_bpf_query(cgrp, attr, uattr);
+
+ cgroup_put(cgrp);
+ return ret;
+}
+
/**
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
* @sk: The socket sending or receiving traffic
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a9e6c04d0f4a..1e5625d46414 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
bpf_fill_ill_insns(hdr, size);
hdr->pages = size / PAGE_SIZE;
- hdr->locked = 0;
-
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
return 0;
}
-static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
-{
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
- int i, err;
-
- for (i = 0; i < fp->aux->func_cnt; i++) {
- err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
- if (err)
- return err;
- }
-
- return bpf_prog_check_pages_ro_single(fp);
-#endif
- return 0;
-}
-
static void bpf_prog_select_func(struct bpf_prog *fp)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1524,17 +1506,7 @@ finalize:
* all eBPF JITs might immediately support all features.
*/
*err = bpf_check_tail_call(fp);
- if (*err)
- return fp;
-
- /* Checkpoint: at this point onwards any cBPF -> eBPF or
- * native eBPF program is read-only. If we failed to change
- * the page attributes (e.g. allocation failure from
- * splitting large pages), then reject the whole program
- * in order to guarantee not ending up with any W+X pages
- * from BPF side in kernel.
- */
- *err = bpf_prog_check_pages_ro_locked(fp);
+
return fp;
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d816c0e..cf7b6a6dbd1f 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -72,6 +72,7 @@ struct bpf_htab {
u32 n_buckets;
u32 elem_size;
struct bpf_sock_progs progs;
+ struct rcu_head rcu;
};
struct htab_elem {
@@ -89,8 +90,8 @@ enum smap_psock_state {
struct smap_psock_map_entry {
struct list_head list;
struct sock **entry;
- struct htab_elem *hash_link;
- struct bpf_htab *htab;
+ struct htab_elem __rcu *hash_link;
+ struct bpf_htab __rcu *htab;
};
struct smap_psock {
@@ -120,6 +121,7 @@ struct smap_psock {
struct bpf_prog *bpf_parse;
struct bpf_prog *bpf_verdict;
struct list_head maps;
+ spinlock_t maps_lock;
/* Back reference used when sock callback trigger sockmap operations */
struct sock *sock;
@@ -140,6 +142,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
+static void bpf_tcp_close(struct sock *sk, long timeout);
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
{
@@ -161,7 +164,42 @@ out:
return !empty;
}
-static struct proto tcp_bpf_proto;
+enum {
+ SOCKMAP_IPV4,
+ SOCKMAP_IPV6,
+ SOCKMAP_NUM_PROTS,
+};
+
+enum {
+ SOCKMAP_BASE,
+ SOCKMAP_TX,
+ SOCKMAP_NUM_CONFIGS,
+};
+
+static struct proto *saved_tcpv6_prot __read_mostly;
+static DEFINE_SPINLOCK(tcpv6_prot_lock);
+static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
+static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
+ struct proto *base)
+{
+ prot[SOCKMAP_BASE] = *base;
+ prot[SOCKMAP_BASE].close = bpf_tcp_close;
+ prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
+ prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
+
+ prot[SOCKMAP_TX] = prot[SOCKMAP_BASE];
+ prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg;
+ prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage;
+}
+
+static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
+{
+ int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
+ int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
+
+ sk->sk_prot = &bpf_tcp_prots[family][conf];
+}
+
static int bpf_tcp_init(struct sock *sk)
{
struct smap_psock *psock;
@@ -181,14 +219,17 @@ static int bpf_tcp_init(struct sock *sk)
psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot;
- if (psock->bpf_tx_msg) {
- tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
- tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
- tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
- tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
+ /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
+ if (sk->sk_family == AF_INET6 &&
+ unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+ spin_lock_bh(&tcpv6_prot_lock);
+ if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+ build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
+ smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+ }
+ spin_unlock_bh(&tcpv6_prot_lock);
}
-
- sk->sk_prot = &tcp_bpf_proto;
+ update_sk_prot(sk, psock);
rcu_read_unlock();
return 0;
}
@@ -219,16 +260,54 @@ out:
rcu_read_unlock();
}
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
+ u32 hash, void *key, u32 key_size)
+{
+ struct htab_elem *l;
+
+ hlist_for_each_entry_rcu(l, head, hash_node) {
+ if (l->hash == hash && !memcmp(&l->key, key, key_size))
+ return l;
+ }
+
+ return NULL;
+}
+
+static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+{
+ return &htab->buckets[hash & (htab->n_buckets - 1)];
+}
+
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+ return &__select_bucket(htab, hash)->head;
+}
+
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
atomic_dec(&htab->count);
kfree_rcu(l, rcu);
}
+static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
+ struct smap_psock *psock)
+{
+ struct smap_psock_map_entry *e;
+
+ spin_lock_bh(&psock->maps_lock);
+ e = list_first_entry_or_null(&psock->maps,
+ struct smap_psock_map_entry,
+ list);
+ if (e)
+ list_del(&e->list);
+ spin_unlock_bh(&psock->maps_lock);
+ return e;
+}
+
static void bpf_tcp_close(struct sock *sk, long timeout)
{
void (*close_fun)(struct sock *sk, long timeout);
- struct smap_psock_map_entry *e, *tmp;
+ struct smap_psock_map_entry *e;
struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock;
struct sock *osk;
@@ -247,7 +326,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
*/
close_fun = psock->save_close;
- write_lock_bh(&sk->sk_callback_lock);
if (psock->cork) {
free_start_sg(psock->sock, psock->cork);
kfree(psock->cork);
@@ -260,20 +338,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
kfree(md);
}
- list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+ e = psock_map_pop(sk, psock);
+ while (e) {
if (e->entry) {
osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) {
- list_del(&e->list);
smap_release_sock(psock, sk);
}
} else {
- hlist_del_rcu(&e->hash_link->hash_node);
- smap_release_sock(psock, e->hash_link->sk);
- free_htab_elem(e->htab, e->hash_link);
+ struct htab_elem *link = rcu_dereference(e->hash_link);
+ struct bpf_htab *htab = rcu_dereference(e->htab);
+ struct hlist_head *head;
+ struct htab_elem *l;
+ struct bucket *b;
+
+ b = __select_bucket(htab, link->hash);
+ head = &b->head;
+ raw_spin_lock_bh(&b->lock);
+ l = lookup_elem_raw(head,
+ link->hash, link->key,
+ htab->map.key_size);
+ /* If another thread deleted this object skip deletion.
+ * The refcnt on psock may or may not be zero.
+ */
+ if (l) {
+ hlist_del_rcu(&link->hash_node);
+ smap_release_sock(psock, link->sk);
+ free_htab_elem(htab, link);
+ }
+ raw_spin_unlock_bh(&b->lock);
}
+ e = psock_map_pop(sk, psock);
}
- write_unlock_bh(&sk->sk_callback_lock);
rcu_read_unlock();
close_fun(sk, timeout);
}
@@ -1111,8 +1207,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
static int bpf_tcp_ulp_register(void)
{
- tcp_bpf_proto = tcp_prot;
- tcp_bpf_proto.close = bpf_tcp_close;
+ build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
/* Once BPF TX ULP is registered it is never unregistered. It
* will be in the ULP list for the lifetime of the system. Doing
* duplicate registers is not a problem.
@@ -1357,7 +1452,9 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
{
if (refcount_dec_and_test(&psock->refcnt)) {
tcp_cleanup_ulp(sock);
+ write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock);
+ write_unlock_bh(&sock->sk_callback_lock);
clear_bit(SMAP_TX_RUNNING, &psock->state);
rcu_assign_sk_user_data(sock, NULL);
call_rcu_sched(&psock->rcu, smap_destroy_psock);
@@ -1508,6 +1605,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, int node)
INIT_LIST_HEAD(&psock->maps);
INIT_LIST_HEAD(&psock->ingress);
refcount_set(&psock->refcnt, 1);
+ spin_lock_init(&psock->maps_lock);
rcu_assign_sk_user_data(sock, psock);
sock_hold(sock);
@@ -1564,18 +1662,32 @@ free_stab:
return ERR_PTR(err);
}
-static void smap_list_remove(struct smap_psock *psock,
- struct sock **entry,
- struct htab_elem *hash_link)
+static void smap_list_map_remove(struct smap_psock *psock,
+ struct sock **entry)
{
struct smap_psock_map_entry *e, *tmp;
+ spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
- if (e->entry == entry || e->hash_link == hash_link) {
+ if (e->entry == entry)
list_del(&e->list);
- break;
- }
}
+ spin_unlock_bh(&psock->maps_lock);
+}
+
+static void smap_list_hash_remove(struct smap_psock *psock,
+ struct htab_elem *hash_link)
+{
+ struct smap_psock_map_entry *e, *tmp;
+
+ spin_lock_bh(&psock->maps_lock);
+ list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+ struct htab_elem *c = rcu_dereference(e->hash_link);
+
+ if (c == hash_link)
+ list_del(&e->list);
+ }
+ spin_unlock_bh(&psock->maps_lock);
}
static void sock_map_free(struct bpf_map *map)
@@ -1601,7 +1713,6 @@ static void sock_map_free(struct bpf_map *map)
if (!sock)
continue;
- write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
@@ -1609,10 +1720,9 @@ static void sock_map_free(struct bpf_map *map)
* to be null and queued for garbage collection.
*/
if (likely(psock)) {
- smap_list_remove(psock, &stab->sock_map[i], NULL);
+ smap_list_map_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock);
}
- write_unlock_bh(&sock->sk_callback_lock);
}
rcu_read_unlock();
@@ -1661,17 +1771,15 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (!sock)
return -EINVAL;
- write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
if (!psock)
goto out;
if (psock->bpf_parse)
smap_stop_sock(psock, sock);
- smap_list_remove(psock, &stab->sock_map[k], NULL);
+ smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock);
out:
- write_unlock_bh(&sock->sk_callback_lock);
return 0;
}
@@ -1752,7 +1860,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
}
}
- write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
/* 2. Do not allow inheriting programs if psock exists and has
@@ -1809,7 +1916,9 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
if (err)
goto out_free;
smap_init_progs(psock, verdict, parse);
+ write_lock_bh(&sock->sk_callback_lock);
smap_start_sock(psock, sock);
+ write_unlock_bh(&sock->sk_callback_lock);
}
/* 4. Place psock in sockmap for use and stop any programs on
@@ -1819,9 +1928,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
*/
if (map_link) {
e->entry = map_link;
+ spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
+ spin_unlock_bh(&psock->maps_lock);
}
- write_unlock_bh(&sock->sk_callback_lock);
return err;
out_free:
smap_release_sock(psock, sock);
@@ -1832,7 +1942,6 @@ out_progs:
}
if (tx_msg)
bpf_prog_put(tx_msg);
- write_unlock_bh(&sock->sk_callback_lock);
kfree(e);
return err;
}
@@ -1869,10 +1978,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
if (osock) {
struct smap_psock *opsock = smap_psock_sk(osock);
- write_lock_bh(&osock->sk_callback_lock);
- smap_list_remove(opsock, &stab->sock_map[i], NULL);
+ smap_list_map_remove(opsock, &stab->sock_map[i]);
smap_release_sock(opsock, osock);
- write_unlock_bh(&osock->sk_callback_lock);
}
out:
return err;
@@ -1915,6 +2022,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
return 0;
}
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+ struct bpf_prog *prog)
+{
+ int ufd = attr->target_fd;
+ struct bpf_map *map;
+ struct fd f;
+ int err;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ err = sock_map_prog(map, prog, attr->attach_type);
+ fdput(f);
+ return err;
+}
+
static void *sock_map_lookup(struct bpf_map *map, void *key)
{
return NULL;
@@ -2043,14 +2168,13 @@ free_htab:
return ERR_PTR(err);
}
-static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+static void __bpf_htab_free(struct rcu_head *rcu)
{
- return &htab->buckets[hash & (htab->n_buckets - 1)];
-}
+ struct bpf_htab *htab;
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
-{
- return &__select_bucket(htab, hash)->head;
+ htab = container_of(rcu, struct bpf_htab, rcu);
+ bpf_map_area_free(htab->buckets);
+ kfree(htab);
}
static void sock_hash_free(struct bpf_map *map)
@@ -2069,16 +2193,18 @@ static void sock_hash_free(struct bpf_map *map)
*/
rcu_read_lock();
for (i = 0; i < htab->n_buckets; i++) {
- struct hlist_head *head = select_bucket(htab, i);
+ struct bucket *b = __select_bucket(htab, i);
+ struct hlist_head *head;
struct hlist_node *n;
struct htab_elem *l;
+ raw_spin_lock_bh(&b->lock);
+ head = &b->head;
hlist_for_each_entry_safe(l, n, head, hash_node) {
struct sock *sock = l->sk;
struct smap_psock *psock;
hlist_del_rcu(&l->hash_node);
- write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get
* the sk_callback_lock before this case but after xchg
@@ -2086,16 +2212,15 @@ static void sock_hash_free(struct bpf_map *map)
* (psock) to be null and queued for garbage collection.
*/
if (likely(psock)) {
- smap_list_remove(psock, NULL, l);
+ smap_list_hash_remove(psock, l);
smap_release_sock(psock, sock);
}
- write_unlock_bh(&sock->sk_callback_lock);
- kfree(l);
+ free_htab_elem(htab, l);
}
+ raw_spin_unlock_bh(&b->lock);
}
rcu_read_unlock();
- bpf_map_area_free(htab->buckets);
- kfree(htab);
+ call_rcu(&htab->rcu, __bpf_htab_free);
}
static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
@@ -2122,19 +2247,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
return l_new;
}
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
- u32 hash, void *key, u32 key_size)
-{
- struct htab_elem *l;
-
- hlist_for_each_entry_rcu(l, head, hash_node) {
- if (l->hash == hash && !memcmp(&l->key, key, key_size))
- return l;
- }
-
- return NULL;
-}
-
static inline u32 htab_map_hash(const void *key, u32 key_len)
{
return jhash(key, key_len, 0);
@@ -2254,9 +2366,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
goto bucket_err;
}
- e->hash_link = l_new;
- e->htab = container_of(map, struct bpf_htab, map);
+ rcu_assign_pointer(e->hash_link, l_new);
+ rcu_assign_pointer(e->htab,
+ container_of(map, struct bpf_htab, map));
+ spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
+ spin_unlock_bh(&psock->maps_lock);
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
@@ -2266,7 +2381,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
psock = smap_psock_sk(l_old->sk);
hlist_del_rcu(&l_old->hash_node);
- smap_list_remove(psock, NULL, l_old);
+ smap_list_hash_remove(psock, l_old);
smap_release_sock(psock, l_old->sk);
free_htab_elem(htab, l_old);
}
@@ -2326,7 +2441,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
struct smap_psock *psock;
hlist_del_rcu(&l->hash_node);
- write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
@@ -2334,10 +2448,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
* to be null and queued for garbage collection.
*/
if (likely(psock)) {
- smap_list_remove(psock, NULL, l);
+ smap_list_hash_remove(psock, l);
smap_release_sock(psock, sock);
}
- write_unlock_bh(&sock->sk_callback_lock);
free_htab_elem(htab, l);
ret = 0;
}
@@ -2383,6 +2496,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_get_next_key = sock_hash_get_next_key,
.map_update_elem = sock_hash_update_elem,
.map_delete_elem = sock_hash_delete_elem,
+ .map_release_uref = sock_map_release,
};
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f12db70d3bf3..8363fd83d399 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1483,8 +1483,6 @@ out_free_tp:
return err;
}
-#ifdef CONFIG_CGROUP_BPF
-
static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
enum bpf_attach_type attach_type)
{
@@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
-static int sockmap_get_from_fd(const union bpf_attr *attr,
- int type, bool attach)
-{
- struct bpf_prog *prog = NULL;
- int ufd = attr->target_fd;
- struct bpf_map *map;
- struct fd f;
- int err;
-
- f = fdget(ufd);
- map = __bpf_map_get(f);
- if (IS_ERR(map))
- return PTR_ERR(map);
-
- if (attach) {
- prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
- if (IS_ERR(prog)) {
- fdput(f);
- return PTR_ERR(prog);
- }
- }
-
- err = sock_map_prog(map, prog, attr->attach_type);
- if (err) {
- fdput(f);
- if (prog)
- bpf_prog_put(prog);
- return err;
- }
-
- fdput(f);
- return 0;
-}
-
#define BPF_F_ATTACH_MASK \
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
@@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;
struct bpf_prog *prog;
- struct cgroup *cgrp;
int ret;
if (!capable(CAP_NET_ADMIN))
@@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
case BPF_SK_MSG_VERDICT:
- return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
+ ptype = BPF_PROG_TYPE_SK_MSG;
+ break;
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
- return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
+ ptype = BPF_PROG_TYPE_SK_SKB;
+ break;
case BPF_LIRC_MODE2:
- return lirc_prog_attach(attr);
+ ptype = BPF_PROG_TYPE_LIRC_MODE2;
+ break;
default:
return -EINVAL;
}
@@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return -EINVAL;
}
- cgrp = cgroup_get_from_fd(attr->target_fd);
- if (IS_ERR(cgrp)) {
- bpf_prog_put(prog);
- return PTR_ERR(cgrp);
+ switch (ptype) {
+ case BPF_PROG_TYPE_SK_SKB:
+ case BPF_PROG_TYPE_SK_MSG:
+ ret = sockmap_get_from_fd(attr, ptype, prog);
+ break;
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ ret = lirc_prog_attach(attr, prog);
+ break;
+ default:
+ ret = cgroup_bpf_prog_attach(attr, ptype, prog);
}
- ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
- attr->attach_flags);
if (ret)
bpf_prog_put(prog);
- cgroup_put(cgrp);
-
return ret;
}
@@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;
- struct bpf_prog *prog;
- struct cgroup *cgrp;
- int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break;
case BPF_SK_MSG_VERDICT:
- return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
+ return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
- return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
+ return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
case BPF_LIRC_MODE2:
return lirc_prog_detach(attr);
default:
return -EINVAL;
}
- cgrp = cgroup_get_from_fd(attr->target_fd);
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
-
- prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
- if (IS_ERR(prog))
- prog = NULL;
-
- ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
- if (prog)
- bpf_prog_put(prog);
- cgroup_put(cgrp);
- return ret;
+ return cgroup_bpf_prog_detach(attr, ptype);
}
#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
- struct cgroup *cgrp;
- int ret;
-
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (CHECK_ATTR(BPF_PROG_QUERY))
@@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
default:
return -EINVAL;
}
- cgrp = cgroup_get_from_fd(attr->query.target_fd);
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
- ret = cgroup_bpf_query(cgrp, attr, uattr);
- cgroup_put(cgrp);
- return ret;
+
+ return cgroup_bpf_prog_query(attr, uattr);
}
-#endif /* CONFIG_CGROUP_BPF */
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
@@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
-#ifdef CONFIG_CGROUP_BPF
case BPF_PROG_ATTACH:
err = bpf_prog_attach(&attr);
break;
@@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_PROG_QUERY:
err = bpf_prog_query(&attr, uattr);
break;
-#endif
case BPF_PROG_TEST_RUN:
err = bpf_prog_test_run(&attr, uattr);
break;