summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf-cgroup.h79
-rw-r--r--include/linux/cgroup-defs.h4
-rw-r--r--include/uapi/linux/bpf.h17
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/cgroup.c167
-rw-r--r--kernel/bpf/syscall.c81
-rw-r--r--kernel/cgroup.c18
-rw-r--r--net/core/filter.c27
-rw-r--r--net/ipv4/ip_output.c26
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--samples/bpf/Makefile2
-rw-r--r--samples/bpf/libbpf.c21
-rw-r--r--samples/bpf/libbpf.h3
-rw-r--r--samples/bpf/test_cgrp2_attach.c147
15 files changed, 612 insertions, 2 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
new file mode 100644
index 000000000000..ec80d0c0953e
--- /dev/null
+++ b/include/linux/bpf-cgroup.h
@@ -0,0 +1,79 @@
+#ifndef _BPF_CGROUP_H
+#define _BPF_CGROUP_H
+
+#include <linux/bpf.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/bpf.h>
+
+struct sock;
+struct cgroup;
+struct sk_buff;
+
+#ifdef CONFIG_CGROUP_BPF
+
+extern struct static_key_false cgroup_bpf_enabled_key;
+#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+
+struct cgroup_bpf {
+ /*
+ * Store two sets of bpf_prog pointers, one for programs that are
+ * pinned directly to this cgroup, and one for those that are effective
+ * when this cgroup is accessed.
+ */
+ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
+ struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+};
+
+void cgroup_bpf_put(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
+
+void __cgroup_bpf_update(struct cgroup *cgrp,
+ struct cgroup *parent,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+void cgroup_bpf_update(struct cgroup *cgrp,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type);
+
+/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled) \
+ __ret = __cgroup_bpf_run_filter(sk, skb, \
+ BPF_CGROUP_INET_INGRESS); \
+ \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \
+({ \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
+ typeof(sk) __sk = sk_to_full_sk(sk); \
+ if (sk_fullsock(__sk)) \
+ __ret = __cgroup_bpf_run_filter(__sk, skb, \
+ BPF_CGROUP_INET_EGRESS); \
+ } \
+ __ret; \
+})
+
+#else
+
+struct cgroup_bpf {};
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+ struct cgroup *parent) {}
+
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif /* _BPF_CGROUP_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5b17de62c962..861b4677fc5b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
+#include <linux/bpf-cgroup.h>
#ifdef CONFIG_CGROUPS
@@ -300,6 +301,9 @@ struct cgroup {
/* used to schedule release agent */
struct work_struct release_agent_work;
+ /* used to store eBPF programs */
+ struct cgroup_bpf bpf;
+
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7d9b2832c280..1370a9d1456f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -73,6 +73,8 @@ enum bpf_cmd {
BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
};
enum bpf_map_type {
@@ -98,8 +100,17 @@ enum bpf_prog_type {
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
};
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
#define BPF_PSEUDO_MAP_FD 1
/* flags for BPF_MAP_UPDATE_ELEM command */
@@ -150,6 +161,12 @@ union bpf_attr {
__aligned_u64 pathname;
__u32 bpf_fd;
};
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ };
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
diff --git a/init/Kconfig b/init/Kconfig
index 34407f15e6d3..405120b5f13e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1154,6 +1154,18 @@ config CGROUP_PERF
Say N if unsure.
+config CGROUP_BPF
+ bool "Support for eBPF programs attached to cgroups"
+ depends on BPF_SYSCALL && SOCK_CGROUP_DATA
+ help
+ Allow attaching eBPF programs to a cgroup using the bpf(2)
+ syscall command BPF_PROG_ATTACH.
+
+ In which context these programs are accessed depends on the type
+ of attachment. For instance, programs that are attached using
+ BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
+ inet sockets.
+
config CGROUP_DEBUG
bool "Example controller"
default n
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index c4d89d6e2058..1276474ac3cd 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
+obj-$(CONFIG_CGROUP_BPF) += cgroup.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
new file mode 100644
index 000000000000..a0ab43f264b0
--- /dev/null
+++ b/kernel/bpf/cgroup.c
@@ -0,0 +1,167 @@
+/*
+ * Functions to manage eBPF programs attached to cgroups
+ *
+ * Copyright (c) 2016 Daniel Mack
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License. See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <net/sock.h>
+
+DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
+/**
+ * cgroup_bpf_put() - put references of all bpf programs
+ * @cgrp: the cgroup to modify
+ */
+void cgroup_bpf_put(struct cgroup *cgrp)
+{
+ unsigned int type;
+
+ for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
+ struct bpf_prog *prog = cgrp->bpf.prog[type];
+
+ if (prog) {
+ bpf_prog_put(prog);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ }
+ }
+}
+
+/**
+ * cgroup_bpf_inherit() - inherit effective programs from parent
+ * @cgrp: the cgroup to modify
+ * @parent: the parent to inherit from
+ */
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+{
+ unsigned int type;
+
+ for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
+ struct bpf_prog *e;
+
+ e = rcu_dereference_protected(parent->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex));
+ rcu_assign_pointer(cgrp->bpf.effective[type], e);
+ }
+}
+
+/**
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ * propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is %NULL, this function attaches a new program to the cgroup and
+ * releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and the effective program of the parent cgroup (if any) is inherited to
+ * @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+void __cgroup_bpf_update(struct cgroup *cgrp,
+ struct cgroup *parent,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type)
+{
+ struct bpf_prog *old_prog, *effective;
+ struct cgroup_subsys_state *pos;
+
+ old_prog = xchg(cgrp->bpf.prog + type, prog);
+
+ effective = (!prog && parent) ?
+ rcu_dereference_protected(parent->bpf.effective[type],
+ lockdep_is_held(&cgroup_mutex)) :
+ prog;
+
+ css_for_each_descendant_pre(pos, &cgrp->self) {
+ struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+ /* skip the subtree if the descendant has its own program */
+ if (desc->bpf.prog[type] && desc != cgrp)
+ pos = css_rightmost_descendant(pos);
+ else
+ rcu_assign_pointer(desc->bpf.effective[type],
+ effective);
+ }
+
+ if (prog)
+ static_branch_inc(&cgroup_bpf_enabled_key);
+
+ if (old_prog) {
+ bpf_prog_put(old_prog);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ }
+}
+
+/**
+ * __cgroup_bpf_run_filter() - Run a program for packet filtering
+ * @sk: The socken sending or receiving traffic
+ * @skb: The skb that is being sent or received
+ * @type: The type of program to be exectuted
+ *
+ * If no socket is passed, or the socket is not of type INET or INET6,
+ * this function does nothing and returns 0.
+ *
+ * The program type passed in via @type must be suitable for network
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ struct bpf_prog *prog;
+ struct cgroup *cgrp;
+ int ret = 0;
+
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
+ if (sk->sk_family != AF_INET &&
+ sk->sk_family != AF_INET6)
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.effective[type]);
+ if (prog) {
+ unsigned int offset = skb->data - skb_network_header(skb);
+
+ __skb_push(skb, offset);
+ ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+ __skb_pull(skb, offset);
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index eb15498b8d55..1090d16a31c1 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -835,6 +835,77 @@ static int bpf_obj_get(const union bpf_attr *attr)
return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
}
+#ifdef CONFIG_CGROUP_BPF
+
+#define BPF_PROG_ATTACH_LAST_FIELD attach_type
+
+static int bpf_prog_attach(const union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ struct cgroup *cgrp;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_PROG_ATTACH))
+ return -EINVAL;
+
+ switch (attr->attach_type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ prog = bpf_prog_get_type(attr->attach_bpf_fd,
+ BPF_PROG_TYPE_CGROUP_SKB);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+
+ cgroup_bpf_update(cgrp, prog, attr->attach_type);
+ cgroup_put(cgrp);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#define BPF_PROG_DETACH_LAST_FIELD attach_type
+
+static int bpf_prog_detach(const union bpf_attr *attr)
+{
+ struct cgroup *cgrp;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (CHECK_ATTR(BPF_PROG_DETACH))
+ return -EINVAL;
+
+ switch (attr->attach_type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ cgroup_bpf_update(cgrp, NULL, attr->attach_type);
+ cgroup_put(cgrp);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -901,6 +972,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
+
+#ifdef CONFIG_CGROUP_BPF
+ case BPF_PROG_ATTACH:
+ err = bpf_prog_attach(&attr);
+ break;
+ case BPF_PROG_DETACH:
+ err = bpf_prog_detach(&attr);
+ break;
+#endif
+
default:
err = -EINVAL;
break;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 85bc9beb046d..2ee9ec3051b2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5074,6 +5074,8 @@ static void css_release_work_fn(struct work_struct *work)
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
+
+ cgroup_bpf_put(cgrp);
}
mutex_unlock(&cgroup_mutex);
@@ -5281,6 +5283,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
if (!cgroup_on_dfl(cgrp))
cgrp->subtree_control = cgroup_control(cgrp);
+ if (parent)
+ cgroup_bpf_inherit(cgrp, parent);
+
cgroup_propagate_control(cgrp);
/* @cgrp doesn't have dir yet so the following will only create csses */
@@ -6495,6 +6500,19 @@ static __init int cgroup_namespaces_init(void)
}
subsys_initcall(cgroup_namespaces_init);
+#ifdef CONFIG_CGROUP_BPF
+void cgroup_bpf_update(struct cgroup *cgrp,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type)
+{
+ struct cgroup *parent = cgroup_parent(cgrp);
+
+ mutex_lock(&cgroup_mutex);
+ __cgroup_bpf_update(cgrp, parent, prog, type);
+ mutex_unlock(&cgroup_mutex);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *
debug_css_alloc(struct cgroup_subsys_state *parent_css)
diff --git a/net/core/filter.c b/net/core/filter.c
index dece94fef005..ea315af56511 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -78,6 +78,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
return -ENOMEM;
+ err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
+ if (err)
+ return err;
+
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
@@ -2630,6 +2634,17 @@ xdp_func_proto(enum bpf_func_id func_id)
}
}
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id)
+{
+ switch (func_id) {
+ case BPF_FUNC_skb_load_bytes:
+ return &bpf_skb_load_bytes_proto;
+ default:
+ return sk_filter_func_proto(func_id);
+ }
+}
+
static bool __is_valid_access(int off, int size, enum bpf_access_type type)
{
if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2992,6 +3007,12 @@ static const struct bpf_verifier_ops xdp_ops = {
.convert_ctx_access = xdp_convert_ctx_access,
};
+static const struct bpf_verifier_ops cg_skb_ops = {
+ .get_func_proto = cg_skb_func_proto,
+ .is_valid_access = sk_filter_is_valid_access,
+ .convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
.ops = &sk_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -3012,12 +3033,18 @@ static struct bpf_prog_type_list xdp_type __read_mostly = {
.type = BPF_PROG_TYPE_XDP,
};
+static struct bpf_prog_type_list cg_skb_type __read_mostly = {
+ .ops = &cg_skb_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SKB,
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
bpf_register_prog_type(&sched_cls_type);
bpf_register_prog_type(&sched_act_type);
bpf_register_prog_type(&xdp_type);
+ bpf_register_prog_type(&cg_skb_type);
return 0;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 358f2c82b030..9af2b7853be4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -74,6 +74,7 @@
#include <net/checksum.h>
#include <net/inetpeer.h>
#include <net/lwtunnel.h>
+#include <linux/bpf-cgroup.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
@@ -285,6 +286,13 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
@@ -303,6 +311,20 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
return ip_finish_output2(net, sk, skb);
}
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ return dev_loopback_xmit(net, sk, skb);
+}
+
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
@@ -340,7 +362,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
/* Multicasts with ttl 0 must not go beyond the host */
@@ -356,7 +378,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
- dev_loopback_xmit);
+ ip_mc_finish_output);
}
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 312cbd0e5038..70d0de404197 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -39,6 +39,7 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/bpf-cgroup.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>
@@ -131,6 +132,14 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ int ret;
+
+ ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f394ac616ed8..fb17206ddb57 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -22,6 +22,7 @@ hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += test_cgrp2_attach
hostprogs-y += xdp1
hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup
@@ -49,6 +50,7 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 9969e35550c3..9ce707bf02a7 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -104,6 +104,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_bpf_fd = prog_fd,
+ .attach_type = type,
+ };
+
+ return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_type = type,
+ };
+
+ return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index de96a935068d..94a901d86fc2 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+
int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
new file mode 100644
index 000000000000..63ef2083f766
--- /dev/null
+++ b/samples/bpf/test_cgrp2_attach.c
@@ -0,0 +1,147 @@
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program accesses the map passed in to store two pieces of
+ * information. The number of invocations of the program, which maps
+ * to the number of packets received, is stored to key 0. Key 1 is
+ * incremented on each iteration by the number of bytes stored in
+ * the skb.
+ *
+ * - Detaches any eBPF program previously attached to the cgroup
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ * packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+static int prog_load(int map_fd, int verdict)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, sizeof(prog), "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+ printf("Usage: %s <cg-path> <egress|ingress> [drop]\n", argv0);
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ int cg_fd, map_fd, prog_fd, key, ret;
+ long long pkt_cnt, byte_cnt;
+ enum bpf_attach_type type;
+ int verdict = 1;
+
+ if (argc < 3)
+ return usage(argv[0]);
+
+ if (strcmp(argv[2], "ingress") == 0)
+ type = BPF_CGROUP_INET_INGRESS;
+ else if (strcmp(argv[2], "egress") == 0)
+ type = BPF_CGROUP_INET_EGRESS;
+ else
+ return usage(argv[0]);
+
+ if (argc > 3 && strcmp(argv[3], "drop") == 0)
+ verdict = 0;
+
+ cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
+ sizeof(key), sizeof(byte_cnt),
+ 256, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ prog_fd = prog_load(map_fd, verdict);
+ printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+ if (prog_fd < 0) {
+ printf("Failed to load prog: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ ret = bpf_prog_detach(cg_fd, type);
+ printf("bpf_prog_detach() returned '%s' (%d)\n", strerror(errno), errno);
+
+ ret = bpf_prog_attach(prog_fd, cg_fd, type);
+ if (ret < 0) {
+ printf("Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ while (1) {
+ key = MAP_KEY_PACKETS;
+ assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
+
+ key = MAP_KEY_BYTES;
+ assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0);
+
+ printf("cgroup received %lld packets, %lld bytes\n",
+ pkt_cnt, byte_cnt);
+ sleep(1);
+ }
+
+ return EXIT_SUCCESS;
+}