summaryrefslogtreecommitdiff
path: root/samples/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
commit468fc7ed5537615efe671d94248446ac24679773 (patch)
tree27bc9de792e863d6ec1630927b77ac9e7dabb38a /samples/bpf
parent08fd8c17686c6b09fa410a26d516548dd80ff147 (diff)
parent36232012344b8db67052432742deaf17f82e70e6 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Unified UDP encapsulation offload methods for drivers, from Alexander Duyck. 2) Make DSA binding more sane, from Andrew Lunn. 3) Support QCA9888 chips in ath10k, from Anilkumar Kolli. 4) Several workqueue usage cleanups, from Bhaktipriya Shridhar. 5) Add XDP (eXpress Data Path), essentially running BPF programs on RX packets as soon as the device sees them, with the option to mirror the packet on TX via the same interface. From Brenden Blanco and others. 6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet. 7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli. 8) Simplify netlink conntrack entry layout, from Florian Westphal. 9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido Schimmel, Yotam Gigi, and Jiri Pirko. 10) Add SKB array infrastructure and convert tun and macvtap over to it. From Michael S Tsirkin and Jason Wang. 11) Support qdisc packet injection in pktgen, from John Fastabend. 12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy. 13) Add NV congestion control support to TCP, from Lawrence Brakmo. 14) Add GSO support to SCTP, from Marcelo Ricardo Leitner. 15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni. 16) Support MPLS over IPV4, from Simon Horman. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits) xgene: Fix build warning with ACPI disabled. be2net: perform temperature query in adapter regardless of its interface state l2tp: Correctly return -EBADF from pppol2tp_getname. net/mlx5_core/health: Remove deprecated create_singlethread_workqueue net: ipmr/ip6mr: update lastuse on entry change macsec: ensure rx_sa is set when validation is disabled tipc: dump monitor attributes tipc: add a function to get the bearer name tipc: get monitor threshold for the cluster tipc: make cluster size threshold for monitoring configurable tipc: introduce constants for tipc address validation net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update() MAINTAINERS: xgene: Add driver and documentation path Documentation: dtb: xgene: Add MDIO node dtb: xgene: Add MDIO node drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset drivers: net: xgene: Use exported functions drivers: net: xgene: Enable MDIO driver drivers: net: xgene: Add backward compatibility drivers: net: phy: xgene: Add MDIO driver ...
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile16
-rw-r--r--samples/bpf/bpf_helpers.h4
-rw-r--r--samples/bpf/bpf_load.c8
-rw-r--r--samples/bpf/sockex2_user.c3
-rw-r--r--samples/bpf/sockex3_user.c3
-rw-r--r--samples/bpf/test_cgrp2_array_pin.c109
-rwxr-xr-xsamples/bpf/test_cgrp2_tc.sh184
-rw-r--r--samples/bpf/test_cgrp2_tc_kern.c69
-rw-r--r--samples/bpf/test_probe_write_user_kern.c52
-rw-r--r--samples/bpf/test_probe_write_user_user.c78
-rw-r--r--samples/bpf/xdp1_kern.c93
-rw-r--r--samples/bpf/xdp1_user.c181
-rw-r--r--samples/bpf/xdp2_kern.c114
13 files changed, 914 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 0bf2478cb7df..90ebf7d35c07 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -14,12 +14,16 @@ hostprogs-y += tracex3
hostprogs-y += tracex4
hostprogs-y += tracex5
hostprogs-y += tracex6
+hostprogs-y += test_probe_write_user
hostprogs-y += trace_output
hostprogs-y += lathist
hostprogs-y += offwaketime
hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
+hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += xdp1
+hostprogs-y += xdp2
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -34,12 +38,17 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
+test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o
offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
+test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
+# reuse xdp1 source intentionally
+xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -52,6 +61,7 @@ always += tracex3_kern.o
always += tracex4_kern.o
always += tracex5_kern.o
always += tracex6_kern.o
+always += test_probe_write_user_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
always += lathist_kern.o
@@ -61,6 +71,9 @@ always += map_perf_test_kern.o
always += test_overhead_tp_kern.o
always += test_overhead_kprobe_kern.o
always += parse_varlen.o parse_simple.o parse_ldabs.o
+always += test_cgrp2_tc_kern.o
+always += xdp1_kern.o
+always += xdp2_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@@ -75,12 +88,15 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_test_probe_write_user += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
HOSTLOADLIBES_offwaketime += -lelf
HOSTLOADLIBES_spintest += -lelf
HOSTLOADLIBES_map_perf_test += -lelf -lrt
HOSTLOADLIBES_test_overhead += -lelf -lrt
+HOSTLOADLIBES_xdp1 += -lelf
+HOSTLOADLIBES_xdp2 += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 7904a2a493de..217c8d507f2e 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data,
(void *) BPF_FUNC_perf_event_output;
static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+ (void *) BPF_FUNC_probe_write_user;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -70,6 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
(void *) BPF_FUNC_l3_csum_replace;
static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_in_cgroup;
#if defined(__x86_64__)
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 022af71c2bb5..0cfda2320320 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
bool is_kprobe = strncmp(event, "kprobe/", 7) == 0;
bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
+ bool is_xdp = strncmp(event, "xdp", 3) == 0;
enum bpf_prog_type prog_type;
char buf[256];
int fd, efd, err, id;
@@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_type = BPF_PROG_TYPE_KPROBE;
} else if (is_tracepoint) {
prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ } else if (is_xdp) {
+ prog_type = BPF_PROG_TYPE_XDP;
} else {
printf("Unknown event '%s'\n", event);
return -1;
@@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
prog_fd[prog_cnt++] = fd;
+ if (is_xdp)
+ return 0;
+
if (is_socket) {
event += 6;
if (*event != '/')
@@ -319,6 +325,7 @@ int load_bpf_file(char *path)
if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
memcmp(shname_prog, "kretprobe/", 10) == 0 ||
memcmp(shname_prog, "tracepoint/", 11) == 0 ||
+ memcmp(shname_prog, "xdp", 3) == 0 ||
memcmp(shname_prog, "socket", 6) == 0)
load_and_attach(shname_prog, insns, data_prog->d_size);
}
@@ -336,6 +343,7 @@ int load_bpf_file(char *path)
if (memcmp(shname, "kprobe/", 7) == 0 ||
memcmp(shname, "kretprobe/", 10) == 0 ||
memcmp(shname, "tracepoint/", 11) == 0 ||
+ memcmp(shname, "xdp", 3) == 0 ||
memcmp(shname, "socket", 6) == 0)
load_and_attach(shname, data->d_buf, data->d_size);
}
diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c
index 29a276d766fc..8a4085c2d117 100644
--- a/samples/bpf/sockex2_user.c
+++ b/samples/bpf/sockex2_user.c
@@ -5,6 +5,7 @@
#include "bpf_load.h"
#include <unistd.h>
#include <arpa/inet.h>
+#include <sys/resource.h>
struct pair {
__u64 packets;
@@ -13,11 +14,13 @@ struct pair {
int main(int ac, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
FILE *f;
int i, sock;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index 2617772d060d..d4184ab5f3ac 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -5,6 +5,7 @@
#include "bpf_load.h"
#include <unistd.h>
#include <arpa/inet.h>
+#include <sys/resource.h>
struct flow_keys {
__be32 src;
@@ -23,11 +24,13 @@ struct pair {
int main(int argc, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
FILE *f;
int i, sock;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c
new file mode 100644
index 000000000000..70e86f7be69d
--- /dev/null
+++ b/samples/bpf/test_cgrp2_array_pin.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+ printf("Usage: test_cgrp2_array_pin [...]\n");
+ printf(" -F <file> File to pin an BPF cgroup array\n");
+ printf(" -U <file> Update an already pinned BPF cgroup array\n");
+ printf(" -v <value> Full path of the cgroup2\n");
+ printf(" -h Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+ const char *pinned_file = NULL, *cg2 = NULL;
+ int create_array = 1;
+ int array_key = 0;
+ int array_fd = -1;
+ int cg2_fd = -1;
+ int ret = -1;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "F:U:v:")) != -1) {
+ switch (opt) {
+ /* General args */
+ case 'F':
+ pinned_file = optarg;
+ break;
+ case 'U':
+ pinned_file = optarg;
+ create_array = 0;
+ break;
+ case 'v':
+ cg2 = optarg;
+ break;
+ default:
+ usage();
+ goto out;
+ }
+ }
+
+ if (!cg2 || !pinned_file) {
+ usage();
+ goto out;
+ }
+
+ cg2_fd = open(cg2, O_RDONLY);
+ if (cg2_fd < 0) {
+ fprintf(stderr, "open(%s,...): %s(%d)\n",
+ cg2, strerror(errno), errno);
+ goto out;
+ }
+
+ if (create_array) {
+ array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,
+ sizeof(uint32_t), sizeof(uint32_t),
+ 1, 0);
+ if (array_fd < 0) {
+ fprintf(stderr,
+ "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n",
+ strerror(errno), errno);
+ goto out;
+ }
+ } else {
+ array_fd = bpf_obj_get(pinned_file);
+ if (array_fd < 0) {
+ fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+ pinned_file, strerror(errno), errno);
+ goto out;
+ }
+ }
+
+ ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0);
+ if (ret) {
+ perror("bpf_update_elem");
+ goto out;
+ }
+
+ if (create_array) {
+ ret = bpf_obj_pin(array_fd, pinned_file);
+ if (ret) {
+ fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n",
+ pinned_file, strerror(errno), errno);
+ goto out;
+ }
+ }
+
+out:
+ if (array_fd != -1)
+ close(array_fd);
+ if (cg2_fd != -1)
+ close(cg2_fd);
+ return ret;
+}
diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh
new file mode 100755
index 000000000000..0b119eeaf85c
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+
+MY_DIR=$(dirname $0)
+# Details on the bpf prog
+BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
+BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
+BPF_SECTION='filter'
+
+[ -z "$TC" ] && TC='tc'
+[ -z "$IP" ] && IP='ip'
+
+# Names of the veth interface, net namespace...etc.
+HOST_IFC='ve'
+NS_IFC='vens'
+NS='ns'
+
+find_mnt() {
+ cat /proc/mounts | \
+ awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }'
+}
+
+# Init cgroup2 vars
+init_cgrp2_vars() {
+ CGRP2_ROOT=$(find_mnt cgroup2)
+ if [ -z "$CGRP2_ROOT" ]
+ then
+ CGRP2_ROOT='/mnt/cgroup2'
+ MOUNT_CGRP2="yes"
+ fi
+ CGRP2_TC="$CGRP2_ROOT/tc"
+ CGRP2_TC_LEAF="$CGRP2_TC/leaf"
+}
+
+# Init bpf fs vars
+init_bpf_fs_vars() {
+ local bpf_fs_root=$(find_mnt bpf)
+ [ -n "$bpf_fs_root" ] || return -1
+ BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals"
+}
+
+setup_cgrp2() {
+ case $1 in
+ start)
+ if [ "$MOUNT_CGRP2" == 'yes' ]
+ then
+ [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT
+ mount -t cgroup2 none $CGRP2_ROOT || return $?
+ fi
+ mkdir -p $CGRP2_TC_LEAF
+ ;;
+ *)
+ rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC
+ [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT
+ ;;
+ esac
+}
+
+setup_bpf_cgrp2_array() {
+ local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME"
+ case $1 in
+ start)
+ $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC
+ ;;
+ *)
+ [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array
+ ;;
+ esac
+}
+
+setup_net() {
+ case $1 in
+ start)
+ $IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
+ $IP link set dev $HOST_IFC up || return $?
+ sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
+
+ $IP netns add ns || return $?
+ $IP link set dev $NS_IFC netns ns || return $?
+ $IP -n $NS link set dev $NS_IFC up || return $?
+ $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
+ $TC qdisc add dev $HOST_IFC clsact || return $?
+ $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
+ ;;
+ *)
+ $IP netns del $NS
+ $IP link del $HOST_IFC
+ ;;
+ esac
+}
+
+run_in_cgrp() {
+ # Fork another bash and move it under the specified cgroup.
+ # It makes the cgroup cleanup easier at the end of the test.
+ cmd='echo $$ > '
+ cmd="$cmd $1/cgroup.procs; exec $2"
+ bash -c "$cmd"
+}
+
+do_test() {
+ run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null"
+ local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \
+ awk '/drop/{print substr($7, 0, index($7, ",")-1)}')
+ if [[ $dropped -eq 0 ]]
+ then
+ echo "FAIL"
+ return 1
+ else
+ echo "Successfully filtered $dropped packets"
+ return 0
+ fi
+}
+
+do_exit() {
+ if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ]
+ then
+ echo "------ DEBUG ------"
+ echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo
+ echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo
+ if [ -d "$BPF_FS_TC_SHARE" ]
+ then
+ echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo
+ fi
+ echo "Host net:"
+ $IP netns
+ $IP link show dev $HOST_IFC
+ $IP -6 a show dev $HOST_IFC
+ $TC -s qdisc show dev $HOST_IFC
+ echo
+ echo "$NS net:"
+ $IP -n $NS link show dev $NS_IFC
+ $IP -n $NS -6 link show dev $NS_IFC
+ echo "------ DEBUG ------"
+ echo
+ fi
+
+ if [ "$MODE" != 'nocleanup' ]
+ then
+ setup_net stop
+ setup_bpf_cgrp2_array stop
+ setup_cgrp2 stop
+ fi
+}
+
+init_cgrp2_vars
+init_bpf_fs_vars
+
+while [[ $# -ge 1 ]]
+do
+ a="$1"
+ case $a in
+ debug)
+ DEBUG='yes'
+ shift 1
+ ;;
+ cleanup-only)
+ MODE='cleanuponly'
+ shift 1
+ ;;
+ no-cleanup)
+ MODE='nocleanup'
+ shift 1
+ ;;
+ *)
+ echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]"
+ echo " debug: Print cgrp and network setup details at the end of the test"
+ echo " cleanup-only: Try to cleanup things from last test. No test will be run"
+ echo " no-cleanup: Run the test but don't do cleanup at the end"
+ echo "[Note: If no arg is given, it will run the test and do cleanup at the end]"
+ echo
+ exit -1
+ ;;
+ esac
+done
+
+trap do_exit 0
+
+[ "$MODE" == 'cleanuponly' ] && exit
+
+setup_cgrp2 start || exit $?
+setup_net start || exit $?
+init_bpf_fs_vars || exit $?
+setup_bpf_cgrp2_array start || exit $?
+do_test
+echo
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
new file mode 100644
index 000000000000..2732c37c8d5b
--- /dev/null
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -0,0 +1,69 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in6.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/pkt_cls.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ unsigned short h_proto;
+};
+
+#define PIN_GLOBAL_NS 2
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+};
+
+struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
+ .type = BPF_MAP_TYPE_CGROUP_ARRAY,
+ .size_key = sizeof(uint32_t),
+ .size_value = sizeof(uint32_t),
+ .pinning = PIN_GLOBAL_NS,
+ .max_elem = 1,
+};
+
+SEC("filter")
+int handle_egress(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ struct eth_hdr *eth = data;
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+ void *data_end = (void *)(long)skb->data_end;
+ char dont_care_msg[] = "dont care %04x %d\n";
+ char pass_msg[] = "pass\n";
+ char reject_msg[] = "reject\n";
+
+ /* single length check */
+ if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto != htons(ETH_P_IPV6) ||
+ ip6h->nexthdr != IPPROTO_ICMPV6) {
+ bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
+ eth->h_proto, ip6h->nexthdr);
+ return TC_ACT_OK;
+ } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+ bpf_trace_printk(pass_msg, sizeof(pass_msg));
+ return TC_ACT_OK;
+ } else {
+ bpf_trace_printk(reject_msg, sizeof(reject_msg));
+ return TC_ACT_SHOT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_probe_write_user_kern.c b/samples/bpf/test_probe_write_user_kern.c
new file mode 100644
index 000000000000..3a677c807044
--- /dev/null
+++ b/samples/bpf/test_probe_write_user_kern.c
@@ -0,0 +1,52 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") dnat_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct sockaddr_in),
+ .value_size = sizeof(struct sockaddr_in),
+ .max_entries = 256,
+};
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ *
+ * This example sits on a syscall, and the syscall ABI is relatively stable
+ * of course, across platforms, and over time, the ABI may change.
+ */
+SEC("kprobe/sys_connect")
+int bpf_prog1(struct pt_regs *ctx)
+{
+ struct sockaddr_in new_addr, orig_addr = {};
+ struct sockaddr_in *mapped_addr;
+ void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
+ int sockaddr_len = (int)PT_REGS_PARM3(ctx);
+
+ if (sockaddr_len > sizeof(orig_addr))
+ return 0;
+
+ if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
+ return 0;
+
+ mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
+ if (mapped_addr != NULL) {
+ memcpy(&new_addr, mapped_addr, sizeof(new_addr));
+ bpf_probe_write_user(sockaddr_arg, &new_addr,
+ sizeof(new_addr));
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c
new file mode 100644
index 000000000000..a44bf347bedd
--- /dev/null
+++ b/samples/bpf/test_probe_write_user_user.c
@@ -0,0 +1,78 @@
+#include <stdio.h>
+#include <assert.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <sys/socket.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+int main(int ac, char **argv)
+{
+ int serverfd, serverconnfd, clientfd;
+ socklen_t sockaddr_len;
+ struct sockaddr serv_addr, mapped_addr, tmp_addr;
+ struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
+ char filename[256];
+ char *ip;
+
+ serv_addr_in = (struct sockaddr_in *)&serv_addr;
+ mapped_addr_in = (struct sockaddr_in *)&mapped_addr;
+ tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
+ assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
+
+ /* Bind server to ephemeral port on lo */
+ memset(&serv_addr, 0, sizeof(serv_addr));
+ serv_addr_in->sin_family = AF_INET;
+ serv_addr_in->sin_port = 0;
+ serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0);
+
+ sockaddr_len = sizeof(serv_addr);
+ assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0);
+ ip = inet_ntoa(serv_addr_in->sin_addr);
+ printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port));
+
+ memset(&mapped_addr, 0, sizeof(mapped_addr));
+ mapped_addr_in->sin_family = AF_INET;
+ mapped_addr_in->sin_port = htons(5555);
+ mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
+
+ assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
+
+ assert(listen(serverfd, 5) == 0);
+
+ ip = inet_ntoa(mapped_addr_in->sin_addr);
+ printf("Client connecting to: %s:%d\n",
+ ip, ntohs(mapped_addr_in->sin_port));
+ assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0);
+
+ sockaddr_len = sizeof(tmp_addr);
+ ip = inet_ntoa(tmp_addr_in->sin_addr);
+ assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0);
+ printf("Server received connection from: %s:%d\n",
+ ip, ntohs(tmp_addr_in->sin_port));
+
+ sockaddr_len = sizeof(tmp_addr);
+ assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0);
+ ip = inet_ntoa(tmp_addr_in->sin_addr);
+ printf("Client's peer address: %s:%d\n",
+ ip, ntohs(tmp_addr_in->sin_port));
+
+ /* Is the server's getsockname = the socket getpeername */
+ assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
+
+ return 0;
+}
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
new file mode 100644
index 000000000000..219742106bfd
--- /dev/null
+++ b/samples/bpf/xdp1_kern.c
@@ -0,0 +1,93 @@
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 256,
+};
+
+static int parse_ipv4(void *data, u64 nh_off, void *data_end)
+{
+ struct iphdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return 0;
+ return iph->protocol;
+}
+
+static int parse_ipv6(void *data, u64 nh_off, void *data_end)
+{
+ struct ipv6hdr *ip6h = data + nh_off;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+ return ip6h->nexthdr;
+}
+
+SEC("xdp1")
+int xdp_prog1(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ int rc = XDP_DROP;
+ long *value;
+ u16 h_proto;
+ u64 nh_off;
+ u32 ipproto;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return rc;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ return rc;
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ return rc;
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ if (h_proto == htons(ETH_P_IP))
+ ipproto = parse_ipv4(data, nh_off, data_end);
+ else if (h_proto == htons(ETH_P_IPV6))
+ ipproto = parse_ipv6(data, nh_off, data_end);
+ else
+ ipproto = 0;
+
+ value = bpf_map_lookup_elem(&rxcnt, &ipproto);
+ if (value)
+ *value += 1;
+
+ return rc;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
new file mode 100644
index 000000000000..a5e109e398a1
--- /dev/null
+++ b/samples/bpf/xdp1_user.c
@@ -0,0 +1,181 @@
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+
+static int set_link_xdp_fd(int ifindex, int fd)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ printf("open netlink socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ printf("bind to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+
+ nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
+ nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ printf("send to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ printf("recv from netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != getpid()) {
+ printf("Wrong pid %d, expected %d\n",
+ nh->nlmsg_pid, getpid());
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ printf("Wrong seq %d, expected %d\n",
+ nh->nlmsg_seq, seq);
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ printf("nlmsg error %s\n", strerror(-err->error));
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int ifindex;
+
+static void int_exit(int sig)
+{
+ set_link_xdp_fd(ifindex, -1);
+ exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(int interval)
+{
+ unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ const unsigned int nr_keys = 256;
+ __u64 values[nr_cpus], prev[nr_keys][nr_cpus];
+ __u32 key;
+ int i;
+
+ memset(prev, 0, sizeof(prev));
+
+ while (1) {
+ sleep(interval);
+
+ for (key = 0; key < nr_keys; key++) {
+ __u64 sum = 0;
+
+ assert(bpf_lookup_elem(map_fd[0], &key, values) == 0);
+ for (i = 0; i < nr_cpus; i++)
+ sum += (values[i] - prev[key][i]);
+ if (sum)
+ printf("proto %u: %10llu pkt/s\n",
+ key, sum / interval);
+ memcpy(prev[key], values, sizeof(values));
+ }
+ }
+}
+
+int main(int ac, char **argv)
+{
+ char filename[256];
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (ac != 2) {
+ printf("usage: %s IFINDEX\n", argv[0]);
+ return 1;
+ }
+
+ ifindex = strtoul(argv[1], NULL, 0);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ if (!prog_fd[0]) {
+ printf("load_bpf_file: %s\n", strerror(errno));
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+
+ if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
+ printf("link set xdp fd failed\n");
+ return 1;
+ }
+
+ poll_stats(2);
+
+ return 0;
+}
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
new file mode 100644
index 000000000000..e01288867d15
--- /dev/null
+++ b/samples/bpf/xdp2_kern.c
@@ -0,0 +1,114 @@
+/* Copyright (c) 2016 PLUMgrid
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = 256,
+};
+
+static void swap_src_dst_mac(void *data)
+{
+ unsigned short *p = data;
+ unsigned short dst[3];
+
+ dst[0] = p[0];
+ dst[1] = p[1];
+ dst[2] = p[2];
+ p[0] = p[3];
+ p[1] = p[4];
+ p[2] = p[5];
+ p[3] = dst[0];
+ p[4] = dst[1];
+ p[5] = dst[2];
+}
+
+static int parse_ipv4(void *data, u64 nh_off, void *data_end)
+{
+ struct iphdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return 0;
+ return iph->protocol;
+}
+
+static int parse_ipv6(void *data, u64 nh_off, void *data_end)
+{
+ struct ipv6hdr *ip6h = data + nh_off;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+ return ip6h->nexthdr;
+}
+
+SEC("xdp1")
+int xdp_prog1(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ int rc = XDP_DROP;
+ long *value;
+ u16 h_proto;
+ u64 nh_off;
+ u32 ipproto;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return rc;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ return rc;
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vhdr;
+
+ vhdr = data + nh_off;
+ nh_off += sizeof(struct vlan_hdr);
+ if (data + nh_off > data_end)
+ return rc;
+ h_proto = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ if (h_proto == htons(ETH_P_IP))
+ ipproto = parse_ipv4(data, nh_off, data_end);
+ else if (h_proto == htons(ETH_P_IPV6))
+ ipproto = parse_ipv6(data, nh_off, data_end);
+ else
+ ipproto = 0;
+
+ value = bpf_map_lookup_elem(&rxcnt, &ipproto);
+ if (value)
+ *value += 1;
+
+ if (ipproto == IPPROTO_UDP) {
+ swap_src_dst_mac(data);
+ rc = XDP_TX;
+ }
+
+ return rc;
+}
+
+char _license[] SEC("license") = "GPL";